In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from matching_separable_simuls.read_data import get_root_dir, read_margins, read_marriages, normalize_mus, \
    reshape_varcov
from matching_separable_simuls.estimate import generate_bases
from cupid_matching.matching_utils import Matching, _get_singles
import numpy as np
from typing import Tuple
from fastcore.test import test_eq

# matching_separable_simuls

> Simulations for Galichon-Salanie paper on estimating separable matching models

## Install

```sh
pip install matching_separable_simuls
```

## summary

The package reads the marriage patterns from the Choo and Siow 2006 *Journal of Political Economy* paper for the non-reform states of the  1970 wave. It fits a parsimonious homoskedastic logit model. Then it generates `S` datasets from the fitted model, and it uses them to test the estimation methods presented in Galichon-Salanié 2022.

## reading the data

In [None]:
#| echo: false
data_dir = get_root_dir() / "matching_separable_simuls" / "ChooSiow70nNdata"

In [None]:
#| echo: false
nx, my = read_margins(data_dir)
muxy, varmus = read_marriages(data_dir)
n_types_men, n_types_women = muxy.shape
mux0, mu0y = _get_singles(muxy, nx, my)
print(f"\nThe data has {n_types_men} types of men and {n_types_women} types of women.")


The data has 25 types of men and 25 types of women.


We reshape the variance-covariance matrix and we normalize the data  to a unit 
total mass of households.

In [None]:
#| hide
test_eq(muxy[3,4], 16730.0)
test_eq(nx[-1], 84399)
test_eq(my[7], 281192)
test_eq(varmus.shape, (675, 675))

In [None]:
#| echo: true
# the number of households in the population
n_households_pop = np.sum(nx) + np.sum(my) - np.sum(muxy)

mus = Matching(muxy, nx, my)
mus_norm = normalize_mus(mus, n_households_pop)

varcovs_norm = reshape_varcov(varmus, n_types_men, n_types_women, n_households_pop)

## fitting a basic model

First we need to generate some basis functions. First we quantile-transform the margins; then we generate orthogonal polynomials on `[0,1]`.   We also create variables `1(x=y)` and `max(x-y, 0)`.

In [None]:
xdeg = 2
ydeg = 2
#base_functions = generate_bases(nx, my, xdeg, ydeg)

TypeError: return arrays must be of ArrayType

## generating artificial datasets

## fitting a homoskedastic logit model

## fitting a nested logit model