<a href="https://colab.research.google.com/github/garciawitulski/Econometria/blob/main/Ejercitaci%C3%B3n.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

# install the package
!pip install econtools
import econtools.metrics as mt
from sklearn import datasets, linear_model, metrics

!pip install linearmodels
from linearmodels.iv import IV2SLS

url = 'https://raw.githubusercontent.com/garciawitulski/Econometria/main/fertility.csv'

df = pd.read_csv(url)

df.head(10)


Unnamed: 0,morekids,boy1st,boy2nd,samesex,age,black,hispan,othrace,weeksworked,twogirls,twoboys
0,0,1,0,0,27,0,0,0,0,0,0
1,0,0,1,0,30,0,0,0,30,0,0
2,0,1,0,0,27,0,0,0,0,0,0
3,0,1,0,0,35,1,0,0,0,0,0
4,0,0,0,1,30,0,0,0,22,1,0
5,0,1,0,0,26,0,0,0,40,0,0
6,0,0,1,0,29,0,0,0,0,0,0
7,0,1,1,1,33,0,0,0,52,0,1
8,0,0,1,0,29,0,0,0,0,0,0
9,0,1,0,0,27,0,0,0,0,0,0


In [6]:
# Note que Y = weeksworked, X = morekids, W = black, Z1 = twoboys, Z2 = twogirls 

# (1)
# Estimamos la primera etapa
model_1 = mt.reg(df, 'morekids', ['twoboys','twogirls','age'], addcons=True,vce_type="robust")

print(model_1)

Dependent variable:	morekids
N:			254654
R-squared:		0.0151
Estimation method:	OLS
VCE method:		Robust
          coeff    se       t   p>t CI_low CI_high
twoboys   0.058 0.002  25.127 0.000  0.053   0.062
twogirls  0.079 0.002  32.947 0.000  0.074   0.084
age       0.014 0.000  51.877 0.000  0.014   0.015
_cons    -0.090 0.008 -10.664 0.000 -0.107  -0.074



In [10]:
# Obtenemos la predicción

df['morekids_hat'] = model_1.yhat
df.head(10)

# (2)
# Estimamos la segunda etapa

model_2= mt.reg(df, 'weeksworked', ['morekids_hat','age'], addcons=True,vce_type="robust") 
print(model_2)

Dependent variable:	weeksworked
N:			254654
R-squared:		0.0124
Estimation method:	OLS
VCE method:		Robust
              coeff    se      t   p>t CI_low CI_high
morekids_hat -5.590 1.240 -4.509 0.000 -8.020  -3.160
age           0.798 0.022 36.989 0.000  0.755   0.840
_cons        -3.100 0.377 -8.222 0.000 -3.839  -2.361



In [21]:
# (3)
# Estimamos el modelo con IV2SLS()

from statsmodels.api import add_constant

df = add_constant(df,has_constant='add')

df.head(10)

model_ivreg = IV2SLS(df.weeksworked, exog=df[['const','age']], endog=df.morekids, instruments=df[['twoboys','twogirls']]).fit()

print(model_ivreg)

# (4)
# Tenemos dos instrumentos (twoboys y twogirls) y un regresor endógeno (morekids), 
# por lo que existe sobreidentificación

                          IV-2SLS Estimation Summary                          
Dep. Variable:            weeksworked   R-squared:                      0.0295
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0295
No. Observations:              254654   F-statistic:                    3518.2
Date:                Tue, Nov 08 2022   P-value (F-stat)                0.0000
Time:                        16:27:30   Distribution:                  chi2(2)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const         -3.0999     0.3737    -8.2953     0.0000     -3.8323     -2.3674
age            0.7977     0.0214     37.315     0.00