<a href="https://colab.research.google.com/github/garciawitulski/Econometria/blob/main/IV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

# install the package
!pip install econtools
import econtools.metrics as mt
from sklearn import datasets, linear_model, metrics

!pip install linearmodels
from linearmodels.iv import IV2SLS



In [2]:
url = 'https://raw.githubusercontent.com/garciawitulski/Econometria/main/CigarettesSW'

df = pd.read_csv(url)

In [None]:
df.head(10)

In [4]:
df['rprice'] = df['price'] / df['cpi']
df['lnpacks'] = np.log(df['packs'])
df['lnrprice'] = np.log(df['rprice'])
df['salestax'] = (df['taxs'] - df['tax']) / df['cpi']
df['rincome'] = df['income'] / df['population'] / df['cpi']
df['lnrincome'] = np.log(df['rincome'])
df['cigtax'] = df['tax'] / df['cpi']

In [None]:
c1995 = df[(df.year == 1995)]

c1995.head(10)

In [None]:
# Comenzamos estimando la función de demanda por MCO sin tener en cuenta la endogeneidad de P

cig_s0 = mt.reg(c1995, 'lnpacks', ['lnrprice'], addcons=True,vce_type="robust")

print(cig_s0)


In [82]:
# Realizamos la primera etapa de la regresión

cig_s1 = mt.reg(c1995, 'lnrprice', ['salestax'], addcons=True,vce_type="robust")

print(cig_s1)

# Almacenamos los valores predichos

c1995['lcigp_pred'] = cig_s1.yhat
c1995.head(10)

Dependent variable:	lnrprice
N:			48
R-squared:		0.4710
Estimation method:	OLS
VCE method:		Robust
         coeff    se       t   p>t CI_low CI_high
salestax 0.031 0.005   6.355 0.000  0.021   0.040
_cons    4.617 0.029 159.644 0.000  4.558   4.675



Unnamed: 0.1,const,Unnamed: 0,state,year,cpi,population,packs,income,tax,price,...,rprice,lnpacks,lnrprice,salestax,rincome,lnrincome,cigtax,lcigp_pred,resid_mod_rel,resid
48,1.0,49,AL,1995,1.524,4262731.0,101.085434,83903280,40.500004,158.371338,...,103.918206,4.615966,4.643604,0.921697,12.915347,2.558416,26.574807,4.644869,0.004822,-0.06453
49,1.0,50,AR,1995,1.524,2480121.0,111.042969,45995496,55.5,175.542511,...,115.18538,4.709917,4.746543,5.485019,12.169073,2.498898,36.417325,4.785095,-0.027529,0.177607
50,1.0,51,AZ,1995,1.524,4306908.0,71.95417,88870496,65.333328,198.607498,...,130.319887,4.276029,4.869992,6.205707,13.539638,2.605622,42.86964,4.807241,0.016169,-0.12851
51,1.0,52,CA,1995,1.524,31493524.0,56.859306,771470144,61.0,210.504669,...,138.12643,4.04058,4.928169,9.036307,16.073591,2.777178,40.026249,4.894222,0.051525,-0.337747
52,1.0,53,CO,1995,1.524,3738061.0,82.582924,92946544,44.0,167.350006,...,109.80972,4.413803,4.698749,0.0,16.315557,2.792119,28.871392,4.616546,0.023207,-0.261781
53,1.0,54,CT,1995,1.524,3265293.0,79.472191,104315120,74.0,218.280502,...,143.228683,4.375407,4.964443,8.107283,20.962359,3.042728,48.556433,4.865674,-0.010628,-0.031045
54,1.0,55,DE,1995,1.524,718265.0,124.466599,18237436,48.0,165.600006,...,108.661426,4.824037,4.688237,0.0,16.660734,2.813055,31.496064,4.616546,-0.014119,0.129155
55,1.0,56,FL,1995,1.524,14185403.0,93.074554,333525344,57.900002,187.717178,...,123.174007,4.533401,4.813598,6.972116,15.427737,2.736167,37.992129,4.830791,-0.017102,0.020217
56,1.0,57,GA,1995,1.524,7188538.0,97.474625,159800448,36.0,156.573074,...,102.738243,4.579592,4.632184,0.939249,14.586546,2.6801,23.622048,4.645408,0.007641,-0.149612
57,1.0,58,IA,1995,1.524,2840860.0,92.401596,60170928,60.0,190.889999,...,125.255911,4.526144,4.830359,5.964565,13.897988,2.631744,39.370081,4.799831,0.009059,0.063652


In [81]:
# Corremos la regresión de la segunda etapa

cig_s2 = mt.reg(c1995, 'lnpacks', ['lcigp_pred'], addcons=True,vce_type="robust")
print(cig_s2)

Dependent variable:	lnpacks
N:			48
R-squared:		0.1525
Estimation method:	OLS
VCE method:		Robust
            coeff    se      t   p>t CI_low CI_high
lcigp_pred -1.084 0.334 -3.247 0.002 -1.755  -0.412
_cons       9.720 1.597  6.086 0.000  6.505  12.935



In [None]:
# Realizamos la estimacion de MC2E usando 'ivreg()'
from statsmodels.api import add_constant

c1995 = add_constant(c1995,has_constant='add')

c1995.head(10)

cig_ivreg = IV2SLS(c1995.lnpacks, exog=c1995.const, endog=c1995.lnrprice, instruments=c1995.salestax).fit()
print(cig_ivreg)

In [85]:

cig_ivreg2 = IV2SLS(c1995.lnpacks, exog=c1995.const, endog=c1995[['lnrprice','lnrincome']], instruments=c1995[['salestax','lnrincome']]).fit()
print(cig_ivreg2)

mt.reg(c1995, 'lnrprice', ['salestax','lnrincome'], addcons=True,vce_type="robust")

Dependent variable:	lnrprice
N:			48
R-squared:		0.6389
Estimation method:	OLS
VCE method:		Robust
          coeff    se      t   p>t CI_low CI_high
salestax  0.027 0.004  6.688 0.000  0.019   0.036
lnrincome 0.389 0.065  5.953 0.000  0.258   0.521
_cons     3.591 0.173 20.789 0.000  3.243   3.939

In [None]:
cig_ivreg3 = IV2SLS(c1995.lnpacks, exog=c1995.const, endog=c1995[['lnrprice','lnrincome']], instruments=c1995[['salestax','lnrincome','cigtax']]).fit()
print(cig_ivreg3)

In [86]:
mod_relevance = mt.reg(c1995, 'lnrprice', ['salestax','cigtax','lnrincome',], addcons=True,vce_type="robust")
print(mod_relevance)

Dependent variable:	lnrprice
N:			48
R-squared:		0.9403
Estimation method:	OLS
VCE method:		Robust
          coeff    se      t   p>t CI_low CI_high
salestax  0.011 0.002  5.097 0.000  0.007   0.015
cigtax    0.009 0.001 10.752 0.000  0.008   0.011
lnrincome 0.108 0.040  2.732 0.009  0.028   0.188
_cons     4.103 0.088 46.425 0.000  3.925   4.281



In [87]:
from scipy.stats import ttest_ind

mod_relevance.Ftest(['salestax', 'cigtax'])

(209.67626939646124, 1.1102230246251565e-16)

In [93]:
#Test J y Hausman-Wu
import statsmodels.formula.api as smf
import linearmodels.iv.model as lm

test_J = lm.IV2SLS(dependent=c1995["lnpacks"], exog=c1995["const"], endog=c1995[["lnrprice","lnrincome"]], instruments=c1995[["salestax","cigtax","lnrincome"]]).fit(cov_type="homoskedastic", debiased=True)
print(test_J.sargan)

# Test de Hausman-Wu
# Estimamos el modelo en su primer etapa
mod_relevance = smf.ols("lnrprice ~ salestax + cigtax + lnrincome", data=c1995).fit()

c1995['resid_mod_rel'] = mod_relevance.resid
c1995.head(10)

# Estimamos el modelo con los residuos

mod_test_HW = mt.reg(c1995, 'lnpacks', ['lnrprice','resid_mod_rel','lnrincome'], addcons=True,vce_type="robust")
print(mod_test_HW)


Dependent variable:	lnpacks
N:			48
R-squared:		0.4697
Estimation method:	OLS
VCE method:		Robust
               coeff    se      t   p>t CI_low CI_high
lnrprice      -1.277 0.228 -5.601 0.000 -1.737  -0.818
resid_mod_rel -1.565 0.836 -1.872 0.068 -3.250   0.120
lnrincome      0.280 0.247  1.134 0.263 -0.218   0.779
_cons          9.895 0.866 11.432 0.000  8.151  11.639

