In [10]:
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import pandas as pd
import patsy as ps

## Ejemplo 5.1 Precios de la vivienda y distancia a un incinerador (kielmc.dta)

In [2]:
df = pd.read_csv('CRIME1.raw',delim_whitespace=True,header=None)
df.columns=['narr86','nfarr86','nparr86','pcnv','avgsen','tottime','ptime86','qemp86','inc86','durat','black','hispan','born60','pcnvsq','pt86sq','inc86sq']
# Model
y,X = ps.dmatrices('narr86 ~ pcnv + avgsen +  tottime + ptime86 + qemp86',data=df, return_type='dataframe')
model = sm.OLS(y,X) # Describe Model
results = model.fit() # Fit model
print (results.summary()) 

                            OLS Regression Results                            
Dep. Variable:                 narr86   R-squared:                       0.043
Model:                            OLS   Adj. R-squared:                  0.041
Method:                 Least Squares   F-statistic:                     24.29
Date:                Thu, 19 Sep 2019   Prob (F-statistic):           5.43e-24
Time:                        15:53:28   Log-Likelihood:                -3392.7
No. Observations:                2725   AIC:                             6797.
Df Residuals:                    2719   BIC:                             6833.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.7061      0.033     21.297      0.0

In [4]:
# Model
y,X = ps.dmatrices('narr86 ~ pcnv + ptime86 + qemp86',data=df, return_type='dataframe')
restModel = sm.OLS(y,X) # Describe Model
restResults = restModel.fit() # Fit model
print (restResults.summary())    


                            OLS Regression Results                            
Dep. Variable:                 narr86   R-squared:                       0.041
Model:                            OLS   Adj. R-squared:                  0.040
Method:                 Least Squares   F-statistic:                     39.10
Date:                Thu, 19 Sep 2019   Prob (F-statistic):           9.91e-25
Time:                        15:53:48   Log-Likelihood:                -3394.7
No. Observations:                2725   AIC:                             6797.
Df Residuals:                    2721   BIC:                             6821.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.7118      0.033     21.565      0.0

In [5]:
print ("(LM,pValue,difference between the degree of freedom) = ", results.compare_lm_test(restResults))

(LM,pValue,difference between the degree of freedom) =  (4.070729706281553, 0.13063281201643764, 2.0)


## Ejemplo 5.2 Errores estándar en una ecuación de peso al nacer (bwght.dta)

In [4]:
dfej5_2= pd.read_stata('bwght.dta')
dfej5_2.head()

Unnamed: 0,faminc,cigtax,cigprice,bwght,fatheduc,motheduc,parity,male,white,cigs,lbwght,bwghtlbs,packs,lfaminc
0,13.5,16.5,122.300003,109,12.0,12.0,1,1,1,0,4.691348,6.8125,0.0,2.60269
1,7.5,16.5,122.300003,133,6.0,12.0,2,1,0,0,4.890349,8.3125,0.0,2.014903
2,0.5,16.5,122.300003,129,,12.0,2,0,0,0,4.859812,8.0625,0.0,-0.693147
3,15.5,16.5,122.300003,126,12.0,12.0,2,1,0,0,4.836282,7.875,0.0,2.74084
4,27.5,16.5,122.300003,134,14.0,12.0,2,1,1,0,4.89784,8.375,0.0,3.314186


In [7]:
regdfej5_2 = smf.ols('lbwght ~ cigs + lfaminc ', data = dfej5_2).fit() 
print(regdfej5_2.summary())

                            OLS Regression Results                            
Dep. Variable:                 lbwght   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                  0.024
Method:                 Least Squares   F-statistic:                     18.31
Date:                Sat, 11 Jul 2020   Prob (F-statistic):           1.42e-08
Time:                        18:18:42   Log-Likelihood:                 349.39
No. Observations:                1388   AIC:                            -692.8
Df Residuals:                    1385   BIC:                            -677.1
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      4.7186      0.018    258.631      0.0

## Ejemplo 5.3 Modelo económico de la delincuencia(crime1.dta)

In [8]:
dfej5_3= pd.read_stata('crime1.dta')
dfej5_3.head()

Unnamed: 0,narr86,nfarr86,nparr86,pcnv,avgsen,tottime,ptime86,qemp86,inc86,durat,black,hispan,born60,pcnvsq,pt86sq,inc86sq
0,0,0,0,0.38,17.6,35.200001,12,0.0,0.0,0.0,0,0,1,0.1444,144,0.0
1,2,2,0,0.44,0.0,0.0,0,1.0,0.8,0.0,0,1,0,0.1936,0,0.64
2,1,1,0,0.33,22.799999,22.799999,0,0.0,0.0,11.0,1,0,1,0.1089,0,0.0
3,2,2,1,0.25,0.0,0.0,5,2.0,8.8,0.0,0,1,1,0.0625,25,77.440002
4,1,1,0,0.0,0.0,0.0,0,2.0,8.1,1.0,0,0,0,0.0,0,65.610008


In [9]:
regdfej5_3 = smf.ols('narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86', data = dfej5_3).fit() 
print(regdfej5_3.summary())

                            OLS Regression Results                            
Dep. Variable:                 narr86   R-squared:                       0.043
Model:                            OLS   Adj. R-squared:                  0.041
Method:                 Least Squares   F-statistic:                     24.29
Date:                Sat, 11 Jul 2020   Prob (F-statistic):           5.43e-24
Time:                        18:21:22   Log-Likelihood:                -3392.7
No. Observations:                2725   AIC:                             6797.
Df Residuals:                    2719   BIC:                             6833.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.7061      0.033     21.297      0.0