# **Práctica de Evaluación de Impacto: de Stata a Python**

In [35]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

In [36]:
# Lee el archivo SPSS
df = pd.read_stata("C:\\Users\\kike\\OneDrive - Universidad de Alcala\\Documentos\\uni\\Técnicas impact\\Práctica II\\hh_98.dta", )


## Cuya descripción de las variables son:

| Variable    | Storage Type | Display Format | Variable Label                                                                 |
|-------------|--------------|----------------|--------------------------------------------------------------------------------|
| nh          | double       | %7.0f          | HH ID                                                                          |
| year        | float        | %9.0g          | Year of observation                                                            |
| villid      | double       | %9.0g          | Village ID                                                                     |
| thanaid     | double       | %9.0g          | Thana ID                                                                       |
| agehead     | float        | %3.0f          | Age of HH head: years                                                          |
| sexhead     | float        | %2.0f          | Gender of HH head: 1=M, 0=F                                                    |
| educhead    | float        | %2.0f          | Education of HH head: years                                                    |
| famsize     | float        | %9.2f          | HH size                                                                        |
| hhland      | float        | %9.0g          | HH land: decimals                                                              |
| hhasset     | float        | %9.0g          | HH total asset: Tk.                                                            |
| expfd       | float        | %9.0g          | HH per capita food expenditure: Tk/year                                        |
| expnfd      | float        | %9.0g          | HH per capita nonfood expenditure: Tk/year                                     |
| exptot      | float        | %9.0g          | HH per capita total expenditure: Tk/year                                       |
| dmmfd       | byte         | %8.0g          | HH has male microcredit participant: 1=Y, 0=N                                  |
| dfmfd       | byte         | %8.0g          | HH has female microcredit participant: 1=Y, 0=N                                |
| weight      | float        | %9.0g          | HH sampling weight                                                             |
| vaccess     | float        | %9.0g          | Village is accessible by road all year: 1=Y, 0=N                               |
| pcirr       | float        | %9.0g          | Proportion of village land irrigated                                           |
| rice        | float        | %9.3f          | Village price of rice: Tk./kg                                                  |
| wheat       | float        | %9.3f          | Village price of wheat: Tk./kg                                                 |
| milk        | float        | %9.3f          | Village price of milk: Tk./liter                                               |
| potato      | float        | %9.3f          | Village price of potato: Tk./kg                                                |
| egg         | float        | %9.3f          | Village price of egg: Tk./4 counts                                             |
| oil         | float        | %9.3f          | Village price of edible oil: Tk./kg                                            |
| lexptot     | float        | %9.0g          |                                                                                |
| lnland      | float        | %9.0g          |                                                                                |
| vill        | float        | %9.0g          |                                                                                |
| progvillm   | float        | %9.0g          |                                                                                |
| progvillf   | float        | %9.0g          |                                                                                |

In [37]:
# Definimos nuevas variables
df["log_exptot"] = np.log(1 + df["exptot"])
df["log_hhland"] = np.log((1 + df["hhland"]/100))
df["vill"] = df["thanaid"] * 10 + df["villid"]
df["progvillf"] = df.groupby("vill")["dfmfd"].transform("max")
df["progvillm"] = df.groupby("vill")["dmmfd"].transform("max")
df.head()

Unnamed: 0,nh,year,villid,thanaid,agehead,sexhead,educhead,famsize,hhland,hhasset,...,wheat,milk,potato,egg,oil,log_exptot,log_hhland,vill,progvillf,progvillm
0,11054.0,1.0,1.0,1.0,79.0,1.0,0.0,2.0,36.0,33295.0,...,8.120178,11.503587,8.547428,2.199215,40.600895,9.159501,0.307485,11.0,1,1
1,11061.0,1.0,1.0,1.0,43.0,1.0,6.0,4.0,116.0,180325.0,...,8.120178,11.503587,8.547428,2.199215,40.600895,9.863308,0.770108,11.0,1,1
2,11081.0,1.0,1.0,1.0,52.0,0.0,0.0,7.0,91.0,80735.0,...,8.120178,11.503587,8.547428,2.199215,40.600895,8.923725,0.647103,11.0,1,1
3,11101.0,1.0,1.0,1.0,48.0,1.0,0.0,7.0,8.0,16755.0,...,8.120178,11.503587,8.547428,2.199215,40.600895,8.582025,0.076961,11.0,1,1
4,12021.0,1.0,2.0,1.0,35.0,1.0,10.0,5.0,10.0,18795.0,...,6.090134,10.826905,6.868469,2.030045,43.307621,10.113386,0.09531,12.0,1,0


## **Prueba T de Student**

In [38]:
grupo_0 = df[df["progvillf"] == 0]["log_exptot"]
grupo_1 = df[df["progvillf"] == 1]["log_exptot"]

kinds = ["less", "two-sided", "greater"]

p_values = pd.DataFrame()

for kind in kinds: 
	estadisticos, p_value = ttest_ind(grupo_0, grupo_1, alternative= kind)
	p_values.loc[kind, f"P-Values"] = p_value.round(4)
p_values



Unnamed: 0,P-Values
less,0.0224
two-sided,0.0448
greater,0.9776


## **a) ¿Cuál es el impacto de la participación en el programa de microcréditos de las ciudades?**

In [39]:
# Estimamos
from statsmodels.api import OLS, add_constant

In [40]:
x = add_constant(df["progvillm"])

model = OLS(endog= df["log_exptot"],
            exog = x).fit()

model.summary()

0,1,2,3
Dep. Variable:,log_exptot,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,2.296
Date:,"Sun, 16 Mar 2025",Prob (F-statistic):,0.13
Time:,18:02:25,Log-Likelihood:,-848.66
No. Observations:,1129,AIC:,1701.0
Df Residuals:,1127,BIC:,1711.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,8.4793,0.024,349.068,0.000,8.432,8.527
progvillm,-0.0474,0.031,-1.515,0.130,-0.109,0.014

0,1,2,3
Omnibus:,151.127,Durbin-Watson:,1.529
Prob(Omnibus):,0.0,Jarque-Bera (JB):,256.548
Skew:,0.868,Prob(JB):,1.9600000000000002e-56
Kurtosis:,4.561,Cond. No.,2.94


In [41]:
x_2 = add_constant(df["progvillf"])
model = OLS(endog = df["log_exptot"],
            exog = x_2).fit()
model.summary()

0,1,2,3
Dep. Variable:,log_exptot,R-squared:,0.004
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,4.035
Date:,"Sun, 16 Mar 2025",Prob (F-statistic):,0.0448
Time:,18:02:25,Log-Likelihood:,-847.79
No. Observations:,1129,AIC:,1700.0
Df Residuals:,1127,BIC:,1710.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,8.3285,0.063,132.843,0.000,8.206,8.452
progvillf,0.1298,0.065,2.009,0.045,0.003,0.257

0,1,2,3
Omnibus:,159.816,Durbin-Watson:,1.53
Prob(Omnibus):,0.0,Jarque-Bera (JB):,279.91
Skew:,0.897,Prob(JB):,1.65e-61
Kurtosis:,4.652,Cond. No.,8.09


## **b) ¿Cuál es la evaluación de impacto de los microcréditos en las ciudades participando las mujeres en el programa? ¿Y si participan los varones?**


In [42]:
# Primera parte:
grupo_0_dfmdf = df[df["dfmfd"] == 0]["log_exptot"]
grupo_1_dfmdf = df[df["dfmfd"] == 1]["log_exptot"]

kinds = ["less", "two-sided", "greater"]

p_values_dfmfd = pd.DataFrame()

for kind in kinds:
	estadisticos, p_value = ttest_ind(grupo_0_dfmdf, grupo_1_dfmdf,
										alternative = kind)
	p_values_dfmfd.loc[kind, "P values dfmfd"] = p_value.round(4)

######################################################################

grupo_0_dmmf = df[df["dmmfd"] == 0]["log_exptot"]
grupo_1_dmmdf = df[df["dmmfd"] == 1]["log_exptot"]

p_values_dmmfd = pd.DataFrame()

for kind in kinds:
	estadisticos, p_value = ttest_ind(grupo_0_dmmf, grupo_1_dmmdf,
										alternative = kind)
	p_values_dmmfd.loc[kind, "P values dmmfd"] = p_value.round(4)

p_values_dmmfd

# Unir todo 

all_pvalues = pd.concat([p_values_dfmfd, p_values_dmmfd], axis= 1)

all_pvalues

Unnamed: 0,P values dfmfd,P values dmmfd
less,0.4339,0.8887
two-sided,0.8678,0.2226
greater,0.5661,0.1113


In [43]:
# Segunda parte:
x_3 = add_constant(df["dfmfd"])
model_3 = OLS(endog = df["log_exptot"],
				exog = x_3).fit()
model_3.summary()

0,1,2,3
Dep. Variable:,log_exptot,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.001
Method:,Least Squares,F-statistic:,0.02772
Date:,"Sun, 16 Mar 2025",Prob (F-statistic):,0.868
Time:,18:02:25,Log-Likelihood:,-849.79
No. Observations:,1129,AIC:,1704.0
Df Residuals:,1127,BIC:,1714.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,8.4480,0.022,379.738,0.000,8.404,8.492
dfmfd,0.0051,0.031,0.166,0.868,-0.055,0.065

0,1,2,3
Omnibus:,156.596,Durbin-Watson:,1.527
Prob(Omnibus):,0.0,Jarque-Bera (JB):,268.435
Skew:,0.891,Prob(JB):,5.1300000000000005e-59
Kurtosis:,4.591,Cond. No.,2.69


In [44]:
from statsmodels.api import WLS

x_4 = df[["dfmfd", "sexhead", "agehead", "educhead", "log_hhland", "vaccess", "pcirr", "rice", "wheat", "milk", "oil", "egg"]]
x_4 = add_constant(x_4)
model_4 = WLS(endog = df["log_exptot"],
            exog = x_4,
            weights = df["weight"]).fit(cov_type = "HC1")
(model_4.summary())

0,1,2,3
Dep. Variable:,log_exptot,R-squared:,0.248
Model:,WLS,Adj. R-squared:,0.24
Method:,Least Squares,F-statistic:,19.72
Date:,"Sun, 16 Mar 2025",Prob (F-statistic):,2e-39
Time:,18:02:25,Log-Likelihood:,-888.0
No. Observations:,1129,AIC:,1802.0
Df Residuals:,1116,BIC:,1867.0
Df Model:,12,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,7.5610,0.278,27.190,0.000,7.016,8.106
dfmfd,0.0655,0.035,1.877,0.060,-0.003,0.134
sexhead,-0.0331,0.065,-0.511,0.609,-0.160,0.094
agehead,0.0031,0.001,2.369,0.018,0.001,0.006
educhead,0.0493,0.006,8.142,0.000,0.037,0.061
log_hhland,0.2058,0.042,4.882,0.000,0.123,0.288
vaccess,-0.0295,0.050,-0.588,0.556,-0.128,0.069
pcirr,0.1081,0.061,1.771,0.077,-0.012,0.228
rice,0.0057,0.011,0.505,0.614,-0.016,0.028

0,1,2,3
Omnibus:,297.131,Durbin-Watson:,1.687
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1545.19
Skew:,1.117,Prob(JB):,0.0
Kurtosis:,8.278,Cond. No.,1020.0


In [45]:
x_5 = df[["dmmfd", "sexhead", "agehead", "educhead", "log_hhland", "vaccess", "pcirr", "rice", "wheat", "milk", "oil", "egg"]]
x_5 = add_constant(x_5)
model_5 = WLS(endog = df["log_exptot"],
            exog = x_5,
            weights = df["weight"]).fit(cov_type = "HC1")
(model_5.summary())

0,1,2,3
Dep. Variable:,log_exptot,R-squared:,0.245
Model:,WLS,Adj. R-squared:,0.237
Method:,Least Squares,F-statistic:,19.94
Date:,"Sun, 16 Mar 2025",Prob (F-statistic):,6.8299999999999994e-40
Time:,18:02:25,Log-Likelihood:,-890.37
No. Observations:,1129,AIC:,1807.0
Df Residuals:,1116,BIC:,1872.0
Df Model:,12,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,7.5634,0.279,27.101,0.000,7.016,8.110
dmmfd,-0.0142,0.048,-0.295,0.768,-0.109,0.080
sexhead,-0.0330,0.065,-0.505,0.613,-0.161,0.095
agehead,0.0031,0.001,2.365,0.018,0.001,0.006
educhead,0.0484,0.006,8.141,0.000,0.037,0.060
log_hhland,0.1926,0.042,4.637,0.000,0.111,0.274
vaccess,-0.0326,0.051,-0.644,0.520,-0.132,0.067
pcirr,0.1194,0.061,1.958,0.050,-0.000,0.239
rice,0.0075,0.012,0.653,0.514,-0.015,0.030

0,1,2,3
Omnibus:,282.262,Durbin-Watson:,1.68
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1431.118
Skew:,1.063,Prob(JB):,1.72e-311
Kurtosis:,8.09,Cond. No.,1020.0


## **d) ¿Cuál es el efecto de ambos programas?**

In [46]:
x_6 = df[["dfmfd", "progvillf","sexhead", "agehead", "educhead", "log_hhland", "vaccess", "pcirr", "rice", "wheat", "milk", "oil", "egg"]]
x_6 = add_constant(x_6)
model_6 = WLS(endog = df["log_exptot"],
            exog = x_6,
            weights = df["weight"]).fit(cov_type = "HC1")
model_6.summary()

0,1,2,3
Dep. Variable:,log_exptot,R-squared:,0.249
Model:,WLS,Adj. R-squared:,0.24
Method:,Least Squares,F-statistic:,18.34
Date:,"Sun, 16 Mar 2025",Prob (F-statistic):,4.2e-39
Time:,18:02:25,Log-Likelihood:,-887.16
No. Observations:,1129,AIC:,1802.0
Df Residuals:,1115,BIC:,1873.0
Df Model:,13,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,7.6405,0.263,29.074,0.000,7.125,8.156
dfmfd,0.0737,0.036,2.049,0.040,0.003,0.144
progvillf,-0.0747,0.107,-0.697,0.486,-0.285,0.135
sexhead,-0.0377,0.064,-0.587,0.557,-0.164,0.088
agehead,0.0030,0.001,2.344,0.019,0.000,0.006
educhead,0.0500,0.006,8.651,0.000,0.039,0.061
log_hhland,0.2041,0.040,5.042,0.000,0.125,0.283
vaccess,-0.0349,0.049,-0.705,0.481,-0.132,0.062
pcirr,0.1072,0.061,1.759,0.079,-0.012,0.227

0,1,2,3
Omnibus:,281.631,Durbin-Watson:,1.686
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1332.627
Skew:,1.082,Prob(JB):,4.1999999999999997e-290
Kurtosis:,7.862,Cond. No.,1060.0


## **d) Proponga una prueba para detectar si hay un efecto contaminación en los no participantes.**

In [47]:
df_filterd = df[df["dfmfd"] == 0]

x_7 = df_filterd["progvillf"]
x_7 = add_constant(x_7)
endog = df_filterd["log_exptot"]
weights = df_filterd["weight"]

model_7 = WLS(endog = endog,
            exog = x_7,
            weights = weights).fit(cov_type = "HC1")
model_7.summary() 

0,1,2,3
Dep. Variable:,log_exptot,R-squared:,0.0
Model:,WLS,Adj. R-squared:,-0.002
Method:,Least Squares,F-statistic:,0.003556
Date:,"Sun, 16 Mar 2025",Prob (F-statistic):,0.952
Time:,18:02:25,Log-Likelihood:,-519.91
No. Observations:,534,AIC:,1044.0
Df Residuals:,532,BIC:,1052.0
Df Model:,1,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,8.5268,0.121,70.595,0.000,8.290,8.764
progvillf,-0.0074,0.124,-0.060,0.952,-0.251,0.236

0,1,2,3
Omnibus:,131.881,Durbin-Watson:,1.741
Prob(Omnibus):,0.0,Jarque-Bera (JB):,422.619
Skew:,1.144,Prob(JB):,1.7e-92
Kurtosis:,6.71,Cond. No.,5.97
