In [None]:
# Clase 4 - Estimadores Causales en Secciones Transversales
# Profesora: Ana Díaz

import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

# --------------------------
# Cargar datos
# --------------------------
url = "https://raw.githubusercontent.com/adiazescobar/libro_cortes/main/dofile/04_ParametrosStata/04_data.dta"
df = pd.read_stata(url)

# Generar resultado observado
df["y"] = df["D"] * df["yd1"] + (1 - df["D"]) * df["yd0"]

# --------------------------
# Estadísticas descriptivas
# --------------------------
print(df["D"].value_counts())
print(df["y"].describe())
print(df.groupby("D")["y"].agg(["mean", "std"]))

# --------------------------
# Diferencia de medias (t-test)
# --------------------------
treated = df[df["D"] == 1]["y"]
control = df[df["D"] == 0]["y"]
t_stat, p_val = stats.ttest_ind(treated, control)
print("t-test:", t_stat, "p-value:", p_val)

# --------------------------
# Regresión simple
# --------------------------
X = sm.add_constant(df["D"])
model = sm.OLS(df["y"], X).fit(cov_type='HC1')  # robust SE
print(model.summary())

# --------------------------
# Estimadores causales
# --------------------------
df["tau"] = df["yd1"] - df["yd0"]

def estimadores(tau, y, D):
    ATE = tau.mean()
    ATT = tau[D == 1].mean()
    ATU = tau[D == 0].mean()
    ybar_1 = y[D == 1].mean()
    ybar_0 = y[D == 0].mean()
    NAIVE = ybar_1 - ybar_0
    print("--- Estimadores ---")
    print("ATE =", ATE)
    print("ATT =", ATT)
    print("ATU =", ATU)
    print("Naive =", NAIVE)
    print("Sesgo de Selección =", NAIVE - ATT)

estimadores(df["tau"], df["y"], df["D"])

# --------------------------
# Experimento 1: Aumentar muestra
# --------------------------
df_expanded = pd.concat([df]*10000, ignore_index=True)
df_expanded["y"] = df_expanded["D"] * df_expanded["yd1"] + (1 - df_expanded["D"]) * df_expanded["yd0"]
df_expanded["tau"] = df_expanded["yd1"] - df_expanded["yd0"]
estimadores(df_expanded["tau"], df_expanded["y"], df_expanded["D"])

# --------------------------
# Experimento 2: Asignación aleatoria
# --------------------------
np.random.seed(87634)
df_random = df.copy()
df_random["D"] = (np.random.rand(len(df_random)) > 0.5).astype(int)
df_random["y"] = df_random["D"] * df_random["yd1"] + (1 - df_random["D"]) * df_random["yd0"]
df_random["tau"] = df_random["yd1"] - df_random["yd0"]
estimadores(df_random["tau"], df_random["y"], df_random["D"])
