In [None]:
import warnings
import random

import numpy as np
import seaborn as sns   
import pandas as pd
from sklearn.exceptions import ConvergenceWarning
import matplotlib.pyplot as plt

from estimation_utils import load_data, load_graph, estimate_effects

In [None]:
plt.style.use('ggplot')

In [None]:
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
seed = 42
random.seed(seed)
np.random.seed(seed)

In [None]:
df = load_data()
graph = load_graph()
results = estimate_effects(df, graph)
pd.DataFrame(results, index=[0]).T.rename(columns={0: "ATE"})

In [None]:
from sklearn.linear_model import LogisticRegression
from estimation_utils import propensity_overlap_graph

ps_model = LogisticRegression(max_iter=2000, class_weight="balanced")
_ = propensity_overlap_graph(
    df,
    graph,
    method="backdoor.propensity_score_weighting",
    ps_model=ps_model,
    figpath="figs/propensity_overlap.svg"
)

In [None]:
results, ci_df, boot_df = estimate_effects(df, graph, return_cis=True, n_boot=200)

In [None]:
boot_df = pd.read_csv('bootstraps/current.csv')
boot_df = boot_df.rename(columns={
    c: ' '.join(c.split('.')[1:]).title().replace('_', ' ').replace('Propensity', 'Prop.').replace('Linear Regression', 'S-Learner').replace('T Learner', 'T-Learner').replace('Distance Matching', 'Distance Matching\n(Gower\'s Distance)')
    for c in boot_df.columns
})

In [None]:
from scipy import stats
def df_to_latex_summary(df: pd.DataFrame) -> str:
    rows = []
    for col in df.select_dtypes(include=[np.number]).columns:
        series = df[col].dropna()
        mean = series.mean()
        std = series.std(ddof=1)
        n = len(series)
        se = std / np.sqrt(n)
        ci_lo, ci_hi = stats.t.interval(0.95, n-1, loc=mean, scale=se)

        rows.append([col, f"{mean:.2f}", f"{std:.2f}", f"[{ci_lo:.2f}; {ci_hi:.2f}]"])
        
    # Build LaTeX table
    latex = (
        "\\begin{table}[htbp]\n"
        "  \\centering\n"
        "  \\caption{Summary statistics (mean, std, 95\\% CI)}\n"
        "  \\begin{tabular}{lccc}\n"
        "    \\hline\n"
        "    Method & Mean & Std & 95\\% CI \\\\\n"
        "    \\hline\n"
    )
    for r in rows:
        latex += f"    {r[0]} & {r[1]} & {r[2]} & {r[3]} \\\\\n"
    latex += "    \\hline\n"
    latex += "  \\end{tabular}\n"
    latex += "\\end{table}\n"
    return latex

# Example usage
print(df_to_latex_summary(boot_df))

In [None]:
plt.figure(figsize=(30, 10))
plt.rcParams.update({'font.size': 25})
sns.boxplot(data=boot_df)
plt.ylabel("ATE")
plt.xticks(rotation=0)
plt.tight_layout()
plt.savefig("figs/ate_comparison.svg", format="svg", bbox_inches="tight")
plt.show()

In [None]:
plt.figure(figsize=(30, 10))
plt.rcParams.update({'font.size': 25})
sns.violinplot(data=boot_df)
plt.ylabel("ATE")
plt.xticks(rotation=0)
plt.tight_layout()
plt.savefig("figs/ate_comparison.svg", format="svg", bbox_inches="tight")
plt.show()