In [74]:
import numpy as np
import pandas as pd
import time

from csdid.att_gt import ATTgt
from diff_diff import CallawaySantAnna
from did_multiplegt_dyn import DidMultiplegtDyn
np.random.seed(123)


In [75]:
# Synthetic panel similar to Callaway & Sant'Anna examples
n_units = 100_0000
n_periods = 10
years = np.arange(2010, 2010 + n_periods)

unit_ids = np.arange(n_units)
panel = pd.MultiIndex.from_product([unit_ids, years], names=['id', 'year'])
df = panel.to_frame(index=False)

# Assign first treatment year or never treated (0)
treat_years = np.random.choice([0, 2012, 2014, 2016, 2018], size=n_units, p=[0.3, 0.2, 0.2, 0.2, 0.1])
df = df.merge(pd.DataFrame({'id': unit_ids, 'g': treat_years}), on='id')

# Create treatment indicator
df['treated'] = (df['g'] > 0) & (df['year'] >= df['g'])

# Unit and time fixed effects + treatment effect
unit_fe = np.random.normal(0, 1, size=n_units)
time_fe = {year: val for year, val in zip(years, np.linspace(-0.2, 0.2, n_periods))}
df['unit_fe'] = df['id'].map(dict(zip(unit_ids, unit_fe)))
df['time_fe'] = df['year'].map(time_fe)
true_tau = 1.5
noise = np.random.normal(0, 1, size=len(df))
df['y'] = 2 + df['unit_fe'] + df['time_fe'] + true_tau * df['treated'].astype(int) + noise

df.rename(columns={'g': 'first_treat'}, inplace=True)
print('Rows:', len(df), 'Units:', df['id'].nunique())


Rows: 10000000 Units: 1000000


In [76]:
# diff_diff (CallawaySantAnna)
cs = CallawaySantAnna()
start = time.perf_counter()
cs_results = cs.fit(
    df,
    outcome='y',
    unit='id',
    time='year',
    first_treat='first_treat',
    aggregate='event_study'
)
cs_time = time.perf_counter() - start

# Average post-treatment ATT from event study effects
post_effects = [v['effect'] for k, v in cs_results.event_study_effects.items() if k >= 0]
cs_att = float(np.mean(post_effects))
print(f'diff_diff CallawaySantAnna time: {cs_time:.2f}s')
print(f'diff_diff post ATT (avg): {cs_att:.3f}')


diff_diff CallawaySantAnna time: 4.67s
diff_diff post ATT (avg): 1.500


In [77]:
# csdid (ATTgt)
start = time.perf_counter()
att_out = ATTgt(
    yname='y',
    gname='first_treat',
    idname='id',
    tname='year',
    data=df,
    control_group='never_treated'
).fit(est_method='dr')
att_time = time.perf_counter() - start

agg = att_out.aggte(typec='dynamic', na_rm=True)

def extract_dynamic(agg_obj):
    return agg_obj.summ_attgt().atte['overall_att']


att_egt = extract_dynamic(agg)
print(f'csdid ATTgt time: {att_time:.2f}s')
print(f'csdid post ATT (avg): {att_egt:.3f}')



KeyboardInterrupt: 

In [None]:
import polars as pl 
df_dcdh = pl.from_pandas(df)
df_dcdh = df_dcdh.with_columns([
    pl.col('treated').cast(pl.Int32).alias('D')
])

In [None]:
model_dcdh = DidMultiplegtDyn(
    df=df_dcdh,
    outcome='y',
    group='id',
    time='year',
    treatment='D',
    effects=5,
    placebo=3,
    cluster='id'
)

# Fit the model
model_dcdh.fit()

<did_multiplegt_dyn.did_multiplegt_dyn.DidMultiplegtDyn at 0x39a844f10>

In [None]:
dcdh_summary = model_dcdh.summary()

               Block  Estimate       SE     LB CI    UB CI         N  Switchers       N.w  Switchers.w
            Effect_1  1.498683 0.001970  1.494822 1.502545 2799244.0   699940.0 2799244.0     699940.0
            Effect_2  1.498378 0.001972  1.494514 1.502242 2799244.0   699940.0 2799244.0     699940.0
            Effect_3  1.498240 0.002233  1.493863 1.502617 1899984.0   600680.0 1899984.0     600680.0
            Effect_4  1.499132 0.002234  1.494753 1.503512 1899984.0   600680.0 1899984.0     600680.0
            Effect_5  1.501101 0.002816  1.495581 1.506620 1099456.0   400076.0 1099456.0     400076.0
Average_Total_Effect  1.498936 0.001666  1.495670 1.502201 7199924.0  3001316.0 7199924.0    3001316.0
           Placebo_1 -0.002198 0.001969 -0.006057 0.001662 2799244.0   699940.0 2799244.0     699940.0
           Placebo_2 -0.001164 0.002368 -0.005804 0.003477 1799244.0   499940.0 1799244.0     499940.0
           Placebo_3 -0.001583 0.002815 -0.007100 0.003934 1100060.0   40

np.float64(1.4989355006083422)