## Difference-in-Difference (DID)

One of the most well-known estimators is probably Difference-in-Difference (DID). It compares the differences pre- and post-experiment of treatment and control.

### Panel

In [1]:
from azcausal.data import CaliforniaProp99
from azcausal.core.error import Bootstrap
from azcausal.core.parallelize import Pool, Joblib
from azcausal.estimators.panel.did import DID


# load an example data set with the columns Year, State, PacksPerCapita, treated.
panel = CaliforniaProp99().panel()

# initialize an estimator object, here difference in difference (did)
estimator = DID()

# run the estimator
result = estimator.fit(panel)

# plot the results
estimator.plot(result, title="CaliforniaProp99", show=False)

# create a process pool for parallelization
pool = Joblib(n_jobs=5, progress=False)

# run the error validation method
estimator.error(result, Bootstrap(n_samples=101), parallelize=pool)

# print out information about the estimate
print(result.summary(title="CaliforniaProp99"))

╭──────────────────────────────────────────────────────────────────────────────╮
|                               CaliforniaProp99                               |
|                                    Panel                                     |
|  Time Periods: 31 (19/12)                                  total (pre/post)  |
|  Units: 39 (38/1)                                       total (contr/treat)  |
├──────────────────────────────────────────────────────────────────────────────┤
|                                     ATT                                      |
|  Effect (±SE): -27.35 (±3.1446)                                              |
|  Confidence Interval (95%): [-33.51 , -21.19]                           (-)  |
|  Observed: 60.35                                                             |
|  Counter Factual: 87.70                                                      |
├──────────────────────────────────────────────────────────────────────────────┤
|                           

### Regression

In [2]:
# for the regression we can use either the panel or also directly the data frame 
df = CaliforniaProp99().load()

df = df.rename(columns=dict(State='unit', Year='time', PacksPerCapita='outcome', treated='intervention'))

df.head(3)

Unnamed: 0,unit,time,outcome,intervention
0,Alabama,1970,89.800003,0
1,Arkansas,1970,100.300003,0
2,Colorado,1970,124.800003,0


In [3]:
from azcausal.estimators.panel.did import DIDRegressor

# initialize an estimator object, here difference in difference (did)
estimator = DIDRegressor()

# fit the estimator
result = estimator.fit(df)

# print out information about the estimate
print(result.summary(title="CaliforniaProp99", conf=95))

╭──────────────────────────────────────────────────────────────────────────────╮
|                               CaliforniaProp99                               |
|                                     ATT                                      |
|  Effect (±SE): -27.35 (±4.4095)                                              |
|  Confidence Interval (95%): [-35.99 , -18.71]                           (-)  |
|  Observed: 60.35                                                             |
|  Counter Factual: 87.70                                                      |
├──────────────────────────────────────────────────────────────────────────────┤
|                                  Percentage                                  |
|  Effect (±SE): -31.19 (±5.0279)                                              |
|  Confidence Interval (95%): [-41.04 , -21.33]                           (-)  |
|  Observed: 68.81                                                             |
|  Counter Factual: 100.00  

### Event Study

Exclude the time period -1 and set all DiD in relation to it

In [4]:
from azcausal.estimators.panel.did import EventStudy

# now perform the event study
estimator = EventStudy(n_pre=None, exclude=-1)

# you can fit this with a data frame or panel
result = estimator.fit(df)

# show the results in a plot
estimator.plot(result)

Assume that we run DID also before the experiment.

In [5]:
from azcausal.estimators.panel.did import EventStudy

# now perform the event study
estimator = EventStudy(n_pre=8, exclude=None)

# you can fit this with a data frame or panel
result = estimator.fit(df)

# show the results in a plot
estimator.plot(result)