In [1]:
import pandas as pd
import numpy as np
from linearmodels import PanelOLS
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

In [30]:
def prepare_did_data(matched_df):
    """
    Prepare matched data for simple DiD
    """
    df = matched_df.copy()
    
    # Create post-treatment indicator
    df['post'] = (df['ano'] >= df['treatment_year']).astype(int)
    
    # Create treatment effect
    df['treat_effect'] = df['post'] * (df['matched_control'] == 0).astype(int)
    
    # Set panel index
    df = df.set_index(['id_microrregiao', 'ano'])
    
    return df

In [31]:
def run_did(df):
    """
    Run simple DiD regression
    """
    model = PanelOLS(
        dependent=df['total_valor_producao'],
        exog=sm.add_constant(df[['treat_effect']]),
        entity_effects=True,
        time_effects=True
    )
    
    results = model.fit(cov_type='clustered', cluster_entity=True)
    return results

In [36]:
matched_df = pd.read_csv('../data/matched_data_for_did.csv')
did_data = prepare_did_data(matched_df)
results = run_did(did_data)

In [37]:
print("\nDiD Results:")
print(results.summary)


DiD Results:
                           PanelOLS Estimation Summary                            
Dep. Variable:     total_valor_producao   R-squared:                        0.0020
Estimator:                     PanelOLS   R-squared (Between):             -0.0024
No. Observations:                   843   R-squared (Within):              -0.0013
Date:                  Fri, Nov 08 2024   R-squared (Overall):           3.392e-05
Time:                          23:52:47   Log-likelihood                   -620.28
Cov. Estimator:               Clustered                                           
                                          F-statistic:                      1.0395
Entities:                           305   P-value                           0.3084
Avg Obs:                         2.7639   Distribution:                   F(1,522)
Min Obs:                         1.0000                                           
Max Obs:                         17.000   F-statistic (robust):          