In [1]:
import pandas as pd
import numpy as np
from linearmodels import PanelOLS
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def prepare_did_data(matched_df):
    """
    Prepare matched data for simple DiD
    """
    df = matched_df.copy()
    
    # Create post-treatment indicator
    #df['post'] = (df['ano'] >= df['treatment_year']).astype(int)
    
    # Create treatment effect
    df['treat_effect'] = df['pos_tratamento'] * (df['matched_control'] == 0).astype(int)
    
    # Set panel index
    df = df.set_index(['id_microrregiao', 'ano'])
    
    return df

In [3]:
def run_did(df):
    """
    Run simple DiD regression
    """
    model = PanelOLS(
        dependent=df['total_valor_producao'],
        exog=sm.add_constant(df[['treat_effect']]),
        entity_effects=True,
        time_effects=True
    )
    
    results = model.fit(cov_type='clustered', cluster_entity=True)
    return results

In [None]:
matched_df = pd.read_csv('../data/csv/QTD_EST_sugar_cane.csv')
did_data = prepare_did_data(matched_df)
results = run_did(did_data)

In [6]:
print("\nDiD Results:")
print(results.summary)


DiD Results:
                           PanelOLS Estimation Summary                            
Dep. Variable:     total_valor_producao   R-squared:                        0.8033
Estimator:                     PanelOLS   R-squared (Between):             -0.0042
No. Observations:                   230   R-squared (Within):               0.5236
Date:                  Fri, Nov 22 2024   R-squared (Overall):             -0.0020
Time:                          20:31:58   Log-likelihood                   -1550.7
Cov. Estimator:               Clustered                                           
                                          F-statistic:                      400.28
Entities:                           124   P-value                           0.0000
Avg Obs:                         1.8548   Distribution:                    F(1,98)
Min Obs:                         1.0000                                           
Max Obs:                         23.000   F-statistic (robust):          