In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pmdarima as pm
from pmdarima import auto_arima
from pmdarima import model_selection


This notebook computes the normalized r-squared for the pre-treatment fit for each of our synthetic control models and fits the baseline ARIMA models for each of the models

In [None]:
def normalized_r_squared(actual, preds):
    return 1 - ((actual - preds)**2).sum() / ((actual - actual.mean())**2).sum()

In [None]:
r_squared = {}
for state in rggi_states:
    Z0,Z1=aug_synth_models[state][1].make_outcome_mats(time_period=years_sma) # Get the true values
    ts_synth=aug_synth_models[state][0]._synthetic(Z0=Z0) # Get the synthetic control predictions
    
    Z1_pre_treat = Z1[Z1.index < treatment_date] # Restrict to pre-intervention period
    ts_synth_pre_treat = ts_synth[ts_synth.index < treatment_date] 

    r_squareds[state] = normalized_r_squared(actual=Z1_pre_treat.values, preds=ts_synth_pre_treat.values).round(3)

We now compute a baseline ARIMA forecast for the moving average of per-capita emissions over the predicted period 2009-2013 for each state. We also calculate 95% confidence intervals. We then compare these intervals to the synthetic control estimator for each state.

In [None]:
arima_models = {}
treat_range = pd.date_range(start=treatment_date, end='2013-12-01', freq='MS').strftime('%Y-%m-%d').tolist()

for state in rggi_states:
    state_emissions = df_mo[(df_mo['state']==state) & (df_mo['date'] >= start_date & (df_mo['date'] <= '2013-12-01'))][['date','co2_per_capita_sma']]
    # Make train-test split on the treatment date
    train, test = model_selection.train_test_split(state_emissions, train_size=len(treat_range))
    baseline_arima = auto_arima(train, trace=False)
    # Get preds, conf_int, and test RMSE for each state
    preds, conf_int = baseline_arima.predict(n_periods=len(test), return_conf_int=True)
    rmse = np.sqrt(mean_squared_error(test['co2_per_capita_sma'],preds)))
    arima_models[state]=baseline_arima,preds,conf_int,rmse
    


In [None]:
for state in rggi_states:
    # Get treatment range, predictions for each state, and confidence intervals for each state
    pred_state = arima_models[state][1]
    conf_int_state = arima_models[state][2]

    # Plot the forecasts
    aug_synth_models[state][0].path_plot(time_period=years_sma, treatment_time=treatment_dates_ma[0])
    plt.plot(treat_range, pred_state, alpha=0.75, label='ARIMA forecast CO2 emissions')
    plt.fill_between(treat_range,
                conf_int_state[:, 0], conf_int_state[:, 1],
                alpha=0.1, color='b', label='95% Confidence Interval for ARIMA forecast')
    plt.title(f"{state} CO2 emissions per capita")
    plt.legend(loc='upper right')
    plt.ylabel("CO2 emissions per capita")
    print("{state} test RMSE for : %.3f" % np.sqrt(mean_squared_error(test['co2_per_capita_sma'],pred_state)))
    plt.xlabel("Year") 
    