In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.sandbox.regression.gmm import IV2SLS

In [2]:
data_path = "../data/processed_data/"
panel = pd.read_csv(data_path + "panel.csv", index_col=None)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
panel = panel.rename(columns={'Average Monthly Advanced CSR Payment for Consumers with 94%':'csr_pay_94',
                              'Average Monthly Advanced CSR Payment for Consumers with 87%':'csr_pay_87',
                              'Total Number of Consumers':'csr_tot',
                              'Number of Consumers with CSR AV of 94%':'csr_tot_94',
                             'Ever Enrolled Count':'ever_enrolled_count'})
X = panel[['csr_pay_94', 'EHBPercentTotalPremium', 'act_value', 'MetalLevel_Silver','MetalLevel_Platinum','csr_tot', 
           'DP05_0015PE' ,'DP05_0069PE']]
X = X.fillna(0)
X = X.astype(str).astype(float)
y = panel['ever_enrolled_count']
y = y.astype(str).astype(float)

In [4]:
# Difference-in-difference to predict enrollments in 2016
panel_1 = pd.get_dummies(panel, columns = ['Year'])
panel_1 = panel_1.drop(columns=['Year_2017'])
mod = smf.ols(formula='ever_enrolled_count ~ Year_2016 + csr_pay_94 + Year_2016*csr_pay_94 + act_value + EHBPercentTotalPremium + MetalLevel_Silver +MetalLevel_Platinum + csr_tot + csr_pay_94 + DP05_0015PE + DP05_0069PE', data=panel_1)
res_1 = mod.fit()
print(res_1.summary())
res_1.params.sum()

                             OLS Regression Results                            
Dep. Variable:     ever_enrolled_count   R-squared:                       0.198
Model:                             OLS   Adj. R-squared:                  0.197
Method:                  Least Squares   F-statistic:                     395.6
Date:                 Thu, 05 May 2022   Prob (F-statistic):               0.00
Time:                         17:54:51   Log-Likelihood:            -1.4330e+05
No. Observations:                14474   AIC:                         2.866e+05
Df Residuals:                    14464   BIC:                         2.867e+05
Df Model:                            9                                         
Covariance Type:             nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept         

9030.13304335452

In [5]:
# Difference-in-difference to predict enrollments in 2017
panel_2 = pd.get_dummies(panel, columns = ['Year'])
panel_2 = panel_2.drop(columns=['Year_2016'])
mod = smf.ols(formula='ever_enrolled_count ~ Year_2017 + csr_pay_94 + Year_2017*csr_pay_94 + act_value + EHBPercentTotalPremium + MetalLevel_Silver +MetalLevel_Platinum + csr_tot + csr_pay_94 + DP05_0015PE + DP05_0069PE', data=panel_2)
res_2 = mod.fit()
print(res_2.summary())
res_2.params.sum()

                             OLS Regression Results                            
Dep. Variable:     ever_enrolled_count   R-squared:                       0.198
Model:                             OLS   Adj. R-squared:                  0.197
Method:                  Least Squares   F-statistic:                     395.6
Date:                 Thu, 05 May 2022   Prob (F-statistic):               0.00
Time:                         17:54:51   Log-Likelihood:            -1.4330e+05
No. Observations:                14474   AIC:                         2.866e+05
Df Residuals:                    14464   BIC:                         2.867e+05
Df Model:                            9                                         
Covariance Type:             nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept         

10955.934849998644

In [6]:
# Difference between 2016 and 2017
res_1.params.sum() - res_2.params.sum()

-1925.8018066441236

In [7]:
# Hausman instrument for price and csr_pay_94
MktIds = np.array(pd.get_dummies(panel['IssuerId']))
MktIds2 = (MktIds.T).dot(MktIds)
dummies_proj = MktIds.dot( np.linalg.inv( MktIds2 ) ).dot( MktIds.T )
panel['demand_instruments0'] = dummies_proj.dot(panel['EHBPercentTotalPremium'])

In [8]:
# IV regression
X_instr = X.copy()
X_instr = X_instr.drop(columns='EHBPercentTotalPremium')
X_instr['demand_instruments0'] = panel['demand_instruments0']

iv_reg = IV2SLS(endog=y, exog=sm.add_constant(X), instrument=sm.add_constant(X_instr))
res = iv_reg.fit()
print(res.summary())
res.params.sum()

                           IV2SLS Regression Results                           
Dep. Variable:     ever_enrolled_count   R-squared:                       0.190
Model:                          IV2SLS   Adj. R-squared:                  0.189
Method:                      Two Stage   F-statistic:                     433.7
                         Least Squares   Prob (F-statistic):               0.00
Date:                 Thu, 05 May 2022                                         
Time:                         17:54:53                                         
No. Observations:                14485                                         
Df Residuals:                    14476                                         
Df Model:                            8                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const             

  x = pd.concat(x[::order], 1)


7642.843461045832