In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pylab as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.sandbox.regression.gmm import IV2SLS

In [2]:
data_path = "../data/processed_data/"
panel = pd.read_csv(data_path + "panel.csv", index_col=None)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
panel = panel.rename(columns={'Average Monthly Advanced CSR Payment for Consumers with 94%':'csr_pay_94',
                              'Average Monthly Advanced CSR Payment for Consumers with 87%':'csr_pay_87',
                              'Total Number of Consumers':'csr_tot',
                              'Number of Consumers with CSR AV of 94%':'csr_tot_94',
                             'Ever Enrolled Count':'ever_enrolled_count'})
X = panel[['csr_pay_94', 'EHBPercentTotalPremium', 'act_value', 'MetalLevel_Silver','MetalLevel_Platinum','csr_tot', 
           'DP05_0015PE' ,'DP05_0069PE']]
X = X.fillna(0)
X = X.astype(str).astype(float)
y = panel['ever_enrolled_count']
y = y.astype(str).astype(float)

In [4]:
# Difference-in-difference to predict enrollments in 2016
panel_1 = pd.get_dummies(panel, columns = ['Year'])
panel_1 = panel_1.drop(columns=['Year_2017'])
mod = smf.ols(formula='ever_enrolled_count ~ Year_2016 + csr_pay_94 + Year_2016*csr_pay_94 + act_value + EHBPercentTotalPremium + MetalLevel_Silver +MetalLevel_Platinum + csr_tot + csr_pay_94 + DP05_0015PE + DP05_0069PE', data=panel_1)
res_1 = mod.fit()
print(res_1.summary())
res_1.params.sum()

                             OLS Regression Results                            
Dep. Variable:     ever_enrolled_count   R-squared:                       0.198
Model:                             OLS   Adj. R-squared:                  0.197
Method:                  Least Squares   F-statistic:                     395.6
Date:                 Tue, 17 May 2022   Prob (F-statistic):               0.00
Time:                         08:24:41   Log-Likelihood:            -1.4330e+05
No. Observations:                14474   AIC:                         2.866e+05
Df Residuals:                    14464   BIC:                         2.867e+05
Df Model:                            9                                         
Covariance Type:             nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept         

9030.13304335452

In [5]:
# Difference-in-difference to predict enrollments in 2017
panel_2 = pd.get_dummies(panel, columns = ['Year'])
panel_2 = panel_2.drop(columns=['Year_2016'])
mod = smf.ols(formula='ever_enrolled_count ~ Year_2017 + csr_pay_94 + Year_2017*csr_pay_94 + act_value + EHBPercentTotalPremium + MetalLevel_Silver +MetalLevel_Platinum + csr_tot + csr_pay_94 + DP05_0015PE + DP05_0069PE', data=panel_2)
res_2 = mod.fit()
print(res_2.summary())
res_2.params.sum()

                             OLS Regression Results                            
Dep. Variable:     ever_enrolled_count   R-squared:                       0.198
Model:                             OLS   Adj. R-squared:                  0.197
Method:                  Least Squares   F-statistic:                     395.6
Date:                 Tue, 17 May 2022   Prob (F-statistic):               0.00
Time:                         08:24:41   Log-Likelihood:            -1.4330e+05
No. Observations:                14474   AIC:                         2.866e+05
Df Residuals:                    14464   BIC:                         2.867e+05
Df Model:                            9                                         
Covariance Type:             nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept         

10955.934849998644

In [6]:
# Difference between 2016 and 2017
res_1.params.sum() - res_2.params.sum()

-1925.8018066441236

In [7]:
# Hausman instrument for price and csr_pay_94
MktIds = np.array(pd.get_dummies(panel['IssuerId']))
MktIds2 = (MktIds.T).dot(MktIds)
dummies_proj = MktIds.dot( np.linalg.inv( MktIds2 ) ).dot( MktIds.T )
panel['demand_instruments0'] = dummies_proj.dot(panel['EHBPercentTotalPremium'])

In [8]:
# IV regression
X_instr = X.copy()
X_instr = X_instr.drop(columns='EHBPercentTotalPremium')
X_instr['demand_instruments0'] = panel['demand_instruments0']

iv_reg = IV2SLS(endog=y, exog=sm.add_constant(X), instrument=sm.add_constant(X_instr))
res = iv_reg.fit()
print(res.summary())
res.params.sum()

                           IV2SLS Regression Results                           
Dep. Variable:     ever_enrolled_count   R-squared:                       0.190
Model:                          IV2SLS   Adj. R-squared:                  0.189
Method:                      Two Stage   F-statistic:                     433.7
                         Least Squares   Prob (F-statistic):               0.00
Date:                 Tue, 17 May 2022                                         
Time:                         08:24:45                                         
No. Observations:                14485                                         
Df Residuals:                    14476                                         
Df Model:                            8                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const             

  x = pd.concat(x[::order], 1)


7642.843461045832

## Analysis with FPL groups

In [9]:
panel['FPL_50_150'] = (panel['S1701_C01_040E'] - panel['S1701_C01_038E'])/panel['S1701_C01_001E']
panel['FPL_150_200'] = (panel['S1701_C01_042E'] - panel['S1701_C01_040E'])/panel['S1701_C01_001E']
panel['FPL_200_250'] = (panel['S1701_C01_043E'] - panel['S1701_C01_042E'])/panel['S1701_C01_001E']
panel['FPL_250_400'] = (panel['S1701_C01_044E'] - panel['S1701_C01_043E'])/panel['S1701_C01_001E']
panel

Unnamed: 0,HIOS ID,Policy County FIPS Code,IssuerId,County,State,FIPS County Code,County Name,ever_enrolled_count,FirstTierUtilization,BeginPrimaryCareCostSharingAfterNumberOfVisits,...,DP05_0081PE,DP05_0085PE,DP05_0087PE,DP05_0088PE,DP05_0089PE,demand_instruments0,FPL_50_150,FPL_150_200,FPL_200_250,FPL_250_400
0,38344,2013,38344,2013,AK,2013.0,ALEUTIANS EAST,0.0,0.0,0.0,...,,,,,,0.000000,0.194774,0.139471,0.213917,0.138560
1,38344,2016,38344,2016,AK,2016.0,ALEUTIANS WEST,12.0,0.0,0.0,...,,,,,,0.000000,0.148629,0.113095,0.159452,0.147186
2,38344,2020,38344,2020,AK,2020.0,ANCHORAGE,2459.0,0.0,0.0,...,,,,,,0.000000,0.115528,0.060183,0.142944,0.140811
3,38344,2050,38344,2050,AK,2050.0,BETHEL,0.0,0.0,0.0,...,,,,,,0.000000,0.350889,0.116675,0.176711,0.090543
4,38344,2060,38344,2060,AK,2060.0,BRISTOL BAY BOROUGH,15.0,0.0,0.0,...,,,,,,0.000000,0.096320,0.073593,0.117965,0.149351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14480,11269,56037,11269,56037,0,0.0,,1189.0,0.0,0.0,...,0.5,1.9,31025.0,51.8,48.2,0.021277,0.116500,0.074783,0.164276,0.149588
14481,11269,56039,11269,56039,0,0.0,,2906.0,0.0,0.0,...,0.0,0.5,16692.0,52.3,47.7,0.021277,0.131435,0.061774,0.123636,0.124819
14482,11269,56041,11269,56041,0,0.0,,669.0,0.0,0.0,...,0.0,2.1,14235.0,50.2,49.8,0.021277,0.186425,0.075428,0.181747,0.160405
14483,11269,56043,11269,56043,0,0.0,,369.0,0.0,0.0,...,0.0,2.5,6125.0,50.0,50.0,0.021277,0.187214,0.102758,0.151725,0.204897


In [10]:
panel['price'] = .25*panel['PREMI27'] +.25*panel['PREMI50'] + .25*panel['PREMI2C30'] + .25*panel['PREMC2C30']
panel['int_1'] = panel['FPL_50_150'] * panel['price']
panel['int_2'] = panel['FPL_150_200'] * panel['price']
panel['int_3'] = panel['FPL_200_250'] * panel['price']
panel['int_4'] = panel['FPL_250_400'] * panel['price']

In [11]:
panel1 = panel[panel['ever_enrolled_count'] > 0]
panel1['log_ever_enrolled_count'] = np.log(panel['ever_enrolled_count'])
# OLS
mod = smf.ols(formula='log_ever_enrolled_count ~ price + FPL_50_150 + FPL_150_200 + FPL_200_250 + FPL_250_400 + int_1 + int_2 + int_3 + int_4', data=panel1)
res_1 = mod.fit()
print(res_1.summary())
res_1.params.sum()

                               OLS Regression Results                              
Dep. Variable:     log_ever_enrolled_count   R-squared:                       0.101
Model:                                 OLS   Adj. R-squared:                  0.100
Method:                      Least Squares   F-statistic:                     167.1
Date:                     Tue, 17 May 2022   Prob (F-statistic):          1.75e-301
Time:                             08:24:45   Log-Likelihood:                -25102.
No. Observations:                    13439   AIC:                         5.022e+04
Df Residuals:                        13429   BIC:                         5.030e+04
Df Model:                                9                                         
Covariance Type:                 nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------


  result = getattr(ufunc, method)(*inputs, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  panel1['log_ever_enrolled_count'] = np.log(panel['ever_enrolled_count'])


-15.923569932150489

In [12]:
X = panel1[['FPL_50_150', 'FPL_150_200', 'FPL_200_250', 'FPL_250_400','int_1','int_2','int_3' ,'int_4', 'price']]
X = X.fillna(0)
X = X.astype(str).astype(float)
y = panel1['log_ever_enrolled_count']
y = y.astype(str).astype(float)
# IV regression
X_instr = X.copy()
X_instr = X_instr.drop(columns='price')
X_instr['demand_instruments0'] = panel['demand_instruments0']
iv_reg = IV2SLS(endog=y, exog=sm.add_constant(X), instrument=sm.add_constant(X_instr))
res = iv_reg.fit()
print(res.summary())
res.params.sum()

                             IV2SLS Regression Results                             
Dep. Variable:     log_ever_enrolled_count   R-squared:                    -279.595
Model:                              IV2SLS   Adj. R-squared:               -279.783
Method:                          Two Stage   F-statistic:                    0.5434
                             Least Squares   Prob (F-statistic):              0.844
Date:                     Tue, 17 May 2022                                         
Time:                             08:24:45                                         
No. Observations:                    13442                                         
Df Residuals:                        13432                                         
Df Model:                                9                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------


  x = pd.concat(x[::order], 1)


1842.9124910071648