In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
data_path = "../data/"
service_df = pd.read_csv(data_path + 'Service_Area_PUF_2019.csv', encoding='cp1252')
plan_df = pd.read_csv(data_path + 'Plan_Attributes_PUF_2019.csv', encoding='cp1252')
plan_summary = pd.read_csv('plan_summary.csv', index_col=None)

In [3]:
no_missing_values = plan_summary[(plan_summary['Missing_Values'] == 0) & (plan_summary['Unique_Values'] <= 34)]['Column_Name'].to_list()
no_missing_values.append('IssuerId')
no_missing_values

['BusinessYear',
 'SourceName',
 'MarketCoverage',
 'DentalOnlyPlan',
 'IsNewPlan',
 'PlanType',
 'MetalLevel',
 'DesignType',
 'QHPNonQHPTypeId',
 'CompositeRatingOffered',
 'ChildOnlyOffering',
 'PlanEffectiveDate',
 'OutOfCountryCoverage',
 'OutOfServiceAreaCoverage',
 'NationalNetwork',
 'CSRVariationType',
 'MultipleInNetworkTiers',
 'FirstTierUtilization',
 'InpatientCopaymentMaximumDays',
 'BeginPrimaryCareCostSharingAfterNumberOfVisits',
 'BeginPrimaryCareDeductibleCoinsuranceAfterNumberOfCopays',
 'IssuerId']

In [4]:
# Treat columns as continous
cleaned_plan_df = plan_df[no_missing_values]
continuous = ['FirstTierUtilization','BeginPrimaryCareCostSharingAfterNumberOfVisits','BeginPrimaryCareDeductibleCoinsuranceAfterNumberOfCopays']
cleaned_plan_df['FirstTierUtilization'] = cleaned_plan_df['FirstTierUtilization'].str.replace('%','')
cleaned_plan_df[continuous] = cleaned_plan_df[continuous].astype(str).astype(float) 

In [5]:
# Get CSRVariationType binary
categories = cleaned_plan_df['CSRVariationType'].value_counts()
four_largest = categories.head(4).index.to_list()
rest = categories[4:].to_list()
m = 1 
cleaned_plan_df['CSRVariationTypeBinary'] = 0
for j in range(len(four_largest)):
    cleaned_plan_df.loc[cleaned_plan_df.CSRVariationType == four_largest[j], 'CSRVariationTypeBinary'] = m
    m += 1
cleaned_plan_df.loc[cleaned_plan_df.CSRVariationTypeBinary == 0, 'CSRVariationTypeBinary'] = 5
cleaned_plan_df

Unnamed: 0,BusinessYear,SourceName,MarketCoverage,DentalOnlyPlan,IsNewPlan,PlanType,MetalLevel,DesignType,QHPNonQHPTypeId,CompositeRatingOffered,...,OutOfServiceAreaCoverage,NationalNetwork,CSRVariationType,MultipleInNetworkTiers,FirstTierUtilization,InpatientCopaymentMaximumDays,BeginPrimaryCareCostSharingAfterNumberOfVisits,BeginPrimaryCareDeductibleCoinsuranceAfterNumberOfCopays,IssuerId,CSRVariationTypeBinary
0,2019,HIOS,Individual,No,New,PPO,Silver,Not Applicable,On the Exchange,No,...,Yes,No,Zero Cost Sharing Plan Variation,No,100.0,0,0.0,0.0,38344,1
1,2019,HIOS,SHOP (Small Group),Yes,Existing,Indemnity,High,Not Applicable,Off the Exchange,No,...,Yes,Yes,Standard High Off Exchange Plan,No,100.0,0,0.0,0.0,21989,5
2,2019,HIOS,Individual,Yes,Existing,Indemnity,Low,Not Applicable,Both,No,...,Yes,Yes,Standard Low Off Exchange Plan,No,100.0,0,0.0,0.0,21989,5
3,2019,HIOS,Individual,Yes,Existing,Indemnity,Low,Not Applicable,Both,No,...,Yes,Yes,Standard Low On Exchange Plan,No,100.0,0,0.0,0.0,21989,5
4,2019,HIOS,SHOP (Small Group),Yes,Existing,Indemnity,High,Not Applicable,Off the Exchange,No,...,Yes,Yes,Standard High Off Exchange Plan,No,100.0,0,0.0,0.0,21989,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15685,2019,SERFF,Individual,Yes,Existing,PPO,Low,Not Applicable,Off the Exchange,No,...,Yes,Yes,Standard Low Off Exchange Plan,No,100.0,0,0.0,0.0,76526,5
15686,2019,SERFF,Individual,Yes,Existing,PPO,High,Not Applicable,Off the Exchange,No,...,Yes,Yes,Standard High Off Exchange Plan,No,100.0,0,0.0,0.0,76526,5
15687,2019,SERFF,Individual,Yes,Existing,PPO,Low,Not Applicable,On the Exchange,No,...,Yes,Yes,Standard Low On Exchange Plan,No,100.0,0,0.0,0.0,76526,5
15688,2019,SERFF,Individual,Yes,Existing,PPO,High,Not Applicable,On the Exchange,No,...,Yes,Yes,Standard High On Exchange Plan,No,100.0,0,0.0,0.0,76526,5


In [6]:
dummy_cols = ['BusinessYear',
 'SourceName',
 'MarketCoverage',
 'DentalOnlyPlan',
 'IsNewPlan',
 'PlanType',
 'MetalLevel',
 'DesignType',
 'QHPNonQHPTypeId',
 'CompositeRatingOffered',
 'ChildOnlyOffering',
 'PlanEffectiveDate',
 'OutOfCountryCoverage',
 'OutOfServiceAreaCoverage',
 'NationalNetwork',
 'MultipleInNetworkTiers',
 'InpatientCopaymentMaximumDays']
cleaned_plan_df = pd.get_dummies(cleaned_plan_df, columns = dummy_cols)
cleaned_plan_df

Unnamed: 0,CSRVariationType,FirstTierUtilization,BeginPrimaryCareCostSharingAfterNumberOfVisits,BeginPrimaryCareDeductibleCoinsuranceAfterNumberOfCopays,IssuerId,CSRVariationTypeBinary,BusinessYear_2019,SourceName_HIOS,SourceName_OPM,SourceName_SERFF,...,NationalNetwork_No,NationalNetwork_Yes,MultipleInNetworkTiers_No,MultipleInNetworkTiers_Yes,InpatientCopaymentMaximumDays_0,InpatientCopaymentMaximumDays_2,InpatientCopaymentMaximumDays_3,InpatientCopaymentMaximumDays_4,InpatientCopaymentMaximumDays_5,InpatientCopaymentMaximumDays_10
0,Zero Cost Sharing Plan Variation,100.0,0.0,0.0,38344,1,1,1,0,0,...,1,0,1,0,1,0,0,0,0,0
1,Standard High Off Exchange Plan,100.0,0.0,0.0,21989,5,1,1,0,0,...,0,1,1,0,1,0,0,0,0,0
2,Standard Low Off Exchange Plan,100.0,0.0,0.0,21989,5,1,1,0,0,...,0,1,1,0,1,0,0,0,0,0
3,Standard Low On Exchange Plan,100.0,0.0,0.0,21989,5,1,1,0,0,...,0,1,1,0,1,0,0,0,0,0
4,Standard High Off Exchange Plan,100.0,0.0,0.0,21989,5,1,1,0,0,...,0,1,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15685,Standard Low Off Exchange Plan,100.0,0.0,0.0,76526,5,1,0,0,1,...,0,1,1,0,1,0,0,0,0,0
15686,Standard High Off Exchange Plan,100.0,0.0,0.0,76526,5,1,0,0,1,...,0,1,1,0,1,0,0,0,0,0
15687,Standard Low On Exchange Plan,100.0,0.0,0.0,76526,5,1,0,0,1,...,0,1,1,0,1,0,0,0,0,0
15688,Standard High On Exchange Plan,100.0,0.0,0.0,76526,5,1,0,0,1,...,0,1,1,0,1,0,0,0,0,0


In [7]:
# Group by IssuerID and ServiceAreaID
service_df2 = service_df[['IssuerId','ServiceAreaId']]
merged = service_df2.merge(cleaned_plan_df, how='left', left_on='IssuerId', right_on='IssuerId')
merged

Unnamed: 0,IssuerId,ServiceAreaId,CSRVariationType,FirstTierUtilization,BeginPrimaryCareCostSharingAfterNumberOfVisits,BeginPrimaryCareDeductibleCoinsuranceAfterNumberOfCopays,CSRVariationTypeBinary,BusinessYear_2019,SourceName_HIOS,SourceName_OPM,...,NationalNetwork_No,NationalNetwork_Yes,MultipleInNetworkTiers_No,MultipleInNetworkTiers_Yes,InpatientCopaymentMaximumDays_0,InpatientCopaymentMaximumDays_2,InpatientCopaymentMaximumDays_3,InpatientCopaymentMaximumDays_4,InpatientCopaymentMaximumDays_5,InpatientCopaymentMaximumDays_10
0,21989,AKS001,Standard High Off Exchange Plan,100.0,0.0,0.0,5,1,1,0,...,0,1,1,0,1,0,0,0,0,0
1,21989,AKS001,Standard Low Off Exchange Plan,100.0,0.0,0.0,5,1,1,0,...,0,1,1,0,1,0,0,0,0,0
2,21989,AKS001,Standard Low On Exchange Plan,100.0,0.0,0.0,5,1,1,0,...,0,1,1,0,1,0,0,0,0,0
3,21989,AKS001,Standard High Off Exchange Plan,100.0,0.0,0.0,5,1,1,0,...,0,1,1,0,1,0,0,0,0,0
4,21989,AKS001,Standard High Off Exchange Plan,100.0,0.0,0.0,5,1,1,0,...,0,1,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
942569,76526,WVS001,Standard Low Off Exchange Plan,100.0,0.0,0.0,5,1,0,0,...,0,1,1,0,1,0,0,0,0,0
942570,76526,WVS001,Standard High Off Exchange Plan,100.0,0.0,0.0,5,1,0,0,...,0,1,1,0,1,0,0,0,0,0
942571,76526,WVS001,Standard Low On Exchange Plan,100.0,0.0,0.0,5,1,0,0,...,0,1,1,0,1,0,0,0,0,0
942572,76526,WVS001,Standard High On Exchange Plan,100.0,0.0,0.0,5,1,0,0,...,0,1,1,0,1,0,0,0,0,0


In [8]:
grouped = merged.groupby(['IssuerId', 'ServiceAreaId']).mean()
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,FirstTierUtilization,BeginPrimaryCareCostSharingAfterNumberOfVisits,BeginPrimaryCareDeductibleCoinsuranceAfterNumberOfCopays,CSRVariationTypeBinary,BusinessYear_2019,SourceName_HIOS,SourceName_OPM,SourceName_SERFF,MarketCoverage_Individual,MarketCoverage_SHOP (Small Group),...,NationalNetwork_No,NationalNetwork_Yes,MultipleInNetworkTiers_No,MultipleInNetworkTiers_Yes,InpatientCopaymentMaximumDays_0,InpatientCopaymentMaximumDays_2,InpatientCopaymentMaximumDays_3,InpatientCopaymentMaximumDays_4,InpatientCopaymentMaximumDays_5,InpatientCopaymentMaximumDays_10
IssuerId,ServiceAreaId,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10046,HIS001,100.0,0.00,0.000000,5.000000,1.0,0.0,0.0,1.0,1.000000,0.000000,...,0.000000,1.000000,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
10091,ORS002,100.0,0.25,0.000000,3.708333,1.0,0.0,0.0,1.0,1.000000,0.000000,...,0.000000,1.000000,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
10091,ORS003,100.0,0.25,0.000000,3.708333,1.0,0.0,0.0,1.0,1.000000,0.000000,...,0.000000,1.000000,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
10091,ORS005,100.0,0.25,0.000000,3.708333,1.0,0.0,0.0,1.0,1.000000,0.000000,...,0.000000,1.000000,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
10207,VAS001,100.0,0.00,0.000000,3.947368,1.0,0.0,0.0,1.0,0.684211,0.315789,...,0.894737,0.105263,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99969,OHS004,100.0,0.00,1.095092,3.539877,1.0,0.0,0.0,1.0,1.000000,0.000000,...,0.987730,0.012270,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
99969,OHS005,100.0,0.00,1.095092,3.539877,1.0,0.0,0.0,1.0,1.000000,0.000000,...,0.987730,0.012270,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
99969,OHS007,100.0,0.00,1.095092,3.539877,1.0,0.0,0.0,1.0,1.000000,0.000000,...,0.987730,0.012270,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
99969,OHS009,100.0,0.00,1.095092,3.539877,1.0,0.0,0.0,1.0,1.000000,0.000000,...,0.987730,0.012270,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


## Merge All

In [9]:
county = pd.read_csv('county_characteristics.csv')
county = county[county['FIPS County Code'].notna()]
service_df3 = service_df[['County','ServiceAreaId', 'IssuerId']]
service_df3.rename(columns={'County':'county_sid'}, inplace=True)
service_df3 = service_df3.drop_duplicates()
m1 = county.merge(service_df3, how='left', left_on='FIPS County Code', right_on='county_sid')
m1

Unnamed: 0,DP03_0001E,DP03_0002E,DP03_0003E,DP03_0004E,DP03_0005E,DP03_0006E,DP03_0007E,DP03_0008E,DP03_0009E,DP03_0010E,...,Number of Consumers with CSR (AV of 73%/87%/94%),Number of Consumers with CSR AV of 73%,Number of Consumers with CSR AV of 87%,Number of Consumers with CSR AV of 94%,Average Monthly Advanced CSR Payment for Consumers with 73%,Average Monthly Advanced CSR Payment for Consumers with 87%,Average Monthly Advanced CSR Payment for Consumers with 94%,county_sid,ServiceAreaId,IssuerId
0,42712,26008,25699,24262,1437,309,16704,25699,0,22074,...,1341,142,399,800,$13,$87,$117,1001.0,ALS001,18239.0
1,160301,93872,93640,87753,5887,232,66429,93640,0,82813,...,8342,1246,2426,4670,$13,$91,$121,1003.0,ALS001,18239.0
2,21476,10316,10316,8993,1323,0,11160,10316,0,9938,...,584,62,155,367,$13,$98,$132,1005.0,ALS001,18239.0
3,18496,9002,8997,8354,643,5,9494,8997,0,8514,...,477,58,121,298,$15,$101,$123,,,
4,46007,22969,22960,21593,1367,9,23038,22960,0,23370,...,1663,197,437,1029,$14,$96,$132,1009.0,ALS001,18239.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9691,18710,14646,14646,14335,311,0,4064,14646,0,8916,...,1411,428,484,499,$15,$115,$165,,,
9692,15257,10124,10124,9477,647,0,5133,10124,0,7705,...,356,79,116,161,$16,$133,$181,,,
9693,6577,4085,4085,3781,304,0,2492,4085,0,3250,...,173,33,54,86,$15,$138,$192,,,
9694,5862,3434,3434,3310,124,0,2428,3434,0,2830,...,112,26,24,62,$25,$170,$191,,,


In [10]:
m2 = m1.merge(grouped, how='left', on=['ServiceAreaId', 'IssuerId'])
m2

Unnamed: 0,DP03_0001E,DP03_0002E,DP03_0003E,DP03_0004E,DP03_0005E,DP03_0006E,DP03_0007E,DP03_0008E,DP03_0009E,DP03_0010E,...,NationalNetwork_No,NationalNetwork_Yes,MultipleInNetworkTiers_No,MultipleInNetworkTiers_Yes,InpatientCopaymentMaximumDays_0,InpatientCopaymentMaximumDays_2,InpatientCopaymentMaximumDays_3,InpatientCopaymentMaximumDays_4,InpatientCopaymentMaximumDays_5,InpatientCopaymentMaximumDays_10
0,42712,26008,25699,24262,1437,309,16704,25699,0,22074,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,160301,93872,93640,87753,5887,232,66429,93640,0,82813,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,21476,10316,10316,8993,1323,0,11160,10316,0,9938,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,18496,9002,8997,8354,643,5,9494,8997,0,8514,...,,,,,,,,,,
4,46007,22969,22960,21593,1367,9,23038,22960,0,23370,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9691,18710,14646,14646,14335,311,0,4064,14646,0,8916,...,,,,,,,,,,
9692,15257,10124,10124,9477,647,0,5133,10124,0,7705,...,,,,,,,,,,
9693,6577,4085,4085,3781,304,0,2492,4085,0,3250,...,,,,,,,,,,
9694,5862,3434,3434,3310,124,0,2428,3434,0,2830,...,,,,,,,,,,


In [11]:
data_path = "../data_subsidy/"
issuer_enroll = pd.read_csv(data_path + '2016-Issuer-Enrollment-Disenrollment-Report.csv')
issuer_enroll

Unnamed: 0,Tenant ID,HIOS ID,Policy County FIPS Code,Ever Enrolled Count,Females,Males,< 18 years old,18-34 years old,35-54 years old,> 55 years old,FPL: < 138%,FPL: >= 138% and <=250%,FPL: > 250% and <=400%,FPL: > 400%,FPL: Unknown,Tobacco Users
0,AK,38344,2013,*,*,*,0,*,*,*,*,*,*,0,*,0
1,AK,38344,2016,12,*,*,*,*,*,*,*,*,0,*,*,0
2,AK,38344,2020,2459,1319,1140,385,687,840,547,206,1148,643,115,347,19
3,AK,38344,2050,*,*,*,0,*,*,*,*,*,*,*,0,0
4,AK,38344,2060,15,*,*,*,*,*,*,0,*,*,0,*,*
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8791,WY,11269,56037,1273,717,556,191,337,401,344,288,539,291,38,117,91
8792,WY,11269,56039,3054,1534,1520,353,1262,962,477,428,1323,915,145,243,58
8793,WY,11269,56041,712,396,316,136,163,228,185,151,313,177,31,40,40
8794,WY,11269,56043,383,200,183,66,80,122,115,82,136,116,13,36,18


In [12]:
m3 = m2.merge(issuer_enroll, how='left', left_on=['FIPS County Code', 'IssuerId'], right_on=['Policy County FIPS Code','HIOS ID'])
m3

Unnamed: 0,DP03_0001E,DP03_0002E,DP03_0003E,DP03_0004E,DP03_0005E,DP03_0006E,DP03_0007E,DP03_0008E,DP03_0009E,DP03_0010E,...,< 18 years old,18-34 years old,35-54 years old,> 55 years old,FPL: < 138%,FPL: >= 138% and <=250%,FPL: > 250% and <=400%,FPL: > 400%,FPL: Unknown,Tobacco Users
0,42712,26008,25699,24262,1437,309,16704,25699,0,22074,...,,,,,,,,,,
1,160301,93872,93640,87753,5887,232,66429,93640,0,82813,...,,,,,,,,,,
2,21476,10316,10316,8993,1323,0,11160,10316,0,9938,...,,,,,,,,,,
3,18496,9002,8997,8354,643,5,9494,8997,0,8514,...,,,,,,,,,,
4,46007,22969,22960,21593,1367,9,23038,22960,0,23370,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9691,18710,14646,14646,14335,311,0,4064,14646,0,8916,...,,,,,,,,,,
9692,15257,10124,10124,9477,647,0,5133,10124,0,7705,...,,,,,,,,,,
9693,6577,4085,4085,3781,304,0,2492,4085,0,3250,...,,,,,,,,,,
9694,5862,3434,3434,3310,124,0,2428,3434,0,2830,...,,,,,,,,,,
