In [58]:
import statistics
import pandas as pd

# Background
Premium Stabilization Programs data can be found here: https://www.cms.gov/CCIIO/Programs-and-Initiatives/Premium-Stabilization-Programs/

Medical Loss Ratio data can be found here: https://www.cms.gov/CCIIO/Resources/Data-Resources/mlr.html

We can find Medical Loss Ratio data for specific insurance companies here: https://www.cms.gov/apps/mlr/mlr-search.aspx#/?state=All&reporting_year=2015

I have also downloaded the files in case they get removed from the websites later on.

# 2015 Benefit Year

Relevant files:
- /Premium-Stabilization-Programs/June-30-2016-RA-and-RI-Summary-Report-5CR-063016.pdf
- /MLR-Analysis/Medical-Loss-Ratio/MLR_DataFilesPUF_20161019/Part1_2_Summary_Data_Premium_Claims.csv

In the XLSX file, column D in row FED_REINS_PAYMENTS represents the reinsurance payment amount, column D and I in row FED_RISK_ADJ_NET_PAYMENTS represents the HHS risk adjustment transfer amount in the individual and small group markets, respectively.

In [71]:
# read in the data
data_2015 = pd.read_csv("Medical-Loss-Ratio/MLR_DataFilesPUF_20161019/Part1_2_Summary_Data_Premium_Claims.csv")

# only keep the rows that we are looking up
a_data_2015 = data_2015.loc[data_2015["ROW_LOOKUP_CODE"].isin({"FED_REINS_PAYMENTS", "FED_RISK_ADJ_NET_PAYMENTS"})]

# only keep the columns that contain numbers that we are looking up
b_data_2015 = a_data_2015[["MR_SUBMISSION_TEMPLATE_ID", "ROW_LOOKUP_CODE", "CMM_INDIVIDUAL_Q1", "CMM_SMALL_GROUP_Q1"]]

# remove rows that contain null values in *BOTH* columns that we are looking up
# (thus, threshold = 3 null columns to drop the column)
c_data_2015 = b_data_2015.dropna(thresh=3)

# drop duplicates in the CMM_INDIVIDUAL_Q1 and CMM_SMALL_GROUP_Q1 rows. 
# for some reason, companies are reported twice.
d_data_2015 = c_data_2015.drop_duplicates(subset=["CMM_INDIVIDUAL_Q1","CMM_SMALL_GROUP_Q1"])

# create one dataframe for reinsurance payments and another for risk adjustment payments
data_2015_reins = d_data_2015.loc[d_data_2015["ROW_LOOKUP_CODE"].isin({"FED_REINS_PAYMENTS"})]
data_2015_riskadj = d_data_2015.loc[d_data_2015["ROW_LOOKUP_CODE"].isin({"FED_RISK_ADJ_NET_PAYMENTS"})]


In [77]:
data_2015_reins_individual = data_2015_reins["CMM_INDIVIDUAL_Q1"].dropna()
data_2015_riskadj_individual = data_2015_riskadj["CMM_INDIVIDUAL_Q1"].dropna()
data_2015_riskadj_smallgroup = data_2015_riskadj["CMM_SMALL_GROUP_Q1"].dropna()

data_2015_reins_individual_nonzero = data_2015_riskadj["CMM_INDIVIDUAL_Q1"].where(data_2015_riskadj["CMM_INDIVIDUAL_Q1"] > 0).dropna()
data_2015_riskadj_individual_nonzero = data_2015_riskadj["CMM_INDIVIDUAL_Q1"].where(data_2015_riskadj["CMM_INDIVIDUAL_Q1"] > 0).dropna()
data_2015_riskadj_smallgroup_nonzero = data_2015_riskadj["CMM_SMALL_GROUP_Q1"].where(data_2015_riskadj["CMM_SMALL_GROUP_Q1"] > 0).dropna()

In [79]:
print("Reinsurance Payments 2015 Preview")
print(data_2015_reins.head())

print("\nAnalysis of Reinsurance Payments 2015")
print("Mean (with zeroes) = " + str(statistics.mean(data_2015_reins_individual)))
print("Mean (without zeroes) = " + str(statistics.mean(data_2015_reins_individual_nonzero)))
print("Median (with zeroes) = " + str(statistics.median(data_2015_reins_individual)))
print("Median (without zeroes) = " + str(statistics.median(data_2015_reins_individual_nonzero)))
print("StdDev (with zeroes) = " + str(statistics.stdev(data_2015_reins_individual)))
print("StdDev (without zeroes) = " + str(statistics.stdev(data_2015_reins_individual_nonzero)))
print("Variance (with zeroes) = " + str(statistics.variance(data_2015_reins_individual)))
print("Variance (without zeroes) = " + str(statistics.variance(data_2015_reins_individual_nonzero)))
print("Max Value = " + str(max(data_2015_reins_individual)))
print("Min Non-Zero Value = " + str(min(data_2015_reins_individual_nonzero)))
print("Sample Size (with zeroes) = " + str(len(data_2015_reins_individual)))
print("Sample Size (without zeroes) = " + str(len(data_2015_reins_individual_nonzero)))

print("\n\nRisk Adjustment Payments 2015 Preview")
print(data_2015_riskadj.head())
      
print("\nAnalysis of Individual Market Risk Adjustment Payments 2015")
print("Mean (with zeroes) = " + str(statistics.mean(data_2015_riskadj_individual)))
print("Mean (without zeroes) = " + str(statistics.mean(data_2015_riskadj_individual_nonzero)))
print("Median (with zeroes) = " + str(statistics.median(data_2015_riskadj_individual)))
print("Median (without zeroes) = " + str(statistics.median(data_2015_riskadj_individual_nonzero)))
print("StdDev (with zeroes) = " + str(statistics.stdev(data_2015_riskadj_individual)))
print("StdDev (without zeroes) = " + str(statistics.stdev(data_2015_riskadj_individual_nonzero)))
print("Variance (with zeroes) = " + str(statistics.variance(data_2015_riskadj_individual)))
print("Variance (without zeroes) = " + str(statistics.variance(data_2015_riskadj_individual_nonzero)))
print("Max Value = " + str(max(data_2015_riskadj_individual)))
print("Min Non-Zero Value = " + str(min(data_2015_riskadj_individual_nonzero)))
print("Sample Size (with zeroes) = " + str(len(data_2015_riskadj_individual)))
print("Sample Size (without zeroes) = " + str(len(data_2015_riskadj_individual_nonzero)))

print("\nAnalysis of Small Group Market Risk Adjustment Payments 2015")
print("Mean (with zeroes) = " + str(statistics.mean(data_2015_riskadj_smallgroup)))
print("Mean (without zeroes) = " + str(statistics.mean(data_2015_riskadj_smallgroup_nonzero)))
print("Median (with zeroes) = " + str(statistics.median(data_2015_riskadj_smallgroup)))
print("Median (without zeroes) = " + str(statistics.median(data_2015_riskadj_smallgroup_nonzero)))
print("StdDev (with zeroes) = " + str(statistics.stdev(data_2015_riskadj_smallgroup)))
print("StdDev (without zeroes) = " + str(statistics.stdev(data_2015_riskadj_smallgroup_nonzero)))
print("Variance (with zeroes) = " + str(statistics.variance(data_2015_riskadj_smallgroup)))
print("Variance (without zeroes) = " + str(statistics.variance(data_2015_riskadj_smallgroup_nonzero)))
print("Max Value = " + str(max(data_2015_riskadj_smallgroup)))
print("Min Non-Zero Value = " + str(min(data_2015_riskadj_smallgroup_nonzero)))
print("Sample Size (with zeroes) = " + str(len(data_2015_riskadj_smallgroup)))
print("Sample Size (without zeroes) = " + str(len(data_2015_riskadj_smallgroup_nonzero)))

Reinsurance Payments 2015 Preview
       MR_SUBMISSION_TEMPLATE_ID     ROW_LOOKUP_CODE  CMM_INDIVIDUAL_Q1  \
26170                     134669  FED_REINS_PAYMENTS         5518706.00   
40864                     135760  FED_REINS_PAYMENTS               0.00   
43468                     135842  FED_REINS_PAYMENTS         5970215.00   
43654                     135947  FED_REINS_PAYMENTS        12577778.91   
44863                     136114  FED_REINS_PAYMENTS        38644223.02   

       CMM_SMALL_GROUP_Q1  
26170                 NaN  
40864                 NaN  
43468                 NaN  
43654                 NaN  
44863                 NaN  

Analysis of Reinsurance Payments 2015
Mean (with zeroes) = 20058449.631
Mean (without zeroes) = 11036595.6855
Median (with zeroes) = 4524839.43
Median (without zeroes) = 2205492.365
StdDev (with zeroes) = 64489499.66906441
StdDev (without zeroes) = 28627589.44649752
Variance (with zeroes) = 4.15889556757e+15
Variance (without zeroes) = 8.195388

# Results of Basic Statistic Analysis

### Analysis of Reinsurance Payments 2015

Mean (with zeroes) = 20058449.631

Mean (without zeroes) = 11036595.6855

Median (with zeroes) = 4524839.43

Median (without zeroes) = 2205492.365

StdDev (with zeroes) = 64489499.66906441

StdDev (without zeroes) = 28627589.44649752

Variance (with zeroes) = 4.15889556757e+15

Variance (without zeroes) = 8.19538877517e+14

Max Value = 1059085035.93

Min Non-Zero Value = 370.93

Sample Size (with zeroes) = 537

Sample Size (without zeroes) = 326

### Analysis of Individual Market Risk Adjustment Payments 2015
Mean (with zeroes) = 271352.456617

Mean (without zeroes) = 11036595.6855

Median (with zeroes) = 0.0

Median (without zeroes) = 2205492.365

StdDev (with zeroes) = 24611688.38164475

StdDev (without zeroes) = 28627589.44649752

Variance (with zeroes) = 6.05735204995e+14

Variance (without zeroes) = 8.19538877517e+14

Max Value = 334055030.0

Min Non-Zero Value = 370.93

Sample Size (with zeroes) = 811

Sample Size (without zeroes) = 326

### Analysis of Small Group Market Risk Adjustment Payments 2015
Mean (with zeroes) = -186196.29992

Mean (without zeroes) = 5169059.17298

Median (with zeroes) = -40454.57

Median (without zeroes) = 830180.91

StdDev (with zeroes) = 14925509.12577084

StdDev (without zeroes) = 20060249.748418428

Variance (with zeroes) = 2.22770822663e+14

Variance (without zeroes) = 4.02413619969e+14

Max Value = 214603533.76

Min Non-Zero Value = 2012.29

Sample Size (with zeroes) = 790

Sample Size (without zeroes) = 256

### Next Steps
Based on this above data, what I want to do the following:
- see is why so many entries are zero
- see why some values are negative and some are positive whereas on the PDF all of the numbers are positive
- graph the distribution of values
- look into which specific companies are having the min/max values, and look at those company's other data