In [2]:
import pandas as pd
import numpy as np
import xlrd
import glob
import os

In [3]:
# this script parses important insurer information from an xlsx file 
# original data can be found here: https://www.cms.gov/apps/mlr/mlr-search.aspx

# author: Grace Guan
# date created: 11/24/17

In [4]:
def parse_input(df14, df15, df16, xls):
    
    sheet0 = xls.parse(0)
    
    # 2014, 2015, and 2016 format
    if sheet0.iloc[2,0] == 'Company Name:': # should say "Company Name:"
        # hios id is 11C (9,1) in sheet 0
        # company name 4C (2,1) is in sheet 0
        # company state is 12C (10,1) in sheet 0
        reporting_year = sheet0.iloc[16,1]
        if reporting_year == 'No':
            reporting_year = sheet0.iloc[17,1]
        
        # individual member months is in section 1.7 59E (57,3) in sheet 1
        # small group member months is in section 1.7 59K (57,9) in sheet 1
        
        sheet1 = xls.parse(1)

        # reinsurance is in section 1.9 which is 15E in sheet 2
        # individual risk adjustment is in section 1.10 which is 16E in sheet 2
        # small group risk adjustment is in section 1.10 which is 16K in sheet 2
        sheet2 = xls.parse(2)

        if reporting_year == '2014':
            df14.loc[df14.shape[0]] = [sheet0.iloc[9,1], # hios id
                                   sheet0.iloc[2,1],  # company name
                                   sheet0.iloc[10,1], # company state
                                   sheet1.iloc[57,3], # individual member months
                                   sheet1.iloc[57,9], # small group member months
                                   sheet2.iloc[13,3], # reinsurance
                                   sheet2.iloc[14,3], # individual risk adjustment
                                   sheet2.iloc[14,9]] # small group risk adjustment
        elif reporting_year == '2015':
            df15.loc[df15.shape[0]] = [sheet0.iloc[9,1], # hios id
                                   sheet0.iloc[2,1],  # company name
                                   sheet0.iloc[10,1], # company state
                                   sheet1.iloc[57,3], # individual member months
                                   sheet1.iloc[57,9], # small group member months
                                   sheet2.iloc[13,3], # reinsurance
                                   sheet2.iloc[14,3], # individual risk adjustment
                                   sheet2.iloc[14,9]] # small group risk adjustment

In [5]:
# create the dataframe

errorcount = 0

mydf14 = pd.DataFrame(columns=('HIOS_ID', 'COMPANY_NAME', 'STATE', 
                        'IND_MEMBER_MONTHS', 'SG_MEMBER_MONTHS',
                       'REINSURANCE', 'IND_RISK_ADJ', 'SG_RISK_ADJ'))
mydf15 = pd.DataFrame(columns=('HIOS_ID', 'COMPANY_NAME', 'STATE', 
                        'IND_MEMBER_MONTHS', 'SG_MEMBER_MONTHS',
                       'REINSURANCE', 'IND_RISK_ADJ', 'SG_RISK_ADJ'))
mydf16 = pd.DataFrame(columns=('HIOS_ID', 'COMPANY_NAME', 'STATE', 
                        'IND_MEMBER_MONTHS', 'SG_MEMBER_MONTHS',
                       'REINSURANCE', 'IND_RISK_ADJ', 'SG_RISK_ADJ'))

print(os.getcwd())
os.chdir('C:/Users/guanz/Desktop/new_input_2')
FileList = glob.glob('MLR_Template_Wyoming (44).xlsx')
print(FileList)

for File in FileList:
    myxls = pd.ExcelFile(File)

    try:
        parse_input(mydf14, mydf15, mydf16, myxls)
    except xlrd.XLRDError:
        errorcount = errorcount + 1
        pass


print("There were " + str(errorcount) + " encrypted files.")
print(mydf14)
print(mydf15)
print(mydf16)

C:\Users\guanz\Dropbox\0_Princeton\Fall_2017\JIW\TestModel\4_Insurer-Data-ETL
['input(1001)1.xlsx']
There were 0 encrypted files.
Empty DataFrame
Columns: [HIOS_ID, COMPANY_NAME, STATE, IND_MEMBER_MONTHS, SG_MEMBER_MONTHS, REINSURANCE, IND_RISK_ADJ, SG_RISK_ADJ]
Index: []
Empty DataFrame
Columns: [HIOS_ID, COMPANY_NAME, STATE, IND_MEMBER_MONTHS, SG_MEMBER_MONTHS, REINSURANCE, IND_RISK_ADJ, SG_RISK_ADJ]
Index: []
Empty DataFrame
Columns: [HIOS_ID, COMPANY_NAME, STATE, IND_MEMBER_MONTHS, SG_MEMBER_MONTHS, REINSURANCE, IND_RISK_ADJ, SG_RISK_ADJ]
Index: []


In [6]:
ycoords = dict()
ycoordlabels = ["1HealthInsuranceINDIVIDUALTotalasof12/31/14","2HealthInsuranceINDIVIDUALTotalasof3/31/15"
,"3HealthInsuranceINDIVIDUALDualContracts(IncludedinTotalasof3/31/15)","4HealthInsuranceINDIVIDUALDeferredPY1(Add)"
,"5HealthInsuranceINDIVIDUALDeferredCY(Subtract)","2AHealthInsuranceINDIVIDUAL[RiskCorridors]Totalasof3/31/15"
,"6HealthInsuranceSMALLGROUPTotalasof12/31/14","7HealthInsuranceSMALLGROUPTotalasof3/31/15"
,"8HealthInsuranceSMALLGROUPDualContracts(IncludedinTotalasof3/31/15)","9HealthInsuranceSMALLGROUPDeferredPY1(Add)"
,"10HealthInsuranceSMALLGROUPDeferredCY(Subtract)","7AHealthInsuranceSMALLGROUP[RiskCorridors]Totalasof3/31/15"
,"11HealthInsuranceLARGEGROUPTotalasof12/31/14","12HealthInsuranceLARGEGROUPTotalasof3/31/15"
,"13HealthInsuranceLARGEGROUPDualContracts(IncludedinTotalasof3/31/15)","14HealthInsuranceLARGEGROUPDeferredPY1(Add)"
,"15HealthInsuranceLARGEGROUPDeferredCY(Subtract)","16Mini-MedINDIVIDUALTotalasof12/31/14"
,"17Mini-MedINDIVIDUALTotalasof3/31/15","18Mini-MedINDIVIDUALDualContracts(IncludedinTotalasof3/31/15)"
,"19Mini-MedSMALLGROUPTotalasof12/31/14","20Mini-MedSMALLGROUPTotalasof3/31/15"
,"21Mini-MedSMALLGROUPDualContracts(IncludedinTotalasof3/31/15)","22Mini-MedLARGEGROUPTotalasof12/31/14"
,"23Mini-MedLARGEGROUPTotalasof3/31/15","24Mini-MedLARGEGROUPDualContracts(IncludedinTotalasof3/31/15)"
,"25ExpatSMALLGROUPTotalasof12/31/14","26ExpatSMALLGROUPTotalasof3/31/15"
,"27ExpatSMALLGROUPDualContracts(IncludedinTotalasof3/31/15)","28ExpatSMALLGROUPDeferredPY1(Add)"
,"29ExpatSMALLGROUPDeferredCY(Subtract)","30ExpatLARGEGROUPTotalasof12/31/14","31ExpatLARGEGROUPTotalasof3/31/15"
,"32ExpatLARGEGROUPDualContracts(IncludedinTotalasof3/31/15)","33ExpatLARGEGROUPDeferredPY1(Add)"
,"34ExpatLARGEGROUPDeferredCY(Subtract)","35StudentHealthINDIVIDUALTotalasof12/31/14"
,"36StudentHealthINDIVIDUALTotalasof3/31/15","37StudentHealthINDIVIDUALDualContracts(IncludedinTotalasof3/31/15)"
,"38StudentHealthINDIVIDUALDeferredPY1(Add)","39StudentHealthINDIVIDUALDeferredCY(Subtract)"
,"40GovernmentProgramPlansTotalasof12/31/14","41OtherHealthBusinessTotalasof12/31/14"
,"42Aggregate2%RuleTotalasof12/31/14","43UninsuredPlansTotalasof12/31/14","44GrandTotalTotalasof12/31/14"]

for w in range(2,48):
    ycoords[w] = ycoordlabels[w - 2]

In [9]:
xcoordspart1 = dict()
xcoordlabels1 = ["1.Premium",
"1.1Totaldirectpremiumearned",
"1.2Federalhighriskpools",
"1.3Statehighriskpools",
"1.4Netassumedlesscededreinsurancepremiumearned(excludeamountsalreadyreportedinLine1.1)",
"1.5OtheradjustmentsduetoMLRcalculations-premium",
"1.6Riskrevenue",
"2.Claims",
"2.1Totalincurredclaims(MLRFormPart2Line2.16)",
"2.2Prescriptiondrugs(informationalonly;alreadyincludedintotalincurredclaimsabove)",
"2.3Pharmaceuticalrebates(informationalonly;alreadyexcludedfromtotalincurredclaimsabove)",
"2.4Statestoplossmarketstabilizationandclaim/censusbasedassessments(informationalonly;alreadyexcludedfromtotalincurredclaimsabove)",
"2.5Netassumedlesscededclaimsincurred(excludeamountsalreadyreportedinLine2.1)",
"2.6OtheradjustmentsduetoMLRcalculations–claimsincurred",
"2.7Rebatespaid",
"2.8EstimatedrebatesunpaidattheendofthepreviousMLRreportingyear",
"2.9EstimatedrebatesunpaidattheendoftheMLRreportingyear",
"2.10Fee-for-serviceandco-payrevenue(netofexpenses)",
"2.11Allowablefraudreductionexpenses(MLRFormPart2Line2.17)",
"3.FederalandStateTaxesandLicensingorRegulatoryFees",
"3.1FederaltaxesandassessmentsincurredbythereportingissuerduringtheMLRreportingyear",
"3.1aFederalincometaxesdeductiblefrompremiuminMLRcalculations",
"3.1bPatientCenteredOutcomesResearchInstitute(PCORI)Fee",
"3.1cAffordableCareActsection9010Fee",
"3.1dOtherFederalTaxesandassessmentsdeductiblefrompremium",
"3.2StateinsurancepremiumandothertaxesincurredbythereportingissuerduringtheMLRreportingyear(deductiblefrompremiuminMLRcalculation)",
"3.2aStateincomeexcisebusinessandothertaxes",
"3.2bStatepremiumtaxes",
"3.2cCommunitybenefitexpendituresdeductiblefrompremiuminMLRcalculations",
"3.3Regulatoryauthoritylicensesandfees",
"3.3aFederalTransitionalReinsuranceProgramcontributions",
"3.3bOtherFederalandStateregulatoryauthoritylicensesandfees",
"4.HealthCareQualityImprovementExpensesIncurred",
"4.1Improvehealthoutcomes",
"4.2Activitiestopreventhospitalreadmission",
"4.3Improvepatientsafetyandreducemedicalerrors",
"4.4Wellnessandhealthpromotionactivities",
"4.5Healthinformationtechnologyexpensesrelatedtoimprovinghealthcarequality",
"4.6AllowableImplementationICD-10expenses(nottoexceed0.3%ofpremium)",
"5.Non-ClaimsCosts",
"5.1CostcontainmentexpensesnotincludedinqualityimprovementexpensesinSection4",
"5.2Allotherclaimsadjustmentexpenses",
"5.3Directsalessalariesandbenefits",
"5.4Agentsandbrokersfeesandcommissions",
"5.5Othertaxes",
"5.5aTaxesandassessments(excludeamountsreportedinSection3orLine9)",
"5.5bFinesandpenaltiesofregulatoryauthorities(excludeamountsreportedinLine3.3)",
"5.6Othergeneralandadministrativeexpenses",
"5.7Communitybenefitexpenditures(informationalonly;includeamountsreportedinLines3.2cand5.6)",
"5.8ICD-10implementationexpenses(informationalonly;includeamountsreportedinLines4.6and5.6)",
"6.Incomefromfeesofuninsuredplans",
"7.OtherIndicatorsorinformation",
"7.1Numberofpolicies/certificates",
"7.2Numberofcoveredlives",
"7.3Numberofgroups",
"7.4Membermonths",
"7.5Numberoflife-years",
"8.Netinvestmentincomeandothergain/(loss)",
"9.OtherFederalincometaxes(excludetaxesonLines3.1a-d)"]

for w in range(2,61):
    xcoordspart1[w] = xcoordlabels1[w - 2]

In [10]:
xcoordspart2 = dict()
xcoordlabels2 = ["1.Premium",
"1.1Directpremiumwritten",
"1.2Unearnedpremiumprioryear",
"1.3UnearnedpremiumMLRReportingyear",
"1.4Experienceratingrefunds(ratecredits)paid",
"1.4aExperienceratingrefundswithallincurreddatespaidintheMLRreportingyear",
"1.4bExperienceratingrefundsassociatedwithpremiumearnedonlyinthereportingyearandpaidthrough3/31ofthefollowingyear",
"1.5Reserveforexperienceratingrefunds(ratecredits)MLRReportingyear",
"1.6Reserveforexperienceratingrefunds(ratecredits)prioryear",
"1.7Premiumbalanceswrittenoff",
"1.8Groupconversioncharges",
"1.9FederalTransitionalReinsuranceProgrampaymentsexpectedfromHHS(asindicatedbyHHSasof6/30)",
"1.10FederalRiskAdjustmentProgramnetpaymentsexpectedfromHHS/(chargespayabletoHHS)(asindicatedbyHHSasof6/30)",
"1.11FederalRiskCorridorsProgramnetpayments/(charges)",
"1.12Premiumcededunder100%reinsurance(informationalonly;alreadyexcludedfromLines1.1-1.11)",
"1.13Premiumassumedunder100%reinsurance(informationalonly;alreadyincludedinLines1.1-1.11)",
"1.14AdvancepaymentsofthepremiumtaxcreditreceivedfromHHS(informationalonly;alreadyincludedinLines1.1-1.11)",
"2.Claims",
"2.1ClaimsPaid",
"2.1aClaimspaidduringtheMLRreportingyearregardlessofincurreddate",
"2.1bClaimsincurredonlyduringtheMLRreportingyearpaidthrough3/31ofthefollowingyear",
"2.2Directclaimliability",
"2.2aLiabilityasof12/31ofMLRreportingyearforallclaimsregardlessofincurreddate",
"2.2bLiabilityforclaimsincurredonlyduringtheMLRreportingyearcalculatedasof3/31ofthefollowingyear",
"2.3Directclaimliabilityprioryear",
"2.4Directclaimreserves",
"2.4aReservesasof12/31ofMLRreportingyearforallclaimsregardlessofincurreddate",
"2.4bReservesforclaimsincurredonlyduringtheMLRreportingyearcalculatedasof3/31ofthefollowingyear",
"2.5Directclaimreservesprioryear",
"2.6Directcontractreserves",
"2.6aDirectcontractreserves12/31column",
"2.6bDirectcontractreserves3/31dualcontractdeferredcolumns",
"2.7Directcontractreservesprioryear",
"2.8Experienceratingrefunds(ratecredits)paid",
"2.8aExperienceratingrefundswithallincurreddatespaidintheMLRreportingyear",
"2.8bExperienceratingrefundsassociatedwithpremiumearnedonlyinthereportingyearandpaidthrough3/31ofthefollowingyear",
"2.9Reserveforexperienceratingrefunds(ratecredits)",
"2.9aReservedinMLRreportingyearregardlessofincurreddate",
"2.9bReservesspecifictotheMLRreportingyearthrough3/31ofthefollowingyear",
"2.10Reserveforexperienceratingrefunds(ratecredits)prioryear",
"2.11Incurredmedicalincentivepoolandbonuses",
"2.11aPaidmedicalincentivepoolsandbonusesMLRReportingyear",
"2.11bAccruedmedicalincentivepoolsandbonusesMLRReportingyear",
"2.11cAccruedmedicalincentivepoolsandbonusesprioryear",
"2.12Nethealthcarereceivables",
"2.12aHealthcarereceivablesMLRReportingyear",
"2.12bHealthcarereceivablesprioryear",
"2.13Contingentbenefitandlawsuitreserves",
"2.14Groupconversioncharges",
"2.15Blendedrateadjustment",
"2.16Totalincurredclaims",
"2.17Allowablefraudreductionexpense(thesmallerofLines2.17aor2.17b)",
"2.17aTotalfraudreductionexpense",
"2.17bTotalfraudrecoveriesthatreducedpaidclaimsinLine2.1",
"2.18Advancepaymentsofcost-sharingreductions"]

for w in range(2,57):
    xcoordspart2[w] = xcoordlabels2[w - 2]