In [53]:
from __future__ import division
import re
from pandas_profiling import ProfileReport
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn import preprocessing
from sklearn.model_selection import validation_curve
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pickle

### Set Parameters

In [54]:
# This flag will control whether models will be created by partition or for complete data set.
# Partition column is QualityStarCluster
create_models_by_partition = True
lasso_penalty = 0.1
GBT_max_depth = 1
GBT_n_estimators = 500

In [55]:
data_file =  "rand_2018.csv"
table_columns = ['a','b','c','d','e']      #dummy
new_model_columns = ['a','b','c','d','e']  #dummy

partition_column = 'QualityStarCluster'
without_partition_model_name = 'All'

drop_col_for_traintest = ['QualityStarCluster','Medicare_provider_number','Hospital_name','State',
                          'Hospital_Compare_5-star_rating_(October_2018,_NA=Not_Available)',
                          'holdout_sample','Class_3', 'Class_5', 'Class_6','Class_10','Relative_price_for_outpatient_services']
id_cols_for_prediction_table = drop_col_for_traintest
target_col = 'Class_5'
# Target cloumn can be Class_3, Class_5, Class_6, Class_10

# Output File names
featureimportance_table_file = "featureimportance_table.csv"
predictions_table_file = "predictions_table.csv"

# Model readable list of important variables
important_variables_file = 'important_variables.pkl'

# Output Directories
Output_results_directory = 'QoS_Output_Results/'
pickled_model_directory  = 'QoS_Pickled_Models/'
partitioned_models_directory = 'Partitioned_Models/'
unpartitioned_models_directory = 'Unpartitioned_Models/'


### 2. Call Data Prep Script

In [56]:
#%run ./1_Medxoom_code_data_prep_final.ipynb

### 3. Load data in Pandas dataframe

In [57]:
df = pd.read_csv(data_file)
#df.columns = table_columns
#df = df[new_model_columns]
print(df.shape)

(2950, 310)


In [58]:
df = df.fillna(0)

In [59]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,Medicare_provider_number,Hospital_name,State,"Hospital_Compare_5-star_rating_(October_2018,_NA=Not_Available)",Number_of_outpatient_services,Relative_price_for_outpatient_services,tot_expenses_employee_benefits,tot_expenses_admin_and_general,tot_expenses_housekeeping,tot_expenses_dietary,capasset_bldngsfixtrs_end10only,capasset_moveequip_end10only,capasset_subtotal_end10only,capasset_total_end10only,net_expenses_anc_dvc_chgd_only10,totcosts_devices_chgd,inpat_charges_devices_chgd,inpat_charges_total,outpat_charges_devices_chgd,outpat_charges_total,mdcd_net_revenue_only10,chgs_uninsured_charity_only10,costs_uninsured_charity_only10,uncomp_uninsured_charity_only10,baddebt_hospcmplx_only10,mdcr_baddebt_hospcmplx_only10,mdcr_baddebt_ln2701_only10,nonmdcr_baddebt_hospcmplx_only10,nonmdcr_baddebt_costs_only10,nonmdcr_uncomp_costs_only10,unreimb_uncomp_costs_only10,chgs_charity_patients_only10,costs_charity_patients_only10,uncomp_charity_patients_only10,mdcr_inpat_discharges_HMO_only10,expns_admin_and_general_salary,expns_nursing_admin_salary,expns_inp_adultspeds_salary,expns_anc_oper_room_salary,expns_anc_radiology_diag_salary,expns_anc_lab_salary,expns_anc_resp_therapy_salary,expns_outp_emergency_salary,expns_subtotal_salary,total_salaries,expenses_empeebens_other,expns_admin_and_general_other,expns_housekeeping_other,expns_dietary_other,total_other_expenses,capasset_bldngsfixtrs_beginning,capasset_moveequip_beginning,capasset_subtotal_beginning,capasset_total_beginning,capasset_moveequip_purchases,capasset_subtotal_purchases,capasset_total_purchases,net_expenses_cap_bldgs_fixtures,net_expenses_cap_movable_equip,net_expenses_employee_benefits,net_expenses_admin_and_general,net_expenses_operation_of_plant,net_expenses_laundry_and_linen,net_expenses_housekeeping,net_expenses_dietary,net_expenses_nursing_admin,net_expenses_medical_records,net_expenses_inp_adultspeds,net_expenses_anc_oper_room,net_expenses_anc_radiology_diag,net_expenses_anc_lab,net_expenses_anc_resp_therapy,net_expenses_anc_phys_therapy,net_expenses_anc_medsupps_chgd,net_expenses_anc_drgs_chgd,net_expenses_outp_emergency,net_expenses_subtotal,net_expenses_total,totcosts_adultspeds,totcosts_oper_room,totcosts_radiology_diag,totcosts_lab,totcosts_resp_therapy,totcosts_phys_therapy,totcosts_medsupps_chgd,totcosts_drgs_chgd,totcosts_emergency,totcosts_subtotal,totcosts_total,inpat_charges_adultspeds,inpat_charges_oper_room,inpat_charges_radiology_diag,inpat_charges_lab,inpat_charges_resp_therapy,inpat_charges_phys_therapy,inpat_charges_medsupps_chgd,inpat_charges_drgs_chgd,inpat_charges_emergency,inpat_charges_subtotal,outpat_charges_oper_room,outpat_charges_radiology_diag,outpat_charges_lab,outpat_charges_resp_therapy,outpat_charges_phys_therapy,outpat_charges_medsupps_chgd,outpat_charges_drgs_chgd,outpat_charges_emergency,outpat_charges_obs_beds,outpat_charges_subtotal,mdcr_outpat_charges,mdcr_outpat_costs,mdcr_inpat_costs,mdcr_inpat_charges_adpeds,mdcr_inpat_charges_anc_outp_oth,mdcr_outpat_lesser_costchg,gross_patient_rev_inpat_hosp,gross_patient_rev_inpat_anc,gross_patient_rev_inpat,gross_patient_rev_outpat,gross_patient_rev,allowances_discounts,net_patient_rev,operating_expenses,net_income_srvcs_patients,other_income_other,other_income_all,net_income_srvcs_patients_other,net_income,mdcd_charges,mdcd_costs,bed_days_available,mdcr_hmo_inpat_days,mdcr_inpat_days,mdcd_inpat_days,all_inpat_days,mdcr_inpat_discharges_ln1,mdcr_inpat_discharges,mdcd_inpat_discharges_ln1,mdcd_inpat_discharges,all_inpat_discharges_ln1,all_inpat_discharges,wt_cy,n_hosp_years_reporting,source_2552_10,cash_on_hand_and_in_banks,accounts_receivable,inventory,tot_cur_assets_genfund,tot_fixed_assets_genfund,tot_other_assets_genfund,tot_assets_genfund,tot_cur_liab_genfund,tot_longterm_liab_genfund,tot_liab_genfund,tot_fundbal_genfund,tot_liab_fundbal_genfund,cost_to_charge_ratio_for_uncomp,adult_peds_beds,tot_adult_peds_beds,beds,beds_incl_subprov,employees_FTEs,type_hosp,ownership_nonprofit,ownership_forprofit,ownership_government,chow_cnt,cbsa_ind,in_compendium,share_of_time_period_with_report,days_in_reporting_period,total_expenses,total_fund_balances,total_current_assets,total_fixed_assets,total_other_assets,total_assets,total_current_liabilities,current_ratio,return_on_equity,days_in_net_accounts_receivable,days_in_gross_accounts_recvable,equity_financing,mdcr_inpat_costs_minus_passthru,mdcr_inpat_charges,mdcr_inpat_cost_to_charge_ratio,mdcr_outpat_cost_to_charge_ratio,mdcr_inpat_revs,mdcr_inpat_costs_worgan,mdcr_inpat_costs_per_day,mdcr_outpat_revs,total_margin,occupancy,income_cont_invest_approp,operating_revenues,operating_margin,mdcr_inpat_margin,mdcr_outpat_margin,mdcr_margin,mdcr_inpat_day_share,mdcrinclHMO_inpat_day_share,mdcd_inpat_day_share,mdcdinclHMO_inpat_day_share,nonmdcrmdcd_inpat_day_share,nonmdcrmdcdinHMO_inpat_day_share,inpat_totcosts_adultspeds,inpat_totcosts_oper_room,inpat_totcosts_radiology_diag,inpat_totcosts_lab,inpat_totcosts_resp_therapy,inpat_totcosts_phys_therapy,inpat_totcosts_medsupps_chgd,inpat_totcosts_devices_chgd,inpat_totcosts_drgs_chgd,inpat_totcosts_emergency,outpat_totcosts_oper_room,outpat_totcosts_radiology_diag,outpat_totcosts_lab,outpat_totcosts_resp_therapy,outpat_totcosts_phys_therapy,outpat_totcosts_medsupps_chgd,outpat_totcosts_devices_chgd,outpat_totcosts_drgs_chgd,outpat_totcosts_emergency,inpat_costs,inpat_totcosts,outpat_costs,outpat_totcosts,discharge_equivalents,internres_per_bed,major_teaching,minor_teaching,inprev,nonmdcr_inprev,nonmdcr_disc,nonmdcr_inprev_per_disc,inpat_length_of_stay,admin_costs,admin_cost_share,expenses_clinical,expenses_administrative,expenses_mixed,expenses_other,admin_cost_share_inclmixed,mdcr_rev_to_charges,mdcr_adv_charges_est,mdcr_adv_rev_est,commercial_rev_est,commercial_charges_est,commercial_rev_to_charges_est,commercial_to_mdcr_est,mdcr_inpat_costs_per_disc,mdcr_inpat_costs_CMI_nonmissing,total_salaries_bens,total_salaries_bens_annualized,employees_FTEs_share_time_gehalf,salaries_bens_per_FTE,discharge_equivalents_annualized,discharge_equivalents_per_FTE,total_uncomp_expnsshr_only10,charityun_uncomp_expnsshr_only10,charity_uncomp_expnsshr_only10,baddebt_nonmdcr_expnsshr_only10,mdcr_ipps_SCHMDH_addl_pymts,accumulated_depreciation,mdcd_inpat_discharge_share,mdcr_inpat_discharge_share,mdcdinclHMO_inpat_disch_share,mdcrinclHMO_inpat_disch_share,COVERED_CHARGES_STATE_SCORE_DRG871,MEDICARE_PAYMENTS_STATE_SCORE_DRG871,MEDICARE_REIMBURSEMENT_STATE_SCORE_DRG871,COVERED_CHARGES_NATIONAL_SCORE_DRG871,MEDICARE_PAYMENTS_NATIONAL_SCORE_DRG871,MEDICARE_REIMBURSEMENT_NATIONAL_SCORE_DRG871,COVERED_CHARGES_STATE_SCORE_DRG291,MEDICARE_PAYMENTS_STATE_SCORE_DRG291,MEDICARE_REIMBURSEMENT_STATE_SCORE_DRG291,COVERED_CHARGES_NATIONAL_SCORE_DRG291,MEDICARE_PAYMENTS_NATIONAL_SCORE_DRG291,MEDICARE_REIMBURSEMENT_NATIONAL_SCORE_DRG291,OPPS_Beneficiaries_Perc,OPPS_Comprehensive_APC_Services_Perc,OPPS_Total_Submitted_Charges_STATE_SCORE,OPPS_Medicare_Allowed_Amount_STATE_SCORE,OPPS_Medicare_Payment_Amount_STATE_SCORE,OPPS_Total_Submitted_Charges_NATIONAL_SCORE,OPPS_Medicare_Allowed_Amount_NATIONAL_SCORE,OPPS_Medicare_Payment_Amount_NATIONAL_SCORE,any_home_office_costs_Y,critical_access_hosp_hcr_Y,teach_hosp_hcr_Y,receives_dsh_only10_Y,teach_hosp_nurs_alld_hcr_only10_Y,receives_mdcd_DSH_YN_only10_Y,ipps_hosp_pos_Y,rural_urban_2.0,rural_urban_3.0,rural_urban_4.0,rural_urban_5.0,rural_urban_6.0,rural_urban_7.0,rural_urban_8.0,rural_urban_9.0,QualityStarCluster,Class_3,Class_5,Class_6,Class_10,holdout_sample
0,10001,Southeast Alabama Medical\nCenter,AL,2.0,54.0,122.0,14745010.0,37652600.0,2934629.0,4840752.0,93211330.0,233641000.0,543232000.0,543232000.0,24092630.0,29156520.0,32766020.0,883911500.0,22534110.0,881747400.0,17845170.0,26554970.0,4048695.0,3931622.0,103848100.0,1736042.0,2670835.0,101177200.0,16370120.0,22484430.0,22484430.0,28761460.0,6255187.0,6114318.0,2467.17144,17551320.0,2923136.0,25219590.0,10867070.0,3147645.0,3150904.0,2431303.0,10452030.0,122686900.0,165407500.0,12000160.0,20101280.0,423008.8,2797965.0,195939400.0,93949650.0,220683300.0,520675500.0,520675500.0,13512360.0,23363570.0,23363570.0,5605285.0,10657570.0,12155600.0,34655360.0,8596970.0,873532.13708,2944769.0,2407431.0,3670955.0,3250454.0,30536660.0,16504200.0,4975390.0,8568020.0,3058265.0,4229289.0,25573970.0,26272610.0,6780155.0,275521700.0,292858000.0,54738660.0,27481130.0,8222171.0,10894880.0,4113891.0,6130409.0,30256890.0,36549930.0,11073870.0,269328500.0,292009800.0,135238300.0,40485340.0,39073430.0,70303490.0,55942790.0,13933580.0,149053500.0,93194940.0,30033250.0,883911500.0,67084910.0,65970780.0,55222880.0,5801596.0,40950420.0,95900770.0,119406700.0,70445110.0,5779509.0,881747400.0,280848200.0,34672250.0,66299570.0,44176170.0,299213200.0,6801.520096,171026100.0,668536700.0,923955700.0,963596800.0,1887553000.0,1536553000.0,350999000.0,361347000.0,-10347890.0,3083566.0,17915260.0,7567365.0,7567365.0,119336500.0,20167300.0,120734.94252,13978.43999,36552.582538,12849.771935,96498.354049,6850.980289,6850.980289,2877.25067,2877.25067,20147.24246,20147.24246,1.0,1.0,1.0,28848950.0,299809200.0,8868561.0,95124330.0,153761400.0,144853900.0,393739600.0,48705970.0,97779740.0,146485700.0,247253900.0,393739600.0,0.152544,275.284932,275.284932,330.780822,330.780822,2398.780583,1.0,0,0,1,1,1.0,1,1.0,365.0,361346900.0,247253900.0,95124330.0,153761400.0,144853900.0,393739600.0,48705970.0,1.953032,0.030606,47.209389,57.974732,0.627963,66263010.0,359146300.0,0.184501,0.123456,76394950.0,66299570.0,1813.813492,41741580.0,0.020513,0.799258,7155685.0,361758600.0,0.001138,0.132147,0.169359,0.145295,0.37879,0.523646,0.133161,0.185284,0.48805,0.29107,54738660.0,10342850.0,3058412.0,6101890.0,3727343.0,1556347.0,18411170.0,17275600.0,16021840.0,3310011.0,17138280.0,5163760.0,4792991.0,386547.39731,4574062.0,11845720.0,11880920.0,20528100.0,7763864.0,168674200.0,168488400.0,106568100.0,100840100.0,32205.348619,0.019822,0,1,163914200.0,87519230.0,13296.262172,6582.242953,4.789656,41576770.0,0.150902,194234000.0,46017370.0,39904740.0,18336110.0,0.191538,0.18459,244746700.0,45177770.0,169815800.0,852156200.0,0.199278,1.07957,9677.384052,66299570.0,177407700.0,177407700.0,2398.780583,73957.438356,32205.348619,13.425717,0.062224,0.01088,0.016921,0.045303,7521096.0,-395473500.0,0.142811,0.340046,0.142811,0.462503,-0.134898,-0.094744,-0.064342,-0.12992,-0.275816,-0.263235,0.010861,-0.054909,-0.020237,-0.107015,-0.246323,-0.2292,0.054932,0.052822,0.108274,0.027922,0.032883,-0.08107,-0.131498,-0.129839,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1Star,1,1,1,1,0
1,390211,Sharon Regional Health\nSystem,PA,2.0,341.0,199.0,15206510.0,36953800.0,1699807.0,2558287.0,4183419.0,12051070.0,17181270.0,17181270.0,4110694.0,5478277.0,14468650.0,272825400.0,9987569.0,388060500.0,13480060.0,2245992.0,413884.4,413884.4,4633424.0,360012.0,553864.6,4079560.0,917537.6,1359963.0,4324356.0,2274533.0,442425.2,442425.2,702.772325,4882868.0,2879075.0,7888585.0,2913910.0,2841229.0,2736302.0,628569.4,2035331.0,47409160.0,48020690.0,13555880.0,32070930.0,723483.4,1699892.0,95164490.0,5466013.0,5627087.0,11672840.0,11672840.0,6438892.0,10802340.0,10802340.0,7164038.0,1131265.0,13846110.0,21390000.0,6209305.0,604130.99093,1727597.0,1282871.0,3168603.0,1561631.0,8827711.0,5618892.0,4645194.0,6186583.0,721263.9,2215134.0,3737015.0,8796879.0,2393159.0,124458700.0,125542400.0,23842630.0,10409970.0,11409240.0,9614208.0,1335786.0,4538594.0,4938745.0,14043630.0,5384015.0,123535200.0,125542400.0,40808740.0,42727880.0,13028670.0,30203790.0,15785840.0,4076615.0,5818225.0,22509850.0,9260108.0,272825400.0,57354400.0,55354680.0,55824370.0,2603278.0,7548569.0,7232422.0,43394140.0,26902900.0,3058457.0,388060500.0,80435970.0,11720190.0,17234400.0,10870790.0,67088950.0,6407.426459,40808740.0,204347300.0,272825400.0,388493100.0,661318500.0,520292100.0,141026400.0,143749300.0,-2722852.0,129830.9,1860557.0,-862298.4,-862298.4,97445600.0,18262300.0,72041.799413,3422.089058,9407.430803,3196.492032,32784.545764,2051.054325,2051.054325,572.183369,572.183369,7119.146276,7119.146276,1.0,1.0,1.0,546178.0,66974700.0,3697684.0,35964280.0,5127220.0,2262574.0,43354070.0,36457520.0,12282950.0,48740490.0,-5386412.0,43354070.0,0.187454,174.0,174.0,195.0,233.0,870.236256,1.0,0,1,0,2,1.0,1,1.0,365.0,143185200.0,-5386412.0,35964280.0,5127220.0,2262574.0,43354070.0,36457520.0,0.986471,0.160088,64.271223,36.965195,-0.124242,17224990.0,81070560.0,0.212469,0.145708,20215050.0,17234400.0,1831.998621,11973290.0,-0.006035,0.455077,12773.64,142874200.0,-0.006125,0.147447,0.021139,0.100463,0.286947,0.391328,0.0975,0.293823,0.615553,0.314849,23842630.0,4444305.0,2173735.0,3375470.0,1146684.0,1591553.0,2201786.0,3241027.0,4796675.0,1378662.0,5965669.0,9235508.0,6238738.0,189102.20268,2947041.0,2736959.0,2237250.0,9246955.0,4005353.0,66321250.0,64781850.0,62721210.0,58753330.0,13575.793571,0.0,0,0,54139090.0,33924040.0,5068.091951,6693.651103,4.605123,26120230.0,0.209871,69146970.0,28197280.0,28060490.0,1498025.0,0.289666,0.199301,58750340.0,11708970.0,81816880.0,340865600.0,0.240027,1.204346,8402.702962,17234400.0,61576570.0,61576570.0,870.236256,70758.453735,13575.793571,15.600124,0.030083,0.002879,0.003078,0.006383,0.0,-11442930.0,0.080372,0.288104,0.293934,0.38682,-0.40356,-0.2408,-0.194107,-0.370172,-0.253244,-0.25195,-0.455928,-0.213361,-0.147375,-0.422273,-0.218113,-0.201855,0.027972,0.027845,-0.398429,-0.071128,-0.069338,-0.385069,-0.091014,-0.08997,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,1Star,1,2,2,3,0
2,390204,Nazareth Hospital-Phila Pa,PA,2.0,225.0,188.0,5006613.0,40391960.0,2166873.0,3167694.0,37686100.0,54986710.0,104568100.0,104568100.0,5059962.0,6691412.0,8320547.0,396072400.0,2438082.0,310556700.0,30239400.0,15986650.0,3146124.0,3095007.0,14153970.0,785109.9,1207861.0,12946110.0,2970446.0,6297052.0,8812345.0,16228000.0,3387474.0,3326607.0,1525.577748,4251198.0,4147945.0,12818630.0,2261144.0,2703993.0,3004463.0,982394.7,5429989.0,58628680.0,58676430.0,4336992.0,36140750.0,899827.6,1887921.0,91230620.0,36104070.0,49158390.0,92786860.0,92786860.0,5828316.0,11781290.0,11781290.0,226115.2,2773628.0,4924137.0,37499980.0,7490859.0,487281.13531,2140583.0,922922.1,6293929.0,461997.9,16403050.0,2340738.0,5559204.0,5801819.0,1422494.0,1854217.0,8404209.0,5489772.0,7430541.0,143881200.0,144016000.0,34671000.0,8255405.0,10792420.0,11358260.0,2646918.0,3188362.0,10550680.0,7393418.0,15452100.0,139061900.0,139704100.0,108076800.0,19985180.0,17046940.0,62746890.0,20560910.0,2994528.0,23976340.0,8506009.0,18627880.0,396072400.0,22172740.0,37113810.0,55049680.0,1409607.0,5203198.0,14557170.0,19318100.0,58502670.0,16353130.0,310556700.0,52360110.0,9218980.0,27300760.0,33071140.0,82300220.0,3279.155537,108076800.0,253291800.0,396205300.0,310982200.0,707187300.0,558607400.0,148579800.0,149907100.0,-1327215.0,1378772.0,3139552.0,1812336.0,1693906.0,166419200.0,32754680.0,52579.079931,8865.406149,11359.28502,1272.854536,31442.83375,2015.169431,2015.169431,215.065732,215.065732,7574.151448,7574.151448,1.0,1.0,1.0,18657440.0,51266060.0,2518076.0,43694300.0,32664820.0,355451.7,76714600.0,16549070.0,29219460.0,45768520.0,30946060.0,76714600.0,0.196786,126.594519,126.594519,146.049314,194.049314,740.638134,1.0,1,0,0,0,1.0,1,1.0,365.0,149907100.0,30946060.0,43694300.0,32664820.0,355451.7,76714600.0,16549070.0,2.640287,0.054737,42.845588,26.459908,0.403392,27289400.0,122552300.0,0.222676,0.176069,28547970.0,27300760.0,2403.386998,7030948.0,0.011165,0.59801,440625.6,151278800.0,0.008285,0.043688,-0.3112,-0.026443,0.361268,0.643221,0.040482,0.221403,0.598251,0.135376,34671000.0,3913518.0,3396884.0,6050224.0,2477094.0,1164669.0,6366162.0,5175028.0,2260216.0,3731852.0,4341888.0,7395536.0,5308038.0,169823.73601,2023693.0,4184513.0,1516384.0,5133202.0,11720250.0,89855130.0,89666440.0,49513110.0,49395510.0,11746.604996,0.143967,0,1,79562700.0,51014730.0,5558.982016,9176.992109,4.151334,44255910.0,0.307586,79232970.0,49079040.0,14827590.0,958884.8,0.382498,0.20341,136511200.0,27767730.0,54350670.0,212558300.0,0.255698,1.257056,13547.624085,27300760.0,63013420.0,63013420.0,740.638134,85079.902698,11746.604996,15.860114,0.058785,0.020646,0.022191,0.019815,1698969.0,-71029280.0,0.028395,0.266059,0.132363,0.467478,-0.147982,0.006077,0.073428,-0.100287,-0.010414,-0.003617,0.026306,0.061566,0.093705,0.089791,0.055153,0.02382,0.030699,0.032183,0.166518,0.04735,0.042523,0.192425,0.024927,0.01941,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,1Star,1,2,2,3,0
3,390198,Millcreek Community Hospital,PA,2.0,209.0,143.0,7034446.0,7532066.0,407855.8,1300091.0,44389680.0,21218170.0,79069690.0,79069690.0,843766.9,1029487.0,1541655.0,60896410.0,823671.1,65121220.0,14913690.0,2005930.0,631800.0,631800.0,2929905.0,153091.9,235525.2,2694379.0,936436.8,1616307.0,1616307.0,2054000.0,679869.8,679869.8,579.720416,3549717.0,508857.7,5480243.0,868302.7,476381.1,3651536.0,176079.1,836183.2,23740450.0,23810830.0,6846405.0,3982351.0,78136.68,656092.9,32761080.0,43458380.0,20341190.0,77124520.0,77124520.0,876989.5,2258210.0,2258210.0,1547381.0,17734.23,7034446.0,6895514.0,5496100.0,235226.39783,407855.8,432123.0,514827.6,819732.5,6035033.0,2652053.0,652003.4,1913019.0,278687.2,1045528.0,87262.23,962601.4,891949.9,44370900.0,44816310.0,13550790.0,4454460.0,1649741.0,2562965.0,418994.3,1785864.0,318089.5,2517445.0,1679288.0,39819060.0,40488300.0,21627630.0,1962369.0,687149.4,5751330.0,1503257.0,1802712.0,2612821.0,6187818.0,1604277.0,60896410.0,8233966.0,5235084.0,6319231.0,866503.8,4598352.0,2977012.0,1702158.0,6231191.0,3349508.0,65121220.0,9286863.0,1920963.0,4531457.0,2988277.0,6581992.0,2671.623819,21782330.0,35972070.0,65470390.0,76039110.0,141509500.0,91649160.0,49860330.0,56571910.0,-6711580.0,3021346.0,8707632.0,1996051.0,1996051.0,39593860.0,12509820.0,46899.501241,3151.245913,3691.374601,1018.692904,22996.591943,742.895726,742.895726,179.942424,179.942424,3900.421021,3900.421021,1.0,1.0,1.0,31518190.0,6708597.0,1214876.0,45990670.0,38196210.0,43948330.0,128135200.0,12233410.0,3978437.0,16211850.0,111923400.0,128135200.0,0.316004,124.0,124.0,130.0,167.495891,361.202613,1.0,1,0,0,2,1.0,1,1.0,365.0,56571910.0,111923400.0,45990670.0,38196210.0,43948330.0,128135200.0,12233410.0,3.75943,0.017834,49.10994,17.303703,0.873479,4527766.0,10195130.0,0.44411,0.206847,6977501.0,4531457.0,1227.579849,2145718.0,0.034081,0.490338,4298249.0,54269710.0,-0.042421,0.350562,0.104746,0.292747,0.160518,0.297549,0.044298,0.456373,0.795184,0.246078,13550790.0,857297.8,191417.4,1221191.0,265788.9,502947.5,148682.6,670991.7,1974340.0,343826.7,3597162.0,1458323.0,1341774.0,153205.37658,1282917.0,169406.9,358495.6,543105.4,1335461.0,26261930.0,26192020.0,14591920.0,13439180.0,5901.735675,0.368503,1,0,21047310.0,14069810.0,3157.525295,4455.962052,5.895926,8230074.0,0.185484,25092420.0,8660974.0,9942921.0,1120004.0,0.256596,0.46829,16631360.0,7788293.0,18035130.0,48256420.0,0.373735,0.798086,6099.721561,4531457.0,30657240.0,30657240.0,361.202613,84875.459315,5901.735675,16.339128,0.028571,0.011168,0.012018,0.016553,431608.7,-40874720.0,0.046134,0.190466,0.396546,0.339096,-0.657445,-0.008216,0.035866,-0.638269,-0.024472,-0.038483,-0.719485,-0.043988,-0.00807,-0.702133,-0.049763,-0.071452,0.00374,0.003638,-0.446135,-0.097974,-0.094223,-0.433834,-0.117285,-0.114303,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1Star,1,1,1,1,0
4,390076,Brandywine Hospital,PA,2.0,71.0,156.0,4433410.0,29639980.0,1460756.0,2343273.0,49386810.0,14404920.0,71200270.0,71200270.0,5372125.0,7376825.0,59104720.0,501206300.0,26204950.0,388109500.0,29194680.0,1886201.0,241919.6,241919.6,8712513.0,388114.8,597100.0,8115412.0,1333985.0,1734676.0,1734676.0,2044972.0,400690.1,400690.1,1886.74666,3609340.0,3204260.0,12808060.0,2719074.0,2475895.0,1788743.0,932889.3,2747469.0,42001550.0,42125400.0,7182463.0,26030640.0,1460756.0,2343273.0,79554490.0,19826840.0,5754146.0,29505030.0,29505030.0,8650772.0,43555710.0,43555710.0,5126320.0,5816112.0,11523040.0,16620790.0,4324100.0,420145.89348,1460756.0,2329185.0,3306032.0,662677.9,14155700.0,5447605.0,3886174.0,4168407.0,1314237.0,952076.8,4568739.0,5117775.0,3486489.0,110782900.0,111391600.0,30147980.0,11348200.0,7342244.0,6538331.0,2416100.0,2602282.0,6217723.0,7159128.0,6847275.0,107224600.0,111391600.0,132438700.0,59708750.0,17655870.0,50234790.0,9977781.0,5693197.0,12606030.0,18473460.0,16069530.0,501206300.0,50191180.0,55364340.0,45975920.0,6593659.0,8472886.0,6208997.0,8869535.0,42120940.0,9484145.0,388109500.0,95694200.0,9130749.0,23313940.0,39018480.0,126796100.0,8241.948223,132438700.0,316158400.0,501206300.0,388109500.0,889315600.0,785576300.0,103739500.0,121679900.0,-17940410.0,623935.2,1851092.0,-16089320.0,-16389460.0,140308500.0,16980710.0,56574.979084,10116.238068,9959.22651,1029.853811,32573.05331,2004.487735,2004.487735,185.505616,185.505616,5976.664019,5976.664019,1.0,1.0,1.0,-3156646.0,32427760.0,3750819.0,20627960.0,56387100.0,-9100938.0,67914140.0,12543580.0,3602426.0,16146000.0,51768120.0,67914140.0,0.122414,140.0,140.0,155.0,171.0,570.601123,1.0,1,0,0,2,1.0,1,1.0,365.0,121679900.0,51768120.0,20627960.0,56387100.0,-9100938.0,67914140.0,12543580.0,1.644504,-0.316594,91.960562,13.30926,0.762258,23303980.0,171929400.0,0.135544,0.095416,20272590.0,23313940.0,2340.939215,7996167.0,-0.155217,0.57575,0.0,105590600.0,-0.155217,-0.150023,-0.141891,-0.147723,0.30575,0.30575,0.031617,0.261818,0.662633,0.432432,30147980.0,6165490.0,1775313.0,3413878.0,1454751.0,1045829.0,4165862.0,5110853.0,4836846.0,1890902.0,5182712.0,5566932.0,3124452.0,961349.2414,1556453.0,2051861.0,2265972.0,2322281.0,4956372.0,74226180.0,72525150.0,38818620.0,34699420.0,8836.179119,0.0,0,0,54248530.0,33975940.0,3972.176283,8553.482139,5.450039,20589500.0,0.185855,62802230.0,21596880.0,22449000.0,608743.2,0.25589,0.105629,238290800.0,43484280.0,46251620.0,479338800.0,0.09649,0.913486,11630.873803,23313940.0,49307870.0,49307870.0,570.601123,86413.899561,8836.179119,15.485737,0.014256,0.001988,0.003293,0.010963,0.0,-5237678.0,0.031038,0.335386,0.194766,0.335386,0.187978,-0.165861,-0.094818,0.25448,-0.179534,-0.159787,0.26738,-0.137158,-0.057475,0.345778,-0.142371,-0.117699,0.031634,0.031483,0.366559,0.023137,0.022303,0.396908,0.001233,-0.000361,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1Star,1,1,1,2,0


In [60]:
df.dtypes

Medicare_provider_number                                             int64
Hospital_name                                                       object
State                                                               object
Hospital_Compare_5-star_rating_(October_2018,_NA=Not_Available)    float64
Number_of_outpatient_services                                      float64
                                                                    ...   
Class_3                                                              int64
Class_5                                                              int64
Class_6                                                              int64
Class_10                                                             int64
holdout_sample                                                       int64
Length: 310, dtype: object

### 4. Split data into dictionary of dataframes by region

In [61]:
#Dictionary of dataframes by region
region_dataframes = {}
Region_list = []

if (create_models_by_partition == False):
    Region_list = [without_partition_model_name]
    region_dataframes[without_partition_model_name] = df

else:
    Region_list = df[partition_column].unique().tolist()
    for region in Region_list:
        region_dataframes[region] = df[df[partition_column]==region]

### 5. Data Preparation Functions

In [62]:
def data_prep_region(df_region):
    
    return df_region

In [63]:
def drop_columns_for_modeling_train(x_train):
    x_train = x_train.drop(drop_col_for_traintest, axis =1)
    return x_train

def drop_columns_for_modeling_test(x_test):
    x_test = x_test.drop(drop_col_for_traintest, axis =1)
    return x_test

In [64]:
#train test split
def train_test(df_region):
    Y=df_region[target_col]    
    x_train, x_test, y_train, y_test = train_test_split(df_region,Y,test_size=0.2, random_state=30, stratify=Y)
    return x_train, x_test, y_train, y_test

### 6. Variable Reduction Functions (Decision Tree and Lasso)

In [65]:
#variable reduction using Lasso
def lasso_variable_reduction(x_train, y_train):
    x_train = drop_columns_for_modeling_train(x_train)
    scaler = StandardScaler()
    scaler.fit(x_train.fillna(0))
    sel_ = SelectFromModel(LogisticRegression(C=lasso_penalty, penalty='l1', solver='saga', multi_class='multinomial', random_state = 20))
    sel_.fit(scaler.transform(x_train.fillna(0)), y_train)
    sel_.get_support()
    selected_feat = x_train.columns[(sel_.get_support())]
    
    #print((sel_.estimator_.coef_ != 0).ravel().tolist())
    #selected_features_lasso = x_train.columns[(sel_.estimator_.coef_ != 0).ravel().tolist()]
    
    
    all_variable_coef = dict(zip( x_train.columns.tolist(),sel_.estimator_.coef_[0]))
    
    selected_l_dict = { key:value for (key,value) in all_variable_coef.items() if value != 0}
    return selected_l_dict

### 7. Model Training and Prediction (GBT)

In [66]:
#Model Training GBT
def GBT(x_train, x_test, y_train, y_test, imp_variables):
    x_train = drop_columns_for_modeling_train(x_train)
    x_test = drop_columns_for_modeling_test(x_test)
    red_x_train = x_train[imp_variables]
    red_x_test  = x_test[imp_variables]
    print(imp_variables)
    GB = GradientBoostingClassifier(max_depth = GBT_max_depth, n_estimators=GBT_n_estimators, max_features = 'sqrt',random_state=20)  
    GB.fit(red_x_train,y_train)    
    predictions_train = GB.predict(red_x_train)   #[:,1]
    predictions_test = GB.predict(red_x_test)     #[:,1]
    selected_features_gbt_dict = dict(zip(red_x_train.columns, GB.feature_importances_))
    selected_gbt_features = { key:value for (key,value) in selected_features_gbt_dict.items() if value != 0}
    return predictions_train, predictions_test, selected_features_gbt_dict, GB

### 8. Save Output Tables (Prediction and Feature Importance)

In [67]:
def create_prediction_table(x_data, prediction_probability, traintest_flag, modelname):
    prediction_tab  = pd.DataFrame()
    prediction_tab['predict'] = prediction_probability
    prediction_tab.reset_index(drop=True)
    prediction_tab['TrainTest'] = traintest_flag
    prediction_tab['Model'] = modelname
    prediction_table = pd.concat([x_data[id_cols_for_prediction_table].reset_index(drop=True),prediction_tab.reset_index(drop=True)], axis=1)
    return prediction_table

In [68]:
def create_featureimportance_table(features, modelname, region_name):
    feature_tab = pd.DataFrame.from_dict(features, orient='index')
    feature_tab['Feature'] = feature_tab.index
    feature_tab.reset_index(drop=True,inplace=True)
    feature_tab['Importance'] = feature_tab[[0]]
    feature_tab.drop(0, axis =1, inplace = True)
    feature_tab['Model'] = modelname
    feature_tab[partition_column] = region_name
    return feature_tab

### 9. Save Pickled Models

In [69]:
def save_model(region,model):
    
    model_name = ''
    
    if (create_models_by_partition == True):
        model_name = pickled_model_directory+partitioned_models_directory+region+'_model.pkl'
    else:
        model_name = pickled_model_directory+unpartitioned_models_directory+region+'_model.pkl'
    
    #model_name = pickled_model_directory+region+'_model.pkl'
    with open(model_name,'wb') as fout:
        pickle.dump(model,fout)
    print(model_name)

In [70]:
def save_important_variables(important_variables):
    file_name = ''
    
    if (create_models_by_partition == True):
        file_name = pickled_model_directory+partitioned_models_directory+important_variables_file
    else:
        file_name = pickled_model_directory+unpartitioned_models_directory+important_variables_file
    
    with open(file_name,'wb') as fout:
        pickle.dump(important_variables,fout)

### 10. Initialize Data Structures

In [71]:
#initialize data structures
x_train = {}
x_test = {}
y_train = {}
y_test = {}
lasso_important_variables = {}
dt_important_variables = {}
gbt_important_variables = {}

predictions_train = {}
predictions_test = {}
predictions_table = pd.DataFrame()       #Final table with predictions
featureimportance_table = pd.DataFrame() #Final table with feature importance

### 11. Main function to run the Pipeline

In [72]:
for region in Region_list:
    print("\n\n")
    print(partition_column," = ", region)
    region_dataframes[region] = data_prep_region(region_dataframes[region])
    x_train[region], x_test[region], y_train[region], y_test[region] = train_test(region_dataframes[region])
    
    lasso_important_variables[region] = lasso_variable_reduction(x_train[region], y_train[region].values.ravel())
    print("\n")
    print("Lasso Reduced Variables", len(lasso_important_variables[region]))
    print(lasso_important_variables[region])
       
    predictions_train[region], predictions_test[region], gbt_important_variables[region], model  = GBT(x_train[region], x_test[region], y_train[region].values.ravel(), y_test[region].values.ravel(), sorted(lasso_important_variables[region].keys()))
    
    predictions_table = predictions_table.append(create_prediction_table(x_train[region], predictions_train[region], "train", "GBT"))
    predictions_table = predictions_table.append(create_prediction_table(x_test[region], predictions_test[region], "test", "GBT"))
    
    featureimportance_table = featureimportance_table.append(create_featureimportance_table(gbt_important_variables[region], 'GBT',region))
    featureimportance_table = featureimportance_table.append(create_featureimportance_table(lasso_important_variables[region], 'Lasso',region))
     
    save_model(region,model)
    
    




QualityStarCluster  =  1Star






Lasso Reduced Variables 34
{'tot_expenses_admin_and_general': -0.005155382911989753, 'outpat_charges_devices_chgd': 0.021007206927462187, 'unreimb_uncomp_costs_only10': -0.005021350549955747, 'expns_admin_and_general_other': -0.03151735154536486, 'net_expenses_anc_radiology_diag': -0.009512392936093982, 'totcosts_radiology_diag': -0.027021929658605413, 'totcosts_phys_therapy': -0.020375129101814885, 'totcosts_drgs_chgd': -0.007877880216239367, 'cost_to_charge_ratio_for_uncomp': 0.32625358354116374, 'type_hosp': -0.004073991245950051, 'ownership_forprofit': 0.007066007245726626, 'chow_cnt': -0.0597971168433246, 'mdcr_outpat_cost_to_charge_ratio': 0.37199647225661847, 'mdcr_inpat_costs_per_day': 0.035454096219755885, 'income_cont_invest_approp': -0.0008704761863366716, 'mdcr_outpat_margin': 0.012228800439025841, 'mdcr_margin': 0.17823928906130665, 'mdcr_inpat_day_share': -0.061418837673065094, 'outpat_totcosts_radiology_diag': -0.06055675862470908, 'outpat_totcosts_phys_therapy': -0.03





Lasso Reduced Variables 49
{'baddebt_hospcmplx_only10': -0.022642313981065202, 'nonmdcr_baddebt_hospcmplx_only10': -0.027788196864181925, 'nonmdcr_baddebt_costs_only10': -0.03214425190534158, 'nonmdcr_uncomp_costs_only10': -0.02539333215407444, 'unreimb_uncomp_costs_only10': -0.027034049872204646, 'costs_charity_patients_only10': -0.010979744935639666, 'uncomp_charity_patients_only10': -0.011124031943096635, 'totcosts_radiology_diag': -0.0010990228294628177, 'cost_to_charge_ratio_for_uncomp': 0.17719899505218326, 'ownership_nonprofit': -0.04360115484616565, 'ownership_forprofit': 0.028862817687612716, 'chow_cnt': -0.018896141031109377, 'cbsa_ind': -0.04870195345617415, 'days_in_gross_accounts_recvable': -0.003399559436478095, 'equity_financing': -0.023895274636729413, 'mdcr_inpat_cost_to_charge_ratio': -0.00794043638557236, 'mdcr_inpat_costs_per_day': 0.032483664876728234, 'occupancy': -0.044631551343727684, 'mdcr_outpat_margin': 0.0589192047401029, 'mdcr_margin': 0.0872621686232494,





Lasso Reduced Variables 39
{'baddebt_hospcmplx_only10': -0.029751974004092106, 'nonmdcr_baddebt_hospcmplx_only10': -0.011590538470120785, 'net_expenses_dietary': -0.004071318856730182, 'inpat_charges_lab': -0.005189950017801732, 'mdcr_outpat_charges': -0.011205541318327626, 'mdcr_inpat_charges_anc_outp_oth': -0.00568920594656817, 'mdcr_outpat_lesser_costchg': 0.02963225974662335, 'accounts_receivable': -0.002419068734417145, 'cost_to_charge_ratio_for_uncomp': 0.03639152126885579, 'type_hosp': 0.06707140631355227, 'ownership_nonprofit': -0.009868245353410266, 'ownership_forprofit': 0.05729275610801392, 'cbsa_ind': -0.07595306433543357, 'current_ratio': -0.06995308131339373, 'equity_financing': -0.07099736555431307, 'mdcr_inpat_costs_per_day': 0.013645707460172518, 'occupancy': -0.04511343767147357, 'operating_margin': -0.014847172097560028, 'mdcr_inpat_margin': 0.21986520273642998, 'mdcr_outpat_margin': 0.06804753492450301, 'mdcr_margin': 0.026983108335128264, 'mdcdinclHMO_inpat_day_s

In [73]:
featureimportance_table.reset_index(drop=True,inplace=True)
predictions_table.reset_index(drop=True,inplace=True)

In [74]:
if (create_models_by_partition == True):
    featureimportance_table.to_csv(Output_results_directory+partitioned_models_directory+featureimportance_table_file,index=False)
    predictions_table.to_csv(Output_results_directory+partitioned_models_directory+predictions_table_file,index=False)
else:
    featureimportance_table.to_csv(Output_results_directory+unpartitioned_models_directory+featureimportance_table_file,index=False)
    predictions_table.to_csv(Output_results_directory+unpartitioned_models_directory+predictions_table_file,index=False)

save_important_variables(lasso_important_variables)

In [75]:
featureimportance_table.head()

Unnamed: 0,Feature,Importance,Model,QualityStarCluster
0,OPPS_Medicare_Allowed_Amount_STATE_SCORE,0.025344,GBT,1Star
1,OPPS_Medicare_Payment_Amount_STATE_SCORE,0.024304,GBT,1Star
2,OPPS_Total_Submitted_Charges_NATIONAL_SCORE,0.070376,GBT,1Star
3,any_home_office_costs_Y,0.001344,GBT,1Star
4,charity_uncomp_expnsshr_only10,0.036091,GBT,1Star


In [76]:
predictions_table.head(100)

Unnamed: 0,QualityStarCluster,Medicare_provider_number,Hospital_name,State,"Hospital_Compare_5-star_rating_(October_2018,_NA=Not_Available)",holdout_sample,Class_3,Class_5,Class_6,Class_10,Relative_price_for_outpatient_services,predict,TrainTest,Model
0,1Star,330286,Good Samaritan Hospital,NY,1.0,0,1,2,2,3,191.0,4,train,GBT
1,1Star,30024,St. Josephs Hospital & Medical\nCtr,AZ,2.0,0,2,4,4,7,291.0,4,train,GBT
2,1Star,50616,St. Johns Pleasant Valley\nHospital,CA,2.0,0,3,5,6,9,375.0,5,train,GBT
3,1Star,100029,North Shore Medical Center\nAnd Fmc C,FL,1.0,0,3,5,6,10,563.0,5,train,GBT
4,1Star,450742,Baylor Scott & White - Lake\nPointe,TX,1.0,0,3,5,6,10,546.0,5,train,GBT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1Star,140080,Presence Saint Francis\nHospital,IL,2.0,0,2,3,3,5,232.0,3,train,GBT
96,1Star,140164,Memorial Hospital Of\nCarbondale,IL,1.0,0,2,4,4,7,303.0,4,train,GBT
97,1Star,70031,The Griffin Hospital,CT,1.0,0,1,2,2,3,191.0,2,train,GBT
98,1Star,190008,Terrebonne General Medical\nCenter,LA,2.0,0,2,3,4,6,260.0,3,train,GBT


In [77]:
print(featureimportance_table.shape)
print(predictions_table.shape)

(244, 4)
(2950, 14)


In [78]:
predictions_table[target_col].value_counts(normalize=True) * 100

3    24.881356
4    24.847458
2    20.237288
1    15.220339
5    14.813559
Name: Class_5, dtype: float64