In [66]:
import pandas as pd
import warnings
warnings.simplefilter('ignore')

In [67]:
# Define variables and locations
hcris_vars = [
    ('beds', 'S300001', '01400', '00200', 'numeric'),
    ('tot_charges', 'G300000', '00100', '00100', 'numeric'),
    ('tot_discounts', 'G300000', '00200', '00100', 'numeric'),
    ('tot_operating_exp', 'G300000', '00400', '00100', 'numeric'),
    ('ip_charges', 'G200000', '00100', '00100', 'numeric'),
    ('icu_charges', 'G200000', '01600', '00100', 'numeric'),
    ('ancillary_charges', 'G200000', '01800', '00100', 'numeric'),
    ('tot_discharges', 'S300001', '00100', '01500', 'numeric'),
    ('mcare_discharges', 'S300001', '00100', '01300', 'numeric'),
    ('mcaid_discharges', 'S300001', '00100', '01400', 'numeric'),
    ('tot_mcare_payment', 'E00A18A', '05900', '00100', 'numeric'),
    ('secondary_mcare_payment', 'E00A18A', '06000', '00100', 'numeric'),
    ('street', 'S200001', '00100', '00100', 'alpha'),
    ('city', 'S200001', '00200', '00100', 'alpha'),
    ('state', 'S200001', '00200', '00200', 'alpha'),
    ('zip', 'S200001', '00200', '00300', 'alpha'),
    ('county', 'S200001', '00200', '00400', 'alpha'),
    ('hvbp_payment', 'E00A18A', '07093', '00100', 'numeric'),
    ('hrrp_payment', 'E00A18A', '07094', '00100', 'numeric')
]




In [68]:
# Pull relevant data
final_hcris_v2010 = None

In [69]:
for year in range(2010, 2018):
    print('Processing year:', year)
    alpha_p = f"../../data/Input/HCRIS_v2010/HospitalFY{year}/hosp10_{year}_ALPHA.CSV"
    numeric_p = f"../../data/Input/HCRIS_v2010/HospitalFY{year}/hosp10_{year}_NMRC.CSV"
    report_p = f"../../data/Input/HCRIS_v2010/HospitalFY{year}/hosp10_{year}_RPT.CSV"

Processing year: 2010
Processing year: 2011
Processing year: 2012
Processing year: 2013
Processing year: 2014
Processing year: 2015
Processing year: 2016
Processing year: 2017


In [70]:
HCRIS_alpha = pd.read_csv(alpha_p, names=['RPT_REC_NUM', 'WKSHT_CD', 'LINE_NUM', 'CLMN_NUM', 'ITM_VAL_NUM'], dtype = str)
HCRIS_numeric = pd.read_csv(numeric_p, names=['RPT_REC_NUM', 'WKSHT_CD', 'LINE_NUM', 'CLMN_NUM', 'ITM_VAL_NUM'], dtype = str)
HCRIS_report = pd.read_csv(report_p, names=['RPT_REC_NUM', 'PRVDR_CTRL_TYPE_CD', 'PRVDR_NUM', 'NPI',
                                                   'RPT_STUS_CD', 'FY_BGN_DT', 'FY_END_DT', 'PROC_DT',
                                                   'INITL_RPT_SW', 'LAST_RPT_SW', 'TRNSMTL_NUM', 'FI_NUM',
                                                   'ADR_VNDR_CD', 'FI_CREAT_DT', 'UTIL_CD', 'NPR_DT',
                                                   'SPEC_IND', 'FI_RCPT_DT'], dtype = str)

final_reports = HCRIS_report[['RPT_REC_NUM', 'PRVDR_NUM', 'NPI', 'FY_BGN_DT', 'FY_END_DT', 'PROC_DT',
                                  'FI_CREAT_DT', 'RPT_STUS_CD']]
final_reports.columns = ['report', 'provider_number', 'npi', 'fy_start', 'fy_end', 'date_processed',
                             'date_created', 'status']
final_reports['year'] = year

# Convert hcris_vars to DataFrame
hcris_vars_df = pd.DataFrame(hcris_vars, columns=['variable', 'WKSHT_CD', 'LINE_NUM', 'CLMN_NUM', 'source'])

for _, row in hcris_vars_df.iterrows():
        hcris_data = HCRIS_numeric if row['source'] == 'numeric' else HCRIS_alpha
        val = hcris_data[(hcris_data['WKSHT_CD'] == row['WKSHT_CD']) &
                         (hcris_data['LINE_NUM'] == row['LINE_NUM']) &
                         (hcris_data['CLMN_NUM'] == row['CLMN_NUM'])][['RPT_REC_NUM', 'ITM_VAL_NUM']]
        val.columns = ['report', row['variable']]
        final_reports = final_reports.merge(val, on='report', how='left')
        if row ['source'] == 'numeric':
                final_reports[row['variable']]=final_reports[row['variable']].astype(float)
    
if final_hcris_v2010 is None:
        final_hcris_v2010 = final_reports
else:
        final_hcris_v2010 = pd.concat([final_hcris_v2010, final_reports], ignore_index=True)

# Save final dataset
final_hcris_v2010.to_csv('../../data/Output/HCRIS_V2010.csv', index=False)

