In [211]:
# https://data.cms.gov/provider-characteristics/hospitals-and-other-facilities/provider-of-services-file-hospital-non-hospital-facilities

file = 'Provider_of_Services_File_Hospital_Non_Hospital_Facilities_Dataset_2022_Q4.csv'

df = pl.read_csv(file, infer_schema_length = 0, encoding = 'utf8-lossy')

In [212]:
subtype_filter = pl.col('PRVDR_CTGRY_SBTYP_CD').is_in([
    '01', # Short Term
    '02', # Long Term
    # '03', # Religious Non-Medical Health Care Institutions
    '04', # Psychiatric
    '05', # Rehabilitation
    '06', # Childrens Hospitals
    '07', # Distinct Part Psychiatric Hospital
    '11', # Critical Access Hospitals
    '20', # Transplant Hospitals
    '22', # Medicaid Only Short-Term Hospitals
    '23', # Medicaid Only Childrens Hospitals
    '24', # Medicaid Only Children's Psychiatric
    '25', # Medicaid Only Psychiatric Hospitals
    '26', # Medicaid Only Rehabilitation Hospitals
    '27', # Medicaid Only Long-Term Hospitals
])

category_code_filter = pl.col('PRVDR_CTGRY_CD').is_in(['01'])

prvdr_num_filter = pl.col('PRVDR_NUM').str.contains('^\d{6}$')

In [213]:
df = (
    df
    .filter(subtype_filter)
    .filter(category_code_filter)
    .filter(prvdr_num_filter)
)

In [214]:
col_rename = {
    'PRVDR_CTGRY_SBTYP_CD':'category',
    'CITY_NAME':'city',
    'FAC_NAME':'name',
    'PRVDR_NUM':'id',
    'STATE_CD':'state',
    'ST_ADR':'addr',
    'PHNE_NUM':'phone',
    'PGM_TRMNTN_CD':'medicare_termination_status',
    'GNRL_CNTL_TYPE_CD':'control_type',
    'ZIP_CD':'zip',
    'CBSA_URBN_RRL_IND':'urban_rural',
}

In [215]:
df = df.select(list(col_rename.keys()))

In [216]:
df = df.rename(col_rename)

In [217]:
control_type_map = {
    '01':'FOR PROFIT - INDIVIDUAL',
    '02':'FOR PROFIT - PARTNERSHIP',
    '03':'FOR PROFIT - CORPORATION',
    '04':'NONPROFIT - CHURCH RELATED',
    '05':'NONPROFIT - CORPORATION',
    '06':'NONPROFIT - OTHER',
    '07':'GOVERNMENT - STATE',
    '08':'GOVERNMENT - COUNTY',
    '09':'GOVERNMENT - CITY',
    '10':'GOVERNMENT - CITY/COUNTY',
    '11':'GOVERNMENT - HOSPITAL DISTRICT',
    '12':'GOVERNMENT - FEDERAL',
    '13':'FOR PROFIT - LIMITED LIABILITY CORPORATION',
}

In [219]:
category_map = {
    '01':'Short Term',
    '02':'Long Term',
    '03':'Religious Non-Medical Health Care Institutions',
    '04':'Psychiatric',
    '05':'Rehabilitation',
    '06':'Childrens Hospitals',
    '07':'Distinct Part Psychiatric Hospital',
    '11':'Critical Access Hospitals',
    '20':'Transplant Hospitals',
    '22':'Medicaid Only Short-Term Hospitals',
    '23':'Medicaid Only Childrens Hospitals',
    '24':'Medicaid Only Children\'s Psychiatric',
    '25':'Medicaid Only Psychiatric Hospitals',
    '26':'Medicaid Only Rehabilitation Hospitals',
    '27':'Medicaid Only Long-Term Hospitals',
}

In [220]:
status_map = {
    '00':'ACTIVE PROVIDER',
    '01':'VOLUNTARY-MERGER, CLOSURE',
    '02':'VOLUNTARY-DISSATISFACTION WITH REIMBURSEMENT',
    '03':'VOLUNTARY-RISK OF INVOLUNTARY TERMINATION',
    '04':'VOLUNTARY-OTHER REASON FOR WITHDRAWAL',
    '05':'INVOLUNTARY-FAILURE TO MEET HEALTH/SAFETY REQ',
    '06':'INVOLUNTARY-FAILURE TO MEET AGREEMENT',
    '07':'OTHER-PROVIDER STATUS CHANGE',
    '08':'NONPAYMENT OF FEES - CLIA Only',
    '09':'REV/UNSUCCESSFUL PARTICIPATION IN PT - CLIA Only',
    '10':'REV/OTHER REASON - CLIA Only',
    '11':'INCOMPLETE CLIA APPLICATION INFORMATION - CLIA Only',
}

In [221]:
df = df.with_columns([
    pl.col('medicare_termination_status').map_dict(status_map),
    pl.col('control_type').map_dict(control_type_map),
    pl.col('category').map_dict(category_map)
])

In [227]:
df.head()

category,city,name,id,state,addr,phone,medicare_termination_status,control_type,zip,urban_rural
str,str,str,str,str,str,str,str,str,str,str
"""Short Term""","""DOTHAN""","""SOUTHEAST HEAL…","""010001""","""AL""","""1108 ROSS CLAR…","""3347938701""","""ACTIVE PROVIDE…","""GOVERNMENT - C…","""36301""","""U"""
"""Short Term""","""BRIDGEPORT""","""NORTH JACKSON …","""010004""","""AL""","""47005 U S HIGH…","""2564372101""","""VOLUNTARY-MERG…","""GOVERNMENT - C…","""35740""","""R"""
"""Short Term""","""BOAZ""","""MARSHALL MEDIC…","""010005""","""AL""","""2505 U S HIGHW…","""2565938310""","""ACTIVE PROVIDE…","""GOVERNMENT - C…","""35957""","""R"""
"""Short Term""","""FLORENCE""","""NORTH ALABAMA …","""010006""","""AL""","""1701 VETERANS …","""2566291000""","""ACTIVE PROVIDE…","""NONPROFIT - CH…","""35630""","""U"""
"""Short Term""","""OPP""","""MIZELL MEMORIA…","""010007""","""AL""","""702 N MAIN ST""","""3344933541""","""ACTIVE PROVIDE…","""FOR PROFIT - P…","""36467""","""R"""


In [228]:
df.write_csv('hospital-2023-05-01.csv')