In [93]:
url = "https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/in/364492612_ascension-st-vincent-heart-center_standardcharges.xlsx"

In [94]:
!wget https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/in/364492612_ascension-st-vincent-heart-center_standardcharges.xlsx

--2023-04-17 14:55:10--  https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/in/364492612_ascension-st-vincent-heart-center_standardcharges.xlsx
Resolving healthcare.ascension.org (healthcare.ascension.org)... 2620:1ec:4e:1::40, 2620:1ec:4f:1::40, 13.107.237.40, ...
Connecting to healthcare.ascension.org (healthcare.ascension.org)|2620:1ec:4e:1::40|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4749263 (4.5M) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘364492612_ascension-st-vincent-heart-center_standardcharges.xlsx.1’


2023-04-17 14:55:15 (1.08 MB/s) - ‘364492612_ascension-st-vincent-heart-center_standardcharges.xlsx.1’ saved [4749263/4749263]



In [95]:
import polars as pl

In [97]:
read_csv_options = {
    'skip_rows':1, 
    'infer_schema_length': 0,
    'null_values':["N/A"]
}

In [98]:
df = pl.read_excel(
    '364492612_ascension-st-vincent-heart-center_standardcharges.xlsx', 
    sheet_name = "Standard Charges", 
    read_csv_options = read_csv_options)

In [99]:
df = df[3:]

In [100]:
payers = df.columns[6:]

In [101]:
df.columns[:6]

['Facility_BU_ID',
 'Code_Type',
 'Code',
 'Description',
 'UB_Revenue_Code',
 'UB_Revenue_Description']

In [102]:
rename_dict = {
    'Facility_BU_ID':'hospital_ccn',
    'Code_Type': 'code_type',
    'Code':'code_orig',
    'Description':'description',
    'UB_Revenue_Code':'rev_code',
    'UB_Revenue_Description':'rev_desc',
}

In [103]:
df = df.rename(rename_dict)

In [133]:
def replace_code_type():
    col = pl.col('code_type')
    col_expr = (
        pl
        .when(col == '1-CDM').then('cdm')
        .when(col == '2-CPT').then('cpt')
        .when(col == '3-DRG').then('drg')
        .otherwise(None).alias('code_type')
    )
    return col_expr

def code_cat():
    col = pl.col('payer')
    col_expr = (
        pl.when(col == 'Gross_Charge').then('gross')
        .when(col == 'Cash_Charge').then('cash')
        .when(col == 'Min_Negotiated_Rate').then('min')
        .when(col == 'Max_Negotiated_Rate').then('max')
        .otherwise('payer').alias('payer_category')
    )
    return col_expr

def extract_prefix():
    col_expr = (
        pl.when(pl.col('code_type') == 'cpt').then('hcpcs_cpt')
        .when(pl.col('code_type') == 'drg').then('ms-drg')
        .when(pl.col('code_type') == 'cdm').then('cdm').otherwise(None).alias('code_prefix')
    )
    return col_expr

def extract_rate_meth():
    col_expr = (
        pl.when(pl.col('rate') == '55% of gross charges').then('55% of gross charges').otherwise(None).alias('rate_method')
    )
    return col_expr

In [134]:
id_vars = df.columns[:6]
value_vars = payers

In [140]:
(df
 .melt(id_vars, value_vars, value_name = 'rate', variable_name = 'payer')
 .filter(pl.col('rate').is_not_null())
 .with_columns([
     replace_code_type(),
     code_cat(),
     extract_rate_meth(),
 ])
 .with_columns([
     extract_prefix(),
     pl.when(pl.col('rate') == '55% of gross charges').then(None).otherwise(pl.col('rate')).cast(float).alias('rate'),
 ])
).sample(10)

hospital_ccn,code_type,code_orig,description,rev_code,rev_desc,payer,rate,payer_category,rate_method,code_prefix
str,str,str,str,str,str,str,f64,str,str,str
"""46006""","""cdm""","""720333577""","""DYSPHAGIA EVAL...","""444""","""Evaluation or ...","""HUMANAACCES_HU...",300.0,"""payer""",,"""cdm"""
"""46006""","""cdm""","""717011724""","""NOREPINEPHRINE...","""250""","""Pharmacy""","""AETNAAMERH_AET...",301.22,"""payer""",,"""cdm"""
"""46006""","""cdm""","""702560245""","""00000000000059...","""278""","""Other implants...","""IHN_INDIANA_HE...",80443.44,"""payer""",,"""cdm"""
"""46006""","""cdm""","""709048170""","""00000000000042...","""278""","""Other implants...","""HLTHLNKHMO_HEA...",13765.0,"""payer""",,"""cdm"""
"""46006""","""cdm""","""709047873""","""00000000000047...","""272""","""Sterile suppli...","""PVCTR1_PATOKA_...",115.6,"""payer""",,"""cdm"""
"""46006""","""drg""","""839""","""Chemotherapy W...",,,"""Min_Negotiated...",9691.32,"""min""",,"""ms-drg"""
"""46006""","""drg""","""436""","""Malignancy Of ...",,,"""UHCMEDICARE_UH...",7450.27,"""payer""",,"""ms-drg"""
"""46006""","""cdm""","""717013472""","""DORZOLAMIDE HC...","""250""","""Pharmacy""","""HLTHLNKHMO_HEA...",48.22,"""payer""",,"""cdm"""
"""46006""","""drg""","""964""","""Other Multiple...",,,"""ANTHEMMCR_ANTH...",9705.01,"""payer""",,"""ms-drg"""
"""46006""","""cdm""","""712528050""","""MR CONTRAST GA...","""636""","""Drugs requirin...","""MULTIPLAN_MULT...",0.84,"""payer""",,"""cdm"""
