In [121]:
import polars as pl

filename = '74-2837441_KansasHeartHospital_standardcharges 6.csv'
tin = '74-2837441'
url = 'some_url'

In [122]:
df = pl.read_csv(filename, skip_rows = 3, null_values = [''])

Let's find the columns where there aren't any unique values.

In [123]:
cols_to_drop = []
for c in df.columns:
    if len(df[c].unique()) == 1:
        cols_to_drop.append(c)

So we can drop all these columns.

In [124]:
df = df.drop(cols_to_drop)

In [125]:
df = (df
 
 # combine HCPCS_CODE and CPT_CODE columns
 # rename to 'code'
 .with_columns(
     pl.col('CPT_CODE').fill_null(pl.col('HCPCS_CODE')).alias('code'))
      
 # drop previous columns
 .drop(['CPT_CODE', 'HCPCS_CODE'])
      
 # only select non-null codes
 .filter(pl.col('code').is_not_null())
      
)

In [126]:
df = df.rename({
    'PRIMARY_PROCEDURE_DESCRIPTION':'description',
    'MODIFIER':'modifier',
    'REV_CODE': 'rev_code',
    'NEGOTIATED_PRICE':'rate',
    'PAYER_NAME':'payer_desc',
    'NETWORK_NAME':'plan_desc',
})

Break the dataframe into payer and "other" parts.

In [127]:
df_payer = df.select([
    'description', 
    'modifier', 
    'rev_code', 
    'rate', 
    'payer_desc', 
    'plan_desc', 
    'code'
])

df_other = df.select(['description', 
                      'modifier', 
                      'rev_code', 
                      'rate',
                      'code',
                      'DEIDENTIFIED_MIN_PRICE', 
                      'DEIDENTIFIED_MAX_PRICE',
                      'GROSS_PRICE', 
                      'CASH_PRICE'
                     ])


In [128]:
df_other = df_other.melt(
    id_vars = ['description', 'modifier', 'rev_code', 'code'],
    value_vars = ['DEIDENTIFIED_MIN_PRICE', 'DEIDENTIFIED_MAX_PRICE', 'GROSS_PRICE', 'CASH_PRICE'],
    variable_name = 'payer_desc',
    value_name = 'rate'
)

In [129]:
# we need to add a column to df_other so that we can concat it
# with df_payer

df_other = df_other.with_columns(pl.lit('na').alias('plan_desc'))
df_other = df_other.select(df_payer.columns)
df = pl.concat([df_payer, df_other])

In [132]:
df = df.with_columns([
    
    # handle the fixed values
    pl.lit(filename).alias('filename'),
    pl.lit(url).alias('url'),
    pl.lit(tin).alias('hospital_tin'),
    pl.lit('hcpcs_cpt').alias('code_type'),
    
    # fill in the null modifiers
    pl.col('modifier').fill_null('na'),
   
    # handle payer category
    (pl.when(pl.col('payer_desc') == 'CASH_PRICE').then('cash')
     .when(pl.col('payer_desc') == 'GROSS_CHARGE').then('gross')
     .when(pl.col('payer_desc') == 'DEIDENTIFIED_MIN_PRICE').then('min')
     .when(pl.col('payer_desc') == 'DEIDENTIFIED_MAX_PRICE').then('max')
     .otherwise('payer')
    ).alias('payer_category')
    
    pl.when(pl.col('plan_name').str_contains('Advantage').then(True).otherwise(None))
    
])

In [133]:
df = df.unique()

In [134]:
df

description,modifier,rev_code,rate,payer_desc,plan_desc,code,filename,url,hospital_tin,code_type,payer_category
str,str,i64,f64,str,str,str,str,str,str,str,str
"""PHARMACY""","""na""",258,35.91,"""Aetna""","""Commercial""","""B5200""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""PHARMACY""","""na""",258,51.3,"""United Healthc...","""Commercial""","""B5200""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""PHARMACY""","""na""",258,41.04,"""WPPA""","""Commercial""","""B5200""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""PROCALAMINE(AM...","""na""",258,0.0,"""Allwell From S...","""Allwell From S...","""B5200""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""PROCALAMINE(AM...","""na""",258,0.0,"""Blue Cross""","""Commercial""","""B5200""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""ICU REVENUE""","""na""",260,25.92,"""Aetna""","""Commercial""","""96366""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""ICU REVENUE""","""na""",260,18.25,"""Aetna""","""Medicare Advan...","""96366""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""ICU REVENUE""","""na""",260,20.48,"""Humana""","""Medicare""","""96366""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""ICU REVENUE""","""na""",260,400.0,"""United Healthc...","""Commercial""","""96366""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
"""ICU REVENUE""","""na""",260,40.0,"""United Healthc...","""Medicare""","""96366""","""74-2837441_Kan...","""some_url""","""74-2837441""","""hcpcs_cpt""","""payer"""
