In [465]:
import polars as pl

In [466]:
primary_keys = ['hospital_id','local_code','code','ms_drg','apr_drg','hcpcs_cpt','modifiers','ndc','rev_code','billing_class','setting','payer''plan']

In [467]:
transparency_page = 'https://www.wth.org/financial-assistance-billing/hospital-charges/'

In [468]:
additional_notes = '* $0.00 payment may indicate a line item is not seperately reimbursed by this specific Payer and may be included in the reimbursement for a sperate line item under budled pricing agreements.'

In [469]:
hospital_data = [
    {'hospital_id': '441316', 'url': 'https://www.wth.org/wp-content/uploads/2022/11/58-1884314_CGH_standardcharges.csv'},
    {'hospital_id': '440002', 'url': 'https://www.wth.org/wp-content/uploads/2022/11/62-6010402_JMCGH_standardcharges.csv'},
    {'hospital_id': '440072', 'url': 'https://www.wth.org/wp-content/uploads/2022/11/82-5179383_Dyersburg_standardcharges.csv'},
    {'hospital_id': '440061', 'url': 'https://www.wth.org/wp-content/uploads/2022/11/82-5187448_Volunteer_standardcharges.csv'},
    {'hospital_id': '440060', 'url': 'https://www.wth.org/wp-content/uploads/2022/11/62-1753289_MGH_standardcharges.csv'},
    {'hospital_id': '441320', 'url': 'https://www.wth.org/wp-content/uploads/2022/11/62-1624171_BGH_standardcharges.csv'},
]

In [470]:
for row in hospital_data:
    file_name = row['url'].split('/')[-1]
    row.update({
        'file_name':file_name,
        'hospital_ein':file_name.split('_')[0],
    })

In [471]:
read_csv_options = {
    'encoding': 'latin-1',
    'null_values': ['N/A', 'Not Billed to Insurance', 'Not billed to Insurance']
}

In [472]:
def payer_category():
    col = pl.col('payer')
    expr = (
        pl.when(col == 'GROSS CHARGE').then('gross')
        .when(col == 'DISCOUNTED CASH PRICE (UNINSURED)').then('cash')
        .when(col == 'DISCOUNTED CASH PRICE (INSURED, SERVICE NOT COVERED BY INSURANCE)').then('cash')
        .when(col == 'DE-IDENTIFIED MINIMUM NEGOTIATED RATE').then('min')
        .when(col == 'DE-IDENTIFIED MAXIMUM NEGOTIATED RATE').then('max')
        .otherwise('payer')
    ).alias('payer_category')
    return expr

In [502]:
def contracting_method():
    col = pl.col('standard_charge')
    stdchg_is_per_diem = col.str.to_lowercase().str.contains('day')
    stdchg_is_digit = col.str.replace_all(',', '').str.contains('\$\d+\.?\d+?')
    
    contracting_method = (
        pl.when(stdchg_is_per_diem).then('per diem')
        .when(stdchg_is_digit).then(None)
        .otherwise('other')
    ).alias('contracting_method')
    
    addl_payer_notes = (
        pl.when(stdchg_is_per_diem).then(None)
        .when(stdchg_is_digit).then(None)
        .otherwise(col)
    ).alias('additional_payer_notes')
    
    return [contracting_method, addl_payer_notes]

In [503]:
def stdchg():
    
    col = pl.col('standard_charge')
    
    stdchg_is_per_diem = col.str.to_lowercase().str.contains('day')
    
    stdchg_expr = (
        col
        .str.replace('\$', '')
        .str.replace_all(',', '')
        .str.to_lowercase()
        .str.replace('per day', '')
        .str.strip()
    )
    
    stdchg_is_digit = stdchg_expr.str.contains('^\d+\.?\d+?$')
    
    stdchg_col = (
        pl.when(stdchg_is_per_diem).then(stdchg_expr)
        .when(stdchg_is_digit).then(stdchg_expr)
        .when(col.is_null()).then(None)
        .otherwise(None)
    ).alias('standard_charge')
            
    return stdchg_col

In [508]:
rename_dict = {
    'LINE TYPE':'line_type',
    'CHARGE CODE/ PACKAGE':'local_code',
    'CHARGE DESCRIPTION':'description',
    'DRG':'ms_drg',
    'APC':'apc',
    'CPT®/HCPCS':'hcpcs_cpt',
    'ALTERNATE CPT®/HCPCS':'alternate_hcpcs_cpt',
    'REV CODE':'rev_code',
    'MODIFIER':'modifier',
    'NDC':'ndc'
}

sql = []

def pipeline(row):
    
    ein = row['hospital_ein']
    id = row['hospital_id']
    url = row['url']
    file_name = row['file_name']
    
    df = pl.read_csv(url, **read_csv_options)

    mm, dd, yy = df.columns[1].split('/')
    last_updated = f'{yy}-{mm}-{dd}'
    query = f"""update hospital set {ein=}, {file_name=}, {last_updated=}, {url=}, {transparency_page=}, {additional_notes=} where {id=}"""
    sql.append(query)
    
    df = df.slice(1,)
    df.columns = list(df[0].to_dicts()[0].values())
    df = df.slice(1,)
    
    id_vars = list(rename_dict.keys())
    payers = [c for c in df.columns if c not in id_vars]
    
    df = df.melt(id_vars, payers, 'payer', 'standard_charge')
    
    df = df.filter(pl.col('standard_charge').is_not_null())
    
    df = df.rename(rename_dict)
    
    df_alt = (df
              .filter(pl.col('alternate_hcpcs_cpt').is_not_null())
              .drop('hcpcs_cpt')
              .rename({'alternate_hcpcs_cpt':'hcpcs_cpt'})
    )
    
    df = df.drop('alternate_hcpcs_cpt')
    df = pl.concat([df, df_alt])
    
    
    
    to_fill = [c for c in df.columns if c in primary_keys]
    
    df = df.with_columns([
        payer_category(),
        stdchg(),
        *contracting_method(),
        pl.lit(id).alias('hospital_id'),
        pl.col('rev_code').str.zfill(4),
        pl.col('hcpcs_cpt').str.to_uppercase(),
    ]).with_columns(
        pl.col(to_fill).fill_null('')
    )
    
    df = (df
     .filter(pl.col('line_type').str.lengths() < 100)
     .filter(pl.col('line_type').is_not_null())
    )
    
    df = df.unique()
    
    return df

In [510]:
df = pl.concat([pipeline(row) for row in hospital_data])

In [512]:
df.write_csv('~/hpt/tenn.csv')

In [500]:
with open('tenn.sql', 'w+') as f:  
    for q in sql:
        f.write(q + ';\n')

In [464]:
df.groupby(['local_code', 'hospital_id', 'hcpcs_cpt', 'ms_drg', 'rev_code','ndc', 'payer']).agg(pl.count()).sort('count')

local_code,hospital_id,hcpcs_cpt,ms_drg,rev_code,ndc,payer,count
str,str,str,str,str,str,str,u32
"""6743619""","""440002""","""C1713""","""""","""0278""","""""","""Cigna HealthSp...",1
"""5792261""","""440002""","""84132""","""""","""0301""","""""","""Bluecare TennC...",1
"""6544370""","""440072""","""A9562""","""""","""0250""","""""","""Bluecare TennC...",1
"""7509959-22745""","""440061""","""A9270""","""""","""0250""","""55111-0119-10""","""DE-IDENTIFIED ...",1
"""7500267-52450""","""440061""","""A9270""","""""","""0250""","""00168-0263-15""","""Humana ChoiceC...",1
"""7506360-8801""","""441320""","""A9270""","""""","""0250""","""64980-0135-01""","""DISCOUNTED CAS...",1
"""6579399""","""441316""","""C1781""","""""","""0278""","""""","""United Healthc...",1
"""7507510-23315""","""440002""","""""","""""","""0250""","""63323-0011-15""","""DISCOUNTED CAS...",1
"""6422338""","""440002""","""C1713""","""""","""0278""","""""","""UHC Community ...",1
"""957746""","""440072""","""80346""","""""","""0301""","""""","""Aetna All Plan...",1
