In [1]:
import polars as pl
from tqdm import tqdm

transparency_page = 'https://www.beaumont.org/patients-families/billing/pricing/beaumont-health-price-transparency-information'

In [2]:
def stdcharge():
    
    stdcharge = pl.col('standard_charge_')
    
    cleaned = (
        stdcharge
        .str.strip()
        .str.replace('\$', '')
        .str.replace_all(',', '')
        .str.replace_all('-|#N/A|#VALUE|!', '')
    )
    
    return pl.when(cleaned.str.lengths() == 0).then(None).otherwise(cleaned).alias('standard_charge')


### Facility rates

In [5]:
data_fac = [
    {'id':'230020', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381405141_beaumont-hospital-dearborn-hospital_standardcharges.csv?sfvrsn=bffa40fb_3&download=true'},
    {'id':'230151', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381426929_beaumont-hospital-farmington-hills-hospital_standardcharges.csv?sfvrsn=fdfa40fb_3&download=true'},
    {'id':'230089', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381459362_beaumont-hospital-grosse-pointe-hospital_standardcharges.csv?sfvrsn=62fa40fb_5&download=true'},
    {'id':'230130', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381459362_beaumont-hospital-royal-oak-hospital_standardcharges.csv?sfvrsn=5cfa40fb_7&download=true'},
    {'id':'230270', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381405141_beaumont-hospital-taylor-hospital_standardcharges.csv?sfvrsn=99fa40fb_5&download=true'},
    {'id':'230176', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381405141_beaumont-hospital-trenton-hospital_standardcharges.csv?sfvrsn=affa40fb_5&download=true'},
    {'id':'230269', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381459362_beaumont-hospital-troy-hospital_standardcharges.csv?sfvrsn=4bfa40fb_3&download=true'},
    {'id':'230142', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381405141_beaumont-hospital-wayne-hospital_standardcharges.csv?sfvrsn=c6fa40fb_1&download=true'},
]

In [6]:
subs = {
    'Code Type':'line_type',
    'Procedure':'local_code',
    'Code':'code',
    'NDC':'ndc',
    'Rev Code':'rev_code',
    'REV CODE':'rev_code',
    'Procedure Description':'description',
}

In [7]:
def extract_codes():
    
    local_code = pl.col('local_code')
    code = pl.col('code')
    
    ms_drg = pl.when(code.str.contains('MSDRG')).then(code.str.extract('MSDRG (\d+)$')).alias('ms_drg')
    hcpcs_cpt = pl.when(code.str.lengths() == 5).then(code).alias('hcpcs_cpt')
    
    return ms_drg, hcpcs_cpt


def payer_cat():
    payer = pl.col('payer')
    expr = (pl.when(payer.str.contains('GROSS|RECONSTRUCTED CHARGE')).then('gross')
    .when(payer.str.contains('MAXIMUM')).then('max')
    .when(payer.str.contains('MINIMUM')).then('min')
    .when(payer.str.contains('CASH PRICE')).then('cash')
    .otherwise('payer')).alias('payer_category')
    
    return expr

In [8]:
dfs = []

for idx, row in tqdm(enumerate(data_fac)):
    
    read_csv_options = {
        'encoding': 'latin-1'
    }
    
    if idx == 3:
        read_csv_options.update({'skip_rows':1})
    
    url = row['url']
    id_ = row['id']
    
    df = pl.read_csv(url, **read_csv_options)
    
    if idx == 5:
        df = df.drop('')

    df = df.rename({c: c.strip() for c in df.columns})
    df_subs = {c: s for c, s in subs.items() if c in df.columns}

    df = df.rename(df_subs)
    id_vars = list(df_subs.values())
    

    df = (
        df
        .melt(id_vars, variable_name = 'payer', value_name = 'standard_charge_')
        .with_columns([
            *extract_codes(),
            stdcharge(),
            payer_cat(),
            pl.lit('facility').alias('billing_class'),
            pl.lit(id_).alias('hospital_id')
        ])
        .filter(pl.col('standard_charge').is_not_null())
    )
    
    file_name = url.split('/')[-1].split('?')[0]
    ein = file_name.split('_')[0]
    ein = f'{ein[:2]}-{ein[2:]}'
    last_updated = '2023-01-01'
    stdchg_file_url = url
    
    dfs.append(df)

8it [00:37,  4.73s/it]


In [10]:
df = pl.concat(dfs)

In [11]:
df.height

6664399

In [12]:
df.unique().height

6664035

In [9]:
pl.concat(dfs).write_csv('beaumont_fac.csv')

### Professional rates

In [33]:
data_prof = [
    {'id':'230020', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381405141_beaumont-hospital-dearborn-professional_standardcharges.csv?sfvrsn=f8fb40fb_3&download=true'},
    {'id':'230151', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381426929_beaumont-hospital-farmington-hills-professional_standardcharges.csv?sfvrsn=aafb40fb_3&download=true'},
    {'id':'230089', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381459362_beaumont-hospital-grosse-pointe-professional_standardcharges.csv?sfvrsn=dfa40fb_3&download=true'},
    {'id':'230130', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381459362_beaumont-hospital-royal-oak-professional_standardcharges.csv?sfvrsn=a1fb40fb_5&download=true'},
    {'id':'230270', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381405141_beaumont-hospital-taylor-professional_standardcharges.csv?sfvrsn=25fa40fb_5&download=true'},
    {'id':'230176', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381405141_beaumont-hospital-trenton-professional_standardcharges.csv?sfvrsn=c1fb40fb_5&download=true'},
    {'id':'230269', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381459362_beaumont-hospital-troy-professional_standardcharges.csv?sfvrsn=b7fb40fb_5&download=true'},
    {'id':'230142', 'url': 'https://www.beaumont.org/docs/default-source/default-document-library/cdm-documents/2023/381405141_beaumont-hospital-wayne-professional_standardcharges.csv?sfvrsn=efa40fb_3&download=true'},
]

transparency_page = 'https://www.beaumont.org/patients-families/billing/pricing/beaumont-health-price-transparency-information'

In [34]:
def payer_cat():
    payer = pl.col('payer')
    expr = (pl.when(payer.str.contains('GROSS')).then('gross')
    .when(payer.str.contains('DEIDENTIFIED MAX|Max Price')).then('max')
    .when(payer.str.contains('DEIDENTIFIED MIN|Min Price')).then('min')
    .when(payer.str.contains('CASH PRICE')).then('cash')
    .otherwise('payer')).alias('payer_category')
    
    return expr

In [40]:
dfs = []

for idx, row in tqdm(enumerate(data_prof)):
    url = row['url']
    id_ = row['id']

    df = (pl
          .read_csv(url, encoding = 'latin-1', infer_schema_length = 0, null_values = ['#N/A'])
          # .drop(['FS ID']) this should be updated to line_type
          .rename({
              'CPT/HCPCS Code ':'hcpcs_cpt',
              'Beaumont Health Charge Code Description':'description',
              'NDC':'ndc',              
          })
          .melt(id_vars = ['hcpcs_cpt', 'description', 'ndc'], variable_name = 'payer', value_name = 'standard_charge_')
          .with_columns([
              payer_cat(),
              stdcharge(),
              pl.lit('professional').alias('billing_class'),
              pl.lit(id_).alias('hospital_id')
          ])
          .filter(pl.col('standard_charge').is_not_null())
         )

    dfs.append(df)

8it [00:24,  3.12s/it]


In [41]:
pl.concat(dfs).write_csv('beaumont_prof.csv')

In [42]:
qs = []
for row_fac, row_prof in zip(data_fac, data_prof):
    assert row_fac['id'] == row_prof['id']
    
    id = row_fac['id']
    url_fac = row_fac['url']
    url_prof = row_prof['url']
    
    file_name_prof = url_prof.split('/')[-1].split('?')[0]
    file_name_fac = url_fac.split('/')[-1].split('?')[0]
    
    file_name = f'{file_name_fac}|{file_name_prof}'
    
    
    ein = file_name.split('_')[0]
    ein = f'{ein[:2]}-{ein[2:]}'
    
    last_updated = '2023-01-01'
    
    stdchg_file_url = f'{url_fac}|{url_prof}'
    
    q = f"""
update hospital set
{ein=},
{transparency_page=},
{file_name=},
{last_updated=},
{stdchg_file_url=}
where {id=}
"""
    
    qs.append(q)

In [43]:
with open('beaumont.sql', 'w+') as f:  
    for q in qs:
        f.write(q + ';\n')