In [188]:
import polars as pl

### Expressions for defining columns

In [189]:
def payer_category():
    return (
        pl.when(pl.col('payer_name') == 'Gross_Charge').then('gross')
        .when(pl.col('payer_name') == 'Cash_Charge').then('cash')
        .when(pl.col('payer_name') == 'Min_Negotiated_Rate').then('min')
        .when(pl.col('payer_name') == 'Max_Negotiated_Rate').then('max')
        .otherwise('payer').alias('payer_category')
    )

def hcpcs_cpt_and_drg():
    
    hcpcs_cpt = pl.when(pl.col('line_type') == '2-CPT').then(pl.col('code')).alias('hcpcs_cpt')
    ms_drg = pl.when(pl.col('line_type') == '3-DRG').then(pl.col('code')).str.zfill(3).alias('ms_drg')
    
    return hcpcs_cpt, ms_drg

def all_charges():
    
    standard_charge_percent = (
        pl.col('standard_charge')
        .str.extract('(\d+)% of gross charges')
        .alias('standard_charge_percent')
    )
    
    contracting_method = (
        pl.when(pl.col('standard_charge').str.contains('\% of gross charges'))
        .then('percent of total billed charge')
        .alias('contracting_method')
    )
    
    standard_charge = (
        pl.when(pl.col('standard_charge').str.contains('\% of gross charges'))
        .then(None).otherwise(pl.col('standard_charge')).alias('standard_charge')
    )

    return standard_charge, standard_charge_percent, contracting_method

In [190]:
file = '474063406_Ascension Saint Thomas Highlands_standardcharges.xlsx'

# These are things I learned from trial and error working with these files
# and also examining them in Numbers
read_csv_options = {
    'skip_rows': 1, 
    'infer_schema_length': 0, 
    'null_values': ['N/A']
}

### Trying a single file: an easy and fast workflow

My workflow is to build a single parenned object like
```py
(
    df
    .transformation
    .transformation
    ...
)
```
and then keep adding transformations until I get the dataframe I like. I do this in a single cell.

So I started like

```py
(
    pl.read_excel(file, sheet_id = 2, read_csv_options = read_csv_options)
)
```
and noticed that I could slice off the first 3 rows, so then I did
```py
(
    pl.read_excel(file, sheet_id = 2, read_csv_options = read_csv_options)
    .slice(3,)
)
```
and so on. What I ended up with was this:

In [191]:
(
    pl.read_excel(file, sheet_id = 2, read_csv_options = read_csv_options)
    .slice(3,)
    .drop(['Facility_BU_ID', 'UB_Revenue_Description'])
    .rename({
        'Code_Type':'line_type',
        'Code':'code',
        'Description':'description',
        'UB_Revenue_Code':'rev_code',
    })
    .melt(
        ['line_type', 'code', 'description', 'rev_code'],
        variable_name = 'payer_name',
        value_name = 'standard_charge',
    )
    .filter(pl.col('standard_charge').is_not_null())
    .with_columns([
        payer_category(),
        pl.col('rev_code').str.zfill(4).keep_name(),
        *hcpcs_cpt_and_drg(),
        *all_charges(),
    ])
).sample(20)

line_type,code,description,rev_code,payer_name,standard_charge,payer_category,hcpcs_cpt,ms_drg,standard_charge_percent,contracting_method
str,str,str,str,str,str,str,str,str,str,str
"""1-CDM""","""30013417""","""PEG DRILL INBO…","""0272""","""Max_Negotiated…","""446.52""","""max""",,,,
"""3-DRG""","""674""","""Other Kidney A…",,"""H85_HUMANA_CHO…","""34625.3""","""payer""",,"""674""",,
"""2-CPT""","""86603""","""ANTIBODY ADENO…",,"""Max_Negotiated…","""21.54""","""max""","""86603""",,,
"""2-CPT""","""90870""","""ELECTROCONVULS…",,"""Max_Negotiated…","""172.65""","""max""","""90870""",,,
"""3-DRG""","""453""","""Combined Anter…",,"""H83_HUMANA_MED…","""69087.33""","""payer""",,"""453""",,
"""2-CPT""","""92273""","""FULL FIELD ERG…",,"""Min_Negotiated…","""76.84""","""min""","""92273""",,,
"""2-CPT""","""15630""","""DELAY FLAP/SCT…",,"""Max_Negotiated…","""1598.83""","""max""","""15630""",,,
"""1-CDM""","""42106119""","""XR SHOULDER 1 …","""0320""","""A09_AETNA_2005…","""223.6""","""payer""",,,,
"""1-CDM""","""1656775""","""ZIPRASIDONE 10…","""0636""","""C04_CIGNA_PPO_…","""110.56""","""payer""",,,,
"""3-DRG""","""698""","""Other Kidney A…",,"""C40_OPTUM_VA_2…","""12073.9""","""payer""",,"""698""",,


Let's turn this into a function so that we can process any excel file (assuming they're formatted the same way. We'll inspect this later.

In [192]:
def process(url):
    
    ### Putting my notes here as I go:
    
    ### Around the 20th file in: upon trying to cast 
    ### 'standard_charge' to float, I got:
    
    ### ComputeError: strict conversion from `str` to `f64` failed for value(s) 
    ### ["85% of gross charges", "65% of gross charges", … "82% of gross charges"]; 
    ### if you were trying to cast Utf8 to temporal dtypes, consider using `strptime`
    
    ### So this tells me that this a relatively uncommon thing, but that it does happen
    
    print(url)    
                
    df = (
        pl.read_excel(url, sheet_id = 2, read_csv_options = read_csv_options)
        .slice(3,)
        .drop(['Facility_BU_ID', 'UB_Revenue_Description'])
        .rename({
            'Code_Type':'line_type',
            'Code':'code',
            'Description':'description',
            'UB_Revenue_Code':'rev_code',
        })
        .melt(
            ['line_type', 'code', 'description', 'rev_code'],
            variable_name = 'payer_name',
            value_name = 'standard_charge',
        )
        .filter(pl.col('standard_charge').is_not_null())
        .with_columns([
            payer_category(),
            pl.col('rev_code').str.zfill(4).keep_name(),
            *hcpcs_cpt_and_drg(),
            *all_charges(),
        ])
    )
    
    return df

### Getting the MRF links from the Ascension page

In [193]:
from bs4 import BeautifulSoup
import requests

headers = {
    "Connection": "keep-alive",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}

mrf_page = 'https://healthcare.ascension.org/price-transparency/price-transparency-files'

resp = requests.get(mrf_page, headers = headers)

In [194]:
soup = BeautifulSoup(resp.content)
mrfs = []
base_url = 'https://healthcare.ascension.org'
for link in soup.find_all('a'):
    if '.xlsx' in link.get('href'):
        mrfs.append((base_url + link.get('href'), link.text))
        
print(f'The number of MRFs found was {len(mrfs)}')

The number of MRFs found was 116


Here's an example of what's in the list of MRF tuples: it's just the link to the MRF, plus the hospital name as shown on the Ascension website.

In [195]:
mrfs[12]

('https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/fl/300577249_ascension-sacred-heart-gulf_standardcharges.xlsx',
 'Ascension Sacred Heart Gulf')

### Looping through all the Ascension MRFs

In [196]:
from tqdm import tqdm
from glob import glob
import os

In [None]:
for mrf_url, hospital_name in tqdm(mrfs):
    # Process each Excel file and save the processed data
    # in a .parquet file
    
    if not os.path.exists('data'):
        os.mkdir('data')
        
    basename = os.path.basename(mrf_url)
    
    path = f'data/{basename}'
    filename, ext = os.path.splitext(basename)
    
    parquet_name = f'{filename}.parquet'
    
    # Ascension won't serve you the binary unless you have the
    # right headers. So it's necessary to download the file with
    # requests and then open it up in a dataframe library.
    # This is fine, since we can then cache the files for later.
    
    if parquet_name in glob('data/*'):
        # We've already processed the file
        continue
    
    if path not in glob('data/*'):
        resp = requests.get(mrf_url, headers = headers)
        output = open(path, 'wb')
        output.write(resp.content)
        output.close()

    try:
        df = process(path)    
        df.write_parquet(f'data/{parquet_name}')
    except Exception as e:
        print(f"An error occurred: {e}")
        print(f"The problematic url was: {mrf_url}")

  0%|                                                                                                                                                     | 0/116 [00:00<?, ?it/s]

data/630578923_ascension-saint-vincents-east_standardcharges.xlsx


  1%|█▏                                                                                                                                           | 1/116 [00:12<23:06, 12.05s/it]

630578923_ascension-saint-vincents-east_standardcharges.parquet
data/630909073_saint-vincents-blount_standardcharges.xlsx


  2%|██▍                                                                                                                                          | 2/116 [00:17<15:51,  8.35s/it]

630909073_saint-vincents-blount_standardcharges.parquet
data/630288864_saint-vincents-birmingham_standardcharges.xlsx


  3%|███▋                                                                                                                                         | 3/116 [00:35<23:46, 12.62s/it]

630288864_saint-vincents-birmingham_standardcharges.parquet
data/810935368_saint-vincents-chilton_standardcharges.xlsx


  3%|████▊                                                                                                                                        | 4/116 [00:44<20:54, 11.20s/it]

810935368_saint-vincents-chilton_standardcharges.parquet
data/631146531_saint-vincents-saint-clair_standardcharges.xlsx


  4%|██████                                                                                                                                       | 5/116 [00:50<17:21,  9.38s/it]

631146531_saint-vincents-saint-clair_standardcharges.parquet
data/630288861_ascension-providence_standardcharges.xlsx


  5%|███████▎                                                                                                                                     | 6/116 [01:14<26:15, 14.32s/it]

630288861_ascension-providence_standardcharges.parquet
data/590624449_ascension-st-vincents-riverside_standardcharges.xlsx


  6%|████████▌                                                                                                                                    | 7/116 [02:08<49:27, 27.23s/it]

590624449_ascension-st-vincents-riverside_standardcharges.parquet
data/260479484_ascension-st-vincents-southside_standardcharges.xlsx


  7%|█████████▋                                                                                                                                   | 8/116 [02:36<49:37, 27.57s/it]

260479484_ascension-st-vincents-southside_standardcharges.parquet
data/461523194_ascension-st-vincents-clay-county_standardcharges.xlsx


  8%|██████████▉                                                                                                                                  | 9/116 [02:56<44:44, 25.09s/it]

461523194_ascension-st-vincents-clay-county_standardcharges.parquet
data/593650609_ascension-st-vincents-st-johns-county_standardcharges.xlsx


  9%|████████████                                                                                                                                | 10/116 [03:27<47:54, 27.11s/it]

593650609_ascension-st-vincents-st-johns-county_standardcharges.parquet
data/590634434_ascension-sacred-heart-pensacola_standardcharges.xlsx


  9%|█████████████▎                                                                                                                              | 11/116 [03:51<45:37, 26.07s/it]

590634434_ascension-sacred-heart-pensacola_standardcharges.parquet
data/721529708_ascension-sacred-heart-emerald-coast_standardcharges.xlsx


 10%|██████████████▍                                                                                                                             | 12/116 [03:58<35:17, 20.36s/it]

721529708_ascension-sacred-heart-emerald-coast_standardcharges.parquet
data/300577249_ascension-sacred-heart-gulf_standardcharges.xlsx


 11%|███████████████▋                                                                                                                            | 13/116 [04:04<27:24, 15.97s/it]

300577249_ascension-sacred-heart-gulf_standardcharges.parquet
data/900799724_ascension-sacred-heart-bay_standardcharges.xlsx


 12%|████████████████▉                                                                                                                           | 14/116 [04:14<24:02, 14.14s/it]

900799724_ascension-sacred-heart-bay_standardcharges.parquet
data/362596381_amita-health-alexian-brothers-medical-center-elk-grove-village-and-amita-health-rehabilitation-hospital-elk-grove-village_standardcharges.xlsx


 13%|██████████████████                                                                                                                          | 15/116 [04:37<27:58, 16.62s/it]

362596381_amita-health-alexian-brothers-medical-center-elk-grove-village-and-amita-health-rehabilitation-hospital-elk-grove-village_standardcharges.parquet
data/364251846_amita-health-st-alexius-medical-center-hoffman-estates-and-amita-health-women-and-childrens-hospital-hoffman-estates_standardcharges.xlsx


 14%|███████████████████▎                                                                                                                        | 16/116 [04:50<25:54, 15.54s/it]

364251846_amita-health-st-alexius-medical-center-hoffman-estates-and-amita-health-women-and-childrens-hospital-hoffman-estates_standardcharges.parquet
data/364251848_alexian-brothers-behavioral-health-hospital_standardcharges.xlsx


 15%|████████████████████▌                                                                                                                       | 17/116 [04:51<18:30, 11.21s/it]

364251848_alexian-brothers-behavioral-health-hospital_standardcharges.parquet
data/362235165_amita-health-holy-family-medical-center-des-plaines_standardcharges.xlsx


 16%|█████████████████████▋                                                                                                                      | 18/116 [05:00<17:10, 10.52s/it]

362235165_amita-health-holy-family-medical-center-des-plaines_standardcharges.parquet
data/362235165_amita-health-resurrection-medical-center-chicago_standardcharges.xlsx


 16%|██████████████████████▉                                                                                                                     | 19/116 [05:08<15:59,  9.89s/it]

362235165_amita-health-resurrection-medical-center-chicago_standardcharges.parquet
data/362235165_amita-health-saint-joseph-hospital-chicago_standardcharges.xlsx


 17%|████████████████████████▏                                                                                                                   | 20/116 [05:31<21:56, 13.71s/it]

362235165_amita-health-saint-joseph-hospital-chicago_standardcharges.parquet
data/362235165_amita-health-saints-mary-and-elizabeth-medical-center-chicago_standardcharges.xlsx


 18%|█████████████████████████▎                                                                                                                  | 21/116 [06:29<43:05, 27.22s/it]

362235165_amita-health-saints-mary-and-elizabeth-medical-center-chicago_standardcharges.parquet
data/364195126_amita-health-mercy-medical-center-aurora_standardcharges.xlsx


 19%|██████████████████████████▌                                                                                                                 | 22/116 [06:49<38:54, 24.84s/it]

364195126_amita-health-mercy-medical-center-aurora_standardcharges.parquet
data/362235165_amita-health-saint-francis-hospital-evanston_standardcharges.xlsx


 20%|███████████████████████████▊                                                                                                                | 23/116 [06:54<29:34, 19.08s/it]

362235165_amita-health-saint-francis-hospital-evanston_standardcharges.parquet
data/364195126_amita-health-saint-joseph-hospital-elgin_standardcharges.xlsx


 21%|████████████████████████████▉                                                                                                               | 24/116 [07:01<23:22, 15.25s/it]

364195126_amita-health-saint-joseph-hospital-elgin_standardcharges.parquet
data/364195126_amita-health-saint-joseph-medical-center-joliet_standardcharges.xlsx


 22%|██████████████████████████████▏                                                                                                             | 25/116 [07:28<28:45, 18.96s/it]

364195126_amita-health-saint-joseph-medical-center-joliet_standardcharges.parquet
data/364195126_amita-health-st-marys-hospital-kankakee_standardcharges.xlsx


 22%|███████████████████████████████▍                                                                                                            | 26/116 [07:53<31:07, 20.75s/it]

364195126_amita-health-st-marys-hospital-kankakee_standardcharges.parquet
data/364195126_amita-health-st-marys-hospital-kankakee_standardcharges.xlsx


 23%|████████████████████████████████▌                                                                                                           | 27/116 [08:18<32:38, 22.01s/it]

364195126_amita-health-st-marys-hospital-kankakee_standardcharges.parquet
data/350992717_ascension-st-vincent-kokomo-_standardcharges.xlsx


 24%|█████████████████████████████████▊                                                                                                          | 28/116 [08:35<30:03, 20.49s/it]

350992717_ascension-st-vincent-kokomo-_standardcharges.parquet
data/350869066_ascension-st-vincent-hospital_standardcharges.xlsx


 25%|███████████████████████████████████                                                                                                         | 29/116 [09:50<53:17, 36.76s/it]

350869066_ascension-st-vincent-hospital_standardcharges.parquet
data/460877261_ascension-st-vincent-anderson_standardcharges.xlsx


 26%|████████████████████████████████████▏                                                                                                       | 30/116 [10:22<50:33, 35.28s/it]

460877261_ascension-st-vincent-anderson_standardcharges.parquet
data/350869065_ascension-st-vincent-evansville_standardcharges.xlsx


 27%|█████████████████████████████████████▍                                                                                                      | 31/116 [10:55<48:56, 34.55s/it]

350869065_ascension-st-vincent-evansville_standardcharges.parquet
data/364492612_ascension-st-vincent-heart-center_standardcharges.xlsx


 28%|██████████████████████████████████████▌                                                                                                     | 32/116 [11:07<39:04, 27.91s/it]

364492612_ascension-st-vincent-heart-center_standardcharges.parquet
data/743107055_ascension-st-vincent-carmel_standardcharges.xlsx


 28%|███████████████████████████████████████▊                                                                                                    | 33/116 [11:30<36:34, 26.43s/it]

743107055_ascension-st-vincent-carmel_standardcharges.parquet
data/454243702_ascension-st-vincent-fishers_standardcharges.xlsx


 29%|█████████████████████████████████████████                                                                                                   | 34/116 [11:49<33:03, 24.19s/it]

454243702_ascension-st-vincent-fishers_standardcharges.parquet
data/352103153_ascension-st-vincent-randolph_standardcharges.xlsx


 30%|██████████████████████████████████████████▏                                                                                                 | 35/116 [11:56<25:40, 19.02s/it]

352103153_ascension-st-vincent-randolph_standardcharges.parquet
data/351841606_ascension-st-vincent-jennings_standardcharges.xlsx


 31%|███████████████████████████████████████████▍                                                                                                | 36/116 [12:05<21:19, 15.99s/it]

351841606_ascension-st-vincent-jennings_standardcharges.parquet
data/350784551_ascension-st-vincent-williamsport_standardcharges.xlsx


 32%|████████████████████████████████████████████▋                                                                                               | 37/116 [12:09<16:33, 12.58s/it]

350784551_ascension-st-vincent-williamsport_standardcharges.parquet
data/350876389_ascension-st-vincent-mercy_standardcharges.xlsx


 33%|█████████████████████████████████████████████▊                                                                                              | 38/116 [12:14<13:15, 10.19s/it]

350876389_ascension-st-vincent-mercy_standardcharges.parquet
data/352112529_ascension-st-vincent-clay_standardcharges.xlsx


 34%|███████████████████████████████████████████████                                                                                             | 39/116 [12:16<10:02,  7.82s/it]

352112529_ascension-st-vincent-clay_standardcharges.parquet
data/270847538_ascension-st-vincent-salem_standardcharges.xlsx


 34%|████████████████████████████████████████████████▎                                                                                           | 40/116 [12:24<09:48,  7.75s/it]

270847538_ascension-st-vincent-salem_standardcharges.parquet
data/351343019_ascension-st-vincent-warrick_standardcharges.xlsx


 35%|█████████████████████████████████████████████████▍                                                                                          | 41/116 [12:26<07:37,  6.10s/it]

An error occurred: Facility_BU_ID
The problematic url was: https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/in/351343019_ascension-st-vincent-warrick_standardcharges.xlsx
data/272192831_ascension-st-vincent-dunn_standardcharges.xlsx


 36%|██████████████████████████████████████████████████▋                                                                                         | 42/116 [12:30<06:40,  5.42s/it]

272192831_ascension-st-vincent-dunn_standardcharges.parquet
data/351712001_ascension-st-vincent-seton-specialty-hospital_standardcharges.xlsx


 37%|███████████████████████████████████████████████████▉                                                                                        | 43/116 [12:30<04:43,  3.89s/it]

351712001_ascension-st-vincent-seton-specialty-hospital_standardcharges.parquet
data/351991390_ascension-st-vincent-naab-road-surgery-center_standardcharges.xlsx


 38%|█████████████████████████████████████████████████████                                                                                       | 44/116 [12:54<11:45,  9.80s/it]

351991390_ascension-st-vincent-naab-road-surgery-center_standardcharges.parquet
data/320014795_ascension-st-vincent-carmel-asc_standardcharges.xlsx


 39%|██████████████████████████████████████████████████████▎                                                                                     | 45/116 [13:18<16:30, 13.95s/it]

320014795_ascension-st-vincent-carmel-asc_standardcharges.parquet
data/320029881_ascension-st-vincent-endoscopy-center_standardcharges.xlsx


 40%|███████████████████████████████████████████████████████▌                                                                                    | 46/116 [13:41<19:33, 16.76s/it]

320029881_ascension-st-vincent-endoscopy-center_standardcharges.parquet
data/ascension-medical-group-st-vincent-urgent-care.xlsx
An error occurred: Facility_BU_ID
The problematic url was: https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/in/ascension-medical-group-st-vincent-urgent-care.xlsx
data/481186704_ascension-via-christi-hospital-_standardcharges.xlsx


 41%|█████████████████████████████████████████████████████████▉                                                                                  | 48/116 [13:52<13:08, 11.60s/it]

481186704_ascension-via-christi-hospital-_standardcharges.parquet
data/480543778_ascension-via-christi-hospital_standardcharges.xlsx


 42%|███████████████████████████████████████████████████████████▏                                                                                | 49/116 [14:04<12:57, 11.61s/it]

480543778_ascension-via-christi-hospital_standardcharges.parquet
data/481172106_ascension-via-christi-st-francis-_standardcharges.xlsx


 43%|████████████████████████████████████████████████████████████▎                                                                               | 50/116 [14:54<23:55, 21.75s/it]

481172106_ascension-via-christi-st-francis-_standardcharges.parquet
data/271965272_ascension-via-christi-st-teresa_standardcharges.xlsx


 44%|█████████████████████████████████████████████████████████████▌                                                                              | 51/116 [15:12<22:27, 20.73s/it]

271965272_ascension-via-christi-st-teresa_standardcharges.parquet
data/721526400_wamego-health-center_standardcharges.xlsx


 45%|██████████████████████████████████████████████████████████████▊                                                                             | 52/116 [15:18<17:37, 16.52s/it]

721526400_wamego-health-center_standardcharges.parquet
data/481158274_ascension-via-christi-rehabiliatation-hospital_standardcharges.xlsx


 46%|███████████████████████████████████████████████████████████████▉                                                                            | 53/116 [15:26<14:52, 14.17s/it]

481158274_ascension-via-christi-rehabiliatation-hospital_standardcharges.parquet
data/520591657_saint-agnes-hospital_standardcharges.xlsx


 47%|█████████████████████████████████████████████████████████████████▏                                                                          | 54/116 [15:33<12:28, 12.07s/it]

520591657_saint-agnes-hospital_standardcharges.parquet
data/381358212_ascension-providence-hospital-southfield-campus_standardcharges.xlsx


 47%|██████████████████████████████████████████████████████████████████▍                                                                         | 55/116 [15:36<09:26,  9.29s/it]

An error occurred: Error: potential invalid date format.
The problematic url was: https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/mi/381358212_ascension-providence-hospital-southfield-campus_standardcharges.xlsx
data/381358212_ascension-providence-hospital-novi-campus_standardcharges.xlsx


 48%|███████████████████████████████████████████████████████████████████▌                                                                        | 56/116 [15:38<07:12,  7.21s/it]

An error occurred: Error: potential invalid date format.
The problematic url was: https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/mi/381358212_ascension-providence-hospital-novi-campus_standardcharges.xlsx
data/381359063_ascension-st-john-hospital_standardcharges.xlsx


 49%|████████████████████████████████████████████████████████████████████▊                                                                       | 57/116 [16:26<19:11, 19.51s/it]

381359063_ascension-st-john-hospital_standardcharges.parquet
data/383322109_ascension-macomb-oakland-hospital-warren-campus_standardcharges.xlsx


 50%|██████████████████████████████████████████████████████████████████████                                                                      | 58/116 [16:41<17:22, 17.97s/it]

383322109_ascension-macomb-oakland-hospital-warren-campus_standardcharges.parquet
data/383322109_ascension-macomb-oakland-hospital-madison-heights-campus_standardcharges.xlsx


 51%|███████████████████████████████████████████████████████████████████████▏                                                                    | 59/116 [16:46<13:35, 14.30s/it]

383322109_ascension-macomb-oakland-hospital-madison-heights-campus_standardcharges.parquet
data/383160564_ascension-river-district-hospital_standardcharges.xlsx


 52%|████████████████████████████████████████████████████████████████████████▍                                                                   | 60/116 [17:00<13:13, 14.17s/it]

383160564_ascension-river-district-hospital_standardcharges.parquet


 53%|█████████████████████████████████████████████████████████████████████████▌                                                                  | 61/116 [17:01<09:13, 10.07s/it]

data/381576680_ascension-brighton-center-for-recovery_standardcharges.xlsx
381576680_ascension-brighton-center-for-recovery_standardcharges.parquet
data/382377821_ascension-genesys-hospital_standardcharges.xlsx


 53%|██████████████████████████████████████████████████████████████████████████▊                                                                 | 62/116 [17:31<14:31, 16.14s/it]

382377821_ascension-genesys-hospital_standardcharges.parquet


Exception ignored in: <function Xlsx2csv.__del__ at 0x7f3ae9e774c0>
Traceback (most recent call last):
  File "/home/ubuntu/.local/lib/python3.9/site-packages/xlsx2csv.py", line 219, in __del__
    self.ziphandle.close()
AttributeError: 'Xlsx2csv' object has no attribute 'ziphandle'
 54%|████████████████████████████████████████████████████████████████████████████                                                                | 63/116 [17:38<11:40, 13.22s/it]

data/381360526_ascension-borgess-hospital-and-ascension-borgess-pipp-hospital_standardcharges.xlsx
An error occurred: Invalid xlsx file: data/381360526_ascension-borgess-hospital-and-ascension-borgess-pipp-hospital_standardcharges.xlsx
The problematic url was: https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/mi/381360526_ascension-borgess-hospital-and-ascension-borgess-pipp-hospital_standardcharges.xlsx
data/381359180_ascension-borgess-allegan-hospital_standardcharges.xlsx


 55%|█████████████████████████████████████████████████████████████████████████████▏                                                              | 64/116 [17:39<08:27,  9.77s/it]

An error occurred: Error: potential invalid date format.
The problematic url was: https://healthcare.ascension.org/-/media/project/ascension/healthcare/price-transparency-files/mi/381359180_ascension-borgess-allegan-hospital_standardcharges.xlsx
data/381490190_ascension-borgess-lee-hospital_standardcharges.xlsx


In [None]:
dfs

### Some problematic files:

* 272192831_ascension-st-vincent-dunn_standardcharges.xlsx doesn't have a Facility_BU_ID column (everything else is the same)
* ascension-medical-group-st-vincent-urgent-care.xlsx doesn't match the schema at all

### A script for matching names to CCNs