In [1]:
from collections import namedtuple
from pathlib import Path, PosixPath

import pandas as pd

from src import COMPLETED

In [2]:
CURRENT_DIR = Path().cwd()

In [3]:
def parse_cms_from_fname(fname: PosixPath) -> str:
    s = fname.name.split('_')[0] if '_' in fname.name else fname.name.split('-')[0]
    return s.replace('~$', '')

def parse_location(fname: PosixPath) -> str:
    s = fname.name.split('_')[1] if '_' in fname.name else fname.name.split('-')[1]
    for v in ['providenceregionalmedicalcenter', 'regionalmedicalcenter', 'providence', 'medicalcenter']:
        s = s.replace(v, '')
    return s

def add_prices(prices: pd.Series) -> float:
    """Convert to cents, sum, then convert back to float"""
    cents = [0 if pd.isnull(p) else int(p) * 100 for p in prices]
    return sum(cents)/100.0

In [4]:
File = namedtuple('File', ['fname', 'cms', 'location'])

files = []
for f in CURRENT_DIR.glob('*.xlsx'):
    files.append(File(f.name, parse_cms_from_fname(f), parse_location(f)))

files

[File(fname='501326_providencemountcarmel_standardcharges.xlsx', cms='501326', location='mountcarmel'),
 File(fname='500014_providenceregionalmedicalcentereverett_standardcharges.xlsx', cms='500014', location='everett'),
 File(fname='500002_providencestmarymedicalcenter_standardcharges.xlsx', cms='500002', location='stmary'),
 File(fname='500054_providencesacredheartmedicalcenter_standardcharges.xlsx', cms='500054', location='sacredheart'),
 File(fname='501309_providencestjosephmedicalcenterofchewelah_standardcharges.xlsx', cms='501309', location='stjosephofchewelah'),
 File(fname='500024_providencestpeterhospital_standardcharges.xlsx', cms='500024', location='stpeterhospital'),
 File(fname='500019_providencecentraliahospital_standardcharges.xlsx', cms='500019', location='centraliahospital'),
 File(fname='500077_providenceholyfamilyhospital_standardcharges.xlsx', cms='500077', location='holyfamilyhospital'),
 File(fname='500058-kadlecregionalmedicalcenter-standardcharges.xlsx', cms='50

In [5]:
for f in files:
    a = pd.read_excel(f.fname, sheet_name='Gross Charges', skiprows=[0,1,2,3])
    b = pd.read_excel(f.fname, sheet_name='Discount Cash Price - Gross', skiprows=[0,1,2,3])

    print(f.fname)
    print(a.columns)
    print(b.columns)
    print('-' * 80)

501326_providencemountcarmel_standardcharges.xlsx
Index(['HOSPITAL SYSTEM CHARGE CODE', 'CHARGE DESCRIPTION',
       'CPT(R)/HCPCS Code', 'WMC LOCATION (Unit Price) [IP/OP]',
       'WMC LOCATION (Base Price) [IP/OP]',
       'WMC LABOR AND DELIVERY (37074000) [IP/OP]',
       'WMC DIAGNOSTIC IMAGING (37076300) [IP/OP]',
       'WMC SURGERY CLINIC (37070780) [IP/OP]',
       'WMC EMERGENCY PHYSICIANS (37070101) [IP/OP]',
       'WMC AMBULATORY SURGERY SERVICES (37074300) [OP]',
       'WMC EMERGENCY SERVICES (37070100) [IP/OP]',
       'WMC INTENSIVE CARE UNIT (37060100) [IP]',
       'WMC IV THERAPY (37077150) [IP/OP]',
       'WMC RESPIRATORY THERAPY (37077200) [IP/OP]',
       'WMC CARDIOLOGY SERVICES (37075900) [OP]'],
      dtype='object')
Index(['HOSPITAL SYSTEM CHARGE CODE', 'CHARGE DESCRIPTION',
       'CPT(R)/HCPCS Code',
       'WMC LOCATION (Unit Price) [IP/OP] DISCOUNT CASH PRICE',
       'WMC LOCATION (Base Price) [IP/OP] DISCOUNT CASH PRICE',
       'WMC LABOR AND DELIVER