In [8]:
import psycopg2
import sqlite3
from pathlib import Path
import pandas as pd
import csv
from tqdm import tqdm

In [9]:
# TABLE_NAME = "ReportedProcedures"
TABLE_NAME = "HospitalProcedure"

INPUT_PATH = Path("processing") / 'database-input'
COLUMNS = [
    {"name" : "hospital_name"},
    {"name": "cpt_code"},
    {"name": "procedure_name"},
    {"name": "average_charge"},
]


### Postgres
conn = psycopg2.connect(database="HealthcareSense",
						user='felixhab', password='',
						host='127.0.0.1', port='5432'
)
conn.autocommit = True 

### SQLite
database_dir = Path(".") / "database"
database_path = database_dir / 'healthcare_data.db'
database_dir.mkdir(exist_ok=True)

def get_conn():
    return sqlite3.connect(database_path)
conn = get_conn()

In [10]:
cursor = conn.cursor()

sql0 = f"""DROP TABLE {TABLE_NAME}"""

# Don't execute this if the database is brand new. Table would not exist. 
cursor.execute(sql0)


sql = f'''CREATE TABLE {TABLE_NAME}(
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
{COLUMNS[0]["name"]} varchar(100) NOT NULL,
{COLUMNS[1]["name"]} int NOT NULL,
{COLUMNS[2]["name"]} varchar(300), 
{COLUMNS[3]["name"]} int);'''


cursor.execute(sql)

conn.commit()


In [11]:
def get_names(columns):
    return str(tuple([col["name"] for col in columns]))

cursor = conn.cursor()

# ### Works only in postgres
# for output_csv_path in INPUT_PATH.iterdir():
#     if output_csv_path.suffix != '.csv':
#         continue

#     sql2 = f'''COPY {TABLE_NAME}(hospital_name,cpt_code,
#     procedure_name,average_charge)
#     FROM '{output_csv_path.absolute()}'
#     DELIMITER ','
#     CSV HEADER;'''
        
#     # TODO: Handle filename escaping
#     try:
#         cursor.execute(sql2)
#     except:
#         print(f"Failed to load csv file {output_csv_path}")

for hospital in tqdm(INPUT_PATH.iterdir()):
    if not hospital.is_dir():
        continue
    for sheet in hospital.iterdir():
        if sheet.is_dir() or sheet.name.startswith('~') or sheet.suffix!='.csv':
            # print(f"{sheet} is not a csv file")
            continue

        with open(sheet) as f:
            contents = csv.reader(f)
            # Skip CSV header
            next(contents, None)

            sql2 = f"""
            INSERT INTO {TABLE_NAME} {get_names(COLUMNS)} VALUES(?,?,?,?);
            """
            cursor.executemany(sql2, contents)
    
conn.commit()

318it [00:00, 319.68it/s]


In [12]:
cursor = conn.cursor()

sql3 = f'''
select * from {TABLE_NAME} 
WHERE cpt_code IS NOT NULL
ORDER BY average_charge;
'''
result = pd.read_sql(sql3, conn)
# cents to dollars
result.average_charge *=.01

conn.commit()

result

Unnamed: 0,id,hospital_name,cpt_code,procedure_name,average_charge
0,1235,Childrens Hospital at Mission,87254,QS-VIRAL CULSTURE SHELL VIAL,0.0
1,1282,Childrens Hospital at Mission,83525,QS-INSULIN TOTAL,0.0
2,1355,Childrens Hospital at Mission,82374,CO2,0.0
3,1641,Childrens Hospital at Mission,94002,VENITLATOR INITIAL DAY,0.0
4,1805,Childrens Hospital at Mission,29105,APPLY SPLNT LONG ARM,0.0
...,...,...,...,...,...
219878,157969,O'Connor Hospital,33264,Hc Replace Icd Gen Multiple Lead,191188.0
219879,158038,O'Connor Hospital,33270,Hc Ins/rep Subq Defibrillator,191188.0
219880,32360,Emanuel Medical Center,33289,TCAT IMPL WRLS P-ART PRS SNR,202881.0
219881,199616,Scripps Memorial Hospital - La Jolla,69930,IMPLANT COCHLEAR DEVICE,205198.0


In [17]:
cursor = conn.cursor()

sql3 = f'''
SELECT DISTINCT cpt_code from {TABLE_NAME} 
WHERE cpt_code IS NOT NULL
ORDER BY cpt_code;

'''
result = pd.read_sql(sql3, conn)

conn.commit()

result

Unnamed: 0,cpt_code
0,103
1,120
2,124
3,140
4,142
...,...
8694,99605
8695,99606
8696,99607
8697,99995


In [6]:
conn.close()

In [7]:
result

Unnamed: 0,id,hospital_name,cpt_code,procedure_name,average_charge
0,1235,Childrens Hospital at Mission,87254,QS-VIRAL CULSTURE SHELL VIAL,0.0
1,1282,Childrens Hospital at Mission,83525,QS-INSULIN TOTAL,0.0
2,1355,Childrens Hospital at Mission,82374,CO2,0.0
3,1641,Childrens Hospital at Mission,94002,VENITLATOR INITIAL DAY,0.0
4,1805,Childrens Hospital at Mission,29105,APPLY SPLNT LONG ARM,0.0
...,...,...,...,...,...
219878,157969,O'Connor Hospital,33264,Hc Replace Icd Gen Multiple Lead,191188.0
219879,158038,O'Connor Hospital,33270,Hc Ins/rep Subq Defibrillator,191188.0
219880,32360,Emanuel Medical Center,33289,TCAT IMPL WRLS P-ART PRS SNR,202881.0
219881,199616,Scripps Memorial Hospital - La Jolla,69930,IMPLANT COCHLEAR DEVICE,205198.0
