In [8]:
import psycopg
import pandas as pd
import time
import credentials as cd
import numpy as np
import sys
from data_cleaning import data_cleaning_hginfo

import warnings
warnings.filterwarnings('ignore')

# Data Cleaning

In [9]:
# input_date = sys.argv[1]
# cms_file = sys.argv[2]
input_date = '2021-07-01'
cms_file = 'Hospital_General_Information-2021-07.csv'
file = str('/Users/arshmacbook/Desktop/36-614/Project/hospital_quality_files/' + cms_file)
cms = data_cleaning_hginfo(input_date, file)
cms['emergency_services_provided'] = cms['Emergency Services'].map({'Yes': True, 'No': False})

# Establishing SQL connection

In [10]:
conn = psycopg.connect(
    host = "sculptor.stat.cmu.edu",
    dbname = , # Insert your dbname
    user = , # Insert your username
    password = # Insert your password
)

In [11]:
cur = conn.cursor()

In [12]:
error_rows_cms = pd.DataFrame()
num_rows_successfully_inserted_cms = 0
num_rows_error_cms = 0

# Deleting all pre-existing rows in all tables

# Creating transaction

In [None]:
with conn.transaction():
    for column, row in cms.iterrows():
        try:
            with conn.transaction():
                
                # If a hospital already exists in the address table then add row to ratings table
                cur.execute("INSERT into ratings "
                            "(hospital_name, "
                            "hospital_pk, "
                            "collection_week, "
                            "overall_quality_rating, "
                            "type, "
                            "emergency_services_provided) "
                            "VALUES (%(hospital_name)s, "
                            "%(hospital_pk)s, "
                            "%(collection_week)s, "
                            "%(overall_quality_rating)s, "
                            "%(type)s, "
                            "%(emergency_services_provided)s)",
                            {'hospital_name' : str(row['Facility Name']),
                             'hospital_pk' : str(row['Facility ID']),
                             'collection_week' : str(row.collection_week),
                             'overall_quality_rating' : float(row['Hospital overall rating']),
                             'type' : str(row['Hospital Ownership']),
                             'emergency_services_provided' : row.emergency_services_provided})

        except Exception as e:
            row = dict(row)
            error_rows_cms = error_rows_cms.append(row, ignore_index = True)
            num_rows_error_cms += 1

        else:
            num_rows_successfully_inserted_cms += 1

# Committing the transaction

In [None]:
conn.commit()

# Creating CSV files for error rows

In [None]:
error_rows_cms.to_csv("Error rows in CMS data set.csv", index = False)

# Printing the summary output

In [None]:
print("Number of rows successfully inserted:", round(num_rows_successfully_inserted_cms / cms.shape[0] * 100, 2), "%")
print("Number of rows unable to be inserted due to errors:", round(num_rows_error_cms / cms.shape[0] * 100, 2), "%")

# Analyses

In [None]:
ratings = pd.DataFrame(cur.execute("select * from ratings").fetchall())
capacity_info = pd.DataFrame(cur.execute("select * from capacity_info").fetchall())
covid_info = pd.DataFrame(cur.execute("select * from covid_info").fetchall())
address = pd.DataFrame(cur.execute("select * from address").fetchall())

# Closing the SQL connection

In [15]:
conn.close()