In [21]:
import psycopg
import pandas as pd
import time
import credentials as cd
import numpy as np
from data_cleaning import data_cleaning_hginfo

import warnings
warnings.filterwarnings('ignore')

# Data Cleaning

In [22]:
input_date_1 = '2021-07-01'
cms_file_1 = 'Hospital_General_Information-2021-07.csv'

input_date_2 = '2022-01-01'
cms_file_2 = 'Hospital_General_Information-2022-01.csv'

input_date_3 = '2022-10-01'
cms_file_3 = 'Hospital_General_Information-2022-10.csv'

path = str('/Users/arshmacbook/Desktop/36-614/data_engineering_project/hhs_weekly_data_files/')  # Enter your directory path
file = str(path + cms_file_1)  # Enter the HHS file to load
cms = data_cleaning_hginfo(input_date_1, file)  # Enter the input date
cms['emergency_services_provided'] = cms['Emergency Services'].map({'Yes': True, 'No': False})

# Establishing SQL connection

In [23]:
conn = psycopg.connect(
    host = "sculptor.stat.cmu.edu",
    dbname = cd.arsh_dbname,  # Insert your dbname
    user = cd.arsh_username,  # Insert your username
    password = cd.arsh_password  # Insert your password
)

In [24]:
cur = conn.cursor()

In [25]:
error_rows_cms = pd.DataFrame()
num_rows_successfully_inserted_cms = 0
num_rows_error_cms = 0

# Creating transaction

In [26]:
with conn.transaction():
    for column, row in cms.iterrows():
        try:
            with conn.transaction():
                
                # If a hospital already exists in the address table then add row to ratings table
                cur.execute("INSERT into ratings "
                            "(hospital_name, "
                            "hospital_pk, "
                            "collection_week, "
                            "overall_quality_rating, "
                            "type, "
                            "emergency_services_provided) "
                            "VALUES (%(hospital_name)s, "
                            "%(hospital_pk)s, "
                            "%(collection_week)s, "
                            "%(overall_quality_rating)s, "
                            "%(type)s, "
                            "%(emergency_services_provided)s)",
                            {'hospital_name' : str(row['Facility Name']),
                             'hospital_pk' : str(row['Facility ID']),
                             'collection_week' : str(row.collection_week),
                             'overall_quality_rating' : float(row['Hospital overall rating']),
                             'type' : str(row['Hospital Ownership']),
                             'emergency_services_provided' : row.emergency_services_provided})

        except Exception as e:
            # row = dict(row)
            # error_rows_cms = error_rows_cms.append(row, ignore_index = True)
            num_rows_error_cms += 1

        else:
            num_rows_successfully_inserted_cms += 1

# Committing the transaction

In [27]:
conn.commit()

# Creating CSV files for error rows

In [28]:
error_rows_cms.to_csv("Error rows in CMS data set.csv", index = False)

# Printing the summary output

In [29]:
print("Number of rows successfully inserted:", round(num_rows_successfully_inserted_cms / cms.shape[0] * 100, 2), "%")
print("Number of rows unable to be inserted due to errors:", round(num_rows_error_cms / cms.shape[0] * 100, 2), "%")

Number of rows successfully inserted: 71.21 %
Number of rows unable to be inserted due to errors: 28.79 %


# Closing the SQL connection

In [30]:
conn.close()