In [30]:
import csv
import sqlite3
import pandas as pd
import numpy as np

database = "healthCC.db"

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
 
    return None

def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

## Create table queries
create_hospital_table = """ CREATE TABLE IF NOT EXISTS hospitals (
                                        provider_id INTEGER PRIMARY KEY,
                                        name TEXT NOT NULL,
                                        type TEXT NOT NULL,
                                        phone INTEGER,
                                        rating INTEGER
                                    ); """
create_address_table = """ CREATE TABLE IF NOT EXISTS address (
                                        id INTEGER PRIMARY KEY,
                                        address TEXT NOT NULL,
                                        state TEXT,
                                        city TEXT,
                                        zip TEXT NOT NULL,
                                        county TEXT
                                    ); """
create_ownership_type_table = """ CREATE TABLE IF NOT EXISTS ownership_type (
                                        id INTEGER PRIMARY KEY,
                                        ow_type TEXT NOT NULL
                                    ); """
create_hospital_address_table = """ CREATE TABLE IF NOT EXISTS hospital_address (
                                        hospital_id INTEGER NOT NULL,
                                        address_id INTEGER NOT NULL,
                                        PRIMARY KEY (hospital_id, address_id)
                                    ); """
create_hospital_ownership_table = """ CREATE TABLE IF NOT EXISTS hospital_ownership (
                                        hospital_id INTEGER NOT NULL,
                                        ownership_id INTEGER NOT NULL,
                                        PRIMARY KEY (hospital_id, ownership_id)
                                    ); """
create_hospital_comparison_table = """ CREATE TABLE IF NOT EXISTS hospital_comparison (
                                        id INTEGER NOT NULL PRIMARY KEY,
                                        hospital_id INTEGER NOT NULL,
                                        emergency_services INTEGER,
                                        ehr INTEGER, 
                                        mortality INTEGER, 
                                        safety INTEGER, 
                                        readmission INTEGER, 
                                        patient_experience INTEGER,
                                        effectiveness INTEGER,
                                        timeliness INTEGER,
                                        medical_imaging INTEGER
                                    ); """

# create a database connection
conn = create_connection(database)

if conn is not None:
    # create all tables
    create_table(conn, create_hospital_table)
    create_table(conn, create_address_table)
    create_table(conn, create_ownership_type_table)
    create_table(conn, create_hospital_address_table)
    create_table(conn, create_hospital_ownership_table)
    create_table(conn, create_hospital_comparison_table)
else:
    print("Error! cannot create the database connection.")

# Commit connection and close cursor and connection
conn.commit()
conn.close()


conn = create_connection(database)

def insert_hospital(cur, hospital_data):
    """ insert into hospital table 
    :param cur: cursor
    :param hospital_data: hospital data
    :return: current inserted row id
    """
    ## 'Provider ID','Hospital Name','Hospital Type', 'Phone Number', 'Hospital overall rating'
    hp_sql = """INSERT INTO hospitals(provider_id, name, type, phone, rating)
            VALUES(?,?,?,?,?)"""
    hp_data=(hospital_data['Provider ID'],
          hospital_data['Hospital Name'],
          hospital_data['Hospital Type'],
          hospital_data['Phone Number'],
          hospital_data['Hospital overall rating'])
    
    cur.execute(hp_sql, hp_data)
    
    return cur.lastrowid

def insert_address(cur, address_data):
    """ insert into address table 
    :param cur: cursor
    :param address_data: address data
    :return: current inserted row id
    """
    ## 'Address','State','City', 'Zip Code', 'County Name'
    add_sql = """INSERT INTO address(address, state, city, zip, county) 
                VALUES(?,?,?,?,?)"""
    add_data = (address_data['Address'],
               address_data['State'],
               address_data['City'],
               address_data['ZIP Code'],
               address_data['County Name'])
    
    cur.execute(add_sql, add_data)
    
    return cur.lastrowid

def insert_hospital_address(conn, hospital_id, address_id):
    """ insert into hospital_address table 
    :param cur: cursor
    :param hospital_id: hospital id
    :param address_id: address id
    :return: current inserted row id
    """
    ## 'Hospital Id','Address Id'
    hp_add_sql = """INSERT INTO hospital_address(hospital_id, address_id) 
                    VALUES(?,?)""" 
    hp_add_data = (hospital_id, address_id)
    
    cur.execute(hp_add_sql, hp_add_data)
    
    return cur.lastrowid

def insert_ownership_type(cur, ownership_data):
    """ insert into ownership type table 
    :param cur: cursor
    :param ownership_data: ownership data
    :return: current inserted row id
    """
    ## 'Ownership Type'
    select_data = (ownership_data,)
    cur.execute("""SELECT * FROM ownership_type WHERE ow_type=?""", select_data)
    entry = cur.fetchone()

    if entry is None:
        insert_data = (ownership_data,)
        cur.execute("""INSERT INTO ownership_type('ow_type')
                        VALUES(?)""", insert_data)
        lasrowid=cur.lastrowid
    else:
        lasrowid=entry[0]
    
    return lasrowid

def insert_hospital_ownership(cur, hospital_id, ow_id):
    """ insert into hospital_ownership table 
    :param cur: cursor
    :param hospital_id: hospital id
    :param ownership_id: ownership_id
    :return: current inserted row id
    """
    ## insert into hospital_ownership
    hp_ow_sql = """INSERT INTO hospital_ownership(hospital_id, ownership_id) 
                    VALUES(?,?)""" 
    hp_ow_data = (hospital_id, ow_id)
    
    cur.execute(hp_ow_sql, hp_ow_data)
    
    return cur.lastrowid

def insert_hospital_comparison(cur, hp_comp_data, hp_id):
    """ insert into hospital_comparison table 
    :param cur: cursor
    :param hp_comp_data: hospital comparison data
    :param hp_id: hospital id 
    :return: current inserted row id
    """
    ## insert into hospital_comparison
    hp_comp_sql = """INSERT INTO hospital_comparison(hospital_id, emergency_services, ehr, mortality, safety, readmission, 
                        patient_experience, effectiveness, timeliness, medical_imaging) 
                    VALUES(?,?,?,?,?,?,?,?,?,?)"""
    hp_comp_data = (hp_id, 
                    hp_comp_data['Emergency Services'],
                    hp_comp_data['Meets criteria for meaningful use of EHRs'],
                    hp_comp_data['Mortality national comparison'],
                    hp_comp_data['Safety of care national comparison'],
                    hp_comp_data['Readmission national comparison'],
                    hp_comp_data['Patient experience national comparison'],
                    hp_comp_data['Effectiveness of care national comparison'],
                    hp_comp_data['Timeliness of care national comparison'],
                    hp_comp_data['Efficient use of medical imaging national comparison'])
    
    cur.execute(hp_comp_sql, hp_comp_data)
    
    return cur.lastrowid


chunksize = 500
cur = conn.cursor()
for df in pd.read_csv('data/hospital_gen_info.csv', chunksize=chunksize, iterator=True):
    # Hospital Type ## Replace 'Acute Care Hospitals'=1, 'Critical Access Hospitals'=2, 'Childrens'=3
    HType_mapping={'Acute Care Hospitals':1, 'Critical Access Hospitals':2, 'Childrens':3}
    df['Hospital Type'].replace(HType_mapping, inplace=True)
    # 'Meets criteria for meaningful use of EHRs' ## Replace Y=1, nan=NaN
    EHR_mapping={'Y':1, 'nan':np.NaN}
    df['Meets criteria for meaningful use of EHRs'].replace(EHR_mapping, inplace=True)

    # 'Emergency Services' ## Replace Yes=1, No=0
    ES_mapping = {'Yes': 1, 'No': 0, 'nan':np.NaN}
    df['Emergency Services'].replace(ES_mapping, inplace=True)

    # 'Hospital overall rating ## Replace Not Available as NaN 
    df['Hospital overall rating'].replace({'Not Available':np.NaN}, inplace=True)

    # Same as the national average = 0, Below the national average = -1, Above the national average = 1, Not Available = NaN
    MC_mapping = {'Same as the national average' : 0,
                  'Below the national average' : -1, 
                  'Above the national average' : 1, 
                  'Not Available' : np.NaN }

    df['Mortality national comparison'].replace(MC_mapping, inplace=True)
    df['Safety of care national comparison'].replace(MC_mapping, inplace=True)
    df['Readmission national comparison'].replace(MC_mapping, inplace=True)
    df['Patient experience national comparison'].replace(MC_mapping, inplace=True)
    df['Effectiveness of care national comparison'].replace(MC_mapping, inplace=True)
    df['Timeliness of care national comparison'].replace(MC_mapping, inplace=True)
    df['Efficient use of medical imaging national comparison'].replace(MC_mapping, inplace=True)
    
    for index, row in df.iterrows():
        hospital_id = insert_hospital(cur, row)
        address_id = insert_address(cur, row)
        
        insert_hospital_address(cur, hospital_id, address_id)
        
        ow_id = insert_ownership_type(cur, row['Hospital Ownership'])
        
        insert_hospital_ownership(cur, hospital_id, ow_id)
        
        insert_hospital_comparison(cur, row, hospital_id)
        

conn.commit()
conn.close()
       

IntegrityError: UNIQUE constraint failed: hospitals.provider_id