## **Dataset 2**: Diabetes Readmission Data
- Method: Storing CSV in PostgreSQL
- https://archive.ics.uci.edu/dataset/296/diabetes+130-us+hospitals+for+years+1999-2008

### Storing Data in PostgreSQL

In [1]:
# import required libraries for PostgreSQL connection
import pandas.io.sql as sqlio
from sqlalchemy import create_engine, text, exc
import psycopg2

In [2]:
# initialize the connection string to PostgreSQL
connection_string = "postgresql+psycopg2://dap:dap@127.0.0.1:5432/postgres"

In [3]:
# function to establish PostgreSQL connection
try:
    engine = create_engine(connection_string) # establish connection to database
    with engine.connect() as connection:
        server_version = sqlio.read_sql_query(
            text("SELECT VERSION();"),
            connection
        )
except exc.SQLAlchemyError as dbError:
    print("PostgreSQL Error", dbError)
else:
    print(server_version["version"].values[0])
finally:
    if engine in locals():
        engine.close() # close connection to database

PostgreSQL 17.0 (Debian 17.0-1.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit


In [4]:
# set up a new database only if it doesn't already exist
try:
    engine = create_engine(connection_string) # establish connection to database
    with engine.connect().execution_options(isolation_level="AUTOCOMMIT") as connection:
        # Check if the database exists
        result = connection.execute(text("SELECT 1 FROM pg_database WHERE datname='diabetes_db';"))
        if not result.fetchone():
            connection.execute(text("CREATE DATABASE diabetes_db;"))
except exc.SQLAlchemyError as dbError:
    print("PostgreSQL Error:", dbError)
finally:
    if 'engine' in locals():
        engine.dispose() # close connection to database

In [5]:
drop_table_query1 = "DROP TABLE IF EXISTS readmission_data;" # drop table if it already exists

# query to make table inside the PostgreSQL Database called 'diabetes_info'
table_create_string = """
    CREATE TABLE IF NOT EXISTS readmission_data (
        encounter_id INTEGER,
        patient_nbr INTEGER,
        race VARCHAR(255),
        gender VARCHAR(255),
        age VARCHAR(255),
        weight VARCHAR(255),
        admission_type_id INTEGER,
        discharge_disposition_id INTEGER,
        admission_source_id INTEGER,
        time_in_hospital INTEGER,
        payer_code VARCHAR(255),
        medical_specialty VARCHAR(255),
        num_lab_procedures INTEGER,
        num_procedures INTEGER,
        num_medications INTEGER,
        number_outpatient INTEGER,
        number_emergency INTEGER,
        number_inpatient INTEGER,
        diag_1 VARCHAR(255),
        diag_2 VARCHAR(255),
        diag_3 VARCHAR(255),
        number_diagnoses INTEGER,
        max_glu_serum VARCHAR(255),
        A1Cresult VARCHAR(255),
        metformin VARCHAR(255),
        repaglinide VARCHAR(255),
        nateglinide VARCHAR(255),
        chlorpropamide VARCHAR(255),
        glimepiride VARCHAR(255),
        acetohexamide VARCHAR(255),
        glipizide VARCHAR(255),
        glyburide VARCHAR(255),
        tolbutamide VARCHAR(255),
        pioglitazone VARCHAR(255),
        rosiglitazone VARCHAR(255),
        acarbose VARCHAR(255),
        miglitol VARCHAR(255),
        troglitazone VARCHAR(255),
        tolazamide VARCHAR(255),
        examide VARCHAR(255),
        citoglipton VARCHAR(255),
        insulin VARCHAR(255),
        glyburide_metformin VARCHAR(255),
        glipizide_metformin VARCHAR(255),
        glimepiride_pioglitazone VARCHAR(255),
        metformin_rosiglitazone VARCHAR(255),
        metformin_pioglitazone VARCHAR(255),
        change VARCHAR(255),
        diabetesMed VARCHAR(255),
        readmitted VARCHAR(255)
    );
"""

try:
    engine = create_engine(connection_string) # establish connection to database
    with engine.connect().execution_options(isolation_level="AUTOCOMMIT") as connection:
        connection.execute(text(drop_table_query1)) # drop table if it already exists
        connection.execute(text(table_create_string)) # make table
except exc.SQLAlchemyError as dbError:
    print("PostgreSQL Error:", dbError)
finally:
    if 'engine' in locals():
        engine.dispose() # close connection to database

In [6]:
# corrected connection string format for psycopg2
connection_string = "postgresql://dap:dap@127.0.0.1:5432/postgres"

# path of CSV file
csv_file_path = "readmission_diabetic_dataset2.csv"

conn = None  # initialize conn variable

try:
    # connect to the PostgreSQL server
    conn = psycopg2.connect(connection_string)
    cursor = conn.cursor()

    # open CSV file and copy its contents into the PostgreSQL table
    with open(csv_file_path, 'r') as f:
        # skip the header row of the CSV file
        next(f)
        
        # use COPY command to load the data into the table
        cursor.copy_from(f, 'readmission_data', sep=',', null='?', columns=(
            'encounter_id', 'patient_nbr', 'race', 'gender', 'age', 'weight',
            'admission_type_id', 'discharge_disposition_id', 'admission_source_id',
            'time_in_hospital', 'payer_code', 'medical_specialty', 'num_lab_procedures',
            'num_procedures', 'num_medications', 'number_outpatient', 'number_emergency',
            'number_inpatient', 'diag_1', 'diag_2', 'diag_3', 'number_diagnoses', 'max_glu_serum', 'a1cresult',
            'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide',
            'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
            'tolazamide', 'examide', 'citoglipton', 'insulin', 'glyburide_metformin', 'glipizide_metformin',
            'glimepiride_pioglitazone', 'metformin_rosiglitazone', 'metformin_pioglitazone', 'change', 'diabetesmed', 'readmitted'
        ))
    # initialize transaction
    conn.commit()
    print("CSV data successfully imported into 'readmission_data' table.")
except Exception as e:
    print(f"Error: {e}")
finally:
    # ensure connection is closed if created
    if conn:
        cursor.close()
        conn.close()

CSV data successfully imported into 'readmission_data' table.
