In [None]:
#Importing Dependencies

import pandas as pd
import psycopg2
from sqlalchemy import create_engine
import getpass

In [None]:
#Importing datasets from parquet file

stroke_data_whole = pd.read_parquet('Resources/cleaned_dataset/clean_stroke.parquet.gzip')
stroke_data_no_null_bmi =pd.read_parquet('Resources/cleaned_dataset/clean_stroke_bmi_removed.parquet.gzip')

In [None]:
#Check columns (They are almost identical Dataframes, some rows were dropped that had null values in the second one)

stroke_data_whole.columns

In [None]:
# Drop columns for medical table

stroke_medical = stroke_data_whole.drop(columns = ['Ever_Married',
       'Work_Type', 'Residence_Type'])
stroke_medical.rename(columns = {'ID#':"Identifier"}, inplace = True)
stroke_medical.head()

In [None]:
#Drop columns for medical, no nulls table

stroke_medical_no_bmi = stroke_data_no_null_bmi.drop(columns = ['Ever_Married',
       'Work_Type', 'Residence_Type'])
stroke_medical_no_bmi.rename(columns = {'ID#':"Identifier"}, inplace = True)
stroke_medical_no_bmi.isnull().sum()

In [None]:
#Drop columns for personal table

stroke_personal = stroke_data_whole.drop(columns = ['Gender', 'Age', 'Hypertension', 'Heart_Disease',
       'Avg_Glucose_Lvl', 'BMI', 'Smoker'])
stroke_personal.rename(columns = {'ID#':"Identifier"}, inplace = True)
stroke_personal.head()

In [None]:
#Get Postgres Username

username = input("What is your Postgres Username? (postgres by default)")

In [None]:
#Get Postgres Password

password = getpass.getpass(prompt= "What is your Postgres Password?")

In [None]:
#Get Postgres port number

port = input("What is your Postgres Port number?")

In [None]:
#Create the connection

conn = f'postgresql://{username}:{password}@localhost:{port}/SanAntonio_Strok_Pred'.format(username, password, port)

In [None]:
#Create Engine

engine = create_engine(conn)

#Create three tables (Medical, Personal, Medical with no nulls)

stroke_medical.to_sql('medical', engine)
stroke_medical_no_bmi.to_sql('medical_no_bmi', engine)
stroke_personal.to_sql('personal', engine)

In [None]:
#Query joining Personal and Medical on Identifier

stroke_w_null = pd.read_sql_query('SELECT personal."Identifier", medical."Age", \
                           medical."Gender", personal."Work_Type", personal."Residence_Type", \
                           personal."Ever_Married", medical."Hypertension", medical."Heart_Disease",\
                           medical."Avg_Glucose_Lvl", medical."BMI", medical."Smoker", personal."Stroke"\
                                FROM personal\
                                INNER JOIN medical\
                                ON personal."Identifier" = medical."Identifier";', conn)
stroke_w_null.isnull().sum()

In [None]:
#Query joining Personal and Medical(no Nulls) on Identifier

stroke_no_null = pd.read_sql_query('SELECT personal."Identifier", medical_no_bmi."Age", \
                           medical_no_bmi."Gender", personal."Work_Type", personal."Residence_Type", \
                           personal."Ever_Married", medical_no_bmi."Hypertension", medical_no_bmi."Heart_Disease",\
                           medical_no_bmi."Avg_Glucose_Lvl", medical_no_bmi."BMI", medical_no_bmi."Smoker", personal."Stroke"\
                                FROM personal\
                                INNER JOIN medical_no_bmi\
                                ON personal."Identifier" = medical_no_bmi."Identifier";', conn)
stroke_no_null.isnull().sum()

In [None]:
stroke_w_null.head()

In [None]:
stroke_no_null.head()