In [1]:
# Dependencies and Setup
import pandas as pd

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.orm import Session

from config import username, password, host, database

## Read in CSV files

In [2]:
# File path
race_filepath = '../../../machine_learning/Resources/race_data.csv'
non_race_filepath = '../../../machine_learning/Resources/non_race_data.csv'

# Read in csv file
race_df = pd.read_csv(race_filepath)
non_race_df = pd.read_csv(non_race_filepath)

In [5]:
race_df.head()

Unnamed: 0,state,state_code,year,race,hispanic_origin,births_by_race,deaths_by_race,mmr_by_race,population_by_race,id,state_abbv,latitude,longitude
0,Alabama,1,2016,Black or African American,Not Hispanic or Latino,17989,14,77.83,0,US.AL,AL,32.318231,-86.902298
1,Alabama,1,2016,White,Not Hispanic or Latino,35319,18,50.96,0,US.AL,AL,32.318231,-86.902298
2,Alabama,1,2017,Black or African American,Not Hispanic or Latino,18354,25,136.21,704201,US.AL,AL,32.318231,-86.902298
3,Alabama,1,2017,White,Not Hispanic or Latino,34784,15,43.12,1655605,US.AL,AL,32.318231,-86.902298
4,Alabama,1,2018,Black or African American,Not Hispanic or Latino,17939,14,78.04,706754,US.AL,AL,32.318231,-86.902298


In [6]:
race_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388 entries, 0 to 387
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   state               388 non-null    object 
 1   state_code          388 non-null    int64  
 2   year                388 non-null    int64  
 3   race                388 non-null    object 
 4   hispanic_origin     388 non-null    object 
 5   births_by_race      388 non-null    int64  
 6   deaths_by_race      388 non-null    int64  
 7   mmr_by_race         388 non-null    float64
 8   population_by_race  388 non-null    int64  
 9   id                  388 non-null    object 
 10  state_abbv          388 non-null    object 
 11  latitude            388 non-null    float64
 12  longitude           388 non-null    float64
dtypes: float64(3), int64(5), object(5)
memory usage: 39.5+ KB


In [7]:
non_race_df.head()

Unnamed: 0,year,state,id,state_code,latitude,longitude,deaths,births,maternal_mortality_ratio,population,...,prem_death_val,smoking_val,uninsured_val,all_determs_val,all_outcomes_val,chlamydia_val,prem_death_ri_val,teen_birth_val,primary_care_val,low_birthweight_val
0,2015,Alabama,US.AL,AL,32.318231,-86.902298,12.0,59657.0,20.11,2505795.0,...,10095.0,21.1,12.9,-0.371,-0.325,611.0,1.2,34.3,103.5,10.0
1,2016,Alabama,US.AL,AL,32.318231,-86.902298,35.0,59151.0,59.17,2507714.0,...,10097.0,21.4,11.1,-0.427,-0.366,600.2,1.1,32.0,116.4,10.1
2,2017,Alabama,US.AL,AL,32.318231,-86.902298,41.0,58941.0,69.56,2514911.0,...,10321.0,21.5,9.6,-0.427,-0.335,543.6,1.2,30.1,119.3,10.4
3,2018,Alabama,US.AL,AL,32.318231,-86.902298,31.0,57761.0,53.67,2523756.0,...,10720.0,20.9,9.3,-0.483,-0.356,553.6,1.1,28.4,122.8,10.3
4,2019,Alabama,US.AL,AL,32.318231,-86.902298,35.0,58615.0,59.71,2533668.0,...,10435.0,19.2,9.7,-0.437,-0.383,614.1,1.2,27.0,122.8,10.3


In [8]:
non_race_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 42 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   year                      308 non-null    int64  
 1   state                     308 non-null    object 
 2   id                        308 non-null    object 
 3   state_code                308 non-null    object 
 4   latitude                  308 non-null    float64
 5   longitude                 308 non-null    float64
 6   deaths                    308 non-null    float64
 7   births                    308 non-null    float64
 8   maternal_mortality_ratio  308 non-null    float64
 9   population                308 non-null    float64
 10  employer                  308 non-null    float64
 11  non_group                 308 non-null    float64
 12  medicaid                  308 non-null    float64
 13  medicare                  308 non-null    float64
 14  military  

## Connect to PostgresSQL database

In [9]:
# Sets an object to utilize the default declarative base in SQL Alchemy
Base = declarative_base()

In [10]:
# Create table schema
class Race(Base):
    __tablename__ = 'race_data'
    __table_args__ = {'extend_existing': True}
    state = Column(String(255))
    state_code = Column(Integer)
    year = Column(Integer, primary_key=True)
    race = Column(String(255), primary_key=True)
    hispanic_origin = Column(String(255), primary_key=True)
    births_by_race = Column(Integer)
    deaths_by_race = Column(Integer)        
    mmr_by_race = Column(Float)         
    population_by_race = Column(Integer)     
    id = Column(String(255))                   
    state_abbv = Column(String(255), primary_key=True)          
    latitude = Column(Float)            
    longitude = Column(Float)   

In [12]:
class NonRace(Base):
    __tablename__ = 'non_race_data'
    __table_args__ = {'extend_existing': True}
    year = Column(Integer, primary_key=True)  
    state = Column(String(255)) 
    id = Column(String(255))
    state_code = Column(String(255), primary_key=True)
    latitude = Column(Float)
    longitude = Column(Float)
    deaths = Column(Float)
    births = Column(Float)
    maternal_mortality_ratio = Column(Float)
    population = Column(Float)
    employer = Column(Float)
    non_group = Column(Float)
    medicaid = Column(Float)
    medicare = Column(Float)
    military = Column(Float)
    uninsured = Column(Float)
    air_pollution_val = Column(Float)
    cancer_death_val = Column(Float)
    cardio_death_val = Column(Float)
    child_pov_val = Column(Float)   
    choles_check_val = Column(Float) 
    dent_vis_val = Column(Float)    
    dentists_val = Column(Float)    
    diabetes_val = Column(Float)    
    drug_deaths_val = Column(Float)  
    health_stat_fem_val = Column(Float)       
    immun_child_val = Column(Float)  
    income_ineq_val = Column(Float)  
    infant_mort_val = Column(Float)  
    infect_dis_val = Column(Float)   
    obesity_val = Column(Float)      
    phys_inac_val = Column(Float)    
    prem_death_val = Column(Float)   
    smoking_val = Column(Float)      
    uninsured_val = Column(Float)    
    all_determs_val = Column(Float)  
    all_outcomes_val = Column(Float) 
    chlamydia_val = Column(Float)    
    prem_death_ri_val = Column(Float)
    teen_birth_val = Column(Float)   
    primary_care_val = Column(Float) 
    low_birthweight_val = Column(Float)       

In [13]:
# Create Engine
connect_string = f'postgresql://{username}:{password}@{host}/{database}'
engine = create_engine(connect_string)

In [14]:
# Specify table
race_table = [Base.metadata.tables['race_data']]

# Create (if not already in existence) the tables associated with our classes.
Base.metadata.create_all(engine, tables=race_table)

In [15]:
# Specify table
non_race_table = [Base.metadata.tables['non_race_data']]

# Create (if not already in existence) the tables associated with our classes.
Base.metadata.create_all(engine, tables=non_race_table)

In [16]:
# Store data to SQL database
race_df.to_sql(name='race_data', con=engine, if_exists='replace', index=False)

In [17]:
# Store data to SQL database
non_race_df.to_sql(name='non_race_data', con=engine, if_exists='replace', index=False)

In [18]:
# Check of the table exists in database
pd.read_sql_query('select * from race_data', con=engine).head()

Unnamed: 0,state,state_code,year,race,hispanic_origin,births_by_race,deaths_by_race,mmr_by_race,population_by_race,id,state_abbv,latitude,longitude
0,Alabama,1,2016,Black or African American,Not Hispanic or Latino,17989,14,77.83,0,US.AL,AL,32.318231,-86.902298
1,Alabama,1,2016,White,Not Hispanic or Latino,35319,18,50.96,0,US.AL,AL,32.318231,-86.902298
2,Alabama,1,2017,Black or African American,Not Hispanic or Latino,18354,25,136.21,704201,US.AL,AL,32.318231,-86.902298
3,Alabama,1,2017,White,Not Hispanic or Latino,34784,15,43.12,1655605,US.AL,AL,32.318231,-86.902298
4,Alabama,1,2018,Black or African American,Not Hispanic or Latino,17939,14,78.04,706754,US.AL,AL,32.318231,-86.902298


In [19]:
# Check of the table exists in database
pd.read_sql_query('select * from non_race_data', con=engine).head()

Unnamed: 0,year,state,id,state_code,latitude,longitude,deaths,births,maternal_mortality_ratio,population,...,prem_death_val,smoking_val,uninsured_val,all_determs_val,all_outcomes_val,chlamydia_val,prem_death_ri_val,teen_birth_val,primary_care_val,low_birthweight_val
0,2015,Alabama,US.AL,AL,32.318231,-86.902298,12.0,59657.0,20.11,2505795.0,...,10095.0,21.1,12.9,-0.371,-0.325,611.0,1.2,34.3,103.5,10.0
1,2016,Alabama,US.AL,AL,32.318231,-86.902298,35.0,59151.0,59.17,2507714.0,...,10097.0,21.4,11.1,-0.427,-0.366,600.2,1.1,32.0,116.4,10.1
2,2017,Alabama,US.AL,AL,32.318231,-86.902298,41.0,58941.0,69.56,2514911.0,...,10321.0,21.5,9.6,-0.427,-0.335,543.6,1.2,30.1,119.3,10.4
3,2018,Alabama,US.AL,AL,32.318231,-86.902298,31.0,57761.0,53.67,2523756.0,...,10720.0,20.9,9.3,-0.483,-0.356,553.6,1.1,28.4,122.8,10.3
4,2019,Alabama,US.AL,AL,32.318231,-86.902298,35.0,58615.0,59.71,2533668.0,...,10435.0,19.2,9.7,-0.437,-0.383,614.1,1.2,27.0,122.8,10.3
