In [10]:
# Dependencies and Setup
import pandas as pd

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.orm import Session

from config import username, password, host, database

In [11]:
# File path
cdc_mmr_filepath = '../output_file/2009_2019_CDC_MMR.csv'
us_ins_filepath = '../output_file/2009_2019_US_ins_cleaned.csv'

In [12]:
# Read in csv file for cdc data
cdc_mmr_df = pd.read_csv(cdc_mmr_filepath)

# Display dataframe
cdc_mmr_df.head()

Unnamed: 0,record_id,state,id,state_code,year,births,maternal_mortality_ratio,population
0,0,Alabama,US.AL,1,2015,59657,20.11,2505795
1,1,Alabama,US.AL,1,2016,59151,59.17,2507714
2,2,Alabama,US.AL,1,2017,58941,69.56,2514911
3,3,Alabama,US.AL,1,2018,57761,53.67,2523756
4,4,Alabama,US.AL,1,2019,58615,59.71,2533668


In [9]:
# Read in csv file for US insurance data
us_ins_df = pd.read_csv(us_ins_filepath)

# Reset index
us_ins_df.reset_index(inplace=True)
us_ins_df = us_ins_df.rename(columns={'index': 'id'})

#display dataframe
us_ins_df.head()

Unnamed: 0,id,location,year,employer,non_group,medicaid,medicare,military,uninsured,total
0,0,Alabama,2009,58.6,6.1,11.4,3.5,2.2,18.2,100.0
1,1,Alaska,2009,56.7,4.2,10.1,0.0,5.6,22.7,100.0
2,2,Arizona,2009,53.8,6.9,15.5,2.0,1.7,20.1,100.0
3,3,Arkansas,2009,54.1,5.8,11.8,3.6,1.5,23.1,100.0
4,4,California,2009,55.4,8.6,12.2,1.3,0.9,21.6,100.0


#### Connect to PostgresSQL database

In [13]:
# Sets an object to utilize the default declarative base in SQL Alchemy
Base = declarative_base()

In [14]:
# Create table schema for CDC data 
class CDC(Base):
    __tablename__ = 'mmr_us'
    record_id = Column(Integer, primary_key=True)
    state = Column(String(255))
    id = Column(String(255))
    state_code = Column(Integer)
    year = Column(Integer)
    deaths = Column(Integer)
    births = Column(Integer)
    maternal_mortality_ratio = Column(Float)
    population = Column(Integer)

In [12]:
# Create table schema for US insurance data
class Ins(Base):
    __tablename__ = 'ins_us'
    id = Column(Integer, primary_key=True)
    location = Column(String(255))
    year = Column(Integer)
    employer = Column(Float)
    non_group = Column(Float)
    medicaid = Column(Float)
    medicare = Column(Float)
    military = Column(Float)
    uninsured = Column(Float)
    total = Column(Integer)

In [15]:
# Create Engine
connect_string = f'postgresql://{username}:{password}@{host}/{database}'
engine = create_engine(connect_string)

In [16]:
# Specify table
cdc_table = [Base.metadata.tables['mmr_us']]

# Create (if not already in existence) the tables associated with our classes.
Base.metadata.create_all(engine, tables=cdc_table)

In [15]:
# Specify table
ins_table = [Base.metadata.tables['ins_us']]

# Create (if not already in existence) the tables associated with our classes.
Base.metadata.create_all(engine, tables=ins_table)

In [17]:
# Store data to SQL database
cdc_mmr_df.to_sql(name='mmr_us', con=engine, if_exists='replace', index=False)

In [30]:
# Store data to SQL database
us_ins_df.to_sql(name='ins_us', con=engine, if_exists='replace', index=False)

In [18]:
# Check of the table exists in database
pd.read_sql_query('select * from mmr_us', con=engine).head()

Unnamed: 0,record_id,state,id,state_code,year,births,maternal_mortality_ratio,population
0,0,Alabama,US.AL,1,2015,59657,20.11,2505795
1,1,Alabama,US.AL,1,2016,59151,59.17,2507714
2,2,Alabama,US.AL,1,2017,58941,69.56,2514911
3,3,Alabama,US.AL,1,2018,57761,53.67,2523756
4,4,Alabama,US.AL,1,2019,58615,59.71,2533668


In [33]:
# Check of the table exists in database
pd.read_sql_query('select * from ins_us', con=engine).head()

Unnamed: 0,location,year,employer,non_group,medicaid,medicare,military,uninsured,total
0,Alabama,2009,58.6,6.1,11.4,3.5,2.2,18.2,100.0
1,Alaska,2009,56.7,4.2,10.1,0.0,5.6,22.7,100.0
2,Arizona,2009,53.8,6.9,15.5,2.0,1.7,20.1,100.0
3,Arkansas,2009,54.1,5.8,11.8,3.6,1.5,23.1,100.0
4,California,2009,55.4,8.6,12.2,1.3,0.9,21.6,100.0
