# Import Dependencies and CSV File

This data was downloaded as a csv from the Gender Statistics library in the DataBank from the World Bank at https://databank.worldbank.org/data/reports.aspx?source=gender-statistics#. All countries were selected and the series called "Birth rate, crude (per 1,000 people)" was selected for the years 2016-2018. Unfortunately, only data for 2016 was available.

In [2]:
# Imports
import pandas as pd
from sqlalchemy import create_engine
import pymysql
pymysql.install_as_MySQLdb()
# from config import pswrd

In [3]:
# Store CSV into DataFrame
csv_file = "resources/data/birthrate/crude_birthrate.csv"
birthrate_df = pd.read_csv(csv_file)
birthrate_df.head()

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,2016 [YR2016],2017 [YR2017],2018 [YR2018]
0,"Birth rate, crude (per 1,000 people)",SP.DYN.CBRT.IN,Afghanistan,AFG,33.214,..,..
1,"Birth rate, crude (per 1,000 people)",SP.DYN.CBRT.IN,Albania,ALB,11.816,..,..
2,"Birth rate, crude (per 1,000 people)",SP.DYN.CBRT.IN,Algeria,DZA,23.132,..,..
3,"Birth rate, crude (per 1,000 people)",SP.DYN.CBRT.IN,American Samoa,ASM,..,..,..
4,"Birth rate, crude (per 1,000 people)",SP.DYN.CBRT.IN,Andorra,AND,8.8,..,..


# Clean up new DF
* Create new df with select columns
* Update unclear column name
* Delete extra column

In [11]:
# Create New Data with Select Columns
new_birthrate_df = birthrate_df[['Country Name', 'Country Code', '2016 [YR2016]']].copy()
new_birthrate_df.head()

Unnamed: 0,Country Name,Country Code,2016 [YR2016]
0,Afghanistan,AFG,33.214
1,Albania,ALB,11.816
2,Algeria,DZA,23.132
3,American Samoa,ASM,..
4,Andorra,AND,8.8


In [13]:
new_birthrate_df['Birthrate per 1000 Women'] = new_birthrate_df['2016 [YR2016]']


In [34]:
del new_birthrate_df['2016 [YR2016]']


In [19]:
new_birthrate_df.head()

Unnamed: 0,Country Name,Country Code,Birthrate per 1000 Women
0,Afghanistan,AFG,33.214
1,Albania,ALB,11.816
2,Algeria,DZA,23.132
3,American Samoa,ASM,..
4,Andorra,AND,8.8


# Open engine to use sqlalchemy
* Connect to database
* Create new database
* Drop former table created previously
* Convert df
* Use pandas df to load csv to SQL database
* Confirm that data has been added

In [6]:
# Connect to local database
rds_connection_string = "root:Loqu1ta!!@127.0.0.1/"
engine = create_engine(f'mysql://{rds_connection_string}')
connection = engine.connect()
connection.execute('create database birthrate_db')


<sqlalchemy.engine.result.ResultProxy at 0x11f71b5c0>

In [29]:
rds_connection_string = "root:Loqu1ta!!@127.0.0.1/birthrate_db"
engine = create_engine(f'mysql://{rds_connection_string}')
connection = engine.connect()
connection.execute('drop table birthrate;')

<sqlalchemy.engine.result.ResultProxy at 0x11f946550>

In [30]:
# Check for tables
rds_connection_string = "root:Loqu1ta!!@127.0.0.1/birthrate_db"
engine = create_engine(f'mysql://{rds_connection_string}')
connection = engine.connect()
engine.table_names()

[]

In [32]:
# Use Pandas to load csv converted DataFrame into SQL database
new_birthrate_df.to_sql(name='birthrate', con=engine, if_exists='replace', index=False)


In [33]:
# Confirm data has been added by querying the table
pd.read_sql_query('select * from birthrate', con=engine).head()


Unnamed: 0,Country Name,Country Code,Birthrate per 1000 Women
0,Afghanistan,AFG,33.214
1,Albania,ALB,11.816
2,Algeria,DZA,23.132
3,American Samoa,ASM,..
4,Andorra,AND,8.8
