### Load Libraries/Dependencies

In [2]:
import pandas as pd
from sqlalchemy import create_engine
import pymysql
pymysql.install_as_MySQLdb()

### Extract CSVs into DataFrames

In [16]:
happiness_file = "resources/data/happiness/2017_Happiness.csv"
happiness_df = pd.read_csv(happiness_file)
happiness_df.head()

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182


In [14]:
#Extract happiness index JSON file and read in. Note that Coastline contains a dict within a dict

json_file = "resources/data/happiness/convertcsv.json"
country_facts_df = pd.read_json(json_file)
country_facts_df.head()


Unnamed: 0,Agriculture,Arable (%),Area (sq. mi.),Birthrate,Climate,Coastline (coast,Country,Crops (%),Deathrate,GDP ($ per capita),Industry,Infant mortality (per 1000 births),Literacy (%),Net migration,Other (%),Phones (per 1000),Pop. Density (per sq. mi.),Population,Region,Service
0,0.38,12.13,647500,46.6,1.0,{'area ratio)': 0},Afghanistan,0.22,20.34,700.0,0.24,163.07,36.0,23.06,87.65,3.2,48.0,31056997,ASIA (EX. NEAR EAST),0.38
1,0.232,21.09,28748,15.11,3.0,{'area ratio)': 1.26},Albania,4.42,5.22,4500.0,0.188,21.52,86.5,-4.93,74.49,71.2,124.6,3581655,EASTERN EUROPE,0.579
2,0.101,3.22,2381740,17.14,1.0,{'area ratio)': 0.04},Algeria,0.25,4.61,6000.0,0.6,31.0,70.0,-0.39,96.53,78.1,13.8,32930091,NORTHERN AFRICA,0.298
3,,10.0,199,22.46,2.0,{'area ratio)': 58.29},American Samoa,15.0,3.27,8000.0,,9.27,97.0,-20.71,75.0,259.5,290.4,57794,OCEANIA,
4,,2.22,468,8.71,3.0,{'area ratio)': 0},Andorra,0.0,6.25,19000.0,,4.05,100.0,6.6,97.78,497.2,152.1,71201,WESTERN EUROPE,


### Transform World Happiness DataFrame

In [44]:
# Create a filtered dataframe from specific columns
happiness_cols = ["Country", "Happiness.Score", "Economy..GDP.per.Capita.","Family","Health..Life.Expectancy.","Freedom","Generosity","Trust..Government.Corruption.","Dystopia.Residual"]
happiness_transformed= happiness_df[happiness_cols].copy()

# Rename the column headers to something more easy to handle
happiness_transformed = happiness_transformed.rename(columns={"Happiness.Score": "Happiness_Score",
                                                          "Economy..GDP.per.Capita.": "GDP_Per_Capita_Score",
                                                          "Family": "Family_Score",
                                                          "Health..Life.Expectancy.":"Health_Score",
                                                           "Freedom":"Freedom_Score",
                                                           "Generosity":"Generosity_Score",
                                                           "Trust..Government.Corruption.": "Trust_Govt_Score",
                                                           "Dystopia.Residual":"Dystopia_Score"})

# Clean the data by dropping duplicates and setting the index
happiness_transformed.drop_duplicates("Country", inplace=True)

happiness_transformed.head()

Unnamed: 0,Country,Happiness_Score,GDP_Per_Capita_Score,Family_Score,Health_Score,Freedom_Score,Generosity_Score,Trust_Govt_Score,Dystopia_Score
0,Norway,7.537,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,7.522,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
2,Iceland,7.504,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
3,Switzerland,7.494,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,7.469,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182


### Transform Country Facts DataFrame

In [33]:
#Pull columns that we're interested in looking at
facts_cols = ["Country", "Region", "Population", "Pop. Density (per sq. mi.)","Area (sq. mi.)","Birthrate","Deathrate"]
country_facts_transformed = country_facts_df[facts_cols].copy()

# Rename the column headers to something more easy to handle
country_facts_transformed = country_facts_transformed.rename(columns={"Pop. Density (per sq. mi.)": "pop_density",
                                                         "County Name (Licensee)": "county_name",
                                                         "Area (sq. mi.)": "Area_Sq_Miles"})

country_facts_transformed.head()

Unnamed: 0,Country,Region,Population,pop_density,Area_Sq_Miles,Birthrate,Deathrate
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,48.0,647500,46.6,20.34
1,Albania,EASTERN EUROPE,3581655,124.6,28748,15.11,5.22
2,Algeria,NORTHERN AFRICA,32930091,13.8,2381740,17.14,4.61
3,American Samoa,OCEANIA,57794,290.4,199,22.46,3.27
4,Andorra,WESTERN EUROPE,71201,152.1,468,8.71,6.25


### Create database connection

In [27]:
connection_string = "root:Coronad01!@localhost/"
engine = create_engine(f'mysql://{connection_string}')

connection = engine.connect()

In [28]:
#Create happiness_db database--note that this will be replaced by master DB in production

#connection.execute('create schema happiness_db; ')

<sqlalchemy.engine.result.ResultProxy at 0x10486a62eb8>

## Be sure to change DB to master DB as it's pointed to happiness_db

In [30]:
#db_connection_string = "root:XXXXX@localhost/happiness_db"
#db_engine = create_engine(f'mysql://{db_connection_string}')

# Confirm tables
db_engine.table_names()

[]

### Load DataFrames into database

In [52]:
#Load happiness dataframe into SQL db
happiness_transformed.to_sql(name='happiness', con=db_engine, if_exists='replace', index=False)

In [36]:
#Load country facts dataframe into SQL db
country_facts_transformed.to_sql(name='country_facts', con=db_engine, if_exists='replace', index=True)

In [41]:
db_engine.table_names()

['country_facts', 'happiness']

### Test Results to Confirm Everything Working

In [54]:
results = db_engine.execute('select * from happiness')

In [55]:
for item in results:
    print(item)

('Norway', 7.53700017929077, 1.61646318435669, 1.53352355957031, 0.796666502952576, 0.635422587394714, 0.36201223731041, 0.315963834524155, 2.27702665328979)
('Denmark', 7.52199983596802, 1.48238301277161, 1.55112159252167, 0.792565524578094, 0.626006722450256, 0.355280488729477, 0.40077006816864, 2.31370735168457)
('Iceland', 7.50400018692017, 1.480633020401, 1.6105740070343, 0.833552122116089, 0.627162635326385, 0.475540220737457, 0.153526559472084, 2.32271528244019)
('Switzerland', 7.49399995803833, 1.56497955322266, 1.51691174507141, 0.858131289482117, 0.620070576667786, 0.290549278259277, 0.367007285356522, 2.2767162322998)
('Finland', 7.4689998626709, 1.44357192516327, 1.5402467250824, 0.80915766954422, 0.617950856685638, 0.24548277258873, 0.38261154294014, 2.4301815032959)
('Netherlands', 7.3769998550415, 1.50394463539124, 1.42893922328949, 0.810696125030518, 0.585384488105774, 0.470489829778671, 0.282661825418472, 2.29480409622192)
('Canada', 7.31599998474121, 1.47920441627502,