In [1]:
# Dependencies
import pandas as pd
import numpy as np

In [2]:
# Name of the CSV file
happiness15 = "data_sets/2015_World_Happiness_Report.csv"
happiness16 = "data_sets/2016_World_Happiness_Report.csv"
happiness17 = "data_sets/2017_World_Happiness_Report.csv"
country_coordinates = "data_sets/Country_Coordinates.csv"

In [3]:
# The correct encoding must be used to read the CSV in pandas
h15_df = pd.read_csv(happiness15)
h16_df = pd.read_csv(happiness16)
h17_df = pd.read_csv(happiness17)
coordinates_df = pd.read_csv(country_coordinates)

In [4]:
# Preview of the 2015 Country Happy Report DataFrame
chr15_df = pd.DataFrame(h15_df[["Country","Region","Happiness Rank","Happiness Score"]])
chr15_df = chr15_df.rename(columns={"Happiness Rank": "Happiness Rank 2015","Happiness Score":"Happiness Score 2015"})
chr15_df.head()
# chr15_df.count()
# chr15_df.dtypes

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Score 2015
0,Switzerland,Western Europe,1,7.587
1,Iceland,Western Europe,2,7.561
2,Denmark,Western Europe,3,7.527
3,Norway,Western Europe,4,7.522
4,Canada,North America,5,7.427


In [5]:
# Preview of the 2016 Country Happy Report DataFrame
chr16_df = pd.DataFrame(h16_df[["Country","Happiness Rank","Happiness Score"]])
chr16_df = chr16_df.rename(columns={"Happiness Rank": "Happiness Rank 2016","Happiness Score":"Happiness Score 2016"})
chr16_df.head()
# chr16_df.count()
# chr16_df.dtypes

Unnamed: 0,Country,Happiness Rank 2016,Happiness Score 2016
0,Denmark,1,7.526
1,Switzerland,2,7.509
2,Iceland,3,7.501
3,Norway,4,7.498
4,Finland,5,7.413


In [6]:
# Preview of the 2017 Country Happy Report DataFrame
chr17_df = pd.DataFrame(h17_df[["Country","Happiness.Rank","Happiness.Score"]])
chr17_df = chr17_df.rename(columns={"Happiness.Rank": "Happiness Rank 2017","Happiness.Score":"Happiness Score 2017"})
chr17_df.head()
# chr17_df.count()
# chr17_df.dtypes

Unnamed: 0,Country,Happiness Rank 2017,Happiness Score 2017
0,Norway,1,7.537
1,Denmark,2,7.522
2,Iceland,3,7.504
3,Switzerland,4,7.494
4,Finland,5,7.469


In [7]:
# Preview of the Country Coordinates Data Frame
cc_df = pd.DataFrame(coordinates_df[["latitude","longitude","name"]])
cc_df = cc_df.rename(columns={"latitude": "Latitude","longitude":"Longitude","name":"Country"})
cc_df.head()
# cc_df.count()
# cc_df.dtypes

Unnamed: 0,Latitude,Longitude,Country
0,42.546245,1.601554,Andorra
1,23.424076,53.847818,United Arab Emirates
2,33.93911,67.709953,Afghanistan
3,17.060816,-61.796428,Antigua and Barbuda
4,18.220554,-63.068615,Anguilla


In [8]:
# Merge the DataFrame
happy1_df = pd.merge(chr15_df, chr16_df, on="Country", how="inner")
happy1_df.head()
# happy1_df.count()

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Score 2015,Happiness Rank 2016,Happiness Score 2016
0,Switzerland,Western Europe,1,7.587,2,7.509
1,Iceland,Western Europe,2,7.561,3,7.501
2,Denmark,Western Europe,3,7.527,1,7.526
3,Norway,Western Europe,4,7.522,4,7.498
4,Canada,North America,5,7.427,6,7.404


In [9]:
# Merge the DataFrame
happy2_df = pd.merge(happy1_df, chr17_df, on="Country", how="inner")
happy2_df.head()
# happy2_df.count()

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Score 2015,Happiness Rank 2016,Happiness Score 2016,Happiness Rank 2017,Happiness Score 2017
0,Switzerland,Western Europe,1,7.587,2,7.509,4,7.494
1,Iceland,Western Europe,2,7.561,3,7.501,3,7.504
2,Denmark,Western Europe,3,7.527,1,7.526,2,7.522
3,Norway,Western Europe,4,7.522,4,7.498,1,7.537
4,Canada,North America,5,7.427,6,7.404,7,7.316


In [13]:
world_happy_df = pd.merge(happy2_df, cc_df, on="Country", how="inner")
# world_happy_df = pd.merge(happy2_df, cc_df, on="Country", how="left"):
    # If I want all 146 countries in the happy file, I would to get 6 countries with NaN for lat and lon when using "Left".
    # The reason for this is that there are only 140 matches to the coordinate file. 
world_happy_df.head()
# world_happy_df.count()

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Score 2015,Happiness Rank 2016,Happiness Score 2016,Happiness Rank 2017,Happiness Score 2017,Latitude,Longitude
0,Switzerland,Western Europe,1,7.587,2,7.509,4,7.494,46.818188,8.227512
1,Iceland,Western Europe,2,7.561,3,7.501,3,7.504,64.963051,-19.020835
2,Denmark,Western Europe,3,7.527,1,7.526,2,7.522,56.26392,9.501785
3,Norway,Western Europe,4,7.522,4,7.498,1,7.537,60.472024,8.468946
4,Canada,North America,5,7.427,6,7.404,7,7.316,56.130366,-106.346771


In [14]:
#Reorganize the data frame so columns are in different places
world_happy_df = world_happy_df[["Region","Country","Latitude","Longitude","Happiness Rank 2015","Happiness Score 2015",
                                 "Happiness Rank 2016","Happiness Score 2016","Happiness Rank 2017","Happiness Score 2017"]]
#Format The Score Columns to round to the thousandths place
world_happy_df["Happiness Score 2015"] = world_happy_df["Happiness Score 2015"].round(3)
world_happy_df["Happiness Score 2016"] = world_happy_df["Happiness Score 2016"].round(3)
world_happy_df["Happiness Score 2017"] = world_happy_df["Happiness Score 2017"].round(3)
world_happy_df.head(10)
# world_happy_df.count()

Unnamed: 0,Region,Country,Latitude,Longitude,Happiness Rank 2015,Happiness Score 2015,Happiness Rank 2016,Happiness Score 2016,Happiness Rank 2017,Happiness Score 2017
0,Western Europe,Switzerland,46.818188,8.227512,1,7.587,2,7.509,4,7.494
1,Western Europe,Iceland,64.963051,-19.020835,2,7.561,3,7.501,3,7.504
2,Western Europe,Denmark,56.26392,9.501785,3,7.527,1,7.526,2,7.522
3,Western Europe,Norway,60.472024,8.468946,4,7.522,4,7.498,1,7.537
4,North America,Canada,56.130366,-106.346771,5,7.427,6,7.404,7,7.316
5,Western Europe,Finland,61.92411,25.748151,6,7.406,5,7.413,5,7.469
6,Western Europe,Netherlands,52.132633,5.291266,7,7.378,7,7.339,6,7.377
7,Western Europe,Sweden,60.128161,18.643501,8,7.364,10,7.291,9,7.284
8,Australia and New Zealand,New Zealand,-40.900557,174.885971,9,7.286,8,7.334,8,7.314
9,Australia and New Zealand,Australia,-25.274398,133.775136,10,7.284,9,7.313,10,7.284


In [15]:
#Save File
world_happy_df.to_csv("data_sets/world_happy.csv")