In [11]:
# Dependencies
import os
import csv
import pandas as pd

In [12]:
# Create and store file paths to all data sets

# UN World Happiness Report Data from Kaggle as: https://www.kaggle.com/alcidesoxa/world-happiness-report-2005-2018
world_happy_file = "Data/world-happiness-report-2005-2018 3.csv"

# Healthcare spending from World Bank as: https://data.worldbank.org/indicator/SH.XPD.CHEX.GD.ZS?name_desc=false
healthcare_spending_file = "Data/Health Expenditure.csv"

# Happiness vs Investment Data from Kaggle as: https://www.kaggle.com/andreyka2/happiness-and-investment
happyVSinvest_file = "Data/Inv_Hap.csv"

# Alcohol consumption from Kaggle as: https://www.kaggle.com/justmarkham/alcohol-consumption-by-country
alcohol_file = "Data/drinks.csv"

# Economic Freedom Data from Kaggle as: https://www.kaggle.com/gsutters/economic-freedom#efw_cc.csv
econ_free_file = "Data/efw_cc.csv"

# Military Expenditure Data from Kaggle as: https://www.kaggle.com/nitinsss/military-expenditure-of-countries-19602019
mil_spend_file = "Data/Military Expenditure.csv"

In [13]:
# Use Pandas to read data

# UN World Happiness Data
happiness_report = pd.read_csv(world_happy_file, delimiter=";")
# happiness_report_pd.head()            # view the data

# UN World Happiness Data
healthcare_spending = pd.read_csv(healthcare_spending_file, skiprows=4)
# healthcare_spending.head()            # view the data

# Happiness vs Investment by Country Data
happy_invest = pd.read_csv(happyVSinvest_file)
# happy_invest.head()            # view the data

# Alcohol Consumption by Country Data
alcohol = pd.read_csv(alcohol_file)
# alcohol.head()            # view the data

# Economic Freedom of the World Data
econ_free = pd.read_csv(econ_free_file)
# econ_free.head()            # view the data

# Military Expenditure by Country Data
mil_spend = pd.read_csv(mil_spend_file)
# mil_spend.head()            # view the data



In [14]:
# UN Happiness Report Data Frame
# Extract and arrange data table with 'Country Name' as lead column

#cols=["WVS round 1981-1984;Most people can be trusted"]
# happiness_report.drop(cols,axis=1,inplace=True)
happiness_report = happiness_report[["Country name","Year","Life Ladder","Log GDP per capita","Social support","Healthy life expectancy at birth","Freedom to make life choices","Generosity","Perceptions of corruption","Confidence in national government"]]
happiness_report = happiness_report.rename(columns={"Country name":"Country Name"})

happiness_report = happiness_report.loc[happiness_report["Year"]==2012]
happiness_report.head()

Unnamed: 0,Country Name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Confidence in national government
4,Afghanistan,2012,3.782938,7.517126,0.520637,52.240002,0.530935,0.244273,0.77562,0.43544
15,Albania,2012,5.510124,9.246655,0.784502,66.959999,0.601512,-0.172262,0.847675,0.364894
24,Algeria,2012,5.604596,9.485086,0.839397,64.82,0.586663,-0.195859,0.690116,
30,Angola,2012,4.36025,8.699498,0.752593,53.200001,0.456029,-0.124065,0.9063,0.237091
39,Argentina,2012,6.468387,9.86396,0.901776,67.660004,0.747498,-0.143875,0.816546,0.418255


In [15]:
# Healthcare Spending Data Frame
# Extract and arrange data table with 'Country Name' as lead column

healthcare_spending = healthcare_spending[["Country Name","Country Code","Indicator Name", "2012"]]
healthcare_spending.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,2012
0,Aruba,ABW,Current health expenditure (% of GDP),
1,Afghanistan,AFG,Current health expenditure (% of GDP),7.897168
2,Angola,AGO,Current health expenditure (% of GDP),2.69289
3,Albania,ALB,Current health expenditure (% of GDP),5.848301
4,Andorra,AND,Current health expenditure (% of GDP),10.046941


In [17]:
# Happiness vs Investment Data Frame
# Extract and arrange data table with 'Country Name' as lead column

# PR = Political Rights
# CL = Civil Liberties


happy_invest = happy_invest.rename(columns={"Country":"Country Name"})
happy_invest.head()

Unnamed: 0,Country Name,Region,Population,Happiness_Rank,Happiness_Score,Investment,PR,CL,Life_Expectancy
0,Afghanistan,Southern Asia,33736494.0,153,3.575,163102600.0,6,6,60.717171
1,Albania,Central and Eastern Europe,2880703.0,95,4.959,991259700.0,3,3,78.014463
2,Algeria,Middle East and Northern Africa,39871528.0,68,5.605,-403397100.0,6,5,75.042537
3,Angola,Sub-Saharan Africa,27859305.0,137,4.033,9282168000.0,6,6,52.666098
4,Argentina,Latin America and Caribbean,43417765.0,30,6.574,11758990000.0,2,2,76.33422


In [19]:
# Alcohol Consumption Data Frame
# Extract and arrange data table with 'Country Name' as lead column

#rename columns - reference defintion from data source documentation
#beer_servings = Beer (12oz)
#spirit_servings = Spirit (3oz)
#wine_servings = Wine (5oz)
alcohol = alcohol.rename(columns = {"country":"Country Name", "beer_servings":"Beer 12oz", 
                                    "spirit_servings":"Spirit 3oz", "wine_servings":"Wine 6oz", 
                                    "total_litres_of_pure_alcohol": "Pure Alcohol Litre",
                                   "continent": "Continent"})


alcohol.head()

Unnamed: 0,Country Name,Beer 12oz,Spirit 3oz,Wine 6oz,Pure Alcohol Litre,Continent
0,Namibia,376,3,1,6.8,Africa
1,Czech Republic,361,170,134,11.8,Europe
2,Gabon,347,98,59,8.9,Africa
3,Germany,346,117,175,11.3,Europe
4,Lithuania,343,244,56,12.9,Europe


In [21]:
# Economic Freedom of the World Data Frame
# Extract, reduce, and arrange data table with 'Country Name' as lead column

econ_free_reduced_df = pd.DataFrame(econ_free[["countries", "year", "ECONOMIC FREEDOM", "rank", 
                                                  "quartile", "2e_integrity_legal_system", "3c_inflation",
                                                  "5_regulation"]])

econ_free_reduced_df.columns = ["Country Name", "Year", "Econ Freedom Score", "Rank", "Quartile", 
                               "Legal System Integrity", "Inflation", "Regulation"]

# Retain only data from 2012
econ_free_reduced_df = econ_free_reduced_df.loc[econ_free_reduced_df["Year"]== 2012]

econ_free_reduced_df.head()

Unnamed: 0,Country Name,Year,Econ Freedom Score,Rank,Quartile,Legal System Integrity,Inflation,Regulation
648,Albania,2012,7.2,59.0,2.0,4.166667,9.594,6.619162
649,Algeria,2012,4.95,149.0,4.0,5.0,8.222,5.333293
650,Angola,2012,5.28,141.0,4.0,4.166667,7.942,5.458704
651,Argentina,2012,5.04,146.0,4.0,3.3,5.12,5.723699
652,Armenia,2012,7.75,13.0,1.0,5.0,9.488,7.651228


In [27]:
# Military Expenditure by Country Data Frame
# Extract, reduce, and arrange data table with 'Country Name' as lead column

mil_spend_reduced_df = pd.DataFrame(mil_spend[["Name", "2012"]])
mil_spend_reduced_df = mil_spend_reduced_df.rename(columns = {"Name":"Country Name"})
# mil_spend_reduced_df.head()
# Assume NaN values indicate no military expenditures - we will set NaN values to zero
mil_spend_reduced_df = mil_spend_reduced_df.fillna(0)
mil_spend_reduced_df.head()

Unnamed: 0,Country Name,2012
0,Aruba,0.0
1,Afghanistan,238583400.0
2,Angola,4144635000.0
3,Albania,183204700.0
4,Andorra,0.0


In [None]:
# Merge data with Country Name as the common ID.