# World Happiness Report Data Analysis

Here we investigate the WHR data for the 2021 WHR

In [147]:
#Import libraries that will be used
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy.stats
import requests
import io
    
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
#Set working directory
os.chdir('C:/Users/Martin/Documents/GitHub/World-Happiness-Data/Martin_Alvarez-Kuglen')

#Import plotly express mapbox access token for choropleth
#from Creds import *
mapbox_access_token = 'pk.eyJ1IjoieWFuZ3k0IiwiYSI6ImNrbTh3OHZtZjFjM3kydXBtMGg0dnhybmoifQ.rEY_hQj3_tXG5PCxY-z_pQ'
px.set_mapbox_access_token(mapbox_access_token)


Begin by reading in the WHR and UN data for analysis

In [148]:
#read in world health report data
df = pd.read_excel("C:/Users/Martin/Documents/GitHub/DataPanelWHR2021C2.xls")
df.head()

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.72359,7.3701,0.450662,50.799999,0.718114,0.16764,0.881686,0.517637,0.258195
1,Afghanistan,2009,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,0.237092
2,Afghanistan,2010,4.758381,7.646709,0.539075,51.599998,0.600127,0.12059,0.706766,0.618265,0.275324
3,Afghanistan,2011,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,0.267175
4,Afghanistan,2012,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.77562,0.710385,0.267919


In [149]:
#read in UN data
df_un = pd.read_csv("C:/Users/Martin/Documents/GitHub/kiva_country_profile_variables.csv")
df_un.head()

Unnamed: 0,country,Region,Surface area (km2),Population in thousands (2017),"Population density (per km2, 2017)","Sex ratio (m per 100 f, 2017)",GDP: Gross domestic product (million current US$),"GDP growth rate (annual %, const. 2005 prices)",GDP per capita (current US$),Economy: Agriculture (% of GVA),...,Mobile-cellular subscriptions (per 100 inhabitants).1,Individuals using the Internet (per 100 inhabitants),Threatened species (number),Forested area (% of land area),CO2 emission estimates (million tons/tons per capita),"Energy production, primary (Petajoules)",Energy supply per capita (Gigajoules),"Pop. using improved drinking water (urban/rural, %)","Pop. using improved sanitation facilities (urban/rural, %)",Net Official Development Assist. received (% of GNI)
0,Afghanistan,SouthernAsia,652864,35530,54.4,106.3,20270,-2.4,623.2,23.3,...,8.3,42,2.1,9.8/0.3,63,5,78.2/47.0,45.1/27.0,21.43,-99
1,Albania,SouthernEurope,28748,2930,106.9,101.9,11541,2.6,3984.2,22.4,...,63.3,130,28.2,5.7/2.0,84,36,94.9/95.2,95.5/90.2,2.96,-99
2,Armenia,WesternAsia,29743,2930,102.9,88.8,10529,3.0,3489.1,19.0,...,58.2,114,11.7,5.5/1.8,48,46,100.0/100.0,96.2/78.2,3.17,-99
3,Azerbaijan,WesternAsia,86600,9828,118.9,99.3,53049,0.7,5438.7,6.7,...,77.0,97,13.5,37.5/3.9,2459,61,94.7/77.8,91.6/86.6,0.14,-99
4,Belize,CentralAmerica,22966,375,16.4,99.2,1721,1.2,4789.4,14.6,...,41.6,117,60.1,0.5/1.4,9,36,98.9/100.0,93.5/88.2,1.68,-99


In [150]:
#Clear out any non-numerical type data from the UN report
for col in df_un.columns[2:]:
    if df_un[col].dtype not in ['float64','int64']:
        df_un.drop(col,axis = 'columns', inplace = True)
df_un.columns

Index(['country', 'Region', 'Surface area (km2)',
       'Population in thousands (2017)', 'Population density (per km2, 2017)',
       'Sex ratio (m per 100 f, 2017)',
       'GDP: Gross domestic product (million current US$)',
       'GDP per capita (current US$)', 'Economy: Agriculture (% of GVA)',
       'Economy: Industry (% of GVA)',
       'Economy: Services and other activity (% of GVA)',
       'Employment: Agriculture (% of employed)',
       'Employment: Industry (% of employed)',
       'Employment: Services (% of employed)',
       'Agricultural production index (2004-2006=100)',
       'Food production index (2004-2006=100)',
       'International trade: Exports (million US$)',
       'International trade: Imports (million US$)',
       'International trade: Balance (million US$)',
       'Urban population (% of total population)',
       'Urban population growth rate (average annual %)',
       'Fertility rate, total (live births per woman)',
       'Infant mortality rat

In [151]:
#Reset the WHR dataframe to have a datetime index
df.set_index(pd.to_datetime(df['year'], yearfirst=True, format = '%Y'), inplace = True)
df

Unnamed: 0_level_0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2008-01-01,Afghanistan,2008,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,0.258195
2009-01-01,Afghanistan,2009,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,0.237092
2010-01-01,Afghanistan,2010,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,0.275324
2011-01-01,Afghanistan,2011,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,0.267175
2012-01-01,Afghanistan,2012,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,0.267919
...,...,...,...,...,...,...,...,...,...,...,...
2016-01-01,Zimbabwe,2016,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,0.208555
2017-01-01,Zimbabwe,2017,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,0.224051
2018-01-01,Zimbabwe,2018,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,0.211726
2019-01-01,Zimbabwe,2019,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,0.235354


In [152]:
#Merge the two datasets along the country name columns
df_merge = df.merge(df_un, left_on = 'Country name', right_on = 'country')
df_merge.set_index(pd.to_datetime(df_merge['year'], yearfirst=True, format = '%Y'), inplace = True)
df_merge

Unnamed: 0_level_0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,Infant mortality rate (per 1000 live births,Health: Total expenditure (% of GDP),Seats held by women in national parliaments %,Mobile-cellular subscriptions (per 100 inhabitants).1,Individuals using the Internet (per 100 inhabitants),Threatened species (number),CO2 emission estimates (million tons/tons per capita),"Energy production, primary (Petajoules)","Pop. using improved sanitation facilities (urban/rural, %)",Net Official Development Assist. received (% of GNI)
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-01,Afghanistan,2008,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,...,68.6,8.2,27.7,8.3,42,2.1,63,5,21.43,-99
2009-01-01,Afghanistan,2009,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,...,68.6,8.2,27.7,8.3,42,2.1,63,5,21.43,-99
2010-01-01,Afghanistan,2010,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,...,68.6,8.2,27.7,8.3,42,2.1,63,5,21.43,-99
2011-01-01,Afghanistan,2011,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,...,68.6,8.2,27.7,8.3,42,2.1,63,5,21.43,-99
2012-01-01,Afghanistan,2012,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,...,68.6,8.2,27.7,8.3,42,2.1,63,5,21.43,-99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-01,Zimbabwe,2016,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,...,46.5,6.0,32.6,16.4,89,37.2,482,30,6.00,-99
2017-01-01,Zimbabwe,2017,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,...,46.5,6.0,32.6,16.4,89,37.2,482,30,6.00,-99
2018-01-01,Zimbabwe,2018,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,...,46.5,6.0,32.6,16.4,89,37.2,482,30,6.00,-99
2019-01-01,Zimbabwe,2019,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,...,46.5,6.0,32.6,16.4,89,37.2,482,30,6.00,-99


In [153]:
#Create a visualization showing the number of countries represented each year
year_data = pd.Series(dtype = 'int64', name = "Number of Countries Included in Report")
for year in df.index.unique():    
    year_data = year_data.append(pd.Series(df.loc[year]['Country name'].unique().size, index = [year], dtype = 'int64'))
print(year_data.head())
fig = px.bar(year_data, labels={'index':'Year','value':'Number of Countries Included in Report'},
             hover_data={'variable':False})

fig.update_layout(showlegend=False)

fig.show()

2008-01-01    110
2009-01-01    114
2010-01-01    124
2011-01-01    146
2012-01-01    142
dtype: int64


In [154]:
# Downloading the csv file from your GitHub account
url = "https://gist.githubusercontent.com/tadast/8827699/raw/f5cac3d42d16b78348610fc4ec301e9234f82821/countries_codes_and_coordinates.csv" 
# Make sure the url is the raw version of the file on GitHub

download = requests.get(url).content

# Reading the downloaded content and turning it into a pandas dataframe

countryID = pd.read_csv(io.StringIO(download.decode()))

countryID.drop(['Alpha-2 code','Numeric code','Latitude (average)','Longitude (average)'],axis = 'columns', inplace = True)
# Printing out the first 5 rows of the dataframe
ac3 = []
for entry in countryID['Alpha-3 code']:
    ac3.append(str(entry).replace('"',''))
countryID['country code'] = ac3

print(countryID.head())
print(countryID['country code'][0])

          Country Alpha-3 code country code
0     Afghanistan        "AFG"          AFG
1         Albania        "ALB"          ALB
2         Algeria        "DZA"          DZA
3  American Samoa        "ASM"          ASM
4         Andorra        "AND"          AND
 AFG


In [155]:
#assign country codes to each country in the df DataFrame
df_country = pd.DataFrame()
countries = df['Country name'].unique()
cID = []
count = []
lifeladder = []
for country in countries:
    count.append(df[df['Country name']==country]['year'].count())
    lifeladder.append(df[df['Country name']==country]['Life Ladder'].mean())
    cID.append(''.join(countryID[countryID['Country'] == country]['country code'].to_list()))
    
df_country['Count'] = pd.Series(count, index = countries)
df_country['Life Ladder'] = pd.Series(lifeladder, index = countries)
df_country['Country'] = pd.Series(cID, index = countries)
df_country['Country name'] = countries

In [156]:
#construct the choropleth from the country dataframe created above
fig = px.choropleth(df_country, locations="Country name",
                    locationmode = 'country names',
                    color = "Count", # lifeExp is a column of gapminder
                    hover_name = df_country.index, # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma,
                    hover_data = {'Count':True,'Life Ladder':True,'Country':False, 'Country name':False},
                    title ='WHR: Number of Years Each Country Was Included',
                    labels={'Count':"Number of Years"})
fig.update_layout(
    title={
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show()

In [157]:
#Construct a new dataframe to count the number of years each country is represented in the dataset
slice_ = pd.DataFrame(df['Country name'])
slice_['ones'] = np.ones(1949)
counts = pd.Series(dtype = 'float64')
for country in slice_['Country name'].unique():
    counts = counts.append(slice_[slice_['Country name'] == country]['ones'].cumsum())
df['Years Counted'] = counts
df.head()

Unnamed: 0_level_0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,Years Counted
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2008-01-01,Afghanistan,2008,3.72359,7.3701,0.450662,50.799999,0.718114,0.16764,0.881686,0.517637,0.258195,1.0
2009-01-01,Afghanistan,2009,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,0.237092,2.0
2010-01-01,Afghanistan,2010,4.758381,7.646709,0.539075,51.599998,0.600127,0.12059,0.706766,0.618265,0.275324,3.0
2011-01-01,Afghanistan,2011,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,0.267175,4.0
2012-01-01,Afghanistan,2012,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.77562,0.710385,0.267919,5.0


In [158]:
print(*df.columns, sep = "','")

Country name','year','Life Ladder','Log GDP per capita','Social support','Healthy life expectancy at birth','Freedom to make life choices','Generosity','Perceptions of corruption','Positive affect','Negative affect','Years Counted


In [159]:
#for each interesting variable we will calculate correlation values and put it in a new dataframe, corr_table

variables = ['year','Life Ladder','Log GDP per capita','Social support','Healthy life expectancy at birth',
             'Freedom to make life choices','Generosity','Perceptions of corruption','Positive affect','Negative affect']

corr_table = pd.DataFrame()
for var1 in variables:
    cor = []
    for var2 in variables:
        cor.append(df_merge[var1].corr(df_merge[var2]))
    corr_table[var1] = pd.Series(cor, index = variables)
corr_table

Unnamed: 0,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
year,1.0,0.002022,0.092948,-0.034829,0.18933,0.263023,-0.056639,-0.123897,0.003025,0.286879
Life Ladder,0.002022,1.0,0.724779,0.616708,0.660138,0.382021,0.065873,0.022046,0.506809,-0.092551
Log GDP per capita,0.092948,0.724779,1.0,0.562385,0.781659,0.208053,-0.141147,0.049668,0.266041,0.012244
Social support,-0.034829,0.616708,0.562385,1.0,0.467395,0.313868,0.026853,0.191375,0.444999,-0.279893
Healthy life expectancy at birth,0.18933,0.660138,0.781659,0.467395,1.0,0.327909,-0.109194,-0.063143,0.301275,0.028169
Freedom to make life choices,0.263023,0.382021,0.208053,0.313868,0.327909,1.0,0.1646,-0.210182,0.558804,-0.096442
Generosity,-0.056639,0.065873,-0.141147,0.026853,-0.109194,0.1646,1.0,0.1373,0.243162,-0.066207
Perceptions of corruption,-0.123897,0.022046,0.049668,0.191375,-0.063143,-0.210182,0.1373,1.0,-0.035008,0.043603
Positive affect,0.003025,0.506809,0.266041,0.444999,0.301275,0.558804,0.243162,-0.035008,1.0,-0.30487
Negative affect,0.286879,-0.092551,0.012244,-0.279893,0.028169,-0.096442,-0.066207,0.043603,-0.30487,1.0


In [190]:
#create a heatmap figure to represent the correlations calculated above
fig = px.imshow(corr_table, title= "Table Heatmap of Data Correlations", 
               color_continuous_scale = 'RdBu',
                
                range_color = [-1,1])

#update layout to add a title, color bar title, customize
fig.update_layout(
    coloraxis_colorbar=dict(
    title="R Value",
    thicknessmode="pixels", thickness=50,
    lenmode="pixels", len=250,
    yanchor="top", y=1,
    ticks="outside"
                            ),
    title={
        'y':0.89,
        'x':0.485,
        'xanchor': 'center',
        'yanchor': 'top'})
pio.write_html(fig, 
"C:/Users/Martin/Documents/GitHub/World-Happiness-Data/Martin_Alvarez-Kuglen/Visualizations/WHRHeatmap_table.html", 
full_html=False)


fig.show()

In [119]:
pio.write_html(fig, "C:/Users/Martin/Documents/GitHub/heatmap.html", full_html=False)

In [197]:
#Use the interesting variables with correlations greater than 0.5 magnitude to construct scatter plots with regression line

variables = ['year','Life Ladder','Log GDP per capita','Social support','Healthy life expectancy at birth',
             'Freedom to make life choices','Generosity','Perceptions of corruption','Positive affect','Negative affect']
i = 0
useful_vars = []
for var1 in variables:
    for var2 in variables[i:]:
        if var1 != var2:
            if (corr_table[var1][var2] > 0.50) or (corr_table[var1][var2] < -0.50): 
                fig = px.scatter(df_merge, x = var1 , y= var2, trendline="ols",
                        title= "World Happiness Report: "+var1+" Versus "+var2)
                fig.data[1].line.color = 'red'
                pio.write_html(fig, 
                "C:/Users/Martin/Documents/GitHub/World-Happiness-Data/Martin_Alvarez-Kuglen/scatterplots/"+var1.replace(' ','_')+"_v_"+var2.replace(' ','_')+'.html', 
                full_html=False)

                fig.show()
                useful_vars.append((var1,var2,corr_table[var1][var2]))
    i = i+1


In [179]:
#same logic as the corr table above, but this time with two separate sets of variables

var_whr = ['Life Ladder', 'Social support', 'Positive affect',
                'Freedom to make life choices']

var_un = ['Surface area (km2)',
       'Population in thousands (2017)', 'Population density (per km2, 2017)',
       'Sex ratio (m per 100 f, 2017)',
       'GDP: Gross domestic product (million current US$)',
       'GDP per capita (current US$)', 'Economy: Agriculture (% of GVA)',
       'Economy: Industry (% of GVA)',
       'Economy: Services and other activity (% of GVA)',
       'Employment: Agriculture (% of employed)',
       'Employment: Industry (% of employed)',
       'Employment: Services (% of employed)',
       'Agricultural production index (2004-2006=100)',
       'Food production index (2004-2006=100)',
       'International trade: Exports (million US$)',
       'International trade: Imports (million US$)',
       'International trade: Balance (million US$)',
       'Urban population (% of total population)',
       'Urban population growth rate (average annual %)',
       'Fertility rate, total (live births per woman)',
       'Infant mortality rate (per 1000 live births',
       'Health: Total expenditure (% of GDP)',
       'Seats held by women in national parliaments %',
       'Mobile-cellular subscriptions (per 100 inhabitants).1',
       'Individuals using the Internet (per 100 inhabitants)',
       'Threatened species (number)',
       'CO2 emission estimates (million tons/tons per capita)',
       'Energy production, primary (Petajoules)',
       'Pop. using improved sanitation facilities (urban/rural, %)',
       'Net Official Development Assist. received (% of GNI)']

corr_table_2 = pd.DataFrame()
for var1 in var_whr:
    cor = []
    for var2 in var_un:
        cor.append(df_merge[var1].corr(df_merge[var2]))
    corr_table_2[var1] = pd.Series(cor, index = var_un)
corr_table_2

Unnamed: 0,Life Ladder,Social support,Positive affect,Freedom to make life choices
Surface area (km2),0.265861,0.178985,0.244723,0.164783
Population in thousands (2017),0.023807,-0.078183,0.127166,0.132413
"Population density (per km2, 2017)",-0.050277,-0.221087,-0.158265,0.004742
"Sex ratio (m per 100 f, 2017)",-0.038898,-0.163352,0.057487,0.045397
GDP: Gross domestic product (million current US$),0.260106,0.151649,0.204561,0.165298
GDP per capita (current US$),0.578725,0.37049,0.209008,0.165004
Economy: Agriculture (% of GVA),-0.54706,-0.462805,-0.246406,-0.104896
Economy: Industry (% of GVA),0.056105,0.167459,0.060348,-0.041782
Economy: Services and other activity (% of GVA),0.567226,0.389304,0.228658,0.148824
Employment: Agriculture (% of employed),-0.575183,-0.44956,-0.237591,-0.124593


In [191]:
#same as above, create a heatmap plot of the corr table
fig = px.imshow(corr_table_2, title= "Table Heatmap of Data Correlations", 
               color_continuous_scale = 'RdBu',
                
                range_color = [-1,1])

fig.update_layout(
    coloraxis_colorbar=dict(
    title="R Value",
    thicknessmode="pixels", thickness=50,
    lenmode="pixels", len=250,
    yanchor="top", y=1,
    ticks="outside"
                            ),
    title={
        'y':0.89,
        'x':0.485,
        'xanchor': 'center',
        'yanchor': 'top'})
pio.write_html(fig, 
"C:/Users/Martin/Documents/GitHub/World-Happiness-Data/Martin_Alvarez-Kuglen/Visualizations/WHR_UN_merged_Heatmap_table.html", 
full_html=False)

fig.show()

In [196]:
#same as above, create scatter plots for all variables with greater than 0.5 magnitude R values
var_whr = ['Life Ladder', 'Social support', 'Positive affect',
                'Freedom to make life choices']

var_un = ['Surface area (km2)',
       'Population in thousands (2017)', 'Population density (per km2, 2017)',
       'Sex ratio (m per 100 f, 2017)',
       'GDP: Gross domestic product (million current US$)',
       'GDP per capita (current US$)', 'Economy: Agriculture (% of GVA)',
       'Economy: Industry (% of GVA)',
       'Economy: Services and other activity (% of GVA)',
       'Employment: Agriculture (% of employed)',
       'Employment: Industry (% of employed)',
       'Employment: Services (% of employed)',
       'Agricultural production index (2004-2006=100)',
       'Food production index (2004-2006=100)',
       'International trade: Exports (million US$)',
       'International trade: Imports (million US$)',
       'International trade: Balance (million US$)',
       'Urban population (% of total population)',
       'Urban population growth rate (average annual %)',
       'Fertility rate, total (live births per woman)',
       'Infant mortality rate (per 1000 live births',
       'Health: Total expenditure (% of GDP)',
       'Seats held by women in national parliaments %',
       'Mobile-cellular subscriptions (per 100 inhabitants).1',
       'Individuals using the Internet (per 100 inhabitants)',
       'Threatened species (number)',
       'CO2 emission estimates (million tons/tons per capita)',
       'Energy production, primary (Petajoules)',
       'Pop. using improved sanitation facilities (urban/rural, %)',
       'Net Official Development Assist. received (% of GNI)']

i = 0
useful_vars_2 = []
for var1 in var_whr:
    for var2 in var_un:
        if var1 != var2:
            if (corr_table_2[var1][var2] > 0.50) or (corr_table_2[var1][var2] < -0.50): 
                fig = px.scatter(df_merge, x = var1 , y= var2, trendline="ols",
                        title= "World Happiness Report: "+var1+" Versus "+var2)
                fig.data[1].line.color = 'red'
                pio.write_html(fig, 
                "C:/Users/Martin/Documents/GitHub/World-Happiness-Data/Martin_Alvarez-Kuglen/scatterplots/"+var1.replace(' ','_')+"_v_"+var2.replace(' ','_').replace('.','').replace(':','').replace('%','').replace('$','')+'.html', 
                full_html=False)
                
                fig.show()
                useful_vars_2.append((var1,var2,corr_table_2[var1][var2]))
    i = i+1


FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Martin/Documents/GitHub/World-Happiness-Data/Martin_Alvarez-Kuglen/scatterplots/Life_Ladder_v_Pop_using_improved_sanitation_facilities_(urban/rural,_).html'

In [185]:
#print out the values for viewing/putting on website
for entry in useful_vars:
    print(entry[0],"and",entry[1],"share a pearson correlation coefficient of", round(entry[2],2))

Life Ladder and Log GDP per capita share a pearson correlation coefficient of 0.72
Life Ladder and Social support share a pearson correlation coefficient of 0.62
Life Ladder and Healthy life expectancy at birth share a pearson correlation coefficient of 0.66
Life Ladder and Positive affect share a pearson correlation coefficient of 0.51
Log GDP per capita and Social support share a pearson correlation coefficient of 0.56
Log GDP per capita and Healthy life expectancy at birth share a pearson correlation coefficient of 0.78
Freedom to make life choices and Positive affect share a pearson correlation coefficient of 0.56


TypeError: type tuple doesn't define __round__ method

In [184]:
for entry in useful_vars_2:
    print(entry[0],"and",entry[1],"share a pearson correlation coefficient of", round(entry[2],2))

Life Ladder and GDP per capita (current US$) share a pearson correlation coefficient of 0.58
Life Ladder and Economy: Agriculture (% of GVA) share a pearson correlation coefficient of -0.55
Life Ladder and Economy: Services and other activity (% of GVA) share a pearson correlation coefficient of 0.57
Life Ladder and Employment: Agriculture (% of employed) share a pearson correlation coefficient of -0.58
Life Ladder and Employment: Services (% of employed) share a pearson correlation coefficient of 0.62
Life Ladder and Urban population (% of total population) share a pearson correlation coefficient of 0.64
Life Ladder and Fertility rate, total (live births per woman) share a pearson correlation coefficient of -0.55
Life Ladder and Infant mortality rate (per 1000 live births share a pearson correlation coefficient of -0.58
Life Ladder and Mobile-cellular subscriptions (per 100 inhabitants).1 share a pearson correlation coefficient of 0.62
Life Ladder and Pop. using improved sanitation fa