In [None]:
import pandas as pd
import seaborn as sb
import altair as alt
import numpy as np
import json

# INTERACTIVE VISUALIZATION
!pip install chart_studio
!pip install --upgrade plotly
import chart_studio.plotly as py 
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

!pip install jupyter-dash
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output 

# Happiness Report w/ Real Numbers

In [None]:
df_real = pd.read_csv("happiness_real_data.csv").drop("Unnamed: 0", axis=1)
df_real.describe()

Unnamed: 0,Overall rank,Score,GDP (Billions),Social Score,Basic Human Needs,Foundation of Wellbeing,Opportunity,Life Expectancy,population,lat,lng,GDP per Capita
count,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0
mean,76.134328,5.469343,671.337985,69.768731,77.41291,71.924104,59.968955,73.580522,53714720.0,22.0249,19.30893,1.726267e-05
std,45.963174,1.138967,2529.571149,15.335755,18.118322,14.1304,16.010124,7.671875,173702100.0,25.457049,54.767562,2.416936e-05
min,1.0,2.853,2.478,31.06,21.31,36.69,20.73,54.022,366425.0,-41.0,-102.0,2.685274e-07
25%,35.25,4.566,19.62475,57.0175,66.21,61.4,48.99,68.0245,5165454.0,7.0,-2.0,1.983754e-06
50%,74.5,5.4495,66.247,71.06,83.38,72.57,58.34,75.361,11854700.0,24.5,22.0,5.916741e-06
75%,115.75,6.2455,383.964,83.145,90.895,83.5375,74.0075,79.0465,37991630.0,42.375,45.0,2.230575e-05
max,156.0,7.769,22939.58,92.73,98.07,93.39,88.66,84.91,1402112000.0,65.0,174.0,0.0001324914


In [None]:
df_real.head(1)

Unnamed: 0,Overall rank,Score,Country,GDP (Billions),Social Score,Basic Human Needs,Foundation of Wellbeing,Opportunity,Life Expectancy,language,population,lat,lng,independent,landlocked,cca3,GDP per Capita,Region
0,1,7.769,Finland,296.016,91.89,96.22,91.29,88.15,82.312,"{'fin': 'Finnish', 'swe': 'Swedish'}",5530719,64.0,26.0,True,False,FIN,5.4e-05,Western Europe


In [None]:
# Distribution of Happiness
alt.Chart(df_real).mark_bar().encode(
    alt.X("Score", bin=alt.Bin(extent=[0, 8.5], step=0.2)),
    y="count()"
)

In [None]:
# PRETTY MAP TIME!
fig0 = px.choropleth(df_real,
                    locations="cca3",
                    color="Score",
                    hover_name="Country", 
                    color_continuous_scale=px.colors.sequential.Plasma,
                    scope="world")


# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig0)])
# run app inline
app.run_server(mode='inline')

<IPython.core.display.Javascript object>

In [None]:
fig1 = px.scatter(df_real, x="GDP per Capita", y="Score",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig1)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
fig2 = px.choropleth(df_real,
                    locations="cca3",
                    color="GDP per Capita",
                    hover_name="Country", 
                    color_continuous_scale=px.colors.sequential.Plasma,
                    scope="world")


# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig2)])
# run app inline
app.run_server(mode='inline')

<IPython.core.display.Javascript object>

In [None]:
fig3 = px.scatter(df_real, x="GDP (Billions)", y="Score",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig3)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
fig4 = px.choropleth(df_real,
                    locations="cca3",
                    color="GDP (Billions)",
                    hover_name="Country", 
                    color_continuous_scale=px.colors.sequential.Plasma,
                    scope="world")


# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig4)])
# run app inline
app.run_server(mode='inline')

<IPython.core.display.Javascript object>

In [None]:
fig5 = px.scatter(df_real, x="Social Score", y="Score",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig5)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
fig6 = px.choropleth(df_real,
                    locations="cca3",
                    color="Social Score",
                    hover_name="Country", 
                    color_continuous_scale=px.colors.sequential.Plasma,
                    scope="world")


# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig6)])
# run app inline
app.run_server(mode='inline')

<IPython.core.display.Javascript object>

In [None]:
fig7 = px.scatter(df_real, x="Life Expectancy", y="Score",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig7)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
fig8 = px.choropleth(df_real,
                    locations="cca3",
                    color="Life Expectancy",
                    hover_name="Country", 
                    color_continuous_scale=px.colors.sequential.Plasma,
                    scope="world")


# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig8)])
# run app inline
app.run_server(mode='inline')

<IPython.core.display.Javascript object>

In [None]:
# Averages by Region
df_avg = pd.DataFrame()
df_avg["Region"] = df_real["Region"].unique()

In [None]:
df_avg = df_avg.merge(df_real.groupby("Region")["Score"].mean().sort_values(), on="Region")
# From a glance and from my knowledge, it seems the less developed regions of the world
# are less happy. Now lets dig into some of the individual stats.
df_avg = df_avg.merge(df_real.groupby("Region")["GDP per Capita"].mean().sort_values(), on="Region")
df_avg = df_avg.merge(df_real.groupby("Region")["Social Score"].mean().sort_values(), on="Region")
df_avg = df_avg.merge(df_real.groupby("Region")["Basic Human Needs"].mean().sort_values(), on="Region")
df_avg = df_avg.merge(df_real.groupby("Region")["Foundation of Wellbeing"].mean().sort_values(), on="Region")
df_avg = df_avg.merge(df_real.groupby("Region")["Opportunity"].mean().sort_values(), on="Region")
df_avg = df_avg.merge(df_real.groupby("Region")["Life Expectancy"].mean().sort_values(), on="Region")

In [None]:
df_avg.corr()

Unnamed: 0,Score,GDP per Capita,Social Score,Basic Human Needs,Foundation of Wellbeing,Opportunity,Life Expectancy
Score,1.0,0.915738,0.966939,0.892055,0.960799,0.9575,0.915779
GDP per Capita,0.915738,1.0,0.90057,0.783389,0.877464,0.951435,0.818689
Social Score,0.966939,0.90057,1.0,0.950753,0.988504,0.967837,0.950816
Basic Human Needs,0.892055,0.783389,0.950753,1.0,0.915191,0.849814,0.980666
Foundation of Wellbeing,0.960799,0.877464,0.988504,0.915191,1.0,0.960804,0.911354
Opportunity,0.9575,0.951435,0.967837,0.849814,0.960804,1.0,0.871558
Life Expectancy,0.915779,0.818689,0.950816,0.980666,0.911354,0.871558,1.0


In [None]:
fig100 = px.scatter(df_real, x="GDP per Capita", y="Life Expectancy",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig100)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
fig101 = px.scatter_3d(df_real, x="GDP per Capita", y="Social Score", z="Score",
                    animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig101)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

# World Happiness Report Data

In [None]:
df = pd.read_csv("world_happiness_report_data.csv").drop("Unnamed: 0", axis=1)
df.describe()

Unnamed: 0,Overall rank,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Year,population,lat,lng
count,782.0,782.0,782.0,782.0,782.0,782.0,782.0,782.0,782.0,782.0,782.0,782.0
mean,78.69821,5.379018,0.916047,1.078392,0.612416,0.411091,0.218576,0.125418,2016.993606,49494760.0,22.044631,21.094233
std,45.182384,1.127456,0.40734,0.329548,0.248309,0.15288,0.122321,0.10575,1.417364,160981800.0,24.269468,54.714351
min,1.0,2.693,0.0,0.0,0.0,0.0,0.0,0.0,2015.0,366425.0,-41.0,-102.0
25%,40.0,4.50975,0.6065,0.869363,0.440183,0.309768,0.13,0.05425,2016.0,5084300.0,8.0,-2.0
50%,79.0,5.322,0.982205,1.124735,0.64731,0.431,0.201982,0.091033,2017.0,11818620.0,24.0,24.0
75%,118.0,6.1895,1.236187,1.32725,0.808,0.531,0.278832,0.155861,2018.0,37950800.0,41.0,46.6875
max,158.0,7.769,2.096,1.644,1.141,0.724,0.838075,0.55191,2019.0,1402112000.0,65.0,174.0


Wow the std of each of the 6 main columns is already sorted.
I want to take a look at each column.


In [None]:
df.head(1)

Unnamed: 0,Overall rank,Country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Year,Region,language,population,lat,lng,independent,landlocked,cca3
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393,2019,Western Europe,"{'fin': 'Finnish', 'swe': 'Swedish'}",5530719,64.0,26.0,True,False,FIN


From what I can interpret from these two plots is that money matters.

In [None]:
# Happiness seems to be about the same in different regions, lets groupby region
df.groupby("Region")["Score"].mean().sort_values()
# From a glance and from my knowledge, it seems the less developed regions of the world
# are less happy. Now lets dig into some of the individual stats.

Region
Sub-Saharan Africa                 4.190153
Southern Asia                      4.580657
Southeastern Asia                  5.335227
Middle East and Northern Africa    5.346173
Central and Eastern Europe         5.428875
Eastern Asia                       5.651567
Latin America and Caribbean        6.023811
Western Europe                     6.745029
North America                      7.174700
Australia and New Zealand          7.294600
Name: Score, dtype: float64

In [None]:
# GDP
df.groupby("Region")["GDP per capita"].mean().sort_values()

Region
Sub-Saharan Africa                 0.448124
Southern Asia                      0.638813
Southeastern Asia                  0.893340
Latin America and Caribbean        0.941661
Central and Eastern Europe         1.018734
Middle East and Northern Africa    1.105559
Eastern Asia                       1.237695
Australia and New Zealand          1.356197
Western Europe                     1.368955
North America                      1.422037
Name: GDP per capita, dtype: float64

In [None]:
fig9 = px.scatter(df, x="GDP per capita", y="Score", animation_frame="Year",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig9)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
# Social Support
df.groupby("Region")["Social support"].mean().sort_values()

Region
Southern Asia                      0.803835
Sub-Saharan Africa                 0.845690
Middle East and Northern Africa    0.998324
Southeastern Asia                  1.093673
Central and Eastern Europe         1.171523
Latin America and Caribbean        1.187033
Eastern Asia                       1.199232
Western Europe                     1.338420
North America                      1.357991
Australia and New Zealand          1.424368
Name: Social support, dtype: float64

In [None]:
fig10 = px.scatter(df, x="Social support", y="Score", animation_frame="Year",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig10)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
# Social Support
df.groupby("Region")["Healthy life expectancy"].mean().sort_values()

Region
Sub-Saharan Africa                 0.283879
Southern Asia                      0.513089
Southeastern Asia                  0.635452
Middle East and Northern Africa    0.671890
Latin America and Caribbean        0.676074
Central and Eastern Europe         0.695550
North America                      0.861086
Eastern Asia                       0.862612
Western Europe                     0.892135
Australia and New Zealand          0.903074
Name: Healthy life expectancy, dtype: float64

In [None]:
fig11 = px.scatter(df, x="Healthy life expectancy", y="Score", animation_frame="Year",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig11)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
# Freedom to make life choices
df.groupby("Region")["Freedom to make life choices"].mean().sort_values()

Region
Middle East and Northern Africa    0.350870
Central and Eastern Europe         0.352283
Sub-Saharan Africa                 0.353638
Southern Asia                      0.392867
Eastern Asia                       0.418185
Latin America and Caribbean        0.464330
Western Europe                     0.515215
Southeastern Asia                  0.543723
North America                      0.558918
Australia and New Zealand          0.611413
Name: Freedom to make life choices, dtype: float64

In [None]:
fig12 = px.scatter(df, x="Freedom to make life choices", y="Score", animation_frame="Year",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig12)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
# Generosity
df.groupby("Region")["Generosity"].mean().sort_values()

Region
Central and Eastern Europe         0.156806
Middle East and Northern Africa    0.178408
Latin America and Caribbean        0.185378
Eastern Asia                       0.199801
Sub-Saharan Africa                 0.207592
Western Europe                     0.271151
Southern Asia                      0.300888
North America                      0.372339
Southeastern Asia                  0.384195
Australia and New Zealand          0.424441
Name: Generosity, dtype: float64

In [None]:
fig13 = px.scatter(df, x="Generosity", y="Score", animation_frame="Year",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig13)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
# Perceptions of corruption
df.groupby("Region")["Perceptions of corruption"].mean().sort_values()

Region
Central and Eastern Europe         0.075969
Latin America and Caribbean        0.092432
Southern Asia                      0.100357
Sub-Saharan Africa                 0.105985
Eastern Asia                       0.117992
Southeastern Asia                  0.136140
Middle East and Northern Africa    0.144609
Western Europe                     0.221963
North America                      0.223345
Australia and New Zealand          0.357294
Name: Perceptions of corruption, dtype: float64

In [None]:
# Language is an intersting one, I am going to see if english as an offical language correlates to anything
df["Speaks English"] = df["language"].apply(lambda d: "English" in d)

In [None]:
# Doesn't look like any correlation
fig14 = px.scatter(df, x="GDP per capita", y="Score", animation_frame="Year",
           animation_group="Country",
           size="Overall rank", color="Speaks English", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig14)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
# absolute distance from equator
df["Dist from Equator"] = abs(df["lat"])

In [None]:
# Doesn't look like any correlation
fig15 = px.scatter(df, x="Score", y="Dist from Equator", animation_frame="Year",
           animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig15)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>

In [None]:
df.corr()

Unnamed: 0,Overall rank,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Year,population,lat,lng,independent,landlocked,Speaks English,Dist from Equator
Overall rank,1.0,-0.992066,-0.794791,-0.644842,-0.743655,-0.537942,-0.117713,-0.372162,-0.007768,0.053882,-0.331933,0.186464,0.037043,0.214386,0.094786,-0.484403
Score,-0.992066,1.0,0.789284,0.648799,0.742456,0.551258,0.137578,0.397821,0.007065,-0.048664,0.344163,-0.173984,-0.031328,-0.218732,-0.088482,0.500633
GDP per capita,-0.794791,0.789284,1.0,0.585966,0.784338,0.340511,-0.01456,0.304181,0.019768,-0.018574,0.428778,0.010837,-0.091712,-0.271808,-0.091828,0.578201
Social support,-0.644842,0.648799,0.585966,1.0,0.57265,0.420361,-0.037262,0.126481,0.367431,-0.093306,0.230123,-0.077434,-0.030157,-0.060764,-0.032687,0.411152
Healthy life expectancy,-0.743655,0.742456,0.784338,0.57265,1.0,0.340745,0.010638,0.250463,0.130302,0.001132,0.471188,0.00702,-0.121733,-0.25901,-0.213855,0.573618
Freedom to make life choices,-0.537942,0.551258,0.340511,0.420361,0.340745,1.0,0.290706,0.45952,0.010353,0.050203,-0.012516,-0.008276,0.032967,-0.083569,0.146276,0.078588
Generosity,-0.117713,0.137578,-0.01456,-0.037262,0.010638,0.290706,1.0,0.318945,-0.192587,-0.046139,-0.019313,0.210941,-0.006378,-0.012073,0.220485,-0.037814
Perceptions of corruption,-0.372162,0.397821,0.304181,0.126481,0.250463,0.45952,0.318945,1.0,-0.122339,-0.087093,0.114446,0.117732,-0.045179,-0.034386,0.08887,0.194035
Year,-0.007768,0.007065,0.019768,0.367431,0.130302,0.010353,-0.192587,-0.122339,1.0,0.000733,-0.001179,0.00157,0.005725,0.010933,0.011106,0.005708
population,0.053882,-0.048664,-0.018574,-0.093306,0.001132,0.050203,-0.046139,-0.087093,0.000733,1.0,0.006775,0.162538,0.034232,-0.125195,0.108463,-0.043198


In [None]:
# How about independence... about 1% of countries are not independent.
# This is not a big enough sample size to see if it makes a difference in a
# countries happiness score.
df.groupby("Year")["independent"].value_counts()

Year  independent
2015  True           155
      False            3
2016  True           153
      False            4
2017  True           152
      False            3
2018  True           153
      False            3
2019  True           153
      False            3
Name: independent, dtype: int64

In [None]:
# not super useful but it's sick
fig16 = px.scatter_3d(df, x="Perceptions of corruption", y="Score", z="Healthy life expectancy",
                    animation_frame="Year", animation_group="Country",
           size="Overall rank", color="Region", hover_name="Country",)

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([dcc.Graph(figure=fig16)])
# run app inline
app.run_server(mode='inline') 

<IPython.core.display.Javascript object>