In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

It is vital to understand the actual data inside the files provided here. All the columns start at 0 and end before 2. It means the values are scaled to some extent. Moreover, GDP for the whole country should not fall to 0. As I read the description to the dataset it occured that the least favourable value in the consecutive columns is taken as the Dystopia Level - 0. This means:
* the country with the 0 is the worst in this exact category,
* the country with only 0 values is a hypothetical Dystopia, the level of happiness is assumed as around 1.85 for this place (specified in the description),
* all the values are somehow scaled according to this,
* the values are said to be the sum of Dystopia Score and the rest of the partial scores,
* Dystopia is strictly relative - its state depends on the overall state of the whole world,
* Dystopia will always exist - there will be always some lowest value and when all the countries would have the same value, all the world would become Dystopia.

## Import the data

Not neat. First of all, the categories vary across the years. Secondly, even the unchanging columns have their names modified. I presented already fixed version. What is more, I had to provide the regions for the latter years. I did so based on the ones form previous years.

In [2]:
file = open('happiness_set/2015.csv')
d15 = pd.read_csv('happiness_set/2015.csv')
file.close()
d15.drop(columns = ["Standard Error", "Dystopia Residual"],inplace=True)
d15.columns = ["country", "region", "rank", "score", "GDP", "family", "life expectancy", "freedom", "trust", "generosity"]
d15["year"] = 2015
d15.head()

Unnamed: 0,country,region,rank,score,GDP,family,life expectancy,freedom,trust,generosity,year
0,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2015
1,Iceland,Western Europe,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2015
2,Denmark,Western Europe,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2015
3,Norway,Western Europe,4,7.522,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2015
4,Canada,North America,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2015


In [3]:
file = open('happiness_set/2016.csv')
d16 = pd.read_csv('happiness_set/2016.csv')
file.close()
d16.drop(columns = ["Lower Confidence Interval", "Upper Confidence Interval","Dystopia Residual"],inplace=True)
d16.columns = ["country", "region", "rank", "score", "GDP", "family", "life expectancy", "freedom", "trust", "generosity"]
d16["year"] = 2016
d16.head()

Unnamed: 0,country,region,rank,score,GDP,family,life expectancy,freedom,trust,generosity,year
0,Denmark,Western Europe,1,7.526,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2016
1,Switzerland,Western Europe,2,7.509,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2016
2,Iceland,Western Europe,3,7.501,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2016
3,Norway,Western Europe,4,7.498,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2016
4,Finland,Western Europe,5,7.413,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2016


In [4]:
new_df = pd.concat([d16[["country", "region"]], d15[["country", "region"]]]).drop_duplicates().reset_index(drop=True)
new_df.head()

Unnamed: 0,country,region
0,Denmark,Western Europe
1,Switzerland,Western Europe
2,Iceland,Western Europe
3,Norway,Western Europe
4,Finland,Western Europe


In [5]:
file = open('happiness_set/2017.csv')
d17 = pd.read_csv('happiness_set/2017.csv')
file.close()
d17.drop(columns = ["Whisker.high", "Whisker.low","Dystopia.Residual"],inplace=True)
d17.columns = ["country", "rank", "score", "GDP", "family", "life expectancy", "freedom", "trust", "generosity"]
d17["year"] = 2017
d17 = pd.merge(d17, new_df, on='country')
d17 = d17[d16.columns]
d17.head()

Unnamed: 0,country,region,rank,score,GDP,family,life expectancy,freedom,trust,generosity,year
0,Norway,Western Europe,1,7.537,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2017
1,Denmark,Western Europe,2,7.522,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2017
2,Iceland,Western Europe,3,7.504,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2017
3,Switzerland,Western Europe,4,7.494,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2017
4,Finland,Western Europe,5,7.469,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2017


In [6]:
file = open('happiness_set/2018.csv')
d18 = pd.read_csv('happiness_set/2018.csv')
file.close()
d18.columns = ["rank", "country", "score", "GDP", "family", "life expectancy", "freedom", "generosity", "trust"]
d18["year"] = 2018
d18 = pd.merge(d18, new_df, on='country')
d18 = d18[d16.columns]
d18.head()

Unnamed: 0,country,region,rank,score,GDP,family,life expectancy,freedom,trust,generosity,year
0,Finland,Western Europe,1,7.632,1.305,1.592,0.874,0.681,0.393,0.202,2018
1,Norway,Western Europe,2,7.594,1.456,1.582,0.861,0.686,0.34,0.286,2018
2,Denmark,Western Europe,3,7.555,1.351,1.59,0.868,0.683,0.408,0.284,2018
3,Iceland,Western Europe,4,7.495,1.343,1.644,0.914,0.677,0.138,0.353,2018
4,Switzerland,Western Europe,5,7.487,1.42,1.549,0.927,0.66,0.357,0.256,2018


In [7]:
file = open('happiness_set/2019.csv')
d19 = pd.read_csv('happiness_set/2019.csv')
file.close()
d19.columns = ["rank", "country", "score", "GDP", "family", "life expectancy", "freedom", "generosity", "trust"]
d19["year"] = 2019
d19 = pd.merge(d19, new_df, on='country')
d19 = d19[d16.columns]
d19.head()

Unnamed: 0,country,region,rank,score,GDP,family,life expectancy,freedom,trust,generosity,year
0,Finland,Western Europe,1,7.769,1.34,1.587,0.986,0.596,0.393,0.153,2019
1,Denmark,Western Europe,2,7.6,1.383,1.573,0.996,0.592,0.41,0.252,2019
2,Norway,Western Europe,3,7.554,1.488,1.582,1.028,0.603,0.341,0.271,2019
3,Iceland,Western Europe,4,7.494,1.38,1.624,1.026,0.591,0.118,0.354,2019
4,Netherlands,Western Europe,5,7.488,1.396,1.522,0.999,0.557,0.298,0.322,2019


In [8]:
d = d15.append(d16, ignore_index=True).append(d17, ignore_index=True).append(d18, ignore_index=True).append(d19, ignore_index=True)