# World Happiness Report

The World Happiness Report is a measure of the state of global happiness based on various quality of life factors. Results from these are used to inform multiple governments and organizations of policy-making decisions, as well as aid experts in the assessment of a nation's progress. For this notebook, we will be exploring some trends from 2015 to 2019, as well as study the correlations in the 2019 dataset.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt

# import data
df_2015 = pd.read_csv('2015.csv')
df_2016 = pd.read_csv('2016.csv')
df_2017 = pd.read_csv('2017.csv')
df_2018 = pd.read_csv('2018.csv')
df_2019 = pd.read_csv('2019.csv')

## Data Cleaning

In [2]:
#drop NA values
df_2015.dropna(inplace = True)
df_2016.dropna(inplace = True)
df_2017.dropna(inplace = True)
df_2018.dropna(inplace = True)
df_2019.dropna(inplace = True)

In [3]:
# rename columns

column_names = ['country', 'happiness-rank', 'happiness-score', 'gdp-per-capita', 'social-support', 'healthy-life-expectancy', 'freedom', 'generosity', 'government-trust']

df_2015.columns.values[[0, 2, 3, 5, 6, 7, 8, 10, 9]] = column_names
df_2016.columns.values[[0, 2, 3, 6, 7, 8, 9, 11, 10]] = column_names
df_2017.columns.values[[0, 1, 2, 5, 6, 7, 8, 9, 10]] = column_names
df_2018.columns.values[[1, 0, 2, 3, 4, 5, 6, 7, 8]] = column_names
df_2019.columns.values[[1, 0, 2, 3, 4, 5, 6, 7, 8]] = column_names

df_2015.rename(columns={'Region': 'region'}, inplace = True)
df_2016.rename(columns={'Region': 'region'}, inplace = True)

In [4]:
#determine which countries do not have information on regions
unique_countries = pd.concat([df_2015['country'], df_2016['country'], df_2017['country'], df_2018['country'], df_2019['country']]).unique()
countries_with_region = pd.concat([df_2015['country'], df_2016['country']])
region = pd.concat([df_2015['region'], df_2016['region']])
missing_countries = [country for country in unique_countries if country not in countries_with_region.unique()]

print (f'Countries with No Mapped Region: {missing_countries}')

Countries with No Mapped Region: ['Taiwan Province of China', 'Hong Kong S.A.R., China', 'Trinidad & Tobago', 'Northern Cyprus', 'North Macedonia', 'Gambia']


In [5]:
#set country to region map
country_region_map = dict(zip(countries_with_region, region))
country_region_map['Taiwan Province of China'] = 'Eastern Asia'
country_region_map['Hong Kong S.A.R., China'] = 'Eastern Asia'
country_region_map['Trinidad & Tobago'] = 'Latin America and Caribbean'
country_region_map['Northern Cyprus'] = 'Middle East and Northern Africa'
country_region_map['North Macedonia'] ='Central and Eastern Europe'
country_region_map['Gambia'] = 'Sub-Saharan Africa'

# add region column to 2017 - 2019 datasets
df_2017['region'] = df_2017['country'].map(country_region_map)
df_2018['region'] = df_2018['country'].map(country_region_map)
df_2019['region'] = df_2019['country'].map(country_region_map)

column_names.append('region')

In [6]:
# add year column
df_2015['year'] = 2015
df_2016['year'] = 2016
df_2017['year'] = 2017
df_2018['year'] = 2018
df_2019['year'] = 2019
column_names.append('year')

# compile dataframes

df = pd.concat([df_2015[column_names], df_2016[column_names], df_2017[column_names], df_2018[column_names], df_2019[column_names]])