Data on Happiness Index for years 2015-2019

In [101]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st

In [102]:
# Study data files
happiness_2015_path = "Happiness_2015.csv"
happiness_2016_path = "Happiness_2016.csv"
happiness_2017_path = "Happiness_2017.csv"
happiness_2018_path = "Happiness_2018.csv"
happiness_2019_path = "Happiness_2019.csv"

# Read the happiness data
happiness_2015 = pd.read_csv(happiness_2015_path)
happiness_2016 = pd.read_csv(happiness_2016_path)
happiness_2017 = pd.read_csv(happiness_2017_path)
happiness_2018 = pd.read_csv(happiness_2018_path)
happiness_2019 = pd.read_csv(happiness_2019_path)

In [103]:
# Combine the data into a single DataFrame
combined_dataframe1 = pd.merge(happiness_2015, happiness_2016,
                              how='left', on='Country')

In [104]:
# check the merged dataframe
print(combined_dataframe1)

         Country  Year_x  Happiness Rank_x  Happiness Score_x  Year_y  \
0    Switzerland    2015                 1              7.587  2016.0   
1        Iceland    2015                 2              7.561  2016.0   
2        Denmark    2015                 3              7.527  2016.0   
3         Norway    2015                 4              7.522  2016.0   
4         Canada    2015                 5              7.427  2016.0   
..           ...     ...               ...                ...     ...   
153       Rwanda    2015               154              3.465  2016.0   
154        Benin    2015               155              3.340  2016.0   
155        Syria    2015               156              3.006  2016.0   
156      Burundi    2015               157              2.905  2016.0   
157         Togo    2015               158              2.839  2016.0   

     Happiness Rank_y  Happiness Score_y  
0                 2.0              7.509  
1                 3.0              7.

In [105]:
#rename columns in first merger
combined_dataframe1.rename(columns={'Country': 'Country', 'Year_x': '2015', 'Happiness Rank_x': 'Rank 2015', 'Happiness Score_x': 'Score 2015', 'Year_y': '2016', 'Happiness Rank_y': 'Rank 2016', 'Happiness Score_y': 'Score 2016'}, inplace=True)

In [106]:
# Check to see renamed columns in first merger
print(combined_dataframe1)

         Country  2015  Rank 2015  Score 2015    2016  Rank 2016  Score 2016
0    Switzerland  2015          1       7.587  2016.0        2.0       7.509
1        Iceland  2015          2       7.561  2016.0        3.0       7.501
2        Denmark  2015          3       7.527  2016.0        1.0       7.526
3         Norway  2015          4       7.522  2016.0        4.0       7.498
4         Canada  2015          5       7.427  2016.0        6.0       7.404
..           ...   ...        ...         ...     ...        ...         ...
153       Rwanda  2015        154       3.465  2016.0      152.0       3.515
154        Benin  2015        155       3.340  2016.0      153.0       3.484
155        Syria  2015        156       3.006  2016.0      156.0       3.069
156      Burundi  2015        157       2.905  2016.0      157.0       2.905
157         Togo  2015        158       2.839  2016.0      155.0       3.303

[158 rows x 7 columns]


In [107]:
# Combine the data into a single DataFrame again incorporating the next year, being 2017.
combined_dataframe2 = pd.merge(combined_dataframe1, happiness_2017,
                              how='left', on='Country')

In [108]:
# check the merge
print(combined_dataframe2)

         Country  2015  Rank 2015  Score 2015    2016  Rank 2016  Score 2016  \
0    Switzerland  2015          1       7.587  2016.0        2.0       7.509   
1        Iceland  2015          2       7.561  2016.0        3.0       7.501   
2        Denmark  2015          3       7.527  2016.0        1.0       7.526   
3         Norway  2015          4       7.522  2016.0        4.0       7.498   
4         Canada  2015          5       7.427  2016.0        6.0       7.404   
..           ...   ...        ...         ...     ...        ...         ...   
153       Rwanda  2015        154       3.465  2016.0      152.0       3.515   
154        Benin  2015        155       3.340  2016.0      153.0       3.484   
155        Syria  2015        156       3.006  2016.0      156.0       3.069   
156      Burundi  2015        157       2.905  2016.0      157.0       2.905   
157         Togo  2015        158       2.839  2016.0      155.0       3.303   

       Year  Happiness Rank  Happiness 

In [109]:
#rename columns
combined_dataframe2.rename(columns={'Year': '2017', 'Happiness Rank': 'Rank 2017', 'Happiness Score': 'Score 2017'}, inplace=True)

In [110]:
#check the merged data frame after renaming columns
# print(combined_dataframe2)

In [111]:
# Combine the data into a single DataFrame again, but merging with 2018
combined_dataframe3 = pd.merge(combined_dataframe2, happiness_2018,
                              how='left', on='Country')

In [112]:
#check the column names of merged df
print(combined_dataframe3)

         Country  2015  Rank 2015  Score 2015    2016  Rank 2016  Score 2016  \
0    Switzerland  2015          1       7.587  2016.0        2.0       7.509   
1        Iceland  2015          2       7.561  2016.0        3.0       7.501   
2        Denmark  2015          3       7.527  2016.0        1.0       7.526   
3         Norway  2015          4       7.522  2016.0        4.0       7.498   
4         Canada  2015          5       7.427  2016.0        6.0       7.404   
..           ...   ...        ...         ...     ...        ...         ...   
153       Rwanda  2015        154       3.465  2016.0      152.0       3.515   
154        Benin  2015        155       3.340  2016.0      153.0       3.484   
155        Syria  2015        156       3.006  2016.0      156.0       3.069   
156      Burundi  2015        157       2.905  2016.0      157.0       2.905   
157         Togo  2015        158       2.839  2016.0      155.0       3.303   

       2017  Rank 2017  Score 2017    Y

In [113]:
#rename columns
combined_dataframe3.rename(columns={'Year': '2018', 'Happiness Rank': 'Rank 2018', 'Happiness Score': 'Score 2018'}, inplace=True)

In [114]:
#check df of merged df
print(combined_dataframe3)

         Country  2015  Rank 2015  Score 2015    2016  Rank 2016  Score 2016  \
0    Switzerland  2015          1       7.587  2016.0        2.0       7.509   
1        Iceland  2015          2       7.561  2016.0        3.0       7.501   
2        Denmark  2015          3       7.527  2016.0        1.0       7.526   
3         Norway  2015          4       7.522  2016.0        4.0       7.498   
4         Canada  2015          5       7.427  2016.0        6.0       7.404   
..           ...   ...        ...         ...     ...        ...         ...   
153       Rwanda  2015        154       3.465  2016.0      152.0       3.515   
154        Benin  2015        155       3.340  2016.0      153.0       3.484   
155        Syria  2015        156       3.006  2016.0      156.0       3.069   
156      Burundi  2015        157       2.905  2016.0      157.0       2.905   
157         Togo  2015        158       2.839  2016.0      155.0       3.303   

       2017  Rank 2017  Score 2017    2

In [115]:
# Combine the data into a single DataFrame again
happiness_index = pd.merge(combined_dataframe3, happiness_2019,
                              how='left', on='Country')

In [116]:
#rename columns one last time
happiness_index.rename(columns={'Year': '2019', 'Happiness Rank': 'Rank 2019', 'Happiness Score': 'Score 2019'}, inplace=True)

In [117]:
#check the first five rows of fully merged df
happiness_index.head()

Unnamed: 0,Country,2015,Rank 2015,Score 2015,2016,Rank 2016,Score 2016,2017,Rank 2017,Score 2017,2018,Rank 2018,Score 2018,2019,Rank 2019,Score 2019
0,Switzerland,2015,1,7.587,2016.0,2.0,7.509,2017.0,4.0,7.494,2018.0,5.0,7.487,2019.0,6.0,7.48
1,Iceland,2015,2,7.561,2016.0,3.0,7.501,2017.0,3.0,7.504,2018.0,4.0,7.495,2019.0,4.0,7.494
2,Denmark,2015,3,7.527,2016.0,1.0,7.526,2017.0,2.0,7.522,2018.0,3.0,7.555,2019.0,2.0,7.6
3,Norway,2015,4,7.522,2016.0,4.0,7.498,2017.0,1.0,7.537,2018.0,2.0,7.594,2019.0,3.0,7.554
4,Canada,2015,5,7.427,2016.0,6.0,7.404,2017.0,7.0,7.316,2018.0,7.0,7.328,2019.0,9.0,7.278


In [118]:
#remove unnecessary columns
# columns_to_remove = ['2015', '2016', '2017', '2018', '2019']
happiness_index.drop(columns_to_remove, axis=1, inplace=True)

In [119]:
#print the first five rows again
happiness_index.head()

Unnamed: 0,Country,Rank 2015,Score 2015,Rank 2016,Score 2016,Rank 2017,Score 2017,Rank 2018,Score 2018,Rank 2019,Score 2019
0,Switzerland,1,7.587,2.0,7.509,4.0,7.494,5.0,7.487,6.0,7.48
1,Iceland,2,7.561,3.0,7.501,3.0,7.504,4.0,7.495,4.0,7.494
2,Denmark,3,7.527,1.0,7.526,2.0,7.522,3.0,7.555,2.0,7.6
3,Norway,4,7.522,4.0,7.498,1.0,7.537,2.0,7.594,3.0,7.554
4,Canada,5,7.427,6.0,7.404,7.0,7.316,7.0,7.328,9.0,7.278


In [120]:
#replace na values with 0
happiness_index.fillna(0, inplace=True)


In [121]:
happiness_index['Rank 2016'] = happiness_index['Rank 2016'].astype(int)
happiness_index['Rank 2017'] = happiness_index['Rank 2017'].astype(int)
happiness_index['Rank 2018'] = happiness_index['Rank 2018'].astype(int)
happiness_index['Rank 2019'] = happiness_index['Rank 2019'].astype(int)
happiness_index.head()

Unnamed: 0,Country,Rank 2015,Score 2015,Rank 2016,Score 2016,Rank 2017,Score 2017,Rank 2018,Score 2018,Rank 2019,Score 2019
0,Switzerland,1,7.587,2,7.509,4,7.494,5,7.487,6,7.48
1,Iceland,2,7.561,3,7.501,3,7.504,4,7.495,4,7.494
2,Denmark,3,7.527,1,7.526,2,7.522,3,7.555,2,7.6
3,Norway,4,7.522,4,7.498,1,7.537,2,7.594,3,7.554
4,Canada,5,7.427,6,7.404,7,7.316,7,7.328,9,7.278


In [122]:
#look for na values or anything that stands out 
unique_values = happiness_index['Score 2017'].unique()
print(unique_values)

[7.494 7.504 7.522 7.537 7.316 7.469 7.377 7.284 7.314 7.213 7.079 7.006
 6.578 6.993 6.635 6.863 6.977 6.891 6.648 6.714 0.    5.25  6.572 6.452
 6.951 6.652 6.375 6.442 6.599 6.609 6.454 6.357 6.424 6.344 6.403 6.527
 6.105 6.168 6.003 5.971 6.098 5.92  5.838 6.008 6.087 5.964 5.823 5.493
 5.819 5.758 5.902 6.071 5.715 5.569 5.973 6.084 5.293 5.525 5.963 5.311
 5.81  5.621 5.872 5.279 5.822 5.629 5.611 5.262 5.074 5.5   5.004 5.011
 5.234 5.269 5.336 5.237 5.273 4.514 5.825 5.395 5.195 5.85  5.43  5.235
 5.175 4.55  4.644 5.182 3.808 5.23  4.955 5.227 5.225 5.324 5.181 5.041
 4.805 4.775 4.608 4.692 4.096 4.497 4.829 4.12  3.875 3.533 4.315 4.139
 3.603 4.28  4.962 4.46  4.709 4.292 4.553 4.376 3.766 4.545 4.286 3.97
 4.44  4.695 4.714 4.735 3.593 3.795 4.19  4.291 4.081 4.535 4.465 4.028
 4.168 3.349 3.644 2.693 3.936 3.507 4.18  4.032 3.794 3.471 3.657 3.462
 2.905 3.495]


In [123]:
#export file into a cleaned dataset csv file
file_path = 'C:/Users/valra/Repository/MONU_project1/Cleaned_Datasets/cleaned_happiness_index.csv'
happiness_index.to_csv(file_path, index=False)