# The Happiness Project - Data Exploration and Cleaning



In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import gmaps
import requests
import json

import statsmodels
import statsmodels.api as sm
from scipy.stats import linregress

### 2021 Happiness Data

In [2]:
# Import World Happiness Report Data 2021
raw_happiness_df = pd.read_csv("Data/world-happiness-report-2021.csv")

# Using DataFrame.insert() to add a column
raw_happiness_df.insert(2, "Year", "2021")

# Rename columns
raw_happiness_df = raw_happiness_df.rename(columns={'Country name': 'Country', 
                                                'Regional indicator': 'Region',
                                                'Ladder score': 'Happiness Score',
                                                'Social support': 'Social Support',                                                    
                                                'Logged GDP per capita': 'GDP per Capita',
                                                'Healthy life expectancy': 'Life Expectancy',
                                                'Freedom to make life choices': 'Freedom',
                                                'Perceptions of corruption': 'Corruption'})

# Drop columns not needed
happiness_df = raw_happiness_df.drop(columns=['Standard error of ladder score', 
                                              'upperwhisker', 
                                              'lowerwhisker',
                                              'Ladder score in Dystopia',
                                              'Explained by: Log GDP per capita',
                                              'Explained by: Social support',
                                              'Explained by: Healthy life expectancy',
                                              'Explained by: Freedom to make life choices',
                                              'Explained by: Generosity',
                                              'Explained by: Perceptions of corruption',
                                              'Dystopia + residual'])

# Write to csv and show preview of DataFrame
happiness_df.to_csv("Data/Final Dataframes/happiness_df.csv", index=False)
happiness_df.head(20)

Unnamed: 0,Country,Region,Year,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
0,Finland,Western Europe,2021,7.842,10.775,0.954,72.0,0.949,-0.098,0.186
1,Denmark,Western Europe,2021,7.62,10.933,0.954,72.7,0.946,0.03,0.179
2,Switzerland,Western Europe,2021,7.571,11.117,0.942,74.4,0.919,0.025,0.292
3,Iceland,Western Europe,2021,7.554,10.878,0.983,73.0,0.955,0.16,0.673
4,Netherlands,Western Europe,2021,7.464,10.932,0.942,72.4,0.913,0.175,0.338
5,Norway,Western Europe,2021,7.392,11.053,0.954,73.3,0.96,0.093,0.27
6,Sweden,Western Europe,2021,7.363,10.867,0.934,72.7,0.945,0.086,0.237
7,Luxembourg,Western Europe,2021,7.324,11.647,0.908,72.6,0.907,-0.034,0.386
8,New Zealand,North America and ANZ,2021,7.277,10.643,0.948,73.4,0.929,0.134,0.242
9,Austria,Western Europe,2021,7.268,10.906,0.934,73.3,0.908,0.042,0.481


### Historical Happiness Data

In [3]:
# Import World Happiness Report Data 2005-2020
raw_happiness_time = pd.read_csv("Data/world-happiness-report.csv")

# Using DataFrame.insert() to add a column
raw_happiness_time.insert(2, "Region", "")

# Rename columns
raw_happiness_time = raw_happiness_time.rename(columns={'Country name': 'Country',
                                                'year' : 'Year',
                                                'Life Ladder': 'Happiness Score',
                                                'Social support': 'Social Support',  
                                                'Healthy life expectancy at birth': 'Life Expectancy',
                                                'Log GDP per capita': 'GDP per Capita',
                                                'Healthy life expectancy': 'Life Expectancy',
                                                'Freedom to make life choices': 'Freedom',
                                                'Perceptions of corruption': 'Corruption'})

# Drop columns not needed
happiness_time = raw_happiness_time.drop(columns=['Positive affect', 
                                                   'Negative affect' 
                                                  ])
happiness_time

Unnamed: 0,Country,Year,Region,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
0,Afghanistan,2008,,3.724,7.370,0.451,50.80,0.718,0.168,0.882
1,Afghanistan,2009,,4.402,7.540,0.552,51.20,0.679,0.190,0.850
2,Afghanistan,2010,,4.758,7.647,0.539,51.60,0.600,0.121,0.707
3,Afghanistan,2011,,3.832,7.620,0.521,51.92,0.496,0.162,0.731
4,Afghanistan,2012,,3.783,7.705,0.521,52.24,0.531,0.236,0.776
...,...,...,...,...,...,...,...,...,...,...
1944,Zimbabwe,2016,,3.735,7.984,0.768,54.40,0.733,-0.095,0.724
1945,Zimbabwe,2017,,3.638,8.016,0.754,55.00,0.753,-0.098,0.751
1946,Zimbabwe,2018,,3.616,8.049,0.775,55.60,0.763,-0.068,0.844
1947,Zimbabwe,2019,,2.694,7.950,0.759,56.20,0.632,-0.064,0.831


In [4]:
# Add data from 2021 to our 2005-2020 DataFrame 
happiness_time_final = pd.concat([happiness_time, happiness_df])

# Convert Year column from string to int
happiness_time_final['Year'] = happiness_time_final['Year'].astype(int)

#  Sort DataFrame by Country and Year for more clarity
happiness_time_final = happiness_time_final.sort_values(['Country', 'Year']).reset_index()

# Add region names
for index, row in happiness_time_final.iterrows():
    
    country =  row['Country']
    if country in happiness_df['Country'].unique():
        row_index = happiness_df.index[happiness_df['Country'] == country][0]
        region = happiness_df.iloc[row_index, 1]
        happiness_time_final.iloc[index, 3] = region

# # # Write to csv and show dataframe preview
happiness_time_final = happiness_time_final.drop("index", axis=1)
happiness_time_final.to_csv("Data/Final Dataframes/happiness_time_final.csv", index=False)
happiness_time_final

Unnamed: 0,Country,Year,Region,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
0,Afghanistan,2008,South Asia,3.724,7.370,0.451,50.800,0.718,0.168,0.882
1,Afghanistan,2009,South Asia,4.402,7.540,0.552,51.200,0.679,0.190,0.850
2,Afghanistan,2010,South Asia,4.758,7.647,0.539,51.600,0.600,0.121,0.707
3,Afghanistan,2011,South Asia,3.832,7.620,0.521,51.920,0.496,0.162,0.731
4,Afghanistan,2012,South Asia,3.783,7.705,0.521,52.240,0.531,0.236,0.776
...,...,...,...,...,...,...,...,...,...,...
2093,Zimbabwe,2017,Sub-Saharan Africa,3.638,8.016,0.754,55.000,0.753,-0.098,0.751
2094,Zimbabwe,2018,Sub-Saharan Africa,3.616,8.049,0.775,55.600,0.763,-0.068,0.844
2095,Zimbabwe,2019,Sub-Saharan Africa,2.694,7.950,0.759,56.200,0.632,-0.064,0.831
2096,Zimbabwe,2020,Sub-Saharan Africa,3.160,7.829,0.717,56.800,0.643,-0.009,0.789


### Country Data

In [5]:
# Find the non matching country names in the country_data.csv file
country_data = pd.read_csv("Data/country_data.csv")

common_countries = happiness_df.merge(country_data, on=["Country"])
missing_countries = happiness_df[~happiness_df['Country'].isin(common_countries['Country'])]
missing_countries

Unnamed: 0,Country,Region,Year,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
23,Taiwan Province of China,East Asia,2021,6.584,10.871,0.898,69.6,0.784,-0.07,0.721
32,Kosovo,Central and Eastern Europe,2021,6.372,9.318,0.821,63.813,0.869,0.257,0.917
64,Moldova,Commonwealth of Independent States,2021,5.766,9.454,0.857,65.699,0.822,-0.079,0.918
73,North Cyprus,Western Europe,2021,5.536,10.576,0.82,73.898,0.795,0.012,0.626
76,Hong Kong S.A.R. of China,East Asia,2021,5.477,11.0,0.836,76.82,0.717,0.067,0.403
82,Congo (Brazzaville),Sub-Saharan Africa,2021,5.342,8.117,0.636,58.221,0.695,-0.068,0.745
93,North Macedonia,Central and Eastern Europe,2021,5.101,9.693,0.805,65.474,0.751,0.038,0.905
99,Laos,Southeast Asia,2021,5.03,8.947,0.728,58.968,0.91,0.123,0.658
117,Iran,Middle East and North Africa,2021,4.721,9.584,0.71,66.3,0.608,0.218,0.714
124,Palestinian Territories,Middle East and North Africa,2021,4.517,8.485,0.826,62.25,0.653,-0.163,0.821


In [6]:
# COUNTRY DATA

# rename the non-matching countries identified aboove 
# Rename list for country_data.csv
rename_list = [["Hong Kong", "Hong Kong S.A.R. of China"], 
               ["Taiwan", "Taiwan Province of China", "Taiwan"], 
               ["Congo", "Congo (Brazzaville)", ], 
               ["Macedonia, the former Yugoslav Republic of", "North Macedonia"],  
               ["Palestinian Territory, Occupied", "Palestinian Territories"],
               ["Lao People's Democratic Republic", "Laos"],
               ["Iran, Islamic Republic of", "Iran"],
               ["Tanzania, United Republic of", "Tanzania"],
               ["Moldova, Republic of", "Moldova"]]

# Change the country names to match happiness_df
for item in range (len(rename_list)):
    index = country_data[country_data["Country"] == rename_list[item][0]].index
    country_data.loc[index, 'Country'] = rename_list[item][1]

# Merge with happienss data
country_df = pd.merge(happiness_df[["Country", "Region", "Happiness Score"]], country_data, how='left')

# Add Alpha-2 & 3 codes to happiness df to allow easier merging later on 
happiness_df["Alpha-2 code"] = country_df["Alpha-2 code"]
happiness_df.to_csv("Data/Final Dataframes/happiness_df.csv", index=False)

# Write to csv and display
country_df.to_csv("Data/Final Dataframes/country_df.csv", index=False)
country_df


Unnamed: 0,Country,Region,Happiness Score,Alpha-2 code,Alpha-3 code,Numeric code,Latitude (average),Longitude (average),Population 2020,Med. Age,Urban Pop %
0,Finland,Western Europe,7.842,FI,FIN,246.0,64.0,26.00,5542237.0,43.0,86%
1,Denmark,Western Europe,7.620,DK,DNK,208.0,56.0,10.00,5795780.0,42.0,88%
2,Switzerland,Western Europe,7.571,CH,CHE,756.0,47.0,8.00,8665615.0,43.0,74%
3,Iceland,Western Europe,7.554,IS,ISL,352.0,65.0,-18.00,341628.0,37.0,94%
4,Netherlands,Western Europe,7.464,NL,NLD,528.0,52.5,5.75,17141544.0,43.0,92%
...,...,...,...,...,...,...,...,...,...,...,...
144,Lesotho,Sub-Saharan Africa,3.512,LS,LSO,426.0,-29.5,28.50,2145194.0,24.0,31%
145,Botswana,Sub-Saharan Africa,3.467,BW,BWA,72.0,-22.0,24.00,2359585.0,24.0,73%
146,Rwanda,Sub-Saharan Africa,3.415,RW,RWA,646.0,-2.0,30.00,13005303.0,20.0,18%
147,Zimbabwe,Sub-Saharan Africa,3.145,ZW,ZWE,716.0,-20.0,30.00,14899771.0,19.0,38%


### Weather Data

In [7]:
# Temp and weather data (1991-2016) accessed from World Bank Climate Knowledge Portal
# Availabile at: https://climateknowledgeportal.worldbank.org/download-data
# Sunlight data: World Cities Ranked by Annual Sunshine Hours. Dataset accessed from Kaggle

# TEMPERATURE DATA
# Temp data units = degrees Celsius
temp_data = pd.read_csv("Data/Temp_data.csv")

# Take the mean of each countries monthly average betweeb 1991-2016
temp_data = temp_data.groupby(['ISO3']).mean().reset_index()
# Change column name and drop unneccesary columns
temp_data = temp_data.rename(columns={"Temperature - (Celsius)":"Average Temp"})
temp_data = temp_data.drop(['Year'], axis=1)

# RAINFALL DATA
# Rainfall data units = mm
rainfall_data = pd.read_csv("Data/Rainfall_data.csv")

# Take the mean of each countries monthly average betweeb 1991-2016
rainfall_data = rainfall_data.groupby(['ISO3']).mean().reset_index()
# Change column name and drop unneccesary columns
rainfall_data = rainfall_data.rename(columns={"Rainfall - (MM)":"Average Rainfall"})
rainfall_data = rainfall_data.drop(['Year'], axis=1)

# Merge the temp and rainfall data into weather_data
weather_data = pd.merge(temp_data, rainfall_data, on="ISO3")
weather_data

# SUNLIGHT DATA
# Sunlight data units = annual sunlight hours
sun_data = pd.read_csv("Data/Sunlight_data.csv")
country_data = pd.read_csv("Data/country_data.csv")
sun_data = pd.merge(sun_data, country_data[['Country', "Alpha-3 code"]], on="Country")
sun_data = sun_data.drop(['Country'], axis=1)

# Add sunlight data to weather dataframe
weather_data = pd.merge(weather_data, sun_data, how="left", left_on="ISO3", right_on="Alpha-3 code")
weather_data = weather_data.drop(["Alpha-3 code"], axis=1)
weather_data.drop_duplicates(keep='first', inplace=True)

# Add happiness data
weather_data = pd.merge(country_df[['Country', 'Region', 'Happiness Score', 'Alpha-3 code']],
                        weather_data, how="left", right_on="ISO3", left_on="Alpha-3 code")
weather_data.drop("ISO3", axis=1, inplace=True)

# write to csv and display 
weather_data.reset_index(drop=True)
weather_data.to_csv("Data/Final Dataframes/weather_data.csv", index=False)
weather_data

Unnamed: 0,Country,Region,Happiness Score,Alpha-3 code,Average Temp,Average Rainfall,Sunlight
0,Finland,Western Europe,7.842,FIN,2.402188,46.890103,1858.000000
1,Denmark,Western Europe,7.620,DNK,8.678213,61.730040,1739.000000
2,Switzerland,Western Europe,7.571,CHE,6.713588,127.114978,1566.000000
3,Iceland,Western Europe,7.554,ISL,2.307253,92.339246,1326.000000
4,Netherlands,Western Europe,7.464,NLD,10.316402,66.786411,1662.000000
...,...,...,...,...,...,...,...
144,Lesotho,Sub-Saharan Africa,3.512,LSO,13.468541,60.716254,
145,Botswana,Sub-Saharan Africa,3.467,BWA,22.295602,31.477969,3426.666667
146,Rwanda,Sub-Saharan Africa,3.415,RWA,20.012486,102.340034,
147,Zimbabwe,Sub-Saharan Africa,3.145,ZWE,22.040161,51.957842,3065.400000


### Mental Health Data

In [8]:
# MENTAL HEALTH DATA
# Source: WHO - Depression and Other Common Mental Disorders
# Availabile at: https://www.who.int/publications/i/item/depression-global-health-estimates

# Load data from mental_health.csv and conver rates to percentage
mental_health_data = pd.read_csv("Data/mental_health.csv", encoding='latin1')
mental_health_data['Depression rate'] = mental_health_data['Depression rate'] *100
mental_health_data['Anxiety rate'] = mental_health_data['Anxiety rate'] *100

# Merge with the country data
mental_health_data = pd.merge(happiness_df[['Country', 'Region', 'Happiness Score']], mental_health_data, on="Country", how="left")

# Write to csv and display 
mental_health_data.reset_index(drop=True)
mental_health_data.to_csv("Data/Final Dataframes/mental_health_data.csv", index=False)
mental_health_data


Unnamed: 0,Country,Region,Happiness Score,Depression rate,Anxiety rate
0,Finland,Western Europe,7.842,5.6,3.2
1,Denmark,Western Europe,7.620,5.0,4.9
2,Switzerland,Western Europe,7.571,5.0,4.9
3,Iceland,Western Europe,7.554,4.1,4.9
4,Netherlands,Western Europe,7.464,4.7,6.4
...,...,...,...,...,...
144,Lesotho,Sub-Saharan Africa,3.512,4.8,3.1
145,Botswana,Sub-Saharan Africa,3.467,4.7,3.1
146,Rwanda,Sub-Saharan Africa,3.415,3.8,3.2
147,Zimbabwe,Sub-Saharan Africa,3.145,4.0,2.8


### COVID Data

In [None]:
# COVID DATA API REQUEST

# Load country_data.csv 
country_data = pd.read_csv("Data/country_data.csv")

# Create a dataframe to store the COVID data
covid_data = pd.DataFrame(columns={"Covid Cases", "Covid Deaths", "Covid Death Rate", "Covid Cases per Mil Pop", "Alpha-2 code"})
covid_data.insert(0, "Country", country_data["Country"])
covid_data
base_url = "http://corona-api.com/countries/"

# Use iterrows to iterate through the dataframe, adding each countires covid data
counter = 0
for index, row in country_data.iterrows():

    try:
        code = row["Alpha-2 code"]
    
        # call the COVID data API
        response = requests.get(base_url + code.lower()).json()
        
        # pull data from response
        covid_cases = response["data"]["latest_data"]["confirmed"]
        covid_deaths = response["data"]["latest_data"]["deaths"]
        covid_death_rate = response["data"]["latest_data"]["calculated"]["death_rate"]
        cases_per_mil_pop = response["data"]["latest_data"]["calculated"]["cases_per_million_population"]
    
        covid_data.loc[index, "Covid Cases"] = float(covid_cases)
        covid_data.loc[index, "Covid Deaths"] = float(covid_deaths)
        covid_data.loc[index, "Covid Cases per Mil Pop"] = float(cases_per_mil_pop)
        covid_data.loc[index, "Alpha-2 code"] = code
        
        if covid_death_rate != None:
            covid_data.loc[index, "Covid Death Rate"] = float(covid_death_rate)
        else:
            covid_data.loc[index, "Covid Death Rate"] = covid_death_rate

        counter += 1
        print(f"{round((counter/len(covid_data)*100),2)}% Done")
    
    except: 
        country = country_data.loc[index, "Country"]
        print(f"Error with country: {country}. No data found")


covid_data.to_csv("Data/covid_data.csv", index=False)
covid_data

In [9]:
# Pull data from covid_data.csv (to save repeating API call)
data = pd.read_csv("Data/covid_data.csv")
final_covid_data = pd.merge(happiness_df[["Country", "Region", "Happiness Score", "Alpha-2 code"]], 
                            data[["Covid Cases", "Covid Deaths", "Covid Death Rate", "Covid Cases per Mil Pop", "Alpha-2 code"]], 
                            how="left", on="Alpha-2 code")

# Drop duplicates
final_covid_data.drop_duplicates(keep='last', inplace=True)

# Write to csv and display
final_covid_data.reset_index(drop=True)
final_covid_data.to_csv("Data/Final Dataframes/final_covid_data.csv", index=False)
final_covid_data

Unnamed: 0,Country,Region,Happiness Score,Alpha-2 code,Covid Cases,Covid Deaths,Covid Death Rate,Covid Cases per Mil Pop
0,Finland,Western Europe,7.842,FI,87228.0,914.0,1.047829,31.0
1,Denmark,Western Europe,7.620,DK,252045.0,2489.0,0.987522,37.0
2,Switzerland,Western Europe,7.571,CH,659974.0,10633.0,1.611124,238.0
3,Iceland,Western Europe,7.554,IS,6472.0,29.0,0.448084,4.0
4,Netherlands,Western Europe,7.464,NL,1502107.0,17169.0,1.142994,792.0
...,...,...,...,...,...,...,...,...
156,Lesotho,Sub-Saharan Africa,3.512,LS,10731.0,316.0,2.944740,0.0
157,Botswana,Sub-Saharan Africa,3.467,BW,46934.0,712.0,1.517024,1.0
158,Rwanda,Sub-Saharan Africa,3.415,RW,25225.0,335.0,1.328048,2.0
159,Zimbabwe,Sub-Saharan Africa,3.145,ZW,38260.0,1568.0,4.098275,12.0


### Combined Data

In [10]:
# Merge country data
final_df = pd.merge(happiness_df, country_df[["Alpha-2 code", "Alpha-3 code", "Numeric code", 
                                              "Latitude (average)", "Longitude (average)", "Population 2020",
                                              "Med. Age", "Urban Pop %"]], how='left', on="Alpha-2 code")


# Merge weather data
final_df = pd.merge(final_df, weather_data[['Alpha-3 code', 'Average Temp', 'Average Rainfall', 'Sunlight']], 
                    on='Alpha-3 code', how="left")

# Merge mental health data
final_df = pd.merge(final_df, mental_health_data[["Country", "Depression rate", "Anxiety rate"]], how="left", on="Country")

# Merge COVID data
final_df = pd.merge(final_df, final_covid_data[["Covid Cases", "Covid Deaths", "Covid Death Rate", "Covid Cases per Mil Pop", "Alpha-2 code"]], how="left", on="Alpha-2 code")

# # Drop duplicates, blanks
final_df.drop_duplicates(keep='last', inplace=True)
final_df.dropna(subset=["Alpha-3 code"], inplace=True)

# # Write to csv
# final_df.reset_index(drop=True)
final_df.to_csv("Data/Final Dataframes/final_df.csv", index=False)
final_df

Unnamed: 0,Country,Region,Year,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption,...,Urban Pop %,Average Temp,Average Rainfall,Sunlight,Depression rate,Anxiety rate,Covid Cases,Covid Deaths,Covid Death Rate,Covid Cases per Mil Pop
0,Finland,Western Europe,2021,7.842,10.775,0.954,72.000,0.949,-0.098,0.186,...,86%,2.402188,46.890103,1858.000000,5.6,3.2,87228.0,914.0,1.047829,31.0
1,Denmark,Western Europe,2021,7.620,10.933,0.954,72.700,0.946,0.030,0.179,...,88%,8.678213,61.730040,1739.000000,5.0,4.9,252045.0,2489.0,0.987522,37.0
2,Switzerland,Western Europe,2021,7.571,11.117,0.942,74.400,0.919,0.025,0.292,...,74%,6.713588,127.114978,1566.000000,5.0,4.9,659974.0,10633.0,1.611124,238.0
3,Iceland,Western Europe,2021,7.554,10.878,0.983,73.000,0.955,0.160,0.673,...,94%,2.307253,92.339246,1326.000000,4.1,4.9,6472.0,29.0,0.448084,4.0
4,Netherlands,Western Europe,2021,7.464,10.932,0.942,72.400,0.913,0.175,0.338,...,92%,10.316402,66.786411,1662.000000,4.7,6.4,1502107.0,17169.0,1.142994,792.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,Lesotho,Sub-Saharan Africa,2021,3.512,7.926,0.787,48.700,0.715,-0.131,0.915,...,31%,13.468541,60.716254,,4.8,3.1,10731.0,316.0,2.944740,0.0
187,Botswana,Sub-Saharan Africa,2021,3.467,9.782,0.784,59.269,0.824,-0.246,0.801,...,73%,22.295602,31.477969,3426.666667,4.7,3.1,46934.0,712.0,1.517024,1.0
188,Rwanda,Sub-Saharan Africa,2021,3.415,7.676,0.552,61.400,0.897,0.061,0.167,...,18%,20.012486,102.340034,,3.8,3.2,25225.0,335.0,1.328048,2.0
189,Zimbabwe,Sub-Saharan Africa,2021,3.145,7.943,0.750,56.201,0.677,-0.047,0.821,...,38%,22.040161,51.957842,3065.400000,4.0,2.8,38260.0,1568.0,4.098275,12.0


In [11]:
final_df.columns

Index(['Country', 'Region', 'Year', 'Happiness Score', 'GDP per Capita',
       'Social Support', 'Life Expectancy', 'Freedom', 'Generosity',
       'Corruption', 'Alpha-2 code', 'Alpha-3 code', 'Numeric code',
       'Latitude (average)', 'Longitude (average)', 'Population 2020',
       'Med. Age', 'Urban Pop %', 'Average Temp', 'Average Rainfall',
       'Sunlight', 'Depression rate', 'Anxiety rate', 'Covid Cases',
       'Covid Deaths', 'Covid Death Rate', 'Covid Cases per Mil Pop'],
      dtype='object')

### How Happy is Australia?

In [12]:
# Reduce the happiness_time dataframe to just Australia data
australia_happiness = happiness_time_final[happiness_time_final['Country'] == 'Australia']

# Write to csv
australia_happiness.to_csv("Data/Final Dataframes/australia_happiness.csv", index=False)

# Show Australia Happiness dataframe
australia_happiness

Unnamed: 0,Country,Year,Region,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
71,Australia,2005,North America and ANZ,7.341,10.659,0.968,71.4,0.935,,0.39
72,Australia,2007,North America and ANZ,7.285,10.703,0.965,71.72,0.891,0.347,0.513
73,Australia,2008,North America and ANZ,7.254,10.719,0.947,71.88,0.916,0.305,0.431
74,Australia,2010,North America and ANZ,7.45,10.722,0.955,72.2,0.932,0.317,0.366
75,Australia,2011,North America and ANZ,7.406,10.733,0.967,72.3,0.945,0.369,0.382
76,Australia,2012,North America and ANZ,7.196,10.754,0.945,72.4,0.935,0.274,0.368
77,Australia,2013,North America and ANZ,7.364,10.762,0.928,72.5,0.933,0.269,0.432
78,Australia,2014,North America and ANZ,7.289,10.772,0.924,72.6,0.923,0.319,0.442
79,Australia,2015,North America and ANZ,7.309,10.779,0.952,72.7,0.922,0.332,0.357
80,Australia,2016,North America and ANZ,7.25,10.791,0.942,73.0,0.922,0.239,0.399


In [13]:
# Reduce the happiness_time dataframe to just Finland data
finland_happiness = happiness_time_final[happiness_time_final['Country'] == 'Finland']

# Write to csv
finland_happiness.to_csv("Data/Final Dataframes/finland_happiness.csv", index=False)

# Show Finland Happiness dataframe
finland_happiness

Unnamed: 0,Country,Year,Region,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
591,Finland,2006,Western Europe,7.672,10.745,0.965,69.76,0.969,-0.005,0.132
592,Finland,2008,Western Europe,7.671,10.796,0.951,70.08,0.934,0.028,0.217
593,Finland,2010,Western Europe,7.393,10.734,0.935,70.4,0.916,0.091,0.413
594,Finland,2011,Western Europe,7.354,10.754,0.938,70.64,0.936,0.101,0.32
595,Finland,2012,Western Europe,7.42,10.735,0.928,70.88,0.921,-0.001,0.361
596,Finland,2013,Western Europe,7.445,10.722,0.941,71.12,0.919,0.04,0.306
597,Finland,2014,Western Europe,7.385,10.714,0.952,71.36,0.933,-0.001,0.265
598,Finland,2015,Western Europe,7.448,10.716,0.948,71.6,0.93,0.111,0.223
599,Finland,2016,Western Europe,7.66,10.74,0.954,71.7,0.948,-0.027,0.25
600,Finland,2017,Western Europe,7.788,10.768,0.964,71.8,0.962,-0.002,0.192


In [14]:
# Reduce the happiness_time dataframe to just Afghanistan data
afghanistan_happiness = happiness_time_final[happiness_time_final['Country'] == 'Afghanistan']

# Write to csv
afghanistan_happiness.to_csv("Data/Final Dataframes/afghanistan_happiness.csv", index=False)

# Show Afghanistan Happiness dataframe
afghanistan_happiness

Unnamed: 0,Country,Year,Region,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
0,Afghanistan,2008,South Asia,3.724,7.37,0.451,50.8,0.718,0.168,0.882
1,Afghanistan,2009,South Asia,4.402,7.54,0.552,51.2,0.679,0.19,0.85
2,Afghanistan,2010,South Asia,4.758,7.647,0.539,51.6,0.6,0.121,0.707
3,Afghanistan,2011,South Asia,3.832,7.62,0.521,51.92,0.496,0.162,0.731
4,Afghanistan,2012,South Asia,3.783,7.705,0.521,52.24,0.531,0.236,0.776
5,Afghanistan,2013,South Asia,3.572,7.725,0.484,52.56,0.578,0.061,0.823
6,Afghanistan,2014,South Asia,3.131,7.718,0.526,52.88,0.509,0.104,0.871
7,Afghanistan,2015,South Asia,3.983,7.702,0.529,53.2,0.389,0.08,0.881
8,Afghanistan,2016,South Asia,4.22,7.697,0.559,53.0,0.523,0.042,0.793
9,Afghanistan,2017,South Asia,2.662,7.697,0.491,52.8,0.427,-0.121,0.954


In [15]:
# Group the average scores of final_df dataframe by Region
avg_scores_region = final_df.groupby(["Region"]).mean()
avg_scores_region = avg_scores_region.sort_values(['Happiness Score'])

# Write to csv
avg_scores_region.to_csv("Data/Final Dataframes/avg_scores_region.csv", index=True)

# Show avg_scores_region dataframe
avg_scores_region

Unnamed: 0_level_0,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption,Numeric code,Latitude (average),Longitude (average),...,Med. Age,Average Temp,Average Rainfall,Sunlight,Depression rate,Anxiety rate,Covid Cases,Covid Deaths,Covid Death Rate,Covid Cases per Mil Pop
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
South Asia,4.441857,8.682571,0.703429,62.681,0.765,0.042714,0.797429,303.714286,20.75,77.142857,...,26.571429,21.726608,110.86685,2666.85,3.871429,3.6,3096945.0,35984.428571,1.578694,2382.142857
Sub-Saharan Africa,4.494472,8.075194,0.69675,55.886472,0.723194,0.013444,0.765944,468.25,-1.881481,16.566667,...,19.911765,24.506397,87.960519,2808.552299,4.088235,2.905882,83475.06,2162.657143,2.086923,77.628571
Middle East and North Africa,5.219765,9.666118,0.797647,65.609118,0.716471,-0.079765,0.762235,492.235294,29.274753,33.708318,...,28.823529,21.68341,15.875942,3169.393939,4.485714,4.328571,768151.6,12013.0,2.946022,800.352941
Southeast Asia,5.407556,9.421444,0.820333,64.888444,0.909,0.156333,0.709111,470.444444,10.651856,108.033333,...,31.0,25.744684,186.250798,2281.465,3.8625,3.3875,382077.4,7598.111111,1.033729,227.666667
Commonwealth of Independent States,5.467,9.401833,0.8725,65.0095,0.816917,-0.036,0.725083,469.916667,45.041667,55.25,...,33.166667,7.651667,39.090331,2204.843056,4.72,3.31,741233.2,15229.333333,1.485216,250.0
East Asia,5.810333,10.367667,0.8605,71.252167,0.7635,-0.062333,0.683333,326.0,33.291667,118.44445,...,40.833333,7.61387,79.797445,2244.92,4.2,3.166667,142820.0,2839.5,1.911665,219.333333
Latin America and Caribbean,5.90805,9.37,0.8395,67.07605,0.83175,-0.0677,0.7926,363.85,0.429165,-74.887505,...,29.2,22.380169,144.942421,2359.395444,4.59,5.51,1439313.0,46123.8,2.781387,1694.1
Central and Eastern Europe,5.984765,10.109059,0.887412,68.338412,0.797059,-0.078941,0.850529,423.352941,43.553924,20.441176,...,41.1875,10.22158,65.354174,2093.794118,5.133333,3.706667,587750.8,14001.0,2.350437,445.0
Western Europe,6.914905,10.822714,0.914476,73.033095,0.858714,-0.00319,0.523095,423.238095,46.218248,8.031743,...,41.238095,10.39748,69.870678,2141.621667,5.005,5.12,1391980.0,31479.05,1.694386,977.45
North America and ANZ,7.1285,10.8095,0.9335,72.325,0.89875,0.12,0.44925,388.5,7.5,28.75,...,38.75,8.410639,69.482876,2411.0875,5.475,6.375,8601372.0,153976.0,1.951053,2741.5


In [16]:
# Set variable holding happiness_data in time, per country
happiness_map_time = happiness_time_final[['Year', 'Country', 'Happiness Score']]
happiness_map_time = happiness_map_time.sort_values(['Year'])

# Write to csv
happiness_map_time.to_csv("Data/Final Dataframes/happiness_map_time.csv", index=False)

# Show happiness_map_time dataframe
happiness_map_time

Unnamed: 0,Year,Country,Happiness Score
1415,2005,Pakistan,5.225
1961,2005,United Kingdom,6.984
1505,2005,Poland,5.587
1041,2005,Lebanon,5.491
482,2005,Denmark,8.019
...,...,...,...
533,2021,Ecuador,5.764
1459,2021,Panama,6.180
1444,2021,Palestinian Territories,4.517
1429,2021,Pakistan,4.934


In [17]:
# Global average scores per year
avg_scores = happiness_time_final.groupby(["Year"]).mean()

# Write to csv
avg_scores.to_csv("Data/Final Dataframes/avg_scores.csv", index=False)

# Show avg_scores dataframe
avg_scores

Unnamed: 0_level_0,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005,6.446259,10.118667,0.897407,67.007407,0.82952,0.256,0.71588
2006,5.196899,9.044284,0.835809,60.1475,0.730477,0.006654,0.755747
2007,5.418275,9.172951,0.80777,61.305267,0.687337,0.014833,0.792133
2008,5.418509,9.167273,0.784391,61.249954,0.688355,0.021486,0.764121
2009,5.457667,9.263805,0.819071,62.418928,0.687402,-0.005372,0.763387
2010,5.496806,9.393846,0.831867,63.069025,0.708262,0.003382,0.757235
2011,5.424082,9.286848,0.8028,62.212121,0.732186,-0.014438,0.755203
2012,5.443754,9.390593,0.809063,63.216993,0.711241,-0.00206,0.757955
2013,5.394562,9.373235,0.806555,63.326481,0.727897,-0.000158,0.763492
2014,5.389021,9.370972,0.805641,63.29595,0.734457,0.020261,0.738331


In [18]:
# Checking length of data for each year 
len_data_year = happiness_time_final.groupby(['Year']).count()

# Write to csv
len_data_year.to_csv("Data/Final Dataframes/len_data_year.csv", index=False)

# Show avg_scores dataframe
len_data_year

Unnamed: 0_level_0,Country,Region,Happiness Score,GDP per Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2005,27,27,27,27,27,27,25,1,25
2006,89,89,89,88,89,88,86,81,83
2007,102,102,102,102,100,101,101,102,98
2008,110,110,110,110,110,108,107,109,107
2009,114,114,114,113,113,111,112,113,111
2010,124,124,124,123,120,121,122,123,119
2011,146,146,146,145,145,141,145,144,138
2012,142,142,142,140,142,136,141,133,132
2013,137,137,137,136,137,133,136,133,130
2014,145,145,145,142,142,140,138,138,136
