### The Happiness Project
* 

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.express as px

import gmaps
import requests
import json


from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key
from api_keys import g_key

In [None]:
# Import World Happiness Report Data 2021
raw_happiness_df = pd.read_csv("Data/world-happiness-report-2021.csv")

# Using DataFrame.insert() to add a column
raw_happiness_df.insert(2, "Year", "2021")

# Rename columns
raw_happiness_df = raw_happiness_df.rename(columns={'Country name': 'Country', 
                                                'Regional indicator': 'Region',
                                                'Ladder score': 'Happiness Score',
                                                'Social support': 'Social Support',                                                    
                                                'Logged GDP per capita': 'GDP per Capita',
                                                'Healthy life expectancy': 'Life Expectancy',
                                                'Freedom to make life choices': 'Freedom',
                                                'Perceptions of corruption': 'Corruption'})

# Drop columns not needed
happiness_df = raw_happiness_df.drop(columns=['Standard error of ladder score', 
                                              'upperwhisker', 
                                              'lowerwhisker',
                                              'Ladder score in Dystopia',
                                              'Explained by: Log GDP per capita',
                                              'Explained by: Social support',
                                              'Explained by: Healthy life expectancy',
                                              'Explained by: Freedom to make life choices',
                                              'Explained by: Generosity',
                                              'Explained by: Perceptions of corruption',
                                              'Dystopia + residual'])
# Show preview of DataFrame
happiness_df

In [None]:
# Select data for happiness map from happiness_df
happiness_map_data = happiness_df[['Country', 'Happiness Score']]

# Set happiness map parameters
fig = px.choropleth(happiness_map_data, 
                     locations="Country", 
                     locationmode = "country names",
                     color = "Happiness Score",
                     hover_name = "Country",
                     color_continuous_scale = px.colors.sequential.Plasma)

# Show map
fig.show()

***Happiness Data in time Import data files, stats and visualisation

In [None]:
# Import World Happiness Report Data 2005-2020
raw_happiness_time = pd.read_csv("Data/world-happiness-report.csv")

# Using DataFrame.insert() to add a column
raw_happiness_time.insert(2, "Region", "")

# Rename columns
raw_happiness_time = raw_happiness_time.rename(columns={'Country name': 'Country',
                                                'year' : 'Year',
                                                'Life Ladder': 'Happiness Score',
                                                'Social support': 'Social Support',  
                                                'Healthy life expectancy at birth': 'Life Expectancy',
                                                'Log GDP per capita': 'GDP per Capita',
                                                'Healthy life expectancy': 'Life Expectancy',
                                                'Freedom to make life choices': 'Freedom',
                                                'Perceptions of corruption': 'Corruption'})

# Drop columns not needed
happiness_time = raw_happiness_time.drop(columns=['Positive affect', 
                                                   'Negative affect' 
                                                  ])
happiness_time

In [None]:
# Convert Year column from string to int
happiness_time['Year'] = happiness_time['Year'].astype(int)

# Add data from 2021 to our 2005-2020 DataFrame 
happiness_time_final = pd.concat([happiness_time, happiness_df])

#  Sort DataFrame by Country and Year for more clarity
happiness_time_final = happiness_time_final.sort_values(['Country', 'Year'])

# Show dataframe preview
happiness_time_final

In [None]:
# Global average scores per year
avg_scores = happiness_time_final.groupby(["Country"]).mean()
avg_scores

In [None]:
australia_happiness = happiness_time_final[happiness_time_final['Country'] == 'Australia']
australia_happiness

In [None]:
finland_happiness = happiness_time_final[happiness_time_final['Country'] == 'Finland']
finland_happiness

In [None]:
afghanistan_happiness = happiness_time_final[happiness_time_final['Country'] == 'Afghanistan']
afghanistan_happiness

In [None]:
# Plot line for Australia's Happiness Scores 2005-2021
x_axis = australia_happiness['Year']
y_axis = australia_happiness['Happiness Score']

plt.plot(x_axis, y_axis)

plt.xlabel('Year')
plt.ylabel('Score')
plt.title('Australias Happiness score evolution 2005-2021')
plt.show()

In [None]:
# Plot line for Australia, Afghanistan and Finland Happiness Scores 2005-2021
x_axis = australia_happiness['Year']
y_axis = australia_happiness['Happiness Score']
x_axis2 = finland_happiness['Year']
y_axis2 = finland_happiness['Happiness Score']
x_axis3 = afghanistan_happiness['Year']
y_axis3 = afghanistan_happiness['Happiness Score']

plt.plot(x_axis, y_axis)
plt.plot(x_axis2, y_axis2)
plt.plot(x_axis3, y_axis3)

plt.legend(('Australia','Finland', 'Afghanistan'), loc=7)
plt.xlabel('Year')
plt.ylabel('Score')
plt.title('Australia, Afghanistan and Finland Happiness Score evolution 2005-2021')
plt.show()

In [None]:
# Global average scores per year
avg_scores = happiness_time_final.groupby(["Year"]).mean()
avg_scores

In [None]:
# Plot line for Happiness and GDP per Capita scores 2005-2021
plt.plot(avg_scores.index, avg_scores['Happiness Score'])
plt.plot(avg_scores.index, avg_scores['GDP per Capita'])
plt.legend(('Happiness Score','GDP per Capita'))
plt.xlabel('Year')
plt.ylabel('Score')
plt.title('Happiness score evolution 2005-2021')
plt.show()

print("Money can not buy happiness... Really?")

In [None]:
# Checking length of data for each year 
len_data_year = happiness_time_final.groupby(['Year']).count()

print("We have to take in consideration that not every year has the same amount of studied countries.")
print("This explains higher scores in 2005 (27 countries vs 149 in 2021). So, the year 2005 should not be considered as it is not representative enough.")

len_data_year

In [None]:
s

In [None]:
happiness_map_time.dtypes

In [None]:
# Set variable holding happiness data in time, per country
happiness_map_time = happiness_time_final[['Year', 'Country', 'Happiness Score']]
happiness_map_time['Year'].astype(int)
happiness_map_time.sort_values(['Year'])

# Set happiness time map parameters
fig = px.choropleth(happiness_map_time, 
                     locations="Country", 
                     locationmode = "country names",
                     color = "Happiness Score",
                     hover_name = "Country",
                     color_continuous_scale = px.colors.sequential.Plasma,
                     animation_frame = "Year")

# Show map
fig.show()

### Things to consider;
Correlation / Causation. What makes us Happy vs. What can result from Happiness..

In [None]:
# Set values for x and y axis
x_axis = happiness_df['Happiness Score']
y_axis = happiness_df['GDP per Capita']

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(5.7,8),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. GDP per Capita')
plt.xlabel('Happiness Score')
plt.ylabel('GDP per Capita')          

# Save plotted figure as .png
plt.savefig("images/Happiness_GDP_per_capita.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Set values for x and y axis
x_axis = happiness_df['Happiness Score']
y_axis = happiness_df['Social Support']

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(5.5,0.7),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. Social support')
plt.xlabel('Happiness Score')
plt.ylabel('Social Support')

# Save plotted figure as .png
plt.savefig("images/Happiness_Social_Support.png")
           
# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Set values for x and y axis
x_axis = happiness_df['Happiness Score']
y_axis = happiness_df['Life Expectancy']

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(5.5,59),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. Life Expectancy')
plt.xlabel('Happiness Score')
plt.ylabel('Life Expectancy')          

# Save plotted figure as .png
plt.savefig("images/Happiness_Life_Expectancy.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Set values for x and y axis
x_axis = happiness_df['Happiness Score']
y_axis = happiness_df['Freedom']

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(5.7,0.61),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. Freedom')
plt.xlabel('Happiness Score')
plt.ylabel('Freedom')          

# Save plotted figure as .png
plt.savefig("images/Happiness_Freedom.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Set values for x and y axis
x_axis = happiness_df['Happiness Score']
y_axis = happiness_df['Generosity']

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(2.5,0.22),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. Generosity')
plt.xlabel('Happiness Score')
plt.ylabel('Generosity')          

# Save plotted figure as .png
plt.savefig("images/Happiness_Generosity.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Set values for x and y axis
x_axis = happiness_df['Happiness Score']
y_axis = happiness_df['Corruption']

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(2.5,0.22),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. Corruption')
plt.xlabel('Happiness Score')
plt.ylabel('Corruption')          

# Save plotted figure as .png
plt.savefig("images/Happiness_Corruption.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Read in GDP per Capita .csv file
GDP_per_capita_df = pd.read_csv("Data/World_GDP_per_capita_1960-2019.csv")

# Reduce dataframe to only show 2019 GDP per Capita
GDP_per_capita_2019_df = GDP_per_capita_df[['Country Name', '2019']]

# Rename columns
GDP_per_capita_2019_df = GDP_per_capita_2019_df.rename(columns={'Country Name': 'Country',
                                                                '2019': '2019 GDP per Capita'})

# GDP_per_capita_df = GDP_per_capita_df.dropna()

# Show dataframe preview
GDP_per_capita_2019_df.head()

In [None]:
# Read in GDP .csv file
GDP_df = pd.read_csv("Data/World_GDP_1960-2019.csv")

# Reduce dataframe to only show 2019 GDP per Capita
GDP_2019_df = GDP_df[['Country Name', '2019']]

# Rename columns
GDP_2019_df = GDP_2019_df.rename(columns={'Country Name': 'Country',
                                          '2019': '2019 GDP'})

# GDP_2019_df = GDP_2019_df.dropna()

# Show dataframe preview
GDP_2019_df.head()

In [None]:
# Merge happiness, 2019 GDP per Capita and 2019 GDP dataframes
happiness_GDP_2019_df = pd.merge(happiness_df, GDP_per_capita_2019_df, how='inner', on="Country") 
happiness_GDP_2019_df = pd.merge(happiness_GDP_2019_df, GDP_2019_df, how='inner', on="Country") 

# Show dataframe preview
happiness_GDP_2019_df.head()

In [None]:
# Set values for x and y axis
x_axis = happiness_GDP_2019_df['Happiness Score']
y_axis = happiness_GDP_2019_df['2019 GDP per Capita']

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(3.5,20000),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. 2019 GDP per Capita')
plt.xlabel('Happiness Score')
plt.ylabel('2019 GDP per Capita')          

# Save plotted figure as .png
plt.savefig("images/Happiness_2019_GDP_per_Capita.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Re-create previous plot but with a logarithmic scale on the y-axis
fig = plt.scatter(x_axis, y_axis)
ax = plt.gca()
ax.set_yscale('log')

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(3.5,20000),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. 2019 GDP per Capita')
plt.xlabel('Happiness Score')
plt.ylabel('2019 GDP per Capita')          

# Save plotted figure as .png
plt.savefig("images/Happiness_2019_GDP_per_Capita.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Set values for x and y axis
x_axis = happiness_GDP_2019_df['Happiness Score']
y_axis = happiness_GDP_2019_df['2019 GDP']

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(3,1000000000000),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. 2019 GDP')
plt.xlabel('Happiness Score')
plt.ylabel('2019 GDP')          

# Save plotted figure as .png
plt.savefig("images/Happiness_2019_GDP.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Remove the outliers by setting a limit on the y-axis
axes = plt.gca()
axes.set_ylim([0,4500000000000])

# Create line plot
plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(3,1000000000000),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. 2019 GDP')
plt.xlabel('Happiness Score')
plt.ylabel('2019 GDP')          

# Save plotted figure as .png
plt.savefig("images/Happiness_2019_GDP.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()



In [None]:
# Re-create previous plot but with a logarithmic scale on the y-axis
fig = plt.scatter(x_axis, y_axis)
ax = plt.gca()
ax.set_yscale('log')

# Create linear regression equation and line to plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(3,0.5),fontsize=15,color="red")

# Add title and labels to line plot
plt.title('Happiness vs. 2019 GDP')
plt.xlabel('Happiness Score')
plt.ylabel('2019 GDP')          

# Save plotted figure as .png
plt.savefig("images/Happiness_2019_GDP.png")

# Show line plot
print(f"The r-value is {rvalue}")
plt.show()

In [None]:
# Sort dataframe by descending 2019 GDP to identify the two outliers
happiness_GDP_2019_sorted_df = happiness_GDP_2019_df.sort_values('2019 GDP', 
                                                                 ascending=False, 
                                                                 inplace=False)
# Show dataframe
print('The two extreme outliers in the Happiness vs. 2019 GDP scatter plot are the United States and China')
happiness_GDP_2019_sorted_df.head(10)

In [None]:
# Load country_data.csv 
country_data = pd.read_csv("Data/country_data.csv").rename(columns={"Alpha-2 code":"Code"})

# Create a dataframe to store the COVID data
covid_data = pd.DataFrame(columns={"Covid Cases", "Covid Deaths", "Covid Death Rate", "Covid Cases per Mil Pop"})
covid_data.insert(0, "Country", country_data["Country"])

base_url = "http://corona-api.com/countries/"

# Use iterrows to iterate through the dataframe, adding each countires covid data
counter = 0
for index, row in country_data.iterrows():

    try:
        code = row["Code"]
    
        # call the COVID data API
        response = requests.get(base_url + code.lower()).json()
        
        # pull data from response
        covid_cases = response["data"]["latest_data"]["confirmed"]
        covid_deaths = response["data"]["latest_data"]["deaths"]
        covid_death_rate = response["data"]["latest_data"]["calculated"]["death_rate"]
        cases_per_mil_pop = response["data"]["latest_data"]["calculated"]["cases_per_million_population"]
    
        covid_data.iloc[index, 2] = float(covid_cases)
        covid_data.iloc[index, 4] = float(covid_deaths)
        covid_data.iloc[index, 1] = float(cases_per_mil_pop)
        
        if covid_death_rate != None:
            covid_data.iloc[index, 3] = float(covid_death_rate)
        else:
            covid_data.iloc[index, 3] = covid_death_rate

        counter += 1
        print(f"{round((counter/len(covid_data)*100),2)}% Done")
    
    except: 
        country = country_data.loc[index, "Country"]
        print(f"Error with country: {country}. No data found")
        
covid_data.to_csv("Data/covid_data.csv", index=False)
covid_data

In [None]:
test_df = test_df["Covid Cases"].astype(float)#, "Covid Deaths", "Covid Death Rate", "Covid Cases per Mil Pop"]].astype(float)

test_df.info()

# Set values for x and y axis
# y_axis = test_df['Happiness Score']
# x_axis = test_df['Covid Cases']

# # Create line plot
# plt.scatter(x_axis, y_axis)

# Create linear regression equation and line to plot
# (slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
# regress_values = x_axis * slope + intercept
# line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
# plt.plot(x_axis,regress_values,"r-")
# plt.annotate(line_eq,(2.5,0.22),fontsize=15,color="red")

# # Add title and labels to line plot
# plt.title('Happiness vs. Generosity')
# plt.xlabel('Happiness Score')
# plt.ylabel('Generosity')          

# # Save plotted figure as .png
# plt.savefig("images/Happiness_Generosity.png")

# # Show line plot
# print(f"The r-value is {rvalue}")
# plt.show()