# Import Libraries

In [49]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Import and Process Data

In [50]:
world_hap_2015 = pd.read_csv('data/2015.csv')
world_hap_2016 = pd.read_csv('data/2016.csv')
world_hap_2017 = pd.read_csv('data/2017.csv')
world_hap_2018 = pd.read_csv('data/2018.csv')
world_hap_2019 = pd.read_csv('data/2019.csv')
country_and_region = world_hap_2015[['Country', 'Region']]

world_hap_2015.drop(columns=['Region', 'Happiness Rank', 'Standard Error'], inplace=True)
world_hap_2016.drop(columns=['Region', 'Happiness Rank', 'Lower Confidence Interval', 'Upper Confidence Interval'], inplace=True)
world_hap_2017.drop(columns=['Happiness.Rank', 'Whisker.high', 'Whisker.low'], inplace=True)
world_hap_2018.drop(columns=['Overall rank'], inplace=True)
world_hap_2019.drop(columns=['Overall rank'], inplace=True)

world_hap_2015.columns = ['Country', 'Happiness_Score', 'GDP_per_Capita', 'Social_Support', 'Life_Expectancy', 'Freedom', 'Corruption', 'Generosity', 'Dystopia_Residual']
world_hap_2016.columns = ['Country', 'Happiness_Score', 'GDP_per_Capita', 'Social_Support', 'Life_Expectancy', 'Freedom', 'Corruption', 'Generosity', 'Dystopia_Residual']

world_hap_2017.columns = ['Country', 'Happiness_Score', 'GDP_per_Capita', 'Social_Support', 'Life_Expectancy', 'Freedom','Generosity', 'Corruption', 'Dystopia_Residual']
world_hap_2018.columns = ['Country', 'Happiness_Score', 'GDP_per_Capita', 'Social_Support', 'Life_Expectancy', 'Freedom', 'Generosity', 'Corruption']
world_hap_2019.columns = ['Country', 'Happiness_Score', 'GDP_per_Capita', 'Social_Support', 'Life_Expectancy', 'Freedom', 'Generosity', 'Corruption']

world_hap_2018['Dystopia_Residual'] = world_hap_2018['Happiness_Score'] - (world_hap_2018['GDP_per_Capita']+world_hap_2018['Social_Support']+world_hap_2018['Life_Expectancy']+world_hap_2018['Freedom']+world_hap_2018['Generosity']+world_hap_2018['Corruption'])
world_hap_2019['Dystopia_Residual'] = world_hap_2019['Happiness_Score'] - (world_hap_2019['GDP_per_Capita']+world_hap_2019['Social_Support']+world_hap_2019['Life_Expectancy']+world_hap_2019['Freedom']+world_hap_2019['Generosity']+world_hap_2019['Corruption'])

world_hap_2015['year'] = pd.to_datetime('2015', format='%Y')
world_hap_2016['year'] = pd.to_datetime('2016', format='%Y')
world_hap_2017['year'] = pd.to_datetime('2017', format='%Y')
world_hap_2018['year'] = pd.to_datetime('2018', format='%Y')
world_hap_2019['year'] = pd.to_datetime('2019', format='%Y')

world_hap = pd.concat([world_hap_2015, world_hap_2016, world_hap_2017, world_hap_2018, world_hap_2019])
world_hap = world_hap.merge(country_and_region, how='inner', on='Country')

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [51]:
gdp_contribution = pd.read_csv('data/GDP_Contribution.csv')

In [52]:
gdp_contribution.head()

Unnamed: 0,Country,Year,GDP,Agriculture,Industry,Manufacturing,Services,CAGR
0,Afghanistan,2019,19.3,26.0,14.0,7.0,55.5,0.021765258
1,Albania,2019,15.3,18.0,20.0,6.0,48.6,0.02831735
2,Algeria,2019,171.1,12.0,37.0,24.0,46.2,0.006644461
3,American Samoa,2019,0.6,,,,,0.0
4,Andorra,2019,3.2,,,,,-0.006713433


In [53]:
world_hap = world_hap.merge(gdp_contribution, how='inner', on='Country')

In [54]:
world_hap.head()

Unnamed: 0,Corruption,Country,Dystopia_Residual,Freedom,GDP_per_Capita,Generosity,Happiness_Score,Life_Expectancy,Social_Support,year,Region,Year,GDP,Agriculture,Industry,Manufacturing,Services,CAGR
0,0.41978,Switzerland,2.51738,0.66557,1.39651,0.29678,7.587,0.94143,1.34951,2015-01-01,Western Europe,2019,703.1,1.0,26.0,19.0,70.9,0.020874971
1,0.41203,Switzerland,2.69463,0.58557,1.52733,0.28083,7.509,0.86303,1.14524,2016-01-01,Western Europe,2019,703.1,1.0,26.0,19.0,70.9,0.020874971
2,0.367007,Switzerland,2.276716,0.620071,1.56498,0.290549,7.494,0.858131,1.516912,2017-01-01,Western Europe,2019,703.1,1.0,26.0,19.0,70.9,0.020874971
3,0.357,Switzerland,2.318,0.66,1.42,0.256,7.487,0.927,1.549,2018-01-01,Western Europe,2019,703.1,1.0,26.0,19.0,70.9,0.020874971
4,0.343,Switzerland,2.272,0.572,1.452,0.263,7.48,1.052,1.526,2019-01-01,Western Europe,2019,703.1,1.0,26.0,19.0,70.9,0.020874971


In [55]:
print(world_hap.shape)

(670, 18)


# Data Exploration

## Average Happiness by Region

In [56]:
world_hap.groupby('Region').agg({'Happiness_Score': np.nanmean}).sort_values(by='Happiness_Score', ascending=False)

Unnamed: 0_level_0,Happiness_Score
Region,Unnamed: 1_level_1
Australia and New Zealand,7.2946
North America,7.1747
Western Europe,6.78933
Latin America and Caribbean,6.03695
Middle East and Northern Africa,5.757423
Central and Eastern Europe,5.406944
Eastern Asia,5.391333
Southeastern Asia,5.389475
Southern Asia,4.580657
Sub-Saharan Africa,4.14986


From the above table, we find that Australia, New Zealand, North America, and Western Europe are the most happy regions.

## Largest Contributor to Happiness by Region

In [57]:
world_hap_contribution = world_hap.copy()

#Maybe replace this with an apply statement
world_hap_contribution['GDP_per_Capita'] = world_hap_contribution['GDP_per_Capita'] / world_hap_contribution['Happiness_Score']
world_hap_contribution['Freedom'] = world_hap_contribution['Freedom'] / world_hap_contribution['Happiness_Score']
world_hap_contribution['Generosity'] = world_hap_contribution['Generosity'] / world_hap_contribution['Happiness_Score']
world_hap_contribution['Life_Expectancy'] = world_hap_contribution['Life_Expectancy'] / world_hap_contribution['Happiness_Score']
world_hap_contribution['Social_Support'] = world_hap_contribution['Social_Support'] / world_hap_contribution['Happiness_Score']
world_hap_contribution['Corruption'] = world_hap_contribution['Corruption'] / world_hap_contribution['Happiness_Score']

world_hap_contribution.groupby('Region').agg({'Happiness_Score': np.nanmean, 'GDP_per_Capita': np.nanmean, 'Freedom': np.nanmean, 'Generosity': np.nanmean, 'Life_Expectancy': np.nanmean, 'Social_Support': np.nanmean, 'Corruption': np.nanmean}).sort_values(by='Happiness_Score', ascending=False)

Unnamed: 0_level_0,Happiness_Score,GDP_per_Capita,Freedom,Generosity,Life_Expectancy,Social_Support,Corruption
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Australia and New Zealand,7.2946,0.185924,0.083815,0.05817,0.123828,0.195298,0.048964
North America,7.1747,0.198453,0.077772,0.051791,0.12,0.189521,0.030853
Western Europe,6.78933,0.204042,0.075299,0.039213,0.133277,0.199343,0.031705
Latin America and Caribbean,6.03695,0.152515,0.077632,0.033436,0.112723,0.196396,0.015921
Middle East and Northern Africa,5.757423,0.210425,0.067444,0.027924,0.123057,0.187589,0.027172
Central and Eastern Europe,5.406944,0.189726,0.064081,0.028495,0.130675,0.213325,0.015365
Eastern Asia,5.391333,0.201748,0.085606,0.028589,0.145198,0.2324,0.015009
Southeastern Asia,5.389475,0.164794,0.103366,0.075341,0.120011,0.208772,0.02349
Southern Asia,4.580657,0.139403,0.085216,0.065971,0.111988,0.17393,0.021575
Sub-Saharan Africa,4.14986,0.105729,0.084838,0.051203,0.071062,0.203129,0.025151


In [58]:
world_hap_contribution.drop(['Happiness_Score', 'Dystopia_Residual', 'Agriculture', 'Manufacturing', 'Services', 'Industry', 'Year'], inplace=True, axis=1)
happiness_contribution_table = pd.pivot_table(data=world_hap_contribution, index=['Region'])
happiness_contribution_table

Unnamed: 0_level_0,Corruption,Freedom,GDP_per_Capita,Generosity,Life_Expectancy,Social_Support
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Australia and New Zealand,0.048964,0.083815,0.185924,0.05817,0.123828,0.195298
Central and Eastern Europe,0.015365,0.064081,0.189726,0.028495,0.130675,0.213325
Eastern Asia,0.015009,0.085606,0.201748,0.028589,0.145198,0.2324
Latin America and Caribbean,0.015921,0.077632,0.152515,0.033436,0.112723,0.196396
Middle East and Northern Africa,0.027172,0.067444,0.210425,0.027924,0.123057,0.187589
North America,0.030853,0.077772,0.198453,0.051791,0.12,0.189521
Southeastern Asia,0.02349,0.103366,0.164794,0.075341,0.120011,0.208772
Southern Asia,0.021575,0.085216,0.139403,0.065971,0.111988,0.17393
Sub-Saharan Africa,0.025151,0.084838,0.105729,0.051203,0.071062,0.203129
Western Europe,0.031705,0.075299,0.204042,0.039213,0.133277,0.199343


In [59]:
print(world_hap_contribution['Corruption'].mean())
print(world_hap_contribution['Freedom'].mean())
print(world_hap_contribution['GDP_per_Capita'].mean())
print(world_hap_contribution['Generosity'].mean())
print(world_hap_contribution['Life_Expectancy'].mean())
print(world_hap_contribution['Social_Support'].mean())

0.023061172779986868
0.07764533184079905
0.16416718391372379
0.04187712981021684
0.11143550704400644
0.20096114708831858


## Data Analysis

We want to find the industries that contribute the most to happiness in various countries and regions. From there, our goal is to identify any glaring trends in the relationship between GDP, industries, and happiness.