In [1]:
# Dependencies
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

In [2]:
# Load/Read World Happiness Data 
HappinessIndex_summary = "Data sets/From Vivi/HappinessIndex.csv"
HappinessIndex = pd.read_csv(HappinessIndex_summary)
HappinessIndex.set_index('Country name', inplace=True)
HappinessIndex.head(3)

Unnamed: 0_level_0,Code,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Afghanistan,AFG,2008,3.72,7.14,0.45,50.8,0.72,0.18,0.88
Afghanistan,AFG,2009,4.4,7.31,0.55,51.2,0.68,0.2,0.85
Afghanistan,AFG,2010,4.76,7.42,0.54,51.6,0.6,0.13,0.71


In [3]:
# Filter Y2007 and sort by greatest GDP
HappinessIndex = HappinessIndex.loc[HappinessIndex['year'] == 2017]
HappinessIndex = HappinessIndex.sort_values('Log GDP per capita', ascending=False) 
HappinessIndex.head(1)

Unnamed: 0_level_0,Code,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Luxembourg,LUX,2017,7.06,11.44,0.91,72.6,0.9,0.05,0.33


In [4]:
# Check countries 
LowHappy_HighGDP = HappinessIndex[(HappinessIndex["Life Ladder"]<=6) & (HappinessIndex["Log GDP per capita"]>=10)] 
# LowHappy_HighGDP.to_csv('LowHappy_HighGDP.csv')
LowHappy_HighGDP.head(20)

Unnamed: 0_level_0,Code,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Japan,JPN,2017,5.91,10.57,0.88,74.9,0.85,-0.21,0.66
South Korea,KOR,2017,5.87,10.49,0.81,73.3,0.54,0.01,0.85
Estonia,EST,2017,5.94,10.31,0.94,68.4,0.86,-0.1,0.67
Portugal,PRT,2017,5.71,10.24,0.9,72.2,0.91,-0.17,0.88
Turkey,TUR,2017,5.61,10.13,0.88,66.4,0.64,-0.24,0.67
Latvia,LVA,2017,5.98,10.13,0.9,66.5,0.7,-0.15,0.8
Russia,RUS,2017,5.58,10.12,0.9,63.9,0.73,-0.15,0.86
Greece,GRC,2017,5.15,10.11,0.75,72.2,0.44,-0.29,0.87
Kazakhstan,KAZ,2017,5.88,10.09,0.91,64.0,0.75,-0.04,0.76
Croatia,HRV,2017,5.34,10.04,0.77,69.6,0.72,-0.1,0.89


Outliers (Low Happiness, High GDP): South Korea, Russia, Croatia, Hungary

In [5]:
# Check countries 
HighHappy_LowGDP = HappinessIndex[(HappinessIndex["Life Ladder"]>=6) & (HappinessIndex["Log GDP per capita"]<=9.5)] 
# LowHappy_HighGDP.to_csv('LowHappy_HighGDP.csv')
HighHappy_LowGDP.head(20)

Unnamed: 0_level_0,Code,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Colombia,COL,2017,6.16,9.49,0.91,67.4,0.84,-0.16,0.88
Kosovo,KXK,2017,6.15,9.19,0.79,,0.86,0.12,0.93
Guatemala,GTM,2017,6.33,8.91,0.83,64.5,0.91,-0.06,0.8
El Salvador,SLV,2017,6.34,8.89,0.83,65.8,0.76,-0.17,0.78
Uzbekistan,UZB,2017,6.42,8.71,0.94,64.8,0.99,0.12,0.46
Nicaragua,NIC,2017,6.48,8.55,0.84,67.2,0.92,0.01,0.67
Honduras,HND,2017,6.02,8.4,0.84,67.0,0.9,0.08,0.78


Outliers (Low GDP, High Happy): All 

In [6]:
HappinessIndex["ColC"] = HappinessIndex["Log GDP per capita"].subtract(HappinessIndex["Life Ladder"], fill_value=0)
HappinessIndex.sort_values('ColC', ascending=False)

Unnamed: 0_level_0,Code,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,ColC
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Botswana,BWA,2017,3.50,9.69,0.77,58.2,0.82,-0.25,0.73,6.19
Egypt,EGY,2017,3.93,9.28,0.64,61.4,0.59,-0.16,,5.35
Iraq,IRQ,2017,4.46,9.67,0.70,59.6,0.63,-0.03,0.76,5.21
Tunisia,TUN,2017,4.12,9.30,0.72,66.6,0.48,-0.23,0.87,5.18
Iran,IRN,2017,4.72,9.86,0.71,65.7,0.73,0.19,0.71,5.14
...,...,...,...,...,...,...,...,...,...,...
Honduras,HND,2017,6.02,8.40,0.84,67.0,0.90,0.08,0.78,2.38
Uzbekistan,UZB,2017,6.42,8.71,0.94,64.8,0.99,0.12,0.46,2.29
Niger,NER,2017,4.62,6.83,0.58,53.0,0.68,-0.02,0.78,2.21
Tajikistan,TJK,2017,5.83,7.98,0.66,63.8,0.83,0.12,0.72,2.15


In [18]:
# df = HappinessIndex.
HappinessIndex['total']=HappinessIndex.iloc[:2:3].difference()

AttributeError: 'DataFrame' object has no attribute 'difference'