### IMPORTS

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import math


from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.datasets import load_iris
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.filterwarnings('ignore')

### World Happiness Dataset 
Following is the raw dataset for world happiness index as downloaded from WHR. It contains multiple rows for one country for roughly 5-7 years. We are only interested in year 2015. So we will first clean the dataset and use only what we need

In [2]:
world_happiness_df = pd.read_excel('./Datasets/World_Happiness/DataPanelWHR2021C2_1.xls',header=0)

In [3]:
display(world_happiness_df.describe())
display(world_happiness_df.info())

Unnamed: 0,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
count,1949.0,1949.0,1913.0,1936.0,1894.0,1917.0,1860.0,1839.0,1927.0,1933.0
mean,2013.216008,5.466707,9.368459,0.812553,63.359375,0.742567,0.000108,0.747111,0.709998,0.268552
std,4.166828,1.115717,1.154091,0.11848,7.510244,0.142104,0.162221,0.186793,0.107106,0.085176
min,2005.0,2.375092,6.635322,0.290184,32.299999,0.257534,-0.33504,0.035198,0.32169,0.082737
25%,2010.0,4.640079,8.463744,0.74939,58.685,0.647048,-0.112973,0.690305,0.625373,0.206403
50%,2013.0,5.386025,9.460323,0.835167,65.199997,0.763476,-0.025393,0.802428,0.722391,0.258117
75%,2017.0,6.283498,10.352778,0.905291,68.589998,0.85603,0.090967,0.871942,0.799276,0.319716
max,2020.0,8.018934,11.648169,0.987343,77.099998,0.985178,0.698099,0.983276,0.943621,0.70459


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1949 entries, 0 to 1948
Data columns (total 11 columns):
Country name                        1949 non-null object
year                                1949 non-null int64
Life Ladder                         1949 non-null float64
Log GDP per capita                  1913 non-null float64
Social support                      1936 non-null float64
Healthy life expectancy at birth    1894 non-null float64
Freedom to make life choices        1917 non-null float64
Generosity                          1860 non-null float64
Perceptions of corruption           1839 non-null float64
Positive affect                     1927 non-null float64
Negative affect                     1933 non-null float64
dtypes: float64(9), int64(1), object(1)
memory usage: 167.6+ KB


None

We see that a couple columns have null/empty values. Seems like we got some data cleaning do to

In [4]:
display(world_happiness_df.head(5))

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.72359,7.3701,0.450662,50.799999,0.718114,0.16764,0.881686,0.517637,0.258195
1,Afghanistan,2009,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,0.237092
2,Afghanistan,2010,4.758381,7.646709,0.539075,51.599998,0.600127,0.12059,0.706766,0.618265,0.275324
3,Afghanistan,2011,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,0.267175
4,Afghanistan,2012,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.77562,0.710385,0.267919


In [5]:
display(world_happiness_df.tail(5))

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
1944,Zimbabwe,2016,3.7354,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,0.208555
1945,Zimbabwe,2017,3.6383,8.015738,0.754147,55.0,0.752826,-0.097645,0.751208,0.806428,0.224051
1946,Zimbabwe,2018,3.61648,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,0.211726
1947,Zimbabwe,2019,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,0.235354
1948,Zimbabwe,2020,3.159802,7.828757,0.717243,56.799999,0.643303,-0.008696,0.788523,0.702573,0.345736


### Trim the dataset to use what we need

In [6]:
world_happiness_df_2015 = world_happiness_df[world_happiness_df["year"] == 2015]
world_happiness_df_2015.describe()

Unnamed: 0,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
count,143.0,143.0,140.0,142.0,139.0,140.0,139.0,133.0,142.0,142.0
mean,2015.0,5.404037,9.396412,0.798211,63.665073,0.748704,0.0197,0.736524,0.709288,0.278709
std,0.0,1.116106,1.171639,0.125879,7.16715,0.136429,0.166145,0.195702,0.106159,0.085422
min,2015.0,2.701591,6.934621,0.434389,46.0,0.388928,-0.271978,0.094604,0.36944,0.103494
25%,2015.0,4.614304,8.470333,0.728894,58.25,0.655594,-0.094748,0.673476,0.62542,0.214518
50%,2015.0,5.344383,9.452952,0.825523,65.5,0.775885,-0.02866,0.809943,0.713898,0.27449
75%,2015.0,6.279204,10.34723,0.900397,68.400002,0.850633,0.111643,0.862374,0.800305,0.331707
max,2015.0,7.603434,11.616853,0.987343,75.900002,0.979937,0.68756,0.961651,0.910497,0.642589
