## Predict Hapiness Rank using Ordinal Regression

In [109]:
# Install needed libraries
!pip install pandas
!pip install numpy
!pip install statsmodels
!pip install sklearn

import pandas as pd
import numpy as np
from statsmodels.miscmodels.ordinal_model import OrderedModel
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.model_selection import train_test_split



### Data Processing

#### Select Features

In [137]:
data = pd.read_csv('../normalized_data.csv')

# Top 4 features from PCA
# data = data[['Social support', 'Log GDP per capita', 'Healthy life expectancy at birth', 'Freedom to make life choices', 'Happiness_Score_Percentile']]

# Top 7 features from PCA
# data = data[['Social support', 'Log GDP per capita', 'Healthy life expectancy at birth', 'Freedom to make life choices', 'Negative affect', 'Perceptions of corruption', 'Positive affect', 'Happiness_Score_Percentile']]

# All 10 features
data['Country'] = data['Country'].astype('category')
data['Country'] = data['Country'].cat.codes
data = data.drop(['Percentile Ranges'], axis=1)
data = data.drop(['Unnamed: 0'], axis=1)
data = data.drop(["Happiness Score"], axis=1)
data = data.drop(["Country"], axis=1)
data = data.drop(["Year"], axis=1)
data = data.drop(["Freedom_Rating"], axis=1)
data = data.drop(["Life Expectancy"], axis=1)

# Happiness Score from 1 - 10
data['Happiness_Score_Percentile'] = abs(data['Happiness_Score_Percentile'] - 9)

# Binary happiness score
data['Happiness_Score_Percentile'] = data['Happiness_Score_Percentile'].transform(lambda x: pd.cut(x, bins=2, labels=[0,1]))
data = data.dropna()
data.iloc[0]

Log GDP per capita                  -1.565348
Social support                      -2.888644
Healthy life expectancy at birth    -1.697393
Freedom to make life choices        -1.111509
Generosity                           0.371831
Perceptions of corruption            0.355938
Positive affect                     -0.992580
Negative affect                      0.034729
Confidence in national government    0.080438
GDP                                 -0.300283
Happiness_Score_Percentile           0.000000
Name: 0, dtype: float64

#### Split Data into Testing and Training

In [138]:
train, test = train_test_split(data, test_size=0.2)

y_train = train['Happiness_Score_Percentile']
x_train = train.drop(['Happiness_Score_Percentile'], axis=1)
y_test = test['Happiness_Score_Percentile']
x_test = test.drop(['Happiness_Score_Percentile'], axis=1)

assert len(x_train) == len(y_train)
assert len(x_test) == len(y_test)
assert len(x_train.iloc[0]) == len(x_test.iloc[0])
print(len(x_train), 'train examples')
print(len(x_test), 'test examples')

879 train examples
220 test examples


### Run Ordinal Regression

In [139]:
mod_prob = OrderedModel(y_train, x_train, distr='logit')
res_prob = mod_prob.fit(method='bfgs')
res_prob.summary()

Optimization terminated successfully.
         Current function value: 0.301491
         Iterations: 57
         Function evaluations: 58
         Gradient evaluations: 58


0,1,2,3
Dep. Variable:,Happiness_Score_Percentile,Log-Likelihood:,-265.01
Model:,OrderedModel,AIC:,552.0
Method:,Maximum Likelihood,BIC:,604.6
Date:,"Thu, 01 Dec 2022",,
Time:,14:14:43,,
No. Observations:,879,,
Df Residuals:,868,,
Df Model:,11,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Log GDP per capita,0.5068,0.227,2.233,0.026,0.062,0.952
Social support,0.8989,0.179,5.010,0.000,0.547,1.251
Healthy life expectancy at birth,1.0878,0.208,5.242,0.000,0.681,1.495
Freedom to make life choices,0.7271,0.178,4.082,0.000,0.378,1.076
Generosity,0.0047,0.114,0.041,0.967,-0.218,0.228
Perceptions of corruption,-0.5829,0.201,-2.898,0.004,-0.977,-0.189
Positive affect,0.4854,0.138,3.523,0.000,0.215,0.755
Negative affect,-0.1368,0.131,-1.044,0.297,-0.394,0.120
Confidence in national government,-0.6675,0.170,-3.931,0.000,-1.000,-0.335


In [140]:
y_pred = res_prob.model.predict(res_prob.params, x_test)
y_pred = np.argmax(y_pred, axis=1)

print("RMS: ", (mean_squared_error(y_test, y_pred))**0.5)
print("Accuracy: ", accuracy_score(y_test, y_pred))

RMS:  0.30151134457776363
Accuracy:  0.9090909090909091


  xb = xb[:, None]
