# Elastic Net on Happiness Data
### Alisha Coffey

In [1]:
import numpy as np
import pandas as pd

## Loading Cleaned Data

In [2]:
hap = pd.read_csv("Data/Clean Data/happinessData.csv")

hap.head()

Unnamed: 0,country,happiness_rank,happiness_score,economy_gdp_per_capita,health_life_expectancy,freedom,trust_government_corruption,generosity,year
0,Switzerland,1,7.587,1.39651,0.94143,0.66557,0.41978,0.29678,2015
1,Iceland,2,7.561,1.30232,0.94784,0.62877,0.14145,0.4363,2015
2,Denmark,3,7.527,1.32548,0.87464,0.64938,0.48357,0.34139,2015
3,Norway,4,7.522,1.459,0.88521,0.66973,0.36503,0.34699,2015
4,Canada,5,7.427,1.32629,0.90563,0.63297,0.32957,0.45811,2015


In [3]:
hap_trim = hap[["country","happiness_rank","happiness_score","economy_gdp_per_capita","health_life_expectancy","freedom","trust_government_corruption","generosity"]]

hap_trim

Unnamed: 0,country,happiness_rank,happiness_score,economy_gdp_per_capita,health_life_expectancy,freedom,trust_government_corruption,generosity
0,Switzerland,1,7.587,1.39651,0.941430,0.66557,0.41978,0.29678
1,Iceland,2,7.561,1.30232,0.947840,0.62877,0.14145,0.43630
2,Denmark,3,7.527,1.32548,0.874640,0.64938,0.48357,0.34139
3,Norway,4,7.522,1.45900,0.885210,0.66973,0.36503,0.34699
4,Canada,5,7.427,1.32629,0.905630,0.63297,0.32957,0.45811
...,...,...,...,...,...,...,...,...
1078,Lesotho,145,3.512,7.92600,0.007796,0.71500,0.91500,-0.13100
1079,Botswana,146,3.467,9.78200,0.378964,0.82400,0.80100,-0.24600
1080,Rwanda,147,3.415,7.67600,0.453802,0.89700,0.16700,0.06100
1081,Zimbabwe,148,3.145,7.94300,0.271220,0.67700,0.82100,-0.04700


In [4]:
print(len(hap_trim["country"].unique()),"countries")

172 countries


In [5]:
features = hap_trim.drop(columns=["country","happiness_score","happiness_rank"])
features.head()

Unnamed: 0,economy_gdp_per_capita,health_life_expectancy,freedom,trust_government_corruption,generosity
0,1.39651,0.94143,0.66557,0.41978,0.29678
1,1.30232,0.94784,0.62877,0.14145,0.4363
2,1.32548,0.87464,0.64938,0.48357,0.34139
3,1.459,0.88521,0.66973,0.36503,0.34699
4,1.32629,0.90563,0.63297,0.32957,0.45811


In [6]:
happy_score = hap_trim["happiness_score"].values.reshape(-1,1)
happy_score

array([[7.587],
       [7.561],
       [7.527],
       ...,
       [3.415],
       [3.145],
       [2.523]])

In [7]:
from sklearn.model_selection import train_test_split

feat_train, feat_test,hap_train, hap_test = train_test_split(features, happy_score, random_state=1234)

In [8]:
from sklearn.preprocessing import StandardScaler
allfeat_scaler = StandardScaler().fit(features)
allhap_scaler = StandardScaler().fit(happy_score)

feat_scaler = StandardScaler().fit(feat_train)
hap_scaler = StandardScaler().fit(hap_train)

feat_train_scaled = feat_scaler.transform(feat_train)
feat_test_scaled = feat_scaler.transform(feat_test)
hap_train_scaled = hap_scaler.transform(hap_train)
hap_test_scaled = hap_scaler.transform(hap_test)

## Finding the best alpha value

In [9]:
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.metrics import mean_squared_error

# set alphas
alphas = [0.0005,0.001,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]

# Scale data to fit gaussian distribution
features_scaled = allfeat_scaler.transform(features)
happy_score_scaled = allhap_scaler.transform(happy_score)

# Find the best alpha value to get the best predictive model
for a in alphas:
    elastic_net = ElasticNet(alpha=a,l1_ratio=0.5).fit(features_scaled, happy_score_scaled)   
    score = elastic_net.score(features_scaled, happy_score_scaled)
    pred_happy = elastic_net.predict(features_scaled)
    mse = mean_squared_error(happy_score_scaled, pred_happy)   
    print("Alpha:{0:.4f}, R2:{1:.2f}, MSE:{2:.2f}".format(a, score, mse))

Alpha:0.0005, R2:0.65, MSE:0.35
Alpha:0.0010, R2:0.65, MSE:0.35
Alpha:0.1000, R2:0.63, MSE:0.37
Alpha:0.2000, R2:0.59, MSE:0.41
Alpha:0.3000, R2:0.55, MSE:0.45
Alpha:0.4000, R2:0.51, MSE:0.49
Alpha:0.5000, R2:0.46, MSE:0.54
Alpha:0.6000, R2:0.40, MSE:0.60
Alpha:0.7000, R2:0.35, MSE:0.65
Alpha:0.8000, R2:0.31, MSE:0.69
Alpha:0.9000, R2:0.26, MSE:0.74
Alpha:1.0000, R2:0.22, MSE:0.78


#### Looks like alpha = 0.001 is the best

### Run model on training data with optimal alpha

In [10]:
elastic_net = ElasticNet(alpha=0.001).fit(feat_train_scaled, hap_train_scaled)
pred_happy = elastic_net.predict(feat_test_scaled)
score = elastic_net.score(feat_test_scaled, hap_test_scaled)
mse = mean_squared_error(hap_test_scaled, pred_happy)

print("R2:{0:.3f}, MSE:{1:.2f}".format(score, mse))

R2:0.646, MSE:0.37


### Cross-validation

In [11]:
elastic_cv=ElasticNetCV(alphas=alphas, cv=5)
model = elastic_cv.fit(feat_train_scaled, hap_train_scaled)

print("Aplha:{0:.3f}, R2:{1:.3f}, Intercept:{2:.2f}".format(model.alpha_,
                                                       elastic_cv.score(feat_test_scaled, hap_test_scaled), 
                                                       model.intercept_))

Aplha:0.001, R2:0.646, Intercept:0.00


  return f(*args, **kwargs)


## Plug in values to make prediction
Spits out a scaled happiness value, so need to find a way to reconvert happiness score back to regular value, or match with rank to see if it is a high or low happiness score

In [12]:
print(features_scaled[0])

# Testing to see which to use for inverse transformation
print(allfeat_scaler.inverse_transform(features_scaled[0]))
print(feat_scaler.inverse_transform(features_scaled[0]))

[-0.48613259  1.37179642  0.67498782  0.41810336  0.85485897]
[1.39651 0.94143 0.66557 0.41978 0.29678]
[1.36723486 0.94517367 0.66539375 0.41736966 0.29200696]


In [13]:
x = elastic_net.predict([[-0.48613259,  1.37179642,  0.67498782,  0.41810336,  0.85485897]]) # first row, Switzerland
print(x)

# unscale the number to get actual score
print(allhap_scaler.inverse_transform(x))

[1.26676158]
[6.82822771]


In [14]:
hap_trim["scaled_happiness_score"] = happy_score_scaled.flatten().tolist()
hap_trim

Unnamed: 0,country,happiness_rank,happiness_score,economy_gdp_per_capita,health_life_expectancy,freedom,trust_government_corruption,generosity,scaled_happiness_score
0,Switzerland,1,7.587,1.39651,0.941430,0.66557,0.41978,0.29678,1.945552
1,Iceland,2,7.561,1.30232,0.947840,0.62877,0.14145,0.43630,1.922293
2,Denmark,3,7.527,1.32548,0.874640,0.64938,0.48357,0.34139,1.891877
3,Norway,4,7.522,1.45900,0.885210,0.66973,0.36503,0.34699,1.887404
4,Canada,5,7.427,1.32629,0.905630,0.63297,0.32957,0.45811,1.802418
...,...,...,...,...,...,...,...,...,...
1078,Lesotho,145,3.512,7.92600,0.007796,0.71500,0.91500,-0.13100,-1.699904
1079,Botswana,146,3.467,9.78200,0.378964,0.82400,0.80100,-0.24600,-1.740161
1080,Rwanda,147,3.415,7.67600,0.453802,0.89700,0.16700,0.06100,-1.786680
1081,Zimbabwe,148,3.145,7.94300,0.271220,0.67700,0.82100,-0.04700,-2.028219


## Happiness levels
When a user inputs their values and gets a happiness score, it needs to be defined in some way. I came up with this scale by evenly dividing the happiness scores. So, the user can get a numeric score and a qualitative value (relative to the happiness rank of the other countries in the data).

In [15]:
x=np.linspace(min(hap_trim["happiness_score"]),max(hap_trim["happiness_score"]),5)
print(x[0],"= Relatively Unhappy")
print(x[1],"= Relatively Low Happiness")
print(x[2],"= Relatively Moderately Happy")
print(x[3],"= Relatively Happy")
print(x[4],"= Relatively Extremely Happy")

2.523 = Relatively Unhappy
3.85275 = Relatively Low Happiness
5.182499999999999 = Relatively Moderately Happy
6.51225 = Relatively Happy
7.842 = Relatively Extremely Happy


In [16]:
# Save happiness levels
happiness_levels = [[x[0],"Relatively Unhappy"],
                   [x[1],"Relatively Low Happiness"],
                   [x[2],"Relatively Moderately Happy"],
                  [x[3],"Relatively Happy"],
                  [x[4],"Relatively Extremely Happy"]]

## Saving the model with pickle

In [18]:
import pickle

save_objects = (elastic_net,allfeat_scaler, allhap_scaler,happiness_levels)

pickle.dump(save_objects, open("Data/elasticnet_model.pkl", 'wb'))


# To load objects, call like so 
# model, feature_scaler, happiness_scaler, happiness_levels = pickle.load(open("elasticnet_model.pkl", 'rb'))