# Random Forest Regressor

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

In [2]:
df = pd.read_pickle('data/happiness_data.pkl')

In [3]:
years_train = [2015, 2016]
years_test = [2017]
train_features = df.loc[df['Year'].isin(years_train)]
test_features = df.loc[df['Year'].isin(years_test)]
train_features.reset_index(inplace=True, drop=True)
test_features.reset_index(inplace=True, drop=True)

In [4]:
df_train = train_features.drop(columns=['Country', 'Year', 'Score', 'Low', 'High', 'Economy', 'Family', 'Health', 'Freedom', 'Trust', 'Generosity', 'Dystopia'], axis=1)
df_test = test_features.drop(columns=['Country', 'Year', 'Score', 'Low', 'High', 'Economy', 'Family', 'Health', 'Freedom', 'Trust', 'Generosity', 'Dystopia'], axis=1)

In [5]:
df_economy_train = pd.read_pickle('data/economy_train.pkl')
df_family_train = pd.read_pickle('data/family_train.pkl')
df_health_train = pd.read_pickle('data/health_train.pkl')
df_freedom_train = pd.read_pickle('data/freedom_train.pkl')
df_trust_train = pd.read_pickle('data/trust_train.pkl')
df_generosity_train = pd.read_pickle('data/generosity_train.pkl')
df_dystopia_train = pd.read_pickle('data/dystopia_train.pkl')

df_economy_test = pd.read_pickle('data/economy_test.pkl')
df_family_test = pd.read_pickle('data/family_test.pkl')
df_health_test = pd.read_pickle('data/health_test.pkl')
df_freedom_test = pd.read_pickle('data/freedom_test.pkl')
df_trust_test = pd.read_pickle('data/trust_test.pkl')
df_generosity_test = pd.read_pickle('data/generosity_test.pkl')
df_dystopia_test = pd.read_pickle('data/dystopia_test.pkl')

In [6]:
#Fetching training and test data set
score_train = pd.DataFrame(train_features['Score'])
economy_train = pd.DataFrame(train_features['Economy'])
family_train = pd.DataFrame(train_features['Family'])
health_train = pd.DataFrame(train_features['Health'])
freedom_train = pd.DataFrame(train_features['Freedom'])
trust_train = pd.DataFrame(train_features['Trust'])
generosity_train = pd.DataFrame(train_features['Generosity'])
dystopia_train = pd.DataFrame(train_features['Dystopia'])

score_test = pd.DataFrame(test_features['Score'])
economy_test = pd.DataFrame(test_features['Economy'])
family_test = pd.DataFrame(test_features['Family'])
health_test = pd.DataFrame(test_features['Health'])
freedom_test = pd.DataFrame(test_features['Freedom'])
trust_test = pd.DataFrame(test_features['Trust'])
generosity_test = pd.DataFrame(test_features['Generosity'])
dystopia_test = pd.DataFrame(test_features['Dystopia'])

low_score = pd.DataFrame(test_features['Low'])
high_score = pd.DataFrame(test_features['High'])

In [7]:
#Training the model to predict Economy
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_economy_train, economy_train.values.ravel())
economy_predictions = rf.predict(df_economy_test)
errors = abs(economy_predictions - economy_test.values.ravel())
accuracy = r2_score(economy_test.values.ravel(), economy_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.5090830135167679


array([0.37302523, 0.04351398, 0.29056479, 0.07337236, 0.17993471,
       0.08847884, 0.1324919 , 0.14960051, 0.14566048, 0.63195845,
       0.16236124, 0.01925813, 0.77897148, 0.0382536 , 0.03678216,
       0.02503701, 0.13738889, 0.25033202, 0.20789347, 0.54881904,
       0.23673881, 0.15874227, 0.13569388, 0.12366617, 0.84935968,
       0.08412453, 0.15954888, 0.05662031, 0.16102183, 0.16535745,
       0.04827169, 0.05914529, 0.00490653, 0.17377164, 0.18623579,
       0.05113468, 0.05741853, 0.56971102, 0.40997244, 0.1190006 ,
       0.1272892 , 0.21795606, 0.05511752, 0.69712781, 0.06686359,
       0.04614292, 0.47236507, 0.00165949, 0.04463907, 0.12837671,
       0.02328602, 0.14555216, 0.01706344, 0.04218228, 0.15161498,
       0.06550751, 0.07949989, 0.0998224 , 0.01660845, 0.16549348,
       0.07510775, 0.626942  , 0.09269356, 0.0299494 , 0.20441295,
       0.02122202, 0.48480829, 0.25781747, 0.29938533, 0.07340854,
       1.03086934, 0.15023826, 0.1333252 , 0.62995267, 0.07279

In [8]:
#Training the model to predict Family
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_family_train, family_train.values.ravel())
family_predictions = rf.predict(df_family_test)
errors = abs(family_predictions - family_test.values.ravel())
accuracy = r2_score(family_test.values.ravel(), family_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  -0.9937385337482858


array([0.1596803 , 0.06947372, 0.30801634, 0.340761  , 0.50238007,
       0.34873779, 0.38401476, 0.37710378, 0.25265453, 0.35874953,
       0.06311483, 0.33068611, 0.39627664, 0.02740754, 0.44852175,
       0.36585756, 0.32869269, 0.33911815, 0.58699845, 0.50249787,
       0.47190698, 0.1955665 , 0.32603264, 0.32022597, 0.41426933,
       0.25541479, 0.26449966, 0.22176005, 0.46109786, 0.56062584,
       0.02805123, 0.29462698, 0.34038941, 0.3962248 , 0.44230422,
       0.32443233, 0.25192919, 0.46283177, 0.23995977, 0.34956236,
       0.30088789, 0.42920635, 0.04359693, 0.36244093, 0.29254959,
       0.36113579, 0.38569278, 0.28572568, 0.02652375, 0.24771435,
       0.12576199, 0.51231568, 0.17681669, 0.3550894 , 0.18289759,
       0.4652591 , 0.2776226 , 0.40085076, 0.48120863, 0.42252517,
       0.29105927, 0.49859615, 0.26644503, 0.39550756, 0.33563809,
       0.40761181, 0.3199203 , 0.2199607 , 0.46951761, 0.39591817,
       0.42354291, 0.306656  , 0.08834283, 0.36375365, 0.51119

In [9]:
#Training the model to predict Health
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_health_train, health_train.values.ravel())
health_predictions = rf.predict(df_health_test)
errors = abs(health_predictions - family_test.values.ravel())
accuracy = r2_score(health_test.values.ravel(), health_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.9406205798031272


array([0.32437504, 0.05996145, 0.55177189, 0.92676591, 0.734456  ,
       0.32075097, 0.67036036, 0.67393371, 0.56743029, 0.61688198,
       0.17488001, 0.71003014, 0.62726889, 0.20208652, 0.79684001,
       0.69469335, 0.30634292, 0.99684475, 0.77374978, 0.71779734,
       0.79945422, 0.34351934, 0.48438735, 0.76400584, 0.67331513,
       0.84645996, 0.47628513, 0.43749796, 0.72901559, 0.65159954,
       0.20425874, 0.29551741, 0.60305823, 0.76225306, 0.81227953,
       0.62108049, 0.52674157, 0.70750927, 0.48910224, 0.67355207,
       0.53389608, 0.80366809, 0.09942785, 0.69666676, 0.52809944,
       0.45848993, 0.68683774, 0.53754775, 0.31524948, 0.50794608,
       0.54518558, 0.75477838, 0.333914  , 0.71141412, 0.42325666,
       0.69254924, 0.49722357, 0.56812979, 0.70381176, 0.53185862,
       0.59969359, 0.72261684, 0.76093631, 0.53317659, 0.54662148,
       0.67937899, 0.40312356, 0.56664553, 0.75226647, 0.67529456,
       0.6165476 , 0.54595401, 0.17464567, 0.59804076, 1.07487

In [10]:
#Training the model to predict Freedom
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_freedom_train, freedom_train.values.ravel())
freedom_predictions = rf.predict(df_freedom_test)
errors = abs(freedom_predictions - freedom_test.values.ravel())
accuracy = r2_score(freedom_test.values.ravel(), freedom_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.4745037601357649


array([1.16973118e-01, 8.16071213e-02, 7.49054139e-02, 1.65110078e-01,
       1.47871015e-01, 4.51513036e-02, 6.74563295e-02, 8.47653511e-02,
       1.26981551e-02, 8.54565238e-02, 4.87089167e-02, 6.99165498e-02,
       1.24550023e-01, 1.14271702e-01, 5.23474605e-02, 2.02449755e-01,
       4.91318154e-02, 7.81026778e-02, 1.17461503e-01, 1.10893235e-01,
       9.63425860e-02, 1.95016127e-01, 1.72700829e-01, 3.39799683e-02,
       1.93994555e-01, 7.71191107e-02, 1.04251235e-01, 4.13454928e-03,
       9.37391644e-02, 2.09714925e-01, 6.57058961e-02, 6.52414359e-02,
       7.37022283e-03, 9.14744708e-02, 1.10994605e-01, 2.30842342e-02,
       4.42313600e-02, 1.63861161e-03, 9.10503676e-02, 1.02945460e-01,
       8.40393220e-02, 3.96743563e-02, 1.53428506e-02, 1.26600176e-01,
       1.16961438e-01, 1.00334083e-01, 1.42519927e-01, 3.44948085e-02,
       2.23020502e-01, 1.99033578e-02, 3.12454055e-01, 1.39087455e-01,
       5.62203755e-02, 5.28737445e-02, 7.57855952e-02, 9.34537252e-02,
      

In [11]:
# Training the model to predict Trust
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_trust_train, trust_train.values.ravel())
trust_predictions = rf.predict(df_trust_test)
errors = abs(trust_predictions - trust_test.values.ravel())
accuracy = r2_score(trust_test.values.ravel(), trust_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.6453096926008854


array([0.0288845 , 0.0717066 , 0.00442185, 0.08114539, 0.0389842 ,
       0.0447361 , 0.0696654 , 0.02528808, 0.00618216, 0.05564134,
       0.00773028, 0.00754583, 0.0840626 , 0.04948806, 0.04665751,
       0.02522312, 0.0693598 , 0.01610808, 0.0155218 , 0.10142739,
       0.00736261, 0.03995758, 0.02006859, 0.026575  , 0.01633302,
       0.0241185 , 0.00831241, 0.06756205, 0.03425978, 0.03017287,
       0.0089855 , 0.05766517, 0.13548305, 0.18677014, 0.01619071,
       0.05034354, 0.00977002, 0.00041429, 0.03761888, 0.15220608,
       0.0298999 , 0.1857437 , 0.04343572, 0.1212743 , 0.0446467 ,
       0.01119813, 0.01803741, 0.02011244, 0.03358071, 0.03213591,
       0.06483053, 0.00463438, 0.0022823 , 0.06025038, 0.02202732,
       0.03296917, 0.03961444, 0.05523474, 0.03526234, 0.01168734,
       0.02527894, 0.01939712, 0.0108441 , 0.0498226 , 0.01087597,
       0.01557846, 0.02569037, 0.03861624, 0.05267012, 0.10350728,
       0.00360197, 0.13415559, 0.0815443 , 0.06638733, 0.02115

In [12]:
#Training the model to predict Generosity
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_generosity_train, generosity_train.values.ravel())
generosity_predictions = rf.predict(df_generosity_test)
errors = abs(generosity_predictions - generosity_test.values.ravel())
accuracy = r2_score(generosity_test.values.ravel(), generosity_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.4851269060859984


array([4.41807899e-02, 2.30831029e-02, 7.81675538e-02, 7.69780524e-02,
       2.46553463e-02, 7.21263899e-02, 1.06004894e-01, 1.67000742e-03,
       1.00896231e-01, 6.69966192e-02, 7.98074518e-02, 1.42411247e-02,
       3.79093136e-02, 3.77040017e-02, 1.26861301e-01, 6.88942799e-03,
       5.49644896e-02, 4.80737119e-02, 6.45147452e-02, 8.09032540e-02,
       4.43617394e-02, 6.10940339e-02, 1.07910153e-01, 1.76421225e-02,
       1.92144356e-01, 1.43159398e-02, 1.03623049e-01, 1.56852952e-01,
       1.71528573e-03, 1.01384820e-02, 2.80005159e-02, 1.03287804e-02,
       1.65725923e-01, 3.85102295e-05, 2.33829746e-02, 1.77450295e-02,
       8.90032487e-02, 2.09075484e-01, 1.09633812e-01, 3.68905838e-02,
       2.39994142e-02, 5.15915164e-02, 8.87908907e-02, 8.96339612e-02,
       6.41219384e-02, 2.04436288e-01, 4.96304084e-03, 6.35266132e-02,
       2.13354167e-01, 3.02249562e-02, 1.52777733e-01, 6.75417739e-02,
       6.15665456e-02, 3.34363469e-01, 1.91299393e-02, 1.10226545e-01,
      

In [13]:
#Training the model to predict Dystopia
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_dystopia_train, dystopia_train.values.ravel())
dystopia_predictions = rf.predict(df_dystopia_test)
errors = abs(dystopia_predictions - dystopia_test.values.ravel())
accuracy = r2_score(dystopia_test.values.ravel(), dystopia_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  -0.12445306782229704


array([2.24960378e-01, 5.73716102e-01, 4.75672415e-01, 4.82072898e-01,
       7.41360777e-02, 5.58879444e-01, 2.68607061e-01, 3.02982506e-04,
       3.37690473e-01, 4.21769860e-01, 1.65740230e-01, 3.10325490e-01,
       1.17913218e-01, 2.41351071e-01, 1.09259213e+00, 1.20464559e-01,
       1.66824905e-01, 1.40670104e+00, 2.82555527e-01, 8.36175148e-01,
       2.16097658e-01, 1.69308932e-01, 7.52647278e-01, 2.27047542e-01,
       3.22671152e-01, 1.72978114e-01, 2.19851601e-01, 5.75353040e-01,
       2.40908405e-01, 6.94668045e-01, 1.51455832e-01, 3.48492374e-01,
       1.30013443e-01, 1.13789388e-01, 9.80691385e-01, 2.56988532e-01,
       2.75253620e-01, 6.19655126e-01, 1.39413645e-03, 2.91553120e-01,
       1.39517463e-01, 6.28189056e-01, 4.50942847e-01, 2.08070935e-01,
       4.60185463e-01, 2.49297859e-01, 5.78658051e-01, 6.08900463e-01,
       5.90726851e-01, 2.13579820e-01, 5.33445666e-01, 1.27142920e-01,
       6.77354641e-01, 8.82973084e-01, 1.01239253e+00, 4.02811572e-01,
      

In [14]:
#Predicting Score
predictions = economy_predictions + family_predictions + health_predictions + freedom_predictions + trust_predictions + generosity_predictions + dystopia_predictions
errors = abs(predictions - score_test.values.ravel())
accuracy = r2_score(score_test.values.ravel(), predictions)
print('R2 score: ', accuracy)
mape = 100 * (errors / score_test.values.ravel())
accuracy = 100 - np.mean(mape)
print('Accuracy: ', accuracy)
errors

R2 score:  0.6850981908722135
Accuracy:  90.85115015885188


array([6.16180528e-01, 4.40463874e-01, 9.39759040e-01, 5.18788388e-01,
       8.30219078e-01, 5.09412512e-01, 4.95186475e-01, 6.67871979e-01,
       7.76218512e-02, 5.89681085e-01, 3.30948136e-01, 1.21105161e-01,
       1.44689963e+00, 2.26495087e-01, 4.28408205e-01, 3.45120284e-01,
       1.77548091e-01, 6.86731031e-01, 1.07349371e+00, 8.61027834e-02,
       6.12508076e-01, 5.62750922e-01, 1.22578414e-01, 6.11727312e-01,
       2.01490822e+00, 1.55149990e-01, 6.46978565e-01, 6.12164047e-01,
       3.60763890e-01, 1.66527289e+00, 2.81001263e-01, 3.21845259e-02,
       8.07350510e-02, 7.38352242e-01, 2.95626249e-01, 1.94385694e-01,
       4.70480739e-01, 1.28095822e-01, 4.75216224e-02, 9.94418082e-01,
       3.69576668e-01, 2.53895662e-01, 4.33420313e-01, 1.21242668e+00,
       1.39447010e-02, 1.29056715e-01, 1.52798739e+00, 4.29803245e-01,
       6.99963142e-01, 2.29448513e-01, 1.02491402e+00, 7.10772596e-01,
       4.90151747e-01, 2.29550521e-01, 8.10558744e-01, 2.42820884e-01,
      

In [15]:
#Find accuracy
rows = len(predictions)
tp = 0
for i in range(rows):
    if(predictions[i] >= low_score.loc[i, 'Low'] and predictions[i] <= high_score.loc[i, 'High']):
        tp = tp + 1
print('Correct margin: ', tp/rows*100)

Correct margin:  10.687022900763358
