# Random Forest Regressor

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

In [2]:
df = pd.read_pickle('data/happiness_data.pkl')

In [3]:
years_train = [2015, 2016]
years_test = [2017]
train_features = df.loc[df['Year'].isin(years_train)]
test_features = df.loc[df['Year'].isin(years_test)]
train_features.reset_index(inplace=True, drop=True)
test_features.reset_index(inplace=True, drop=True)

In [4]:
df_train = train_features.drop(columns=['Country', 'Year', 'Score', 'Low', 'High', 'Economy', 'Family', 'Health', 'Freedom', 'Trust', 'Generosity', 'Dystopia'], axis=1)
df_test = test_features.drop(columns=['Country', 'Year', 'Score', 'Low', 'High', 'Economy', 'Family', 'Health', 'Freedom', 'Trust', 'Generosity', 'Dystopia'], axis=1)

In [5]:
df_economy_train = pd.read_pickle('data/economy_train.pkl')
df_family_train = pd.read_pickle('data/family_train.pkl')
df_health_train = pd.read_pickle('data/health_train.pkl')
df_freedom_train = pd.read_pickle('data/freedom_train.pkl')
df_trust_train = pd.read_pickle('data/trust_train.pkl')
df_generosity_train = pd.read_pickle('data/generosity_train.pkl')
df_dystopia_train = pd.read_pickle('data/dystopia_train.pkl')

df_economy_test = pd.read_pickle('data/economy_test.pkl')
df_family_test = pd.read_pickle('data/family_test.pkl')
df_health_test = pd.read_pickle('data/health_test.pkl')
df_freedom_test = pd.read_pickle('data/freedom_test.pkl')
df_trust_test = pd.read_pickle('data/trust_test.pkl')
df_generosity_test = pd.read_pickle('data/generosity_test.pkl')
df_dystopia_test = pd.read_pickle('data/dystopia_test.pkl')

In [6]:
#Fetching training and test data set
score_train = pd.DataFrame(train_features['Score'])
economy_train = pd.DataFrame(train_features['Economy'])
family_train = pd.DataFrame(train_features['Family'])
health_train = pd.DataFrame(train_features['Health'])
freedom_train = pd.DataFrame(train_features['Freedom'])
trust_train = pd.DataFrame(train_features['Trust'])
generosity_train = pd.DataFrame(train_features['Generosity'])
dystopia_train = pd.DataFrame(train_features['Dystopia'])

score_test = pd.DataFrame(test_features['Score'])
economy_test = pd.DataFrame(test_features['Economy'])
family_test = pd.DataFrame(test_features['Family'])
health_test = pd.DataFrame(test_features['Health'])
freedom_test = pd.DataFrame(test_features['Freedom'])
trust_test = pd.DataFrame(test_features['Trust'])
generosity_test = pd.DataFrame(test_features['Generosity'])
dystopia_test = pd.DataFrame(test_features['Dystopia'])

low_score = pd.DataFrame(test_features['Low'])
high_score = pd.DataFrame(test_features['High'])

In [7]:
#Training the model to predict Economy
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_economy_train, economy_train.values.ravel())
economy_predictions = rf.predict(df_economy_test)
errors = abs(economy_predictions - economy_test.values.ravel())
accuracy = r2_score(economy_test.values.ravel(), economy_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.8011810490600753


array([0.28128615, 0.04887999, 0.35424542, 0.25869368, 0.16565907,
       0.01416061, 0.13752795, 0.16258517, 0.23321855, 0.12331026,
       0.11893856, 0.01289885, 0.26037445, 0.14457007, 0.02209197,
       0.02455511, 0.06168311, 0.19095812, 0.15707195, 0.03089379,
       0.10788329, 0.03500172, 0.16227226, 0.11876961, 0.15738033,
       0.0840184 , 0.12958824, 0.08862237, 0.12450529, 0.17009608,
       0.02618214, 0.17972535, 0.04097858, 0.28372149, 0.23215588,
       0.04010389, 0.05643364, 0.0031423 , 0.43687974, 0.1373319 ,
       0.10191174, 0.29240838, 0.04975082, 0.22419331, 0.06258755,
       0.19693899, 0.41872359, 0.05095899, 0.1646235 , 0.13566747,
       0.01401038, 0.2029701 , 0.01325626, 0.04832675, 0.18212621,
       0.02327387, 0.11348377, 0.11104275, 0.01826944, 0.2620519 ,
       0.08349639, 0.18953714, 0.19772038, 0.02594357, 0.1679868 ,
       0.02885056, 0.00855403, 0.44732522, 0.16340911, 0.07656149,
       0.33815172, 0.10501023, 0.19949977, 0.03000258, 0.05924

In [8]:
#Training the model to predict Family
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_family_train, family_train.values.ravel())
family_predictions = rf.predict(df_family_test)
errors = abs(family_predictions - family_test.values.ravel())
accuracy = r2_score(family_test.values.ravel(), family_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  -0.7245571435658835


array([0.16104484, 0.04453599, 0.30817099, 0.33618781, 0.51523975,
       0.21308102, 0.31772129, 0.26090819, 0.26520404, 0.25361714,
       0.11583196, 0.28412225, 0.28202366, 0.08607852, 0.46234204,
       0.39260782, 0.13059454, 0.41635974, 0.45525006, 0.32224642,
       0.43490146, 0.22209074, 0.19128739, 0.18697499, 0.33459474,
       0.38703877, 0.29422052, 0.17948634, 0.44998369, 0.43745302,
       0.07212601, 0.19362619, 0.27416469, 0.37266334, 0.52212068,
       0.26728364, 0.27697889, 0.30705594, 0.14751729, 0.38965119,
       0.23233108, 0.24648344, 0.28497941, 0.29368477, 0.18933391,
       0.39540874, 0.27809765, 0.25485625, 0.0511106 , 0.16703998,
       0.1182362 , 0.42538865, 0.03577211, 0.34964948, 0.1303999 ,
       0.35680757, 0.21198973, 0.3158821 , 0.49032055, 0.48136777,
       0.27068761, 0.30673835, 0.24189556, 0.23129843, 0.23193958,
       0.37292938, 0.18039796, 0.30024507, 0.54067635, 0.41200098,
       0.29800482, 0.22464605, 0.0150208 , 0.26698307, 0.73572

In [9]:
#Training the model to predict Health
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_health_train, health_train.values.ravel())
health_predictions = rf.predict(df_health_test)
errors = abs(health_predictions - family_test.values.ravel())
accuracy = r2_score(health_test.values.ravel(), health_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.9426239171013356


array([0.33048409, 0.06637281, 0.54802821, 0.96342981, 0.73501836,
       0.32899807, 0.64094826, 0.64557029, 0.58159373, 0.61905727,
       0.18063304, 0.72136374, 0.61531271, 0.2520551 , 0.79076177,
       0.70321037, 0.31504852, 0.96056888, 0.75628353, 0.68516848,
       0.83847814, 0.41318509, 0.48729635, 0.74278267, 0.64745425,
       0.82466198, 0.5002117 , 0.42637579, 0.7197827 , 0.67806758,
       0.23407089, 0.28162269, 0.60289774, 0.74672895, 0.81384964,
       0.57645931, 0.52814692, 0.71582153, 0.49836524, 0.68248172,
       0.5171169 , 0.78342875, 0.1061793 , 0.61413056, 0.55193305,
       0.42921934, 0.67546721, 0.58575951, 0.31776315, 0.51027417,
       0.60376778, 0.72381053, 0.36167135, 0.72550162, 0.4272089 ,
       0.69852317, 0.50475254, 0.56573539, 0.74029274, 0.54164906,
       0.6006918 , 0.71541848, 0.73352409, 0.47164393, 0.57014785,
       0.6478541 , 0.37560196, 0.56929819, 0.71410456, 0.71190656,
       0.61237285, 0.5669097 , 0.2089037 , 0.60219876, 1.10552

In [10]:
#Training the model to predict Freedom
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_freedom_train, freedom_train.values.ravel())
freedom_predictions = rf.predict(df_freedom_test)
errors = abs(freedom_predictions - freedom_test.values.ravel())
accuracy = r2_score(freedom_test.values.ravel(), freedom_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.4585019201331324


array([0.14320105, 0.0007657 , 0.13595956, 0.21547377, 0.08014981,
       0.08590278, 0.05632094, 0.01529636, 0.07933114, 0.07054965,
       0.01303811, 0.16579035, 0.0361452 , 0.13926173, 0.01811406,
       0.11141601, 0.15598904, 0.10431913, 0.14045374, 0.21095574,
       0.05746451, 0.25243059, 0.13969343, 0.07738002, 0.17585207,
       0.08741425, 0.06175139, 0.0170512 , 0.00205103, 0.29556373,
       0.0292328 , 0.00381213, 0.02420247, 0.09849571, 0.05010515,
       0.01006173, 0.07525696, 0.066273  , 0.03957569, 0.14445836,
       0.04163555, 0.0704491 , 0.002001  , 0.03990266, 0.15084382,
       0.1267873 , 0.21287885, 0.04724268, 0.21911335, 0.13807498,
       0.24918775, 0.0658309 , 0.00794661, 0.0298318 , 0.09697714,
       0.03911623, 0.06882554, 0.15717735, 0.12851702, 0.02879077,
       0.02616229, 0.0199616 , 0.09088801, 0.06038993, 0.0479068 ,
       0.09473953, 0.05860658, 0.08058203, 0.08982784, 0.20686137,
       0.05571074, 0.09412796, 0.08271202, 0.03629471, 0.01395

In [11]:
# Training the model to predict Trust
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_trust_train, trust_train.values.ravel())
trust_predictions = rf.predict(df_trust_test)
errors = abs(trust_predictions - trust_test.values.ravel())
accuracy = r2_score(trust_test.values.ravel(), trust_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.36840870827314987


array([0.03132313, 0.08976472, 0.01858799, 0.04034574, 0.03136157,
       0.07709533, 0.11338414, 0.00603841, 0.06844955, 0.01704135,
       0.01152909, 0.04992003, 0.06474118, 0.04720926, 0.02921724,
       0.03308537, 0.12358435, 0.00342979, 0.05119622, 0.08399109,
       0.05354081, 0.08562166, 0.03961963, 0.05374445, 0.2156055 ,
       0.04737737, 0.07165526, 0.06539685, 0.08468298, 0.03743314,
       0.09645018, 0.05181266, 0.15537262, 0.12975411, 0.02401161,
       0.02496229, 0.02304343, 0.0227266 , 0.00039155, 0.11323653,
       0.02868942, 0.12200471, 0.03487592, 0.11853458, 0.09357381,
       0.05180606, 0.01688026, 0.02547349, 0.00633877, 0.04119205,
       0.15223146, 0.01507394, 0.00866697, 0.11668221, 0.07019858,
       0.01541424, 0.02755314, 0.07620752, 0.02613434, 0.06479418,
       0.01053953, 0.00550155, 0.03742699, 0.10398305, 0.03983913,
       0.0051759 , 0.15602745, 0.26138447, 0.07392447, 0.02215593,
       0.00373075, 0.14576937, 0.04361284, 0.0405151 , 0.03992

In [12]:
#Training the model to predict Generosity
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_generosity_train, generosity_train.values.ravel())
generosity_predictions = rf.predict(df_generosity_test)
errors = abs(generosity_predictions - generosity_test.values.ravel())
accuracy = r2_score(generosity_test.values.ravel(), generosity_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  0.15732064603166285


array([0.10113204, 0.06689501, 0.11356106, 0.05395914, 0.04838709,
       0.08761972, 0.1459179 , 0.03685553, 0.11146218, 0.03941212,
       0.14712504, 0.01809066, 0.01323907, 0.17842719, 0.14568245,
       0.10424755, 0.14810996, 0.07332284, 0.00422078, 0.07267134,
       0.03292065, 0.0276349 , 0.14989753, 0.00138275, 0.23145541,
       0.02354003, 0.07491295, 0.20139139, 0.05117166, 0.01074933,
       0.01403544, 0.06651837, 0.20557637, 0.00607271, 0.02078571,
       0.04848868, 0.23143604, 0.21923303, 0.10177343, 0.02477187,
       0.04784926, 0.11229528, 0.05302493, 0.09195145, 0.09994276,
       0.18582686, 0.02668138, 0.06241613, 0.21433678, 0.16320577,
       0.16314189, 0.06856817, 0.11012064, 0.36447301, 0.01393864,
       0.11746091, 0.00268209, 0.04227488, 0.07526642, 0.14672709,
       0.02824395, 0.02759302, 0.16976215, 0.12950516, 0.02255691,
       0.04345482, 0.05085943, 0.08677039, 0.03009387, 0.11698501,
       0.03073298, 0.04726781, 0.02560993, 0.05400559, 0.24292

In [13]:
#Training the model to predict Dystopia
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(df_dystopia_train, dystopia_train.values.ravel())
dystopia_predictions = rf.predict(df_dystopia_test)
errors = abs(dystopia_predictions - dystopia_test.values.ravel())
accuracy = r2_score(dystopia_test.values.ravel(), dystopia_predictions)
print('R2 score: ', accuracy)
errors

R2 score:  -0.16754626085820967


array([0.02063425, 0.78965976, 0.12554662, 0.53320913, 0.34191132,
       0.63197462, 0.30747208, 0.29802445, 0.47911895, 0.55117817,
       0.18953281, 0.56786515, 0.03055348, 0.27341296, 0.82140765,
       0.06216816, 0.41121762, 1.70559533, 0.24365563, 0.95942157,
       0.14611214, 0.4907297 , 0.42655582, 0.02632599, 0.08743083,
       0.06162282, 0.30559622, 0.55874819, 0.28908539, 0.24708998,
       0.50126166, 0.31955398, 0.26533405, 0.01920367, 0.3540137 ,
       0.38140494, 0.55835698, 0.9197747 , 0.51217979, 0.12482683,
       0.3978859 , 0.30018868, 0.48718058, 0.28915159, 0.65417284,
       0.61477376, 0.70597316, 0.42947183, 0.51766495, 0.21904807,
       0.35761666, 0.04534031, 0.79286839, 0.84589837, 0.74437705,
       0.33206686, 0.27656347, 0.62090359, 0.59399623, 1.11770007,
       0.52382121, 0.72136788, 0.55717891, 0.33883028, 0.47614302,
       0.17571078, 0.58473604, 0.64630686, 0.61203664, 0.01663829,
       0.61768132, 0.1568959 , 0.24624312, 0.07793915, 0.39134

In [14]:
#Predicting Score
predictions = economy_predictions + family_predictions + health_predictions + freedom_predictions + trust_predictions + generosity_predictions + dystopia_predictions
errors = abs(predictions - score_test.values.ravel())
accuracy = r2_score(score_test.values.ravel(), predictions)
print('R2 score: ', accuracy)
mape = 100 * (errors / score_test.values.ravel())
accuracy = 100 - np.mean(mape)
print('Accuracy: ', accuracy)
errors

R2 score:  0.7574300525148794
Accuracy:  91.44490470012306


array([2.84357044e-01, 8.13727499e-01, 3.25193807e-01, 3.39057731e-01,
       3.29487768e-01, 6.95881939e-01, 4.38148285e-01, 1.53844334e-01,
       2.69641943e-01, 1.77254861e-01, 5.91298996e-01, 4.90974081e-01,
       6.31677700e-01, 5.63537415e-01, 2.41208343e-01, 2.29132890e-01,
       5.23245480e-01, 9.90387893e-01, 4.97250621e-01, 1.01454907e+00,
       4.44433894e-01, 7.33860556e-01, 8.76841216e-02, 2.86890516e-01,
       1.02773246e+00, 5.27255213e-03, 7.06856864e-01, 7.44516125e-01,
       6.00535529e-02, 1.12308367e+00, 6.91675015e-01, 5.97960075e-02,
       1.18983303e-01, 8.98248983e-01, 3.92245001e-01, 1.61903376e-01,
       5.03859732e-01, 9.44820259e-01, 6.73484570e-01, 6.35812545e-01,
       6.69693059e-02, 8.21447382e-02, 7.69804523e-01, 4.19999409e-01,
       3.95989201e-01, 3.87185888e-01, 1.58585584e+00, 2.77153464e-01,
       6.87653424e-01, 5.80762388e-01, 8.69823790e-01, 6.79576420e-01,
       8.01024125e-01, 2.86711599e-01, 6.63008875e-01, 1.39306155e-01,
      

In [15]:
#Find accuracy
rows = len(predictions)
tp = 0
for i in range(rows):
    if(predictions[i] >= low_score.loc[i, 'Low'] and predictions[i] <= high_score.loc[i, 'High']):
        tp = tp + 1
print('Correct margin: ', tp/rows*100)

Correct margin:  13.740458015267176
