In [1]:
#import dependencies

import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
# Import all CSVs

WH20 = pd.read_csv('Clean_Data/World_Happiness_2020.csv')
WH19 = pd.read_csv('Clean_Data/World_Happiness_2019.csv')
WH18 = pd.read_csv('Clean_Data/World_Happiness_2018.csv')
WH17 = pd.read_csv('Clean_Data/World_Happiness_2017.csv')
WH16 = pd.read_csv('Clean_Data/World_Happiness_2016.csv')
WH15 = pd.read_csv('Clean_Data/World_Happiness_2015.csv')
Life_Exp = pd.read_csv('Clean_Data/Life_Expectancy_History.csv')
CPI = pd.read_csv('Clean_Data/CPI_History.csv')
GDP_History = pd.read_csv('Clean_Data/GDP_History.csv')
Mil_Exp = pd.read_csv('Clean_Data/Military_Expenditure.csv')
Free = pd.read_csv('Clean_Data/Freedom_Index_08-17.csv')

In [3]:
#Important columns extracted from happiness
WH201 = WH20[["Country","Ladder score"]].copy()
WH191 = WH19[["Country","Score"]].copy()
WH181 = WH18[["Country","Score"]].copy()
WH171 = WH17[["Country","Happiness.Score"]].copy()
WH161 = WH16[["Country","Happiness Score"]].copy()
WH151 = WH15[["Country","Happiness Score"]].copy()

#Import other factors
Life_Exp_df = Life_Exp[["Country","2013","2014","2015","2016","2017","2018"]]
#rename columns
Life_Exp_df = Life_Exp_df.rename(columns={"2013":"LE 2013", "2014":"LE 2014", "2015":"LE 2015", "2016":"LE 2016", "2017":"LE 2017", "2018":"LE 2018"})

CPI_df = CPI[["Country","CPI 2013 Score", "CPI 2013 Score","CPI 2014 Score","CPI 2015 Score","CPI 2016 Score",]]

GDP_History_df = GDP_History[["Country","2013","2014","2015","2016","2017","2018"]]
#rename columns
GDP_History_df = GDP_History_df.rename(columns={"2013":"GDP 2013", "2014":"GDP 2014", "2015":"GDP 2015", "2016":"GDP 2016", "2017":"GDP 2017", "2018":"GDP 2018"})

Mil_Exp_df = Mil_Exp[["Country","2013","2014","2015","2016","2017","2018"]]
#rename columns
Mil_Exp_df = Mil_Exp_df.rename(columns={"2013":"ME 2013", "2014":"ME 2014", "2015":"ME 2015", "2016":"ME 2016", "2017":"ME 2017", "2018":"ME 2018"})


In [4]:
# Make Dataframe with happiness years
Happy_df = WH201.merge(WH191, how = "inner", on="Country")
Happy_df = Happy_df.merge(WH181, how = "inner", on="Country")
Happy_df = Happy_df.merge(WH171, how = "inner", on="Country")
Happy_df = Happy_df.merge(WH161, how = "inner", on="Country")
Happy_df = Happy_df.merge(WH151, how = "inner", on="Country")
Happy_df = Happy_df.rename(columns={"Ladder score":"2018", "Score_x":"2017", "Score_y":"2016", "Happiness.Score":"2015", "Happiness Score_x":"2014", "Happiness Score_y":"2013"})
Happy_df = Happy_df[["Country","2013", "2014", "2015", "2016", "2017", "2018"]]

# Make Dataframe with all factors as columns
Final_df = Happy_df.merge(Life_Exp_df, how="inner", on = "Country")
Final_df = Final_df.merge(CPI_df, how="inner", on = "Country")
Final_df = Final_df.merge(GDP_History_df, how="inner", on = "Country")
Final_df = Final_df.merge(Mil_Exp_df, how="inner", on = "Country")

#go through and delete nans and other undesirable values
Final_df = Final_df[~Final_df.isin([np.nan, np.inf, -np.inf]).any(1)]



In [5]:
# creating machine learning algorithm
Happy_df = Final_df[["2013", "2014", "2015", "2016", "2017", "2018"]].copy()
Country = pd.Series(Final_df["Country"], index = Final_df.index)
Final_df = Final_df.drop(["Country","2013", "2014", "2015", "2016", "2017", "2018"], axis=1)

# train test split
X_train, X_test, y_train, y_test = train_test_split(Final_df, Happy_df, test_size=0.33, random_state=42, shuffle = False)
CLF = LinearRegression()
CLF.fit(X_train, y_train)
preds = CLF.predict(X_test)
rmse_ = np.sqrt(mean_squared_error(y_test, preds))

#display rmse
rmse_

0.7449094869361969

In [6]:
#create dataframe to show results
pred = pd.DataFrame(dict(_2013=preds[:,0], _2014 = preds[:,1], _2015 = preds[:,2], _2016 = preds[:,3], _2017 = preds[:,4], _2018 = preds[:,5]))
pred["Country"] = Country.loc[X_test.index].values

#save results
pred.to_csv("Predictions.csv")