In [None]:
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns
import tensorflow as tf
print(tf.__version__)
import gc
#from talos.model.layers import hidden_layers
from keras import optimizers
from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, LocallyConnected1D, Conv1D, Reshape
from keras.callbacks import ModelCheckpoint, History
from keras.activations import relu, elu, linear
from keras import backend as K
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score, KFold, train_test_split

from tqdm import trange

plt.style.use('seaborn-darkgrid')
mpl.rcParams['figure.figsize'] = [10.0, 4.0]
mpl.rcParams['figure.dpi'] = 80
mpl.rcParams['savefig.dpi'] = 100
mpl.rcParams['font.size'] = 12

# Tensorflow Neural Network on WHR Data

In [None]:
data_pd = pd.read_pickle("../data/world_happiness.pickle")
data = data_pd.to_numpy()[:,2:]; data_pd

In [None]:
input_data = data[:,1:]
output_data = data[:,0]
nr_params = input_data.shape[1]; nr_datapoints = input_data.shape[0]; nr_params, nr_datapoints

In [None]:
def accuracy_score(Y_test, Y_pred):
    return np.sum(Y_test == Y_pred) / len(Y_test)

# Hyperparameter Optimization

In [None]:
all_params = {
    "hidden_layers" : [1, 2, 4],
    "neurons" : [4, 8, 16],
    "epochs" : [100, 200, 400, 800],
    "lr" : [0.01, 0.001, 0.0001]
}
nr_runs = np.prod([len(nr) for nr in all_params.values()])
nr_runs

In [None]:
def make_model(params):
    opti = optimizers.Adam(amsgrad=True, lr=params["lr"])
    def nn_reg():
        model = Sequential()
        model.add(Dense(params["neurons"], input_shape=(nr_params,), activation="linear", kernel_initializer='random_uniform'))
        for i in range(params["hidden_layers"] - 1):
            model.add(Dense(params["neurons"], activation="relu", kernel_initializer='random_uniform'))
        model.add(Dense(1, activation="linear", kernel_initializer='random_uniform'))
        model.compile(optimizer=opti, loss="mean_squared_error",  metrics=["mse"])
        return model
    return nn_reg

In [None]:
hp_dict = {
    "r2_score" : [],
    "r2_std" : [],
    "hidden_layers" : [],
    "neurons" : [],
    "lr" : [],
    "epochs" : []
}

nr_averages = 40

k = 0
t0 = time.time()
for hidden_layers in all_params["hidden_layers"]:
    for neurons in all_params["neurons"]:
        for epochs in all_params["epochs"]:
            for lr in all_params["lr"]:
                params = {
                    "hidden_layers" : hidden_layers,
                    "neurons" : neurons,
                    "epochs" : epochs,
                    "lr" : lr
                         }

                hp_dict["hidden_layers"].append(hidden_layers)
                hp_dict["neurons"].append(neurons)
                hp_dict["epochs"].append(epochs)
                hp_dict["lr"].append(lr)

                r2 = 0
                r22 = 0
                for i in range(nr_averages):
                    X_train, X_test, Y_train, Y_test = train_test_split(input_data, output_data, test_size=0.1)
                    model = make_model(params)
                    reg = KerasRegressor(build_fn=make_model(params), batch_size=32, epochs=epochs, validation_split=0, verbose=0)
                    history = reg.fit(X_train, Y_train)
                    Y_pred = reg.predict(X_test)
                    r2 += r2_score(Y_test, Y_pred)
                    r22 += r2_score(Y_test, Y_pred)**2
                    #plt.plot(history.history["mse"])
                    del history, reg, model
                    gc.collect()
                    K.clear_session()
                hp_dict["r2_score"].append(r2/nr_averages)
                hp_dict["r2_std"].append(r22/nr_averages)  # Placeholder for std. Atm just <r2^2>.

                k += 1
                print(f"{100*k/nr_runs:.1f} %, {(time.time()-t0)/60:.1f} m")


In [None]:
hp_df = pd.DataFrame(hp_dict).sort_values(by="r2_score", ascending=False);
hp_df["r2_std"] = np.sqrt(hp_df["r2_std"] - hp_df["r2_score"]**2); hp_df

In [None]:
hp_df.to_pickle("../data/WHR_hyperparam.pickle")

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 4), sharey=True)
asdf = [hp_df[hp_df["neurons"] == i]["r2_score"] for i in [2, 4, 8]]
ax[0].boxplot(asdf);
ax[0].set_xlabel("Neurons per layer")
ax[0].set_ylabel("R2 Score")
ax[0].set_xticks([1,2,3], ["4", "8", "16"]);
asdf = [hp_df[hp_df["hidden_layers"] == i]["r2_score"] for i in [1, 2, 4]]
ax[1].boxplot(asdf);
ax[1].set_xlabel("Nr of Layers")
ax[1].set_xticks([1,2,3], ["1", "2", "4"]);
plt.ylim(0, 1)
plt.tight_layout()
plt.savefig("../figs/WHR_TF_boxplot.pdf")

In [None]:
lrs = [0.0001, 0.001, 0.003]
epochs = [100, 200, 400, 800]
asdf = np.zeros((3,4))
for i in range(3):
    for j in range(4):
        asdf[i,j] = np.mean(hp_df[(hp_df["epochs"]==epochs[j]) & (hp_df["lr"]==lrs[i])]["r2_score"])

In [None]:
plt.figure(figsize=(8,5))
sns.heatmap(asdf, annot=True, xticklabels=epochs, yticklabels=lrs, vmin=0.7, vmax=0.8, cmap="Purples", square=True)
plt.ylim(0, 3)
plt.ylabel("Learning Rate")
plt.xlabel("Epochs")
plt.tight_layout()
#plt.savefig("../figs/CC_TF_lr_epoch_heatmap", bbox_inches="tight")

# Casual NN fit

In [None]:
opti = optimizers.Adam(amsgrad=True, lr=0.0001)
def nn_reg():
    model = Sequential()
    model.add(Dense(2, input_shape=(nr_params,), activation="relu", kernel_initializer='random_uniform'))
    for i in range(4 - 1):
        model.add(Dense(2, activation="relu", kernel_initializer='random_uniform'))
    model.add(Dense(1, activation="linear", kernel_initializer='random_uniform'))
    model.compile(optimizer=opti, loss="mean_squared_error",  metrics=["mse", r2_keras])
    return model
nn_reg().summary()

In [None]:
from keras import backend as K

def r2_keras(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [None]:
nr_averages = 40
r2_NN = np.zeros(1)
r22_NN = np.zeros(1)

histories = []

for i in trange(nr_averages):
    reg = KerasRegressor(build_fn=nn_reg, batch_size=32, epochs=100, validation_split=0.1, verbose=0)

    X_train, X_test, Y_train, Y_test = train_test_split(input_data, output_data, test_size=0.1)
    Scaler = preprocessing.StandardScaler()
    X_train_scaled = Scaler.fit_transform(X_train)
    X_test_scaled = Scaler.transform(X_test)
    
    history = reg.fit(X_train_scaled, Y_train)
    histories.append(history)

In [None]:
for i in range(nr_averages):
    plt.plot(histories[i].history["val_r2_keras"])