In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import  make_scorer
import itertools
import numpy as np
import concurrent.futures
from threading import Lock

import tensorflow as tf
import keras.backend as K
import math

from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import  make_scorer

In [None]:
SMALL_SIZE = 30
MEDIUM_SIZE = 30
BIGGER_SIZE = 30

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
plt.rc('figure', figsize=(15,8))
plt.rc('lines', linewidth=2)


##Preprocessing

Load the CUP dataset

In [None]:
header=['ID','a','b','c','d','e','f', 'g', 'h', 'i', 'j','Class_x', 'Class_y']
df = pd.read_csv("./CUP/ML-CUP21-TR.csv", header=None,delimiter=',', skiprows=7,names=header)
df.index=df['ID'].values
df.drop('ID', axis=1, inplace=True)

In [None]:
df

In [None]:
df.describe()

In [None]:
col=[c for c in df.columns if (c!='Class_x' and c!= 'Class_y')]
x= df[col].values
y= df[['Class_x', 'Class_y']].values

Training/Test splitting with Hold-out approch (90%-10%)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0)
print(y_train.shape, y_test.shape)

MEE definition

In [None]:
def mean_euclidean_error_tf(y_true, y_pred):
    return K.mean(K.sqrt(K.sum(K.square(y_pred - y_true), axis=-1)))

In [None]:
def mean_euclidean_error(y_true, y_pred):
    return np.mean(np.sqrt(np.sum(np.square(y_pred-y_true), axis=-1)))

In [None]:
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [None]:
score = make_scorer(mean_euclidean_error, greater_is_better = False)

Next we define our grid search which is parallelized on single CV splits' 
fitting using Futures class from Concurrent library. Model building is made sequentially acquiring a lock; it is necessary to guarantee same weights initializing.
We also exploit EarlyStopping callback to stop NN training after 50 epochs without improvement on val MEE.


In [None]:
def parallel_cv(list_split, iter, d, x_train, y_train, lock, epochs, batch_size):
    
  lock.acquire()
  try:
    model= build_model(**d)
  finally:
    lock.release()

  x_train_cv, x_val_cv, y_train_cv, y_val_cv=  x_train[list_split[iter,0]], x_train[list_split[iter,1]], y_train[list_split[iter,0]], y_train[list_split[iter,1]]

  callback = tf.keras.callbacks.EarlyStopping(monitor='val_mean_euclidean_error_tf', patience=50)
    
  res= model.fit(x_train_cv, y_train_cv, epochs=epochs, batch_size=batch_size, 
                 validation_data=(x_val_cv,y_val_cv), verbose=0, callbacks=[callback])

  epoche=np.argmin(res.history['val_mean_euclidean_error_tf']) 
  print(epoche+1)

  return (res.history['val_loss'][epoche], res.history['val_mean_euclidean_error_tf'][epoche],
         res.history['mean_euclidean_error_tf'][epoche])

In [None]:
def grid_search(x_train, y_train, param_grid, fold, epochs, batch_size):
  totale_iter= 1
  for _,value in param_grid.items():
    totale_iter= totale_iter*len(value)

  split= fold.split(x_train, y_train)
  list_split=[]
  for train_index, test_index in split:
    list_split.append([train_index,test_index])
  list_split=np.array(list_split, dtype=object)
  
  iter=0
  cv_results=[]
  for params in itertools.product(*[l for l in param_grid.values()]):
    d= dict(zip(param_grid.keys(), params))
    
    euclidean_error=[]
    losses=[]
    tr_euclidean_error=[]
    future=[]
    
    lock= Lock()
    
    for i in range(0,len(list_split)):
      executor=concurrent.futures.ThreadPoolExecutor(max_workers=4)
      future.append(executor.submit(parallel_cv, list_split, i, d, x_train, y_train, lock, epochs, batch_size))
    
    for f in future:
      (loss, euclidean,tr_euclidean)=f.result()
      losses.append(loss)
      euclidean_error.append(euclidean)
      tr_euclidean_error.append(tr_euclidean)
    


    mean_ecl= np.mean(euclidean_error)
    std_euc= np.std(euclidean_error)
    d['mean_euclidean_error']= mean_ecl
    d['std_euclidean_error']= std_euc

    mean_loss= np.mean(losses)
    std_loss= np.std(losses)
    d['mean_val_loss']= mean_loss
    d['std_val_loss']= std_loss
    
    mean_ecl_tr= np.mean(tr_euclidean_error)
    std_eul_tr= np.std(tr_euclidean_error)
    d['mean_training_MEE']= mean_ecl_tr
    d['std_training_MEE']= std_eul_tr


    
    cv_results.append(d)
    iter=iter+1
    print('ITERAZIONE NUMERO ' + str(iter)+ '   su '+ str(totale_iter)+ ' totali')
  
  return cv_results

##KNN

Hyper-parameters tuning using GridSearchCV from Scikit-learn

In [None]:
param_grid = {'n_neighbors': range(2,200),
              'weights':['uniform', 'distance'],
              'p': [1, 2]}


grid_search = GridSearchCV(KNeighborsRegressor(), param_grid, cv=KFold(n_splits=5, shuffle=True, random_state=0), scoring=score, verbose=4)

grid_search.fit(x_train, y_train)
report(grid_search.cv_results_, n_top=5)

Model fitting and TR/TS prediction

In [None]:
knn= KNeighborsRegressor(n_neighbors= 9, p=1, weights='distance')
knn.fit(x_train, y_train)

In [None]:
y_pred_knn= knn.predict(x_test)
mean_euclidean_error(y_test, y_pred_knn)

In [None]:
y_pred_train_knn= knn.predict(x_train)
mean_euclidean_error(y_train, y_pred_train_knn)

##SVM

Hyper-parameters tuning using GridSearchCV from Scikit-learn

In [None]:
param_grid = [{
    'estimator__C':[0.1, 1, 10, 100], 
    'estimator__kernel':[ 'linear'],
    'estimator__epsilon':[0.01, 0.1, 1, 10]},
    {
    'estimator__C':[0.1, 1, 10, 100],
    'estimator__gamma':['scale','auto'], 
    'estimator__kernel':['rbf'],
    'estimator__epsilon':[0.01, 0.1, 1, 10]},
    {
    'estimator__C':[0.1, 1, 10, 100],
    'estimator__gamma':['scale', 'auto'], 
    'estimator__kernel':[ 'poly'],
    'estimator__degree': [1, 2, 3, 4, 5],
    'estimator__epsilon':[0.01, 0.1, 1, 10]}
]

In [None]:
grid_search = GridSearchCV(MultiOutputRegressor(SVR()), param_grid, cv=KFold(n_splits=5, shuffle=True, random_state=0), scoring=score, verbose=4)

grid_search.fit(x_train, y_train)
report(grid_search.cv_results_, n_top=5)

Running a finer grid

In [None]:
param_grid_1 = {
    'estimator__C':np.arange (0, 15, 0.5),
    'estimator__gamma':['scale','auto'], 
    'estimator__kernel':['rbf'],
    'estimator__epsilon':np.arange (0, 1.05, 0.05)}

In [None]:
grid_search = GridSearchCV(MultiOutputRegressor(SVR()), param_grid_1, cv=KFold(n_splits=5, shuffle=True, random_state=0), scoring=score, verbose=4)

grid_search.fit(x_train, y_train)
report(grid_search.cv_results_,n_top=5)

Model fitting and TR/TS prediction

In [None]:
mor= MultiOutputRegressor(SVR(C= 10, epsilon= 0.5, gamma= 'auto', kernel= 'rbf'))
mor.fit(x_train, y_train)

In [None]:
y_pred_train= mor.predict(x_train)
print('MEE Train: ',mean_euclidean_error(y_train,y_pred_train))
y_pred_svm= mor.predict(x_test)
print('MEE Test: ',mean_euclidean_error(y_test,y_pred_svm))

##LBE

Hyper-parameters tuning using GridSearchCV from Scikit-learn

In [None]:
pipe= Pipeline(steps=[('lbe',PolynomialFeatures()),('ridge',Ridge(random_state=0))])
param_grid={
    'lbe__degree':[2, 3, 4, 5, 6],
    'ridge__solver':['saga'],
    'ridge__alpha':[10, 1, 0, 0.1, 0.01, 0.001],
    'lbe__interaction_only':[True, False]
}  

In [None]:
grid_search= GridSearchCV(pipe, param_grid=param_grid, scoring=score, cv=KFold(n_splits=5, shuffle=True, random_state=0), verbose=4)
grid_search.fit(x_train, y_train)
report(grid_search.cv_results_, n_top=5)

Model fitting and TR/TS prediction

In [None]:
pipe= Pipeline(steps=[('lbe',PolynomialFeatures(degree=4, interaction_only=True)),('ridge',Ridge(alpha=10, solver='saga', random_state=0))])
pipe.fit(x_train, y_train)

In [None]:
y_pred_train= pipe.predict(x_train)
print('MEE Train: ',mean_euclidean_error(y_train,y_pred_train))
y_pred= pipe.predict(x_test)
print('MEE Test: ',mean_euclidean_error(y_test,y_pred))

##Random Forest

Hyper-parameters tuning using GridSearchCV from Scikit-learn

In [None]:
param_grid = {'max_depth': range(5,60,5),
              'n_estimators':[100],
              'min_samples_split': range(2,22,2),
              'min_samples_leaf':range(2,22,2),
              'max_features': range(2,11,1),
              'bootstrap' : [True, False]}



grid_search = GridSearchCV(RandomForestRegressor(random_state=0, criterion='squared_error'), param_grid=param_grid, 
                           cv=KFold(n_splits=5, shuffle=True, random_state=0), scoring=score)

grid_search.fit(x_train, y_train)
report(grid_search.cv_results_, n_top=5)

Model fitting and TR/TS prediction

In [None]:
regr = RandomForestRegressor(max_depth=20, random_state=0, n_estimators=100, criterion='squared_error', min_samples_split=2, min_samples_leaf=2,
 max_features = 3, bootstrap=False)
regr.fit(x_train, y_train)

In [None]:
y_pred_train = regr.predict(x_train)
mean_euclidean_error(y_train, y_pred_train)

In [None]:
y_pred_forest = regr.predict(x_test)
mean_euclidean_error(y_test, y_pred_forest)

##NN SGD optimizer

Next we define **build_model** function in order to build our Neural Network. In this function we use:

*   A seed to get reproducible results. 
*   L2 reguralization term to loss function
*   SGD optimizer to train our NN
*   MEE as loss and metric

For solve this task it is useful to build a multilayer architecture using Tanh activation function for the hidden layers' units. Instead in the 2 output units it is used a linear activation function dealing with the regression task.

In [None]:
def build_model(weight_init=0.2, weight_distr=0, activ='relu',layer=1, unit=4, eta=0.2, alpha=0.5, lambd=0):
  
  tf.random.set_seed(0)  
    
  if weight_distr==0:
    init= tf.keras.initializers.RandomUniform(minval=-weight_init, maxval=weight_init)
  elif weight_distr==1:
    init= tf.keras.initializers.RandomNormal(mean=0., stddev=weight_init)
  else:
    init= tf.keras.initializers.GlorotNormal()

  reg= tf.keras.regularizers.l2(l2=lambd)


  model= tf.keras.models.Sequential()
  model.add(tf.keras.layers.Input(10,))
  for i in range(layer):
    model.add(tf.keras.layers.Dense(unit, activation='tanh', kernel_initializer=init, bias_initializer=init, kernel_regularizer=reg))
  model.add(tf.keras.layers.Dense(2, activation='linear', kernel_initializer=init, bias_initializer=init, kernel_regularizer=reg))

  loss=mean_euclidean_error_tf
  opt= tf.keras.optimizers.SGD(learning_rate=eta, momentum=alpha, nesterov=False)
  metric=mean_euclidean_error_tf
  model.compile(loss=loss, 
                optimizer=opt,
                metrics=[metric])
  
  #print(model.get_weights())
  return model

### Batch mode

Hyper-parameters tuning for NN with batch mode. Parameter grids is shown in order from the most general toward more granular ones.



In [None]:
param_grid={
    'weight_init': [0.2 ,0.3, 0.4],
    'weight_distr': [1],
    'unit': [30, 40],
    'layer':[3],
    'eta': [0.01, 0.03, 0.05, 0.07],
    'alpha': [0.9, 0.95],
    'lambd': [0.0005, 0.001, 0.005, 0.01],
    'activ': ['tanh']
}

param_grid={
    'weight_init': [0.2,0.4],
    'weight_distr': [1],
    'unit': [10,20,30,40,50],
    'layer':[2,3],
    'eta': [0.0005, 0.001, 0.005, 0.01, 0.05],
    'alpha': [0.8, 0.9, 0.95, 0.975],
    'lambd': [0.0005, 0.001, 0.005, 0.01],
    'activ': ['tanh']
}

In [None]:
%%time
cv_results= grid_search(x_train, y_train, param_grid, fold=KFold(n_splits=4, shuffle=True, random_state=0), epochs=800, batch_size=(len(x_train)))

In [None]:
cv_results

In [None]:
sorted_result = (sorted(cv_results, key = lambda i: (i['mean_euclidean_error'], i['std_euclidean_error'])))
best_5_result=sorted_result[:5]
best_5_result

In [None]:
best_model_par=best_5_result[0]
best_model_par

Best parameter combo retrieved from the grid

In [None]:
best_model_par={'weight_init': 0.4,
  'weight_distr': 1,
  'unit': 40,
  'layer': 3,
  'eta': 0.03,
  'alpha': 0.95,
  'lambd': 0.001,
  'activ': 'tanh',
  'mean_euclidean_error': 1.050774022936821,
  'std_euclidean_error': 0.03191835462576261,
  'mean_val_loss': 1.6136361360549927,
  'std_val_loss': 0.14172841075355544,
  'mean_training_MEE': 0.8342830985784531,
  'std_training_MEE': 0.07254623048895122}

Then we do a retraining on the whole TR set stopping the NN training when it reaches the mean training loss of the CV fitting.

In [None]:
err=best_model_par['mean_training_MEE']
class haltCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('mean_euclidean_error_tf') <=err):
            print("\n\n\nReached tr value so cancelling training!\n\n\n")
            self.model.stop_training = True


In [None]:
trainingStopCallback = haltCallback()
d=best_model_par
model_best= build_model(weight_init=d['weight_init'], weight_distr=d['weight_distr'], activ=d['activ'], layer=d['layer'], unit=d['unit'], eta=d['eta'], alpha=d['alpha'], lambd=d['lambd'])

val_best=(x_test,y_test)     
result_best=model_best.fit(x=x_train, y=y_train, epochs=800, batch_size=(len(x_train)), validation_data=val_best, 
                           shuffle=True,callbacks=[trainingStopCallback])

MEE and Loss plot followed by TR and TS prediction performance

In [None]:
# summarize history for MEE
plt.figure(figsize=(15,8))
plt.plot(result_best.history['mean_euclidean_error_tf'])
plt.plot(result_best.history['val_mean_euclidean_error_tf'], linestyle='--')
plt.title('model MEE')
plt.ylabel('MEE')
plt.xlabel('epoch')
plt.legend(['training', 'test'], loc='upper right')
plt.show()
# summarize history for loss
plt.figure(figsize=(15,8))
plt.plot(result_best.history['loss'])
plt.plot(result_best.history['val_loss'], linestyle='--')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['training', 'test'], loc='upper right')
plt.show()

In [None]:
model_best.evaluate(x_train, y_train,batch_size=(len(x_train)))

In [None]:
model_best.evaluate(x_test, y_test,batch_size=(len(x_test)))

###Mini batch 100

Hyper-parameters tuning for NN with mini-batch mode of size 100.


In [None]:
param_grid={
    'weight_init': [0.2, 0.3, 0.4],
    'weight_distr': [1],
    'unit': [10,20,30,40,50],
    'layer':[2,3],
    'eta': [0.0001, 0.0005, 0.001, 0.005, 0.0075, 0.01, 0.025, 0.05],
    'alpha':0,
    'lambd': [0.0001,0.0005, 0.001, 0.005, 0.01, 0.05],
    'activ': ['tanh']}

In [None]:
%%time
cv_results= grid_search(x_train, y_train, param_grid, fold=KFold(n_splits=4, shuffle=True, random_state=0), epochs=800, batch_size=100)

In [None]:
cv_results

In [None]:
sorted_result = (sorted(cv_results, key = lambda i: (i['mean_euclidean_error'], i['std_euclidean_error'])))
best_5_result=sorted_result[:5]
best_5_result

In [None]:
best_model_par=best_5_result[0]
best_model_par

In [None]:
best_model_par={'weight_init': 0.2,
  'weight_distr': 1,
  'unit': 50,
  'layer': 3,
  'eta': 0.025,
  'alpha':0,
  'lambd': 0.0005,
  'activ': 'tanh',
  'mean_euclidean_error': 1.0905110239982605,
  'std_euclidean_error': 0.022719327275489965,
  'mean_val_loss': 1.2269698977470398,
  'std_val_loss': 0.019988722933869295,
  'mean_training_MEE': 1.0076722502708435,
  'std_training_MEE': 0.02095401036698643}
  

Then we do a retraining on the whole TR set stopping the NN training when it reaches the mean training loss of the CV fitting.

In [None]:
err=best_model_par['mean_training_MEE']
class haltCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('mean_euclidean_error_tf') <=err):
            print("\n\n\nReached tr value so cancelling training!\n\n\n")
            self.model.stop_training = True

In [None]:
trainingStopCallback = haltCallback()
d=best_model_par
model_best= build_model(weight_init=d['weight_init'], weight_distr=d['weight_distr'], activ=d['activ'], layer=d['layer'], unit=d['unit'], eta=d['eta'], alpha=d['alpha'], lambd=d['lambd'])

val_best=(x_test,y_test)     
result_best=model_best.fit(x=x_train, y=y_train, epochs=800, batch_size=100, validation_data=val_best, 
                           shuffle=True,callbacks=[trainingStopCallback])

MEE and Loss plot followed by TR and TS prediction performance

In [None]:
# summarize history for MEE
plt.figure(figsize=(15,8))
plt.plot(result_best.history['mean_euclidean_error_tf'])
plt.plot(result_best.history['val_mean_euclidean_error_tf'], linestyle=(0, (5, 1)))
plt.title('model MEE')
plt.ylabel('MEE')
plt.xlabel('epoch')
plt.legend(['training', 'test'], loc='upper right')
plt.show()
# summarize history for loss
plt.figure(figsize=(15,8))
plt.plot(result_best.history['loss'])
plt.plot(result_best.history['val_loss'], linestyle='--')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['training', 'test'], loc='upper right')
plt.show()

In [None]:
model_best.evaluate(x_train, y_train,batch_size=(len(x_train)))

In [None]:
model_best.evaluate(x_test, y_test,batch_size=(len(x_test)))

###Online mode

Hyper-parameters tuning for NN with online mode.


In [None]:
param_grid={
    'weight_init': [0.2, 0.4],
    'weight_distr': [1],
    'unit': [10,20,30,40,50],
    'eta': [0.000001,0.000005,0.00001,0.00005,0.0001,0.00025,0.0005],
    'alpha':[0],
    'layer':[2,3],
    'lambd': [0.0005, 0.001, 0.005, 0.01, 0.05],
    'activ': ['tanh']
}


In [None]:
%%time
cv_results= grid_search(x_train, y_train, param_grid, fold=KFold(n_splits=4, shuffle=True, random_state=0), epochs=400, batch_size=1)

In [None]:
cv_results

In [None]:
sorted_result = (sorted(cv_results, key = lambda i: (i['mean_euclidean_error'], i['std_euclidean_error'])))
best_5_result=sorted_result[:5]
best_5_result

In [None]:
best_model_par=best_5_result[0]
best_model_par

In [None]:
best_model_par={'weight_init': 0.2,
  'weight_distr': 1,
  'unit': 50,
  'eta': 0.0005,
  'alpha': 0,
  'layer': 2,
  'lambd': 0.001,
  'activ': 'tanh',
  'mean_euclidean_error': 1.0894718170166016,
  'std_euclidean_error': 0.01959021261526786,
  'mean_training_MEE': 0.9664344638586044,
  'std_training_MEE': 0.027382713585267594,
  'mean_val_loss': 1.2418962121009827,
  'std_val_loss': 0.019059742747835607}

Then we do a retraining on the whole TR set stopping the NN training when it reaches the mean training loss of the CV fitting.

In [None]:
err=best_model_par['mean_training_MEE']

class haltCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('mean_euclidean_error_tf') <=err):
            print("\n\n\nReached tr value so cancelling training!\n\n\n")
            self.model.stop_training = True

In [None]:
trainingStopCallback = haltCallback()
d=best_model_par
model_best= build_model(weight_init=d['weight_init'], weight_distr=d['weight_distr'], activ=d['activ'], layer=d['layer'], unit=d['unit'], eta=d['eta'], alpha=d['alpha'], lambd=d['lambd'])

val_best=(x_test,y_test)     
result_best=model_best.fit(x=x_train, y=y_train, epochs=400, batch_size=1, validation_data=val_best, 
                           shuffle=True,callbacks=[trainingStopCallback])

MEE and Loss plot followed by TR and TS prediction performance

In [None]:
# summarize history for MEE
plt.figure(figsize=(15,8))
plt.plot(result_best.history['mean_euclidean_error_tf'])
plt.plot(result_best.history['val_mean_euclidean_error_tf'], linestyle='--')
plt.title('model MEE')
plt.ylabel('MEE')
plt.xlabel('epoch')
plt.legend(['training', 'test'], loc='upper right')
plt.show()
# summarize history for loss
plt.figure(figsize=(15,8))
plt.plot(result_best.history['loss'])
plt.plot(result_best.history['val_loss'], linestyle='--')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['training', 'test'], loc='upper right')
plt.show()

In [None]:
model_best.evaluate(x_train, y_train,batch_size=(len(x_train)))

In [None]:
model_best.evaluate(x_test, y_test,batch_size=(len(x_test)))

##ADAM optimizer

Next we define **build_model** also for ADAM optimizer

In [None]:
def build_model(weight_init=0.2, weight_distr=0, activ='relu',layer=1, unit=4, eta=0.2, alpha=0.5, lambd=0, beta_1=0.9, beta_2=0.999):
  
  tf.random.set_seed(0)  
    
  if weight_distr==0:
    init= tf.keras.initializers.RandomUniform(minval=-weight_init, maxval=weight_init)
  elif weight_distr==1:
    init= tf.keras.initializers.RandomNormal(mean=0., stddev=weight_init)
  else:
    init= tf.keras.initializers.GlorotNormal()

  reg= tf.keras.regularizers.l2(l2=lambd)


  model= tf.keras.models.Sequential()
  model.add(tf.keras.layers.Input(10,))
  for i in range(layer):
    model.add(tf.keras.layers.Dense(unit, activation='tanh', kernel_initializer=init, bias_initializer=init, kernel_regularizer=reg))
  model.add(tf.keras.layers.Dense(2, activation='linear', kernel_initializer=init, bias_initializer=init, kernel_regularizer=reg))

  loss=mean_euclidean_error_tf
  opt= tf.keras.optimizers.Adam(learning_rate=eta, beta_1=beta_1, beta_2=beta_2)
  metric=mean_euclidean_error_tf
  model.compile(loss=loss, 
                optimizer=opt,
                metrics=[metric])
  
  #print(model.get_weights())
  return model

Hyper-parameters tuning for NN with ADAM optimizer and batch mode. Parameter grids is shown in order from the most general toward more granular ones.



In [None]:
param_grid={
    'weight_init': [0.2, 0.3, 0.4],
    'weight_distr': [1],
    'unit': [30,40,50],
    'layer':[3],
    'eta': [0.025],
    'beta_1': [0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6],
    'beta_2': [0.999, 0.9, 0.8, 0.7, 0.6],
    'lambd': [0.00075, 0.0005,  0.001, 0.005],
    'activ': ['tanh']
}

param_grid={
    'weight_init': [0.2, 0.3, 0.4],
    'weight_distr': [1],
    'unit': [20,30,40,50],
    'layer':[3],
    'eta': [0.0075, 0.01, 0.025],
    'beta_1': [0.9, 0.8, 0.7, 0.6],
    'beta_2': [0.999],
    'lambd': [0.00075, 0.0005,  0.001, 0.005],
    'activ': ['tanh']
}

param_grid={
    'weight_init': [0.2, 0.4],
    'weight_distr': [1],
    'unit': [10,20,30,40,50],
    'layer':[2,3],
    'eta': [0.0001, 0.0005, 0.001, 0.005, 0.01],
    'beta_1': [0.9, 0.8, 0.7, 0.6],
    'beta_2': [0.999],
    'lambd': [0.0005,  0.001, 0.005,  0.01],
    'activ': ['tanh']
}

In [None]:
%%time
cv_results= grid_search(x_train, y_train, param_grid, fold=KFold(n_splits=4, shuffle=True, random_state=0), epochs=600, batch_size=len(x_train))

In [None]:
cv_results

In [None]:
sorted_result = (sorted(cv_results, key = lambda i: (i['mean_euclidean_error'], i['std_euclidean_error'])))
best_5_result=sorted_result[:5]
best_5_result

In [None]:
best_model_par=best_5_result[0]
best_model_par

In [None]:
best_model_par= {'weight_init': 0.4,
  'weight_distr': 1,
  'unit': 40,
  'layer': 3,
  'eta': 0.025,
  'beta_1': 0.9,
  'beta_2': 0.7,
  'lambd': 0.001,
  'activ': 'tanh',
  'mean_euclidean_error': 1.041769653558731,
  'std_euclidean_error': 0.01717042196876029,
  'mean_val_loss': 1.3092704713344574,
  'std_val_loss': 0.050951337200595484,
  'mean_loss_euclidean_error': 0.8765210807323456,
  'std_loss_euclidean_error': 0.04217124598293542}

Then we do a retraining on the whole TR set stopping the NN training when it reaches the mean training loss of the CV fitting.

In [None]:
err=best_model_par['mean_training_MEE']

class haltCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('mean_euclidean_error_tf') <=err):
            print("\n\n\nReached tr value so cancelling training!\n\n\n")
            self.model.stop_training = True

trainingStopCallback = haltCallback()


In [None]:
d=best_model_par
model_best= build_model(weight_init=d['weight_init'], weight_distr=d['weight_distr'], activ=d['activ'], layer=d['layer'], unit=d['unit'], eta=d['eta'], lambd=d['lambd'], beta_1=d['beta_1'], beta_2=d['beta_2'])


val_best=(x_test,y_test)     
result_best=model_best.fit(x=x_train, y=y_train, epochs=800, batch_size=len(x_train), validation_data=val_best, 
                           shuffle=True,callbacks=[trainingStopCallback])

MEE and Loss plot followed by TR and TS prediction performance

In [None]:
# summarize history for MEE
plt.figure(figsize=(15,8))
plt.plot(result_best.history['mean_euclidean_error_tf'])
plt.plot(result_best.history['val_mean_euclidean_error_tf'], linestyle='--')
plt.title('model MEE')
plt.ylabel('MEE')
plt.xlabel('epoch')
plt.legend(['training', 'test'], loc='upper right')
plt.show()
# summarize history for loss
plt.figure(figsize=(15,8))
plt.plot(result_best.history['loss'])
plt.plot(result_best.history['val_loss'], linestyle='--')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['training', 'test'], loc='upper right')
plt.show()

In [None]:
model_best.evaluate(x_train, y_train,batch_size=(len(x_train)))

In [None]:
model_best.evaluate(x_test, y_test,batch_size=(len(x_test)))