In [None]:
from time import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, KFold
from sklearn.neural_network import MLPRegressor
from sklearn.base import clone
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
RANDOM_STATE = 0
MODEL = MLPRegressor()

In [None]:
tr = pd.read_csv('ML-CUP22-TR.csv', skiprows=7, header=None)
tr = tr.drop([0], axis=1).rename(columns={i:i-1 for i in tr.columns})
blind_ts = pd.read_csv('ML-CUP22-TS.csv', skiprows=7, header=None)
blind_ts = blind_ts.drop([0], axis=1).rename(columns={i:i-1 for i in blind_ts.columns})

X_blind = blind_ts.values
X = tr.iloc[:,:9].values
Y = tr.iloc[:,9:].values
# HOLD-OUT DESIGNER SET - TEST SET
X_tr, X_tt, Y_tr, Y_tt = train_test_split(
    X, Y, test_size=0.3, random_state=RANDOM_STATE, shuffle=True)
# HOLD-OUT TRAINING - VALIDATION
X_inner_tr, X_val, Y_inner_tr, Y_val = train_test_split(
    X_tr, Y_tr, test_size=0.2, random_state=RANDOM_STATE, shuffle=True) 
# we choose a val size of 0.2*X_tr.shape[0], to save 80% of data for the actual training

# DIVIDING THE 2 TARGETS Y
y1_tr, y2_tr = Y_tr[:,0], Y_tr[:,1]
y1_inner_tr, y2_inner_tr = Y_inner_tr[:,0], Y_inner_tr[:,1]
y1_val, y2_val = Y_val[:,0], Y_val[:,1]
y1_tt, y2_tt = Y_tt[:,0], Y_tt[:,1]

# RESCALING OF THE DATA (MEAN=0, SIGMA=1)
scaler = StandardScaler()
scaler.fit(X_tr)
X_tr = scaler.transform(X_tr)
X_tt = scaler.transform(X_tt)
X_blind = scaler.transform(X_blind)

In [None]:
def search_function(X_tr, y_tr, model, type_of_search, n_iter):
    '''
    Function useful to avoid the repetition of the grid/random search in each cell
    '''
    if (type_of_search=='rand'):
        grid = RandomizedSearchCV(
            model,
            param_distributions=param_grid,
            cv=KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE), # not a repeatedKfold, would have been too time consuming
            n_jobs=-1,
            n_iter=n_iter,
            refit='neg_mean_absolute_error',
            scoring='neg_mean_absolute_error',
            random_state=RANDOM_STATE,
        )
    if type_of_search=='grid':
        grid = GridSearchCV(
            model,
            param_grid=param_grid,
            scoring='neg_mean_absolute_error',
            cv=KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE), # not a repeatedKfold, would have been too time consuming
            n_jobs=-1,
            refit='neg_mean_absolute_error',
        )
    grid.fit(X_tr, y_tr)
    return grid, pd.DataFrame(grid.cv_results_).sort_values(by='rank_test_score',axis=0)

def MSE_vs_Epochs(model, X_tr, X_val, y_tr, y_val, epochs, plotting):
    '''
    Function that returns training and validation errors for each epoch. 
    If plotting==True, it also plots the two curves against the number of epochs.
    In case we select 0 epochs, it will automatically select the best number of epochs,
    depending on the stopping point of the model over the training set.

    '''
    if epochs==0: 
        model.fit(X_tr, y_tr)
        epochs = model.n_iter_
    model_ = clone(model)
    model_.warm_start = True
    tr_err_curve, val_err_curve = [], []
    for i in range(epochs):
        model_.partial_fit(X_tr,y_tr)
        tr_err = mean_squared_error(y_tr, model_.predict(X_tr))
        tr_err_curve.append(tr_err)
        val_err = mean_squared_error(y_val, model_.predict(X_val))
        val_err_curve.append(val_err)
    if plotting==True:
        fig_learn_curve = plt.figure()
        plt.plot(tr_err_curve, linewidth=3, label='MSE vs #epochs training')
        plt.plot(val_err_curve, linewidth=3, linestyle='--', label='MSE vs #epochs validation')
        plt.grid()
        plt.legend()
        plt.title('MSE vs Epochs', fontsize=20)
        plt.xlabel('Epoch', fontsize=15)
        plt.ylabel('MSE', fontsize=15)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        fig_loss = plt.figure()
        plt.grid()
        plt.title('Loss curve', fontsize=20)
        plt.plot(model_.loss_curve_, linewidth=3)
        plt.xlabel('Epoch', fontsize=15)
        plt.ylabel('Loss', fontsize=15)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        return tr_err_curve, val_err_curve, fig_learn_curve, fig_loss
    return tr_err_curve, val_err_curve

def mean_learning_curve(repetitions, model, X_tr, X_val, y_tr, y_val, epochs):
    '''
    A function that repeate the evaluation of the model for 'repetitions' times
    and give in output the mean and the std dev for mse over training and validation sets.
    It also plots the curve obtained, assigning to each point also the std dev found.
    '''
    tr_err_curves = []
    val_err_curves = []
    model = clone(model)
    model.random_state = None
    for i in range(repetitions):
        tr_err_curve, val_err_curve = MSE_vs_Epochs(model, X_tr, X_val, y_tr, y_val, epochs, False)
        tr_err_curves.append(tr_err_curve)
        val_err_curves.append(val_err_curve)
    tr_err_curves = pd.DataFrame(tr_err_curves)
    val_err_curves = pd.DataFrame(val_err_curves)
    epoch_v = np.arange(1,tr_err_curves.shape[1]+1,step=1)
    mean_err_tr_per_epoch = np.array([np.mean(tr_err_curves.iloc[:,i]) for i in range(tr_err_curves.shape[1])])
    mean_err_val_per_epoch = np.array([np.mean(val_err_curves.iloc[:,i]) for i in range(val_err_curves.shape[1])])
    std_err_tr_per_epoch = np.array([np.std(tr_err_curves.iloc[:,i]) for i in range(tr_err_curves.shape[1])])
    std_err_val_per_epoch = np.array([np.std(val_err_curves.iloc[:,i]) for i in range(val_err_curves.shape[1])])
    fig = plt.figure()
    plt.plot(epoch_v, mean_err_val_per_epoch, c='r', linestyle='--', label='MSE vs #epochs validation')
    plt.plot(epoch_v, mean_err_tr_per_epoch, c='b', label='MSE vs #epochs training')
    plt.fill_between(epoch_v, mean_err_tr_per_epoch-std_err_tr_per_epoch, mean_err_tr_per_epoch+std_err_tr_per_epoch, color='b', alpha=.1)
    plt.fill_between(epoch_v, mean_err_val_per_epoch-std_err_val_per_epoch, mean_err_val_per_epoch+std_err_val_per_epoch, color='r', alpha=.1)
    epoch_mean_err_val = epoch_v[np.argmin(mean_err_val_per_epoch)]
    #plt.axvline(epoch_mean_err_val, color="grey")
    str_min_mean_err_val = 'epoch of the minimum for val. :'+str(epoch_mean_err_val)
    plt.grid()
    plt.legend()
    plt.title('Learning curves', fontsize=20)
    plt.xlabel('Epochs', fontsize=15)
    plt.ylabel('MSE', fontsize=15)
    model_to_return = clone(model)
    model_to_return.random_state = model.random_state
    for epoch in range(epoch_mean_err_val):
        model_to_return.partial_fit(X_tr, y_tr) # here we fit the model until epoch=epoch_mean_err_val
    return fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, model_to_return
    
    
def mee(X_tt, Y_true, Y_pred):
    '''
    Function to compute Mean Euclidean Error for a 2-outputs Y
    '''
    return ((1/X_tt.shape[0])*np.sqrt(np.square(Y_true[:,0] - Y_pred[:,0]) + np.square(Y_true[:,1] - Y_pred[:,1]))).sum()

# Using invscaling learning rate

## 1 MLP network:
a single model, with two outputs, to solve the task

In [None]:
%%time
param_grid = {
    'activation': ['tanh', 'logistic'],
    'alpha': [0, 0.01, 0.1],
    'batch_size': [100], # mini batch
    'early_stopping': [False], 
    'hidden_layer_sizes': [10, 100, (10,10), (10,10,10)],
    'learning_rate': ['invscaling'],
    'learning_rate_init': [0.1, 0.01, 0.001], # higher values because we decided to work with invscaling
    'max_iter': [5000],
    'momentum': [0], # we choose to avoid momentum, it can be noisy for mini batch/online
    'n_iter_no_change': [50],
    'nesterovs_momentum': [False], # we set it to false because we are not using full batch
    'power_t': [0.3, 0.4, 0.5, 0.6], # the power we want to explore for the invscaling
    'solver': ['sgd'],
    'tol': [1e-3],
    'random_state': [RANDOM_STATE]
}

grid_all, df_sorted_all = search_function(X_tr, Y_tr, MODEL, 'grid', 0)

In [None]:
df_sorted_all.to_csv('MLP_CUP_grid_results/df_sorted_all.csv', index=False)

In [None]:
grid_all.best_estimator_

In [None]:
pd.set_option('display.max_colwidth', None)
df_sorted_all[:10]['params']

In [None]:
%%time
model_all = clone(grid_all.best_estimator_)
tr_err_curve, val_err_curve, fig1, fig2 = MSE_vs_Epochs(model_all, X_inner_tr, X_val, Y_inner_tr, Y_val, 0, True)
fig1.savefig('images_MLP_CUP/model_Y/invscaling_model_Y_learn_curve.pdf')
fig2.savefig('images_MLP_CUP/model_Y/invscaling_model_Y_loss.pdf')
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, final_model_all = mean_learning_curve(100, model_all, X_inner_tr, X_val, Y_inner_tr, Y_val, len(tr_err_curve))
fig.savefig('images_MLP_CUP/model_Y/invscaling_model_Y_mean_learn_curve.pdf')

### MEE obtained (assessment)

In [None]:
Y_pred_tt = final_model_all.predict(X_tt)
Y_pred_inner_tr = final_model_all.predict(X_inner_tr)
Y_pred_val = final_model_all.predict(X_val)
print(f'MEE test = {mee(X_tt, Y_tt, Y_pred_tt)}')
print(f'MEE inner training = {mee(X_inner_tr, Y_inner_tr, Y_pred_inner_tr)}')
print(f'MEE validation = {mee(X_val, Y_val, Y_pred_val)}')

## 2 MLP networks

### - y1

In [None]:
%%time
param_grid = {
    'activation': ['tanh', 'logistic'],
    'alpha': [0.001, 0.01, 0.1],
    'batch_size': [100],
    'early_stopping': [False],
    'hidden_layer_sizes': [10, 100, (10,10), (10,10,10)],
    'learning_rate': ['invscaling'], 
    'learning_rate_init': [0.1, 0.01, 0.001],
    'max_iter': [5000],
    'momentum': [0], 
    'n_iter_no_change': [50],
    'nesterovs_momentum': [False],
    'power_t': [0.3, 0.4, 0.5, 0.6],
    'solver': ['sgd'],
    'tol': [1e-3],
    'random_state': [RANDOM_STATE]
}

grid1, df_sorted1 = search_function(X_tr, y1_tr, MODEL, 'grid', 0)

In [None]:
df_sorted1.to_csv('MLP_CUP_grid_results/df_sorted1.csv', index=False)

In [None]:
model1 = clone(grid1.best_estimator_)
tr_err_curve, val_err_curve, fig1, fig2 = MSE_vs_Epochs(model1, X_inner_tr, X_val, y1_inner_tr, y1_val, 0, True)
fig1.savefig('images_MLP_CUP/model_y1/invscaling_model_y1_learn_curve.pdf')
fig2.savefig('images_MLP_CUP/model_y1/invscaling_model_y1_loss.pdf')
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, final_model1 = mean_learning_curve(10, model1, X_inner_tr, X_val, y1_inner_tr, y1_val, len(val_err_curve))
fig.savefig('images_MLP_CUP/model_y1/invscaling_model_y1.pdf')

### - y2

In [None]:
%%time
param_grid = {
    'activation': ['tanh', 'logistic'],
    'alpha': [0.001, 0.01, 0.1],
    'batch_size': [100],
    'early_stopping': [False],
    'hidden_layer_sizes': [10, 100, (10,10), (10,10,10)],
    'learning_rate': ['invscaling'], 
    'learning_rate_init': [0.1, 0.01, 0.001],
    'max_iter': [5000],
    'momentum': [0], 
    'n_iter_no_change': [50],
    'nesterovs_momentum': [False],
    'power_t': [0.3, 0.4, 0.5, 0.6],
    'solver': ['sgd'],
    'tol': [1e-3],
    'random_state': [RANDOM_STATE]
}

grid2, df_sorted2 = search_function(X_tr, y2_tr, MODEL, 'grid', 0)

In [None]:
df_sorted2.to_csv('MLP_CUP_grid_results/df_sorted2.csv', index=False)

In [None]:
model2 = clone(grid2.best_estimator_)
tr_err_curve, val_err_curve, fig1, fig2 = MSE_vs_Epochs(model2, X_inner_tr, X_val, y2_inner_tr, y2_val, 0, True)
fig1.savefig('images_MLP_CUP/model_y2/invscaling_model_y2_learn_curve.pdf')
fig2.savefig('images_MLP_CUP/model_y2/invscaling_model_y2_loss.pdf')
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, final_model2 = mean_learning_curve(10, model2, X_inner_tr, X_val, y2_inner_tr, y2_val, len(val_err_curve))
plt.savefig('images_MLP_CUP/model_y2/invscaling_model_y2.pdf')

### MEE obtained (assessment)

In [None]:
Y_pred_tt = np.column_stack([final_model1.predict(X_tt), final_model2.predict(X_tt)])
Y_pred_inner_tr = np.column_stack([final_model1.predict(X_inner_tr), final_model2.predict(X_inner_tr)])
Y_pred_val = np.column_stack([final_model1.predict(X_val), final_model2.predict(X_val)])

print(f'MAE inner training y1 = {mean_absolute_error(y1_inner_tr, final_model1.predict(X_inner_tr))}')
print(f'MAE inner training y2 = {mean_absolute_error(y2_inner_tr, final_model2.predict(X_inner_tr))}\n')

print(f'MAE validation y1 = {mean_absolute_error(y1_val, final_model1.predict(X_val))}')
print(f'MAE validation y2 = {mean_absolute_error(y2_val, final_model2.predict(X_val))}\n')

print(f'MAE test y1 = {mean_absolute_error(y1_tt, final_model1.predict(X_tt))}')
print(f'MAE test y2 = {mean_absolute_error(y2_tt, final_model2.predict(X_tt))}\n')

print(f'MEE test = {mee(X_tt, Y_tt, Y_pred_tt)}')
print(f'MEE inner training = {mee(X_inner_tr, Y_inner_tr, Y_pred_inner_tr)}')
print(f'MEE validation = {mee(X_val, Y_val, Y_pred_val)}')

# More investigations using other learning rate types (n_iter_no_change = 50)

## y1

In [None]:
%%time
param_grid = {
    'hidden_layer_sizes' : [1,10,100],
    'activation' : ['relu', 'logistic', 'tanh'],
    'solver' : ['sgd'],
    'alpha' : [0, 1e-5, 1e-4, 1e-3, 1e-2],
    'batch_size' : [100],
    'learning_rate' : ['constant', 'adaptive'],
    'learning_rate_init' : [1e-4, 1e-3, 1e-2, 1e-1],
    'max_iter' : [5000], 
    'tol' : [1e-4], 
    'momentum' : [0, 0.01, 0.1], 
    'nesterovs_momentum' : [False], # batch size è più vicina all'online che al totale del numero di punti
    'early_stopping' : [False], # non serve, visto l'hold out iniziale
    'n_iter_no_change' : [50],
    'random_state' : [RANDOM_STATE]
}

grid1_1, df_sorted1_1 = search_function(X_tr, y1_tr, MODEL, 'grid', 0)

In [None]:
df_sorted1_1.to_csv('MLP_CUP_grid_results/df_sorted1_1.csv', index=False)

In [None]:
model1_1 = clone(grid1_1.best_estimator_)
tr_err_curve, val_err_curve, fig1, fig2 = MSE_vs_Epochs(model1_1, X_inner_tr, X_val, y1_inner_tr, y1_val, 1000, True)
fig1.savefig('images_MLP_CUP/model_y1/other_lr_model_y1_learn_curve.pdf')
fig2.savefig('images_MLP_CUP/model_y1/other_lr_model_y1_loss.pdf')
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, final_model1_1 = mean_learning_curve(10, model1_1, X_inner_tr, X_val, y1_inner_tr, y1_val, len(val_err_curve))
plt.savefig('images_MLP_CUP/model_y1/other_lr_model1_1y1.pdf')

## y2

In [None]:
%%time
param_grid = {
    'hidden_layer_sizes' : [1,10,100],
    'activation' : ['relu', 'logistic', 'tanh'],
    'solver' : ['sgd'],
    'alpha' : [0, 1e-5, 1e-4, 1e-3, 1e-2],
    'batch_size' : [100],
    'learning_rate' : ['constant', 'adaptive'],
    'learning_rate_init' : [1e-4, 1e-3, 1e-2, 1e-1],
    'max_iter' : [5000], 
    'tol' : [1e-4], 
    'momentum' : [0, 0.01, 0.1], 
    'nesterovs_momentum' : [False], # batch size è più vicina all'online che al totale del numero di punti
    'early_stopping' : [False], # non serve, visto l'hold out iniziale
    'n_iter_no_change' : [50],
    'random_state' : [RANDOM_STATE]
}

grid2_1, df_sorted2_1 = search_function(X_tr, y2_tr, MODEL, 'grid', 0)

In [None]:
df_sorted2_1.to_csv('MLP_CUP_grid_results/df_sorted2_1.csv', index=False)

In [None]:
model2_1 = clone(grid1.best_estimator_)
tr_err_curve, val_err_curve, fig1, fig2 = MSE_vs_Epochs(model2_1, X_inner_tr, X_val, y2_inner_tr, y2_val, 0, True)
fig1.savefig('images_MLP_CUP/model_y2/other_lr_model_y2_learn_curve.pdf')
fig2.savefig('images_MLP_CUP/model_y2/other_lr_model_y2_loss.pdf')
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, final_model2_1 = mean_learning_curve(10, model2_1, X_inner_tr, X_val, y2_inner_tr, y2_val, len(val_err_curve))
fig.savefig('images_MLP_CUP/model_y2/other_lr_model2_1y1.pdf')

### MEE obtained (assessment)

In [None]:
Y_pred_tt = np.column_stack([final_model1_1.predict(X_tt), final_model2_1.predict(X_tt)])
Y_pred_inner_tr = np.column_stack([final_model1_1.predict(X_inner_tr), final_model2_1.predict(X_inner_tr)])
Y_pred_val = np.column_stack([final_model1_1.predict(X_val), final_model2_1.predict(X_val)])

print(f'MAE inner training y1 = {mean_absolute_error(y1_inner_tr, final_model1_1.predict(X_inner_tr))}')
print(f'MAE inner training y2 = {mean_absolute_error(y2_inner_tr, final_model2_1.predict(X_inner_tr))}\n')

print(f'MAE validation y1 = {mean_absolute_error(y1_val, final_model1_1.predict(X_val))}')
print(f'MAE validation y2 = {mean_absolute_error(y2_val, final_model2_1.predict(X_val))}\n')

print(f'MAE test y1 = {mean_absolute_error(y1_tt, final_model1_1.predict(X_tt))}')
print(f'MAE test y2 = {mean_absolute_error(y2_tt, final_model2_1.predict(X_tt))}\n')

print(f'MEE test = {mee(X_tt, Y_tt, Y_pred_tt)}')
print(f'MEE inner training = {mee(X_inner_tr, Y_inner_tr, Y_pred_inner_tr)}')
print(f'MEE validation = {mee(X_val, Y_val, Y_pred_val)}')

## Single network MLP

In [None]:
%%time
param_grid = {
    'hidden_layer_sizes' : [1,10,100],
    'activation' : ['relu', 'logistic', 'tanh'],
    'solver' : ['sgd'],
    'alpha' : [0, 1e-5, 1e-4, 1e-3, 1e-2],
    'batch_size' : [100],
    'learning_rate' : ['constant', 'adaptive'],
    'learning_rate_init' : [1e-4, 1e-3, 1e-2, 1e-1],
    'max_iter' : [5000], 
    'tol' : [1e-4], 
    'momentum' : [0, 0.01, 0.1], 
    'nesterovs_momentum' : [False], # batch size è più vicina all'online che al totale del numero di punti
    'early_stopping' : [False], # non serve, visto l'hold out iniziale
    'n_iter_no_change' : [50],
    'random_state' : [RANDOM_STATE]
}

grid_all_1, df_sorted_all_1 = search_function(X_tr, Y_tr, MODEL, 'grid', 0)

In [None]:
df_sorted_all_1[['params', 'mean_test_score', 'std_test_score']][:10]

In [None]:
model_all_1 = clone(grid_all_1.best_estimator_)
tr_err_curve, val_err_curve = MSE_vs_Epochs(model_all_1, X_inner_tr, X_val, Y_inner_tr, Y_val, 0, True)
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch = mean_learning_curve(10, model_all_1, X_inner_tr, X_val, Y_inner_tr, Y_val, 0)
plt.savefig('images/model_1Y.pdf')

### MEE obtained (assessment)

In [None]:
Y_pred = np.column_stack([final_model1_1.predict(X_tt), final_model2_1.predict(X_tt)])
mee(X_tt, Y_tt, Y_pred)

# n_iter_no_change = 50 is to obtain convergence conditions (the runs took 4 hours... it means that, in a lot of cases, the MLP reaches 5000 iterations without obtaining the required n_iter_no_change). Let's low the n_iter_no_change to 10, tol to 1e-3 and max iter to 2000

## y1

In [None]:
%%time
param_grid = {
    'hidden_layer_sizes' : [10, 100, (10,10), (10,10,10)],
    'activation' : ['relu', 'logistic', 'tanh'],
    'solver' : ['sgd'],
    'alpha' : [0, 1e-5, 1e-4, 1e-3],
    'batch_size' : [100],
    'learning_rate' : ['constant', 'adaptive'],
    'learning_rate_init' : [1e-3, 1e-2, 1e-1],
    'max_iter' : [2000], 
    'tol' : [1e-3], 
    'momentum' : [0, 0.01, 0.1], 
    'nesterovs_momentum' : [False], # batch size è più vicina all'online che al totale del numero di punti
    'early_stopping' : [False], # non serve, visto l'hold out iniziale
    'n_iter_no_change' : [10],
    'random_state' : [RANDOM_STATE]
}

grid1_2, df_sorted1_2 = search_function(X_tr, y1_tr, MODEL, 'grid', 0)

In [None]:
df_sorted1_2.to_csv('MLP_CUP_grid_results/df_sorted1_2.csv', index=False)

In [None]:
model1_2 = clone(grid1_2.best_estimator_)
tr_err_curve, val_err_curve, fig1, fig2 = MSE_vs_Epochs(model1_2, X_inner_tr, X_val, y1_inner_tr, y1_val, 0, True)
fig1.savefig('images_MLP_CUP/model_y1/other_lr_lessruns_model_y1_learn_curve.pdf')
fig2.savefig('images_MLP_CUP/model_y1/other_lr_lessruns_model_y1_loss.pdf')
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, final_model1_2 = mean_learning_curve(10, model1_2, X_inner_tr, X_val, y1_inner_tr, y1_val, len(val_err_curve))
plt.savefig('images_MLP_CUP/model_y1/other_lr_lessruns_model1_2y1.pdf')

## y2

In [None]:
%%time
param_grid = {
    'hidden_layer_sizes' : [10, 100, (10,10), (10,10,10)],
    'activation' : ['relu', 'logistic', 'tanh'],
    'solver' : ['sgd'],
    'alpha' : [0, 1e-5, 1e-4, 1e-3],
    'batch_size' : [100],
    'learning_rate' : ['constant', 'adaptive'],
    'learning_rate_init' : [1e-3, 1e-2, 1e-1],
    'max_iter' : [2000], 
    'tol' : [1e-3], 
    'momentum' : [0, 0.01, 0.1], 
    'nesterovs_momentum' : [False], # batch size è più vicina all'online che al totale del numero di punti
    'early_stopping' : [False], # non serve, visto l'hold out iniziale
    'n_iter_no_change' : [10],
    'random_state' : [RANDOM_STATE]
}

grid2_2, df_sorted2_2 = search_function(X_tr, y2_tr, MODEL, 'grid', 0)

In [None]:
df_sorted2_2.to_csv('MLP_CUP_grid_results/df_sorted2_2.csv', index=False)

In [None]:
grid2_2.best_estimator_

In [None]:
model2_2 = clone(grid2_2.best_estimator_)
tr_err_curve, val_err_curve, fig1, fig2 = MSE_vs_Epochs(model2_2, X_inner_tr, X_val, y2_inner_tr, y2_val, 0, True)
fig1.savefig('images_MLP_CUP/model_y2/other_lr_lessruns_model_y2_learn_curve.pdf')
fig2.savefig('images_MLP_CUP/model_y2/other_lr_lessruns_model_y2_loss.pdf')
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, final_model2_2 = mean_learning_curve(10, model2_2, X_inner_tr, X_val, y2_inner_tr, y2_val, len(val_err_curve))
plt.savefig('images_MLP_CUP/model_y2/other_lr_lessruns_model2_2y2.pdf')

### MEE obtained (assessment)

In [None]:
Y_pred_tt = np.column_stack([final_model1_2.predict(X_tt), final_model2_2.predict(X_tt)])
Y_pred_inner_tr = np.column_stack([final_model1_2.predict(X_inner_tr), final_model2_2.predict(X_inner_tr)])
Y_pred_val = np.column_stack([final_model1_2.predict(X_val), final_model2_2.predict(X_val)])

print(f'MAE inner training y1 = {mean_absolute_error(y1_inner_tr, final_model1_2.predict(X_inner_tr))}')
print(f'MAE inner training y2 = {mean_absolute_error(y2_inner_tr, final_model2_2.predict(X_inner_tr))}\n')

print(f'MAE validation y1 = {mean_absolute_error(y1_val, final_model1_2.predict(X_val))}')
print(f'MAE validation y2 = {mean_absolute_error(y2_val, final_model2_2.predict(X_val))}\n')

print(f'MAE test y1 = {mean_absolute_error(y1_tt, final_model1_2.predict(X_tt))}')
print(f'MAE test y2 = {mean_absolute_error(y2_tt, final_model2_2.predict(X_tt))}\n')

print(f'MEE test = {mee(X_tt, Y_tt, Y_pred_tt)}')
print(f'MEE inner training = {mee(X_inner_tr, Y_inner_tr, Y_pred_inner_tr)}')
print(f'MEE validation = {mee(X_val, Y_val, Y_pred_val)}')

## Single network MLP


In [None]:
%%time
param_grid = {
    'hidden_layer_sizes' : [10, 100, (10,10), (10,10,10)],
    'activation' : ['relu', 'logistic', 'tanh'],
    'solver' : ['sgd'],
    'alpha' : [0, 1e-5, 1e-4, 1e-3],
    'batch_size' : [100],
    'learning_rate' : ['constant', 'adaptive'],
    'learning_rate_init' : [1e-3, 1e-2, 1e-1],
    'max_iter' : [2000], 
    'tol' : [1e-3], 
    'momentum' : [0, 0.01, 0.1], 
    'nesterovs_momentum' : [False], # batch size è più vicina all'online che al totale del numero di punti
    'early_stopping' : [False], # non serve, visto l'hold out iniziale
    'n_iter_no_change' : [10],
    'random_state' : [RANDOM_STATE]
}

grid_all_2, df_sorted_all_2 = search_function(X_tr, Y_tr, MODEL, 'grid', 0)

In [None]:
df_sorted_all_2.to_csv('MLP_CUP_grid_results/df_sorted_all_2.csv', index=False)

In [None]:
grid_all_2.best_estimator_

In [None]:
model_all_2 = clone(grid_all_2.best_estimator_)
tr_err_curve, val_err_curve, fig1, fig2 = MSE_vs_Epochs(model_all_2, X_inner_tr, X_val, Y_inner_tr, Y_val, 0, True)
fig1.savefig('images_MLP_CUP/model_Y/other_lr_lessruns_model_Y_learn_curve.pdf')
fig2.savefig('images_MLP_CUP/model_Y/other_lr_lessruns_model_Y_loss.pdf')
fig, mean_err_tr_per_epoch, mean_err_val_per_epoch, std_err_tr_per_epoch, std_err_val_per_epoch, final_model_all_2 = mean_learning_curve(10, model_all_2, X_inner_tr, X_val, Y_inner_tr, Y_val, len(val_err_curve))
plt.savefig('images_MLP_CUP/model_Y/other_lr_lessruns_model_2Y.pdf')

### MEE obtained (assessment)

In [None]:
Y_pred_tt = final_model_all_2.predict(X_tt)
Y_pred_inner_tr = final_model_all_2.predict(X_inner_tr)
Y_pred_val = final_model_all_2.predict(X_val)

print(f'MEE test = {mee(X_tt, Y_tt, Y_pred_tt)}')
print(f'MEE inner training = {mee(X_inner_tr, Y_inner_tr, Y_pred_inner_tr)}')
print(f'MEE validation = {mee(X_val, Y_val, Y_pred_val)}')