# Neural Network with Keras

## Import Libraries

In [None]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.optimizers import SGD
import keras
from keras.utils import plot_model
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, KFold


In [None]:
from sklearn.metrics import mean_squared_error
import keras.backend as K


In [None]:
from scikeras.wrappers import KerasRegressor

## Read the Dataset

In [None]:
def ReadFile(s):
    column=['Id','i1','i2','i3','i4','i5','i6','i7','i8','i9','i10','Y1','Y2','Y3']
    dataset=pd.read_csv(s,sep=",", names=column,skiprows=7)
    dataset.set_index('Id', inplace=True)
    return dataset



In [None]:
data=ReadFile("Dataset_Cup/ML-CUP23-TR.csv")

In [None]:
data

Dataset divided in Target values and Feature values

In [None]:
featureTrain=data.iloc[:,0:10]
TargetTrain=data.iloc[:,10:13]


In [None]:
TargetTrain

In [None]:
featureTrain

## Definition of the function to compute the MEE

In [None]:
def mean_euclidean_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

## Function to create the NN

In [None]:
def get_Model(h_units,learning_rate,momentum,num_hidden_layers):
    model=Sequential()
    model.add(Dense(h_units,activation='tanh',input_dim=10))
    for i in range( 1, num_hidden_layers):
      model.add(Dense(h_units,activation='tanh'))

    model.add(Dense(units=3,activation='linear'))
    opt = SGD(learning_rate=learning_rate,momentum=momentum)
    model.compile(optimizer=opt, loss='mean_squared_error', metrics=[mean_euclidean_error])
    return model

## Model Selection, Grid Search, 3 fold Cross Validation

In [None]:
def model_selection(x, y):

    model=KerasRegressor(model=get_Model,verbose=0,epochs=250)
    batch_size=[8,16,32]
    learn_rate=[0.002,0.01,0.0008]
    momentum = [0.0, 0.5, 0.8]
    model_units=[30,50,100]

    params={
        "model__learning_rate":learn_rate,
        "batch_size":batch_size,
        "model__h_units":model_units,
        "model__momentum":momentum,
        'model__num_hidden_layers': [1,2,3]
    }

    grid=GridSearchCV(estimator=model,param_grid=params,n_jobs=-1,cv=3,verbose=4)
    grid_result=grid.fit(x,y)

    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    return grid_result.best_params_

In [None]:
X_train, X_test, y_train, y_test = train_test_split(featureTrain.values, TargetTrain.values, test_size=0.25,random_state=32)


### Time to perform model selection: 3 hours
Results:
 * model_units: 100
 * batch_size: 8
 * epochs: 250
 * learn_rate: 0.002
 * number_levels: 3
 * momentum: 0.5

In [None]:
bestParame=model_selection(X_train,y_train)

## Model Evaluation

Try the model selection on new parameters

In [None]:
model=get_Model(100,0.002,0.5,3) # Parametri scelti in base al risultato della Grid Search

In [None]:
n_epochs=250

# Try to add Regularization, DropOut

New Model Selection, time: 30 minutes
Results:
 * DropOut: 0
 * Regularization: 0.001

In [None]:
# modello con i parametri scelti dalla prima Model Selection, con dei parametri aggiuntivi da selezionare (dropout_rate,Weight_deacy)
def get_Model_2(dropout_rate,weight_decay,activation):
    model=Sequential()
    model.add(Dense(100,activation=activation,input_dim=10))
    if(dropout_rate!=0):
     model.add(Dropout(rate=dropout_rate))
    for i in range( 1, 3):
      model.add(Dense(100,activation=activation))
      if(dropout_rate!=0):
        model.add(Dropout(rate=dropout_rate))

    model.add(Dense(units=3,activation='linear'))

    opt = SGD(learning_rate=0.0002,momentum=0.5,weight_decay=weight_decay)
    model.compile(optimizer=opt, loss='mean_squared_error', metrics=[mean_euclidean_error])
    return model

In [None]:
def model_selection2(x, y):

    model=KerasRegressor(model=get_Model_2,verbose=0,epochs=600)

    params={
        'model__dropout_rate': [0, 0.1,0.03],
        "model__weight_decay": [0.0001, 0.001, 0.01],

    }

    grid=GridSearchCV(estimator=model,param_grid=params,n_jobs=-1,cv=3,verbose=4)
    grid_result=grid.fit(x,y)

    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    return grid_result.best_params_

In [None]:
bestParam2=model_selection2(X_train,y_train)

In [None]:
model=get_Model_2(0,0.001,activation='tanh')

## Train of the new Model

In [None]:
hist=model.fit(X_train,y_train,epochs=450,batch_size=8,validation_split=0.2)

In [None]:
# summarize history for accuracy
plt.plot(hist.history['mean_euclidean_error'],label='train')
plt.plot(hist.history['val_mean_euclidean_error'], label='Validation', ls='--')
plt.title('model mean euclidean error')
plt.ylabel('MEE')
plt.xlabel('epoch')
plt.legend(loc='center right')

plt.grid()
plt.show()
# summarize history for loss
plt.plot(hist.history['loss'],label='train')
plt.plot(hist.history['val_loss'], label='Validation', ls='--')
plt.title('model loss')
plt.ylabel('MSE')
plt.xlabel('epoch')
plt.legend( loc='center right')
plt.grid()
plt.show()

# Result on Test, Training and Validation

In [None]:
tr_los=hist.history['loss']
val_los=hist.history['val_loss']
tr_mee=hist.history['mean_euclidean_error']
val_mee=hist.history['val_mean_euclidean_error']

In [None]:
print("Training Set MSE:", tr_los[-1])
print("Validation Set MSE:", val_los[-1 ])


In [None]:

print("Training Set MEE:", tr_mee[-1])
print("Validation Set MEE:", val_mee[-1 ])

In [None]:
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
mee=np.sqrt(np.mean(np.square(predictions - y_test)))

In [None]:
print("Test Set MSE:",mse)
print("Test Set MEE:",mee)


# Writing Results of the blind test set

In [None]:
def ReadTest(s):
    column=['Id','i1','i2','i3','i4','i5','i6','i7','i8','i9','i10']
    dataset=pd.read_csv(s,sep=",", names=column,skiprows=7)
    dataset.set_index('Id', inplace=True)
    return dataset

In [None]:
datasetTest=ReadTest('Dataset_Cup/ML-CUP23-TS.csv')

In [None]:
ypred=model.predict(datasetTest)

In [None]:
ypred

In [None]:
def write_results(y_pred):

    assert len(y_pred) == 900, "Not enough data were predicted!"

    file = "Big_Human_ML-CUP23-TS.csv"
    with open(file, "w") as f:
        print("# Francesco Caprari \t Francesco Botrugno \t Agnese Camici", file=f)
        print("# Big_Human", file=f)
        print("# ML-CUP23", file=f)
        print("# 15/01/2023", file=f)

        pred_id = 1
        for p in y_pred:
            print("{},{},{},{}".format(pred_id, p[0], p[1],p[2]), file=f)
            pred_id += 1

    f.close()

In [None]:
write_results(ypred)