# Preliminary operations

In [1]:
# import main libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import defaultdict
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.preprocessing import StandardScaler

from sklearn.neural_network import MLPRegressor

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

In [2]:
# mont Google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# import training data
df_data_train = pd.read_excel("drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Outliers/DATASET NO OUTLIERS/df_prep_TRAIN_no_outliers.xlsx", index_col="Unnamed: 0")

# get training values
X_train = df_data_train.drop(labels = ["std", "mfcc_kur", "mfcc_kur_w2"], axis=1).values
y_train = df_data_train.loc[:, ["std", "mfcc_kur", "mfcc_kur_w2"]]

# import test data
df_data_test = pd.read_excel("drive/MyDrive/Progetto Data Mining 2/CODICE PROGETTO/Outliers/DATASET NO OUTLIERS/df_prep_TEST_no_outliers.xlsx", index_col="Unnamed: 0")

# get test values
X_test = df_data_test.drop(labels = ["std", "mfcc_kur", "mfcc_kur_w2"], axis=1).values
y_test = df_data_test.loc[:, ["std", "mfcc_kur", "mfcc_kur_w2"]]


#Hyperparameter Tuning

In [None]:
# get LinearSVC
reg = MLPRegressor(
    random_state=0, 
) 

# defining parameter range
param_grid = {
    "hidden_layer_sizes" : [(16,), (32,), (64,), (128,)],
    "activation": ["logistic", "tanh", "relu"],
    "learning_rate": ["constant", "adaptive"],
    "alpha": [0.0001, 0.01, 1]
} 

KF = KFold(
    n_splits=10,
)

grid = RandomizedSearchCV(
    reg,
    n_iter=100,
    param_distributions = param_grid,
    cv=KF,
    n_jobs = -1,
    refit = True,
    verbose=2,
    random_state=0
)
  
# fitting the model for grid search
grid.fit(X_train, y_train)



Fitting 10 folds for each of 72 candidates, totalling 720 fits




In [None]:
grid.best_params_

{'learning_rate': 'constant',
 'hidden_layer_sizes': (128,),
 'alpha': 1,
 'activation': 'relu'}

In [None]:
# print best parameter after tuning
print(grid.best_params_)
# print how our model looks after hyper-parameter tuning
print(grid.best_estimator_)
print(grid.best_score_)

{'learning_rate': 'constant', 'hidden_layer_sizes': (128,), 'alpha': 1, 'activation': 'relu'}
MLPRegressor(alpha=1, hidden_layer_sizes=(128,), random_state=0)
0.8236732546766554


In [None]:
grid_scores = pd.DataFrame(grid.cv_results_)
grid_scores

In [None]:
grid_scores.to_excel("tuning_scores_MLP_reg.xlsx")

# MLP Regressor

In [4]:
regr = MLPRegressor(alpha=1, hidden_layer_sizes=(128,), random_state=0)

In [5]:
regr.fit(X_train, y_train)
y_pred_test = regr.predict(X_test)



In [6]:
print(r2_score(y_test, y_pred_test))
print(mean_absolute_error(y_test, y_pred_test))
print(mean_squared_error(y_test, y_pred_test))

0.7996215731310888
0.29884895778754356
0.19413433740753963


In [8]:
print(r2_score(y_test, y_pred_test, multioutput='raw_values'))
print(mean_absolute_error(y_test, y_pred_test, multioutput='raw_values'))
print(mean_squared_error(y_test, y_pred_test, multioutput='raw_values'))

[0.81145683 0.74750421 0.83990368]
[0.26975873 0.32862031 0.29816783]
[0.17287768 0.24669263 0.1628327 ]
