# Fourth Delivery - Hyperparameter Tuning
## Multilayer Perceptron Tuning
The aim of this fourth delivery will be to optimize properly the multilayer perceptron.

**First**
After preprocessing the data, we use a grid search in order to find the most optimized parameters. We use the precision_score (tp / (tp + fp)) as the main measure. https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html#sklearn.metrics.precision_score

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_curve, auc, roc_auc_score, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
import time, sys

In [2]:
data = pd.read_csv("../datasets/UJIIndoorLoc/UJIIndoorLoc_B0-ID-01.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5249 entries, 0 to 5248
Columns: 521 entries, WAP001 to ID
dtypes: int64(521)
memory usage: 20.9 MB


In [3]:
data.head(5)

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP512,WAP513,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,ID
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,122
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,102
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,110
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,111
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,107


In [4]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, stratify=y)

In [5]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
skf = StratifiedKFold(n_splits=9)
param_grid = {'solver': ['adam', 'sgd','lbfgs'],
              'activation':['logistic','relu'],
              'max_iter': [5,10,15], 
              'alpha': 10.0 ** -np.arange(1, 4), 
              'hidden_layer_sizes':np.arange(20, 50), 
              #'random_state':[0,1,2,3,4,5,6,7,8,9]
             }
scorers = {
    'precision_score': make_scorer(precision_score, average = 'weighted'),
    'roc_auc_score': make_scorer(roc_auc_score),
#    'recall_score': make_scorer(recall_score),
    'accuracy_score': make_scorer(accuracy_score)
}

In [7]:
mlp = MLPClassifier(max_iter = sys.maxsize, verbose=10)
# mlp_grid = GridSearchCV(mlp, param_grid, cv=skf, scoring = scorers, refit = make_scorer(precision_score, average = 'weighted'), n_jobs=-1)
mlp_grid = GridSearchCV(mlp, param_grid, cv=skf, n_jobs=-1)

In [8]:
t1 = time.time()
mlp_grid.fit(X_train, y_train)
t2 = time.time()

Iteration 1, loss = 5.51295014
Iteration 2, loss = 5.34888041
Iteration 3, loss = 5.04538180
Iteration 4, loss = 4.67464477
Iteration 5, loss = 4.31212252
Iteration 6, loss = 3.97678428
Iteration 7, loss = 3.67958275
Iteration 8, loss = 3.42439972
Iteration 9, loss = 3.20821191
Iteration 10, loss = 3.02230075
Iteration 11, loss = 2.85480534
Iteration 12, loss = 2.70996067
Iteration 13, loss = 2.58041978
Iteration 14, loss = 2.46579184
Iteration 15, loss = 2.35929215




In [9]:
fit_time = round(t2-t1, 3)
print("Fitting time:{}".format(fit_time))

Fitting time:3011.917


In [10]:
mlp_grid.best_params_  

{'activation': 'relu',
 'alpha': 0.01,
 'hidden_layer_sizes': 49,
 'max_iter': 15,
 'solver': 'adam'}


mlp_grid.best_params_

{'activation': 'relu',
 'alpha': 0.01,
 'hidden_layer_sizes': 4,
 'max_iter': 15,
 'random_state': 8,
 'solver': 'lbfgs'}

In [24]:
y_pred = mlp_grid.predict(X_test)
y_pred_prob = mlp_grid.predict_proba(X_test)

In [25]:
auc = roc_auc_score(y_test, y_pred_prob, multi_class='ovr')

In [31]:
print("AUC: ", auc*100)

AUC:  99.22874562903961


In [30]:
# print("Accuracy: ", round(100 * accuracy_score(y_test, y_pred), 4))

Accuracy:  47.0476


In [28]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         102       0.00      0.00      0.00         2
         106       1.00      1.00      1.00         1
         107       0.00      0.00      0.00         1
         110       1.00      1.00      1.00         1
         111       0.00      0.00      0.00         2
         112       0.00      0.00      0.00         2
         113       1.00      0.50      0.67         2
         114       0.00      0.00      0.00         2
         115       1.00      1.00      1.00         1
         116       0.50      1.00      0.67         1
         117       0.00      0.00      0.00         1
         118       0.00      0.00      0.00         1
         119       1.00      1.00      1.00         2
         120       0.14      1.00      0.25         4
         121       0.00      0.00      0.00         2
         122       0.00      0.00      0.00         2
         123       0.00      0.00      0.00         2
         125       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
