# Multi-Layer Perceptron Binary Classifier using Scikit-Learn

## load

In [17]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from utils import load_data_from_json, array_to_rgb_image

In [3]:
RAW_DATA = load_data_from_json('.')
inputs = np.array(RAW_DATA["data"], dtype=float)
labels = np.array(RAW_DATA["labels"], dtype=float)
# Perform any transformations of the input data here
# --------------------------------------------------
maxval = inputs.max(axis=1)
minval = inputs.min(axis = 1)
norm01 = (inputs - minval[:, np.newaxis]) / (maxval - minval)[:,np.newaxis]

## Training

The cells below train the model and plot the loss as a function of the number of training epochs.

An epoch is defined as one whole pass through the training set.

In [7]:
# Modify the model arguments in the dictionary
# --------------------------------------------
model_args = {
    "hidden_layer_sizes": (10,),
    "activation": "relu",
    "batch_size": 128,
    "max_iter": 25,
    "shuffle": True,
    "early_stopping": False
}

In [8]:
X_train, X_test, y_train, y_test = train_test_split(norm01, labels, shuffle=True, train_size = 0.25)
model = MLPClassifier(**model_args)
trained_model = model.fit(X_train, y_train)



In [None]:
fig, ax = plt.subplots()
ax.plot(trained_model.loss_curve_, "o-")
ax.set_xlabel("epoch")
ax.set_ylabel("train loss")
ax.set_yscale("log")

## grid search

In [12]:
# GridSearchCV?

In [None]:
X_train, X_test, y_train, y_test = train_test_split(norm01, labels, shuffle=True, train_size = 0.25)
modelGS = MLPClassifier(**model_args)

params = {
    # "alpha": [0.001, 0.0001, 0.00001],
    "learning_rate_init": [0.01, 0.001, 0.0001],
    "batch_size" : [16,32,64,128]
}
gs = GridSearchCV(
    estimator=modelGS, param_grid=params,
    scoring="f1", n_jobs=-1, verbose=4, cv=3)

trained_models = gs.fit(X_train, y_train)
print('finished')

Fitting 3 folds for each of 12 candidates, totalling 36 fits


In [20]:
summary = pd.DataFrame(trained_models.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_batch_size,param_learning_rate_init,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,14.216239,0.558752,0.036321,0.005956,16,0.01,"{'batch_size': 16, 'learning_rate_init': 0.01}",0.769231,0.0,0.133333,0.300855,0.335635,8
1,14.055275,0.451894,0.030856,0.004167,16,0.001,"{'batch_size': 16, 'learning_rate_init': 0.001}",0.8,0.8,0.814815,0.804938,0.006984,2
2,13.726961,0.025211,0.025285,0.001933,16,0.0001,"{'batch_size': 16, 'learning_rate_init': 0.0001}",0.8,0.882353,0.8,0.827451,0.038822,1
3,9.25196,0.475714,0.030375,0.001408,32,0.01,"{'batch_size': 32, 'learning_rate_init': 0.01}",0.0,0.0,0.764706,0.254902,0.360486,11
4,7.30593,1.763113,0.031768,0.008734,32,0.001,"{'batch_size': 32, 'learning_rate_init': 0.001}",0.75,0.365854,0.695652,0.603835,0.169735,3
5,8.099898,0.089751,0.037203,0.00789,32,0.0001,"{'batch_size': 32, 'learning_rate_init': 0.0001}",0.0,0.866667,0.846154,0.57094,0.403803,4
6,6.003544,0.169404,0.036663,0.009562,64,0.01,"{'batch_size': 64, 'learning_rate_init': 0.01}",0.526316,0.0,0.0,0.175439,0.248108,12
7,5.742509,0.035559,0.026767,0.001794,64,0.001,"{'batch_size': 64, 'learning_rate_init': 0.001}",0.0,0.810811,0.0,0.27027,0.38222,10
8,5.753796,0.330734,0.022192,0.004084,64,0.0001,"{'batch_size': 64, 'learning_rate_init': 0.0001}",0.352941,0.571429,0.740741,0.555037,0.158742,5
9,3.408301,0.780383,0.022143,0.007086,128,0.01,"{'batch_size': 128, 'learning_rate_init': 0.01}",0.133333,0.0,0.695652,0.276329,0.301462,9


In [24]:
from sklearn.metrics import precision_recall_fscore_support, matthews_corrcoef, classification_report
best_model = trained_models.best_estimator_

y_pred_class = best_model.predict(X_test)
y_pred_prob = best_model.predict_proba(X_test)[:, 1]

precision, recall, fscore, support = precision_recall_fscore_support(
        y_test, y_pred_class, average="binary"
)
matthews = matthews_corrcoef(y_test, y_pred_class)

print(classification_report(y_test, y_pred_class, target_names=["no ship (0)", "ship (1)"]))
print(f"matthews: {matthews:.2g}")

              precision    recall  f1-score   support

 no ship (0)       0.75      1.00      0.86      2843
    ship (1)       0.00      0.00      0.00       957

    accuracy                           0.75      3800
   macro avg       0.37      0.50      0.43      3800
weighted avg       0.56      0.75      0.64      3800

matthews: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Metrics

Modify the cells below to compute some more useful classification metrics.

In [None]:
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay

In [None]:
# The predicted class (0 or 1)
y_pred_class = trained_model.predict(X_test)

# The predicted probability (between 0 and 1)
y_pred_prob = trained_model.predict_proba(X_test)[:, 1]

In [None]:
accuracy = accuracy_score(y_test, y_pred_class)
f"The accuracy is: {accuracy}"

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_class)

In [None]:
from sklearn.metrics import cohen_kappa_score
cohen_kappa_score(y_test, y_pred_class)

In [None]:
from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred_class, adjusted = True)

In [None]:
from sklearn.metrics import top_k_accuracy_score
top_k_accuracy_score(y_test, y_pred_class)

precision, recall & f-score, matthews coeff

## Reproducibility check

You should modify the code below so that the training phase is reproducible.

To check this, train two identical models, and pass the parameters of the trained models into ``check_parameters_match``.

**Hint:** You will need to add an extra argument to ``train_test_split``, as well as ``MLPClassifier``.

In [None]:
from utils import check_parameters_match
# check_parameters_match?

In [None]:
# FIRST MODEL
SEED = 123456789
rng = np.random.RandomState(SEED)
X_train, X_test, y_train, y_test = train_test_split(inputs, labels, shuffle=True, random_state = rng)
model = MLPClassifier(**model_args, random_state=rng)
trained_model = model.fit(X_train, y_train)
parameters_1 = trained_model.coefs_ + trained_model.intercepts_

In [None]:
# SECOND MODEL
rng = np.random.RandomState(SEED)
X_train, X_test, y_train, y_test = train_test_split(inputs, labels, shuffle=True, random_state=rng)
model = MLPClassifier(**model_args, random_state=rng)
trained_model = model.fit(X_train, y_train)
parameters_2 = trained_model.coefs_ + trained_model.intercepts_

In [None]:
assert check_parameters_match(parameters_1, parameters_2), "Models are not identical!"