# Multi-Layer Perceptron Binary Classifier using Scikit-Learn

In [1]:
from itertools import product

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import (
    classification_report,
    precision_recall_fscore_support,
    matthews_corrcoef,
    ConfusionMatrixDisplay,
    RocCurveDisplay,
    DetCurveDisplay,
    PrecisionRecallDisplay,
)

from utils import load_data_from_json, array_to_rgb_image

In [2]:
RAW_DATA = load_data_from_json()

FileNotFoundError: [Errno 2] No such file or directory: 'data/shipsnet.json'

## Example of training and classification metrics

In [None]:
inputs = np.array(RAW_DATA["data"], dtype=float)
labels = np.array(RAW_DATA["labels"], dtype=float)

# NOTE: this is better than adding a standardisation transform to the pipeline
inputs = inputs / 255 - 0.5

X_train, X_test, y_train, y_test = train_test_split(inputs, labels, shuffle=True)

model = MLPClassifier(
    hidden_layer_sizes=(10,),
    activation="relu",
    solver="adam",
    batch_size=32,
    learning_rate="adaptive",
    max_iter=50,
    shuffle=True,
    random_state=None,
    early_stopping=True,
)
trained_model = model.fit(X_train, y_train)

fig, ax = plt.subplots()
ax.plot(trained_model.loss_curve_, "o-")
ax.set_xlabel("epoch")
ax.set_ylabel("train loss")
ax.set_yscale("log")

In [None]:
y_pred_class = trained_model.predict(X_test)
y_pred_prob = trained_model.predict_proba(X_test)[:, 1]

precision, recall, fscore, support = precision_recall_fscore_support(
        y_test, y_pred_class, average="binary"
)
matthews = matthews_corrcoef(y_test, y_pred_class)

print(classification_report(y_test, y_pred_class, target_names=["no ship (0)", "ship (1)"]))
print(f"matthews: {matthews:.2g}")

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(8, 8))

for metric, y_pred, ax in zip(
    [ConfusionMatrixDisplay, RocCurveDisplay, DetCurveDisplay, PrecisionRecallDisplay],
    [y_pred_class, y_pred_prob, y_pred_prob, y_pred_prob],
    axes.flatten()
):
    disp = metric.from_predictions(y_test, y_pred, ax=ax)
    ax.set_title(metric.__name__.replace("Display", ""))

fig.tight_layout()

## Reproducibility check

In [None]:
def reproducible_train(seed) -> list[np.ndarray]:
    rng = np.random.RandomState(seed)
    
    inputs = np.array(RAW_DATA["data"], dtype=float)
    labels = np.array(RAW_DATA["labels"], dtype=float)
    inputs = inputs / 255 - 0.5
    X_train, X_test, y_train, y_test = train_test_split(inputs, labels, random_state=rng, shuffle=True)
    
    model = MLPClassifier(
        hidden_layer_sizes=(10,),
        shuffle=True,
        random_state=rng,
        max_iter=2,
    )
    trained_model = model.fit(X_train, y_train)

    return trained_model.coefs_ + trained_model.intercepts_

seed = 123456789

params_1 = reproducible_train(seed)
params_2 = reproducible_train(seed)

assert all([np.allclose(a, b) for a, b in zip(params_1, params_2)])

# Hyperparameter scan

In [None]:
model = MLPClassifier(
    hidden_layer_sizes=(10,),
    solver="adam",
    activation="relu",
    batch_size=32,
    learning_rate="adaptive",
    max_iter=50,
    shuffle=True,
    early_stopping=True,
)
params = {
    "alpha": [0.001, 0.0001, 0.00001],
    "learning_rate_init": [0.01, 0.001, 0.0001],
}
gs = GridSearchCV(
    estimator=model,
    param_grid=params,
    scoring="f1",
    n_jobs=4,
    verbose=4,
    cv=3,
)

In [None]:
inputs = np.array(RAW_DATA["data"], dtype=float)
labels = np.array(RAW_DATA["labels"], dtype=float)
inputs = inputs / 255 - 0.5
X_train, X_test, y_train, y_test = train_test_split(inputs, labels, shuffle=True)
    
trained_models = gs.fit(X_train, y_train)
summary = pd.DataFrame(trained_models.cv_results_)

summary

In [None]:
best_model = trained_models.best_estimator_

y_pred_class = best_model.predict(X_test)
y_pred_prob = best_model.predict_proba(X_test)[:, 1]

precision, recall, fscore, support = precision_recall_fscore_support(
        y_test, y_pred_class, average="binary"
)
matthews = matthews_corrcoef(y_test, y_pred_class)

print(classification_report(y_test, y_pred_class, target_names=["no ship (0)", "ship (1)"]))
print(f"matthews: {matthews:.2g}")