# Multi-Layer Perceptron Binary Classifier using Scikit-Learn

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

from utils import load_data_from_json, array_to_rgb_image

In [None]:
RAW_DATA = load_data_from_json()

In [None]:
inputs = np.array(RAW_DATA["data"], dtype=float)
labels = np.array(RAW_DATA["labels"], dtype=float)

# Perform any transformations of the input data here
# --------------------------------------------------
# inputs = ...

In [None]:
# Bring up the help for MLPClassifier for info on model_args
#MLPClassifier?

In [None]:
# Modify the model arguments in the dictionary
# --------------------------------------------
model_args = {
    "hidden_layer_sizes": (10,),
    "activation": "relu",
    "batch_size": 128,
    "max_iter": 25,
    "shuffle": True,
    "early_stopping": False,
}

## Training

The cells below train the model and plot the loss as a function of the number of training epochs.

An epoch is defined as one whole pass through the training set.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(inputs, labels, shuffle=True)

model = MLPClassifier(**model_args)
trained_model = model.fit(X_train, y_train)

In [None]:
fig, ax = plt.subplots()
ax.plot(trained_model.loss_curve_, "o-")
ax.set_xlabel("epoch")
ax.set_ylabel("train loss")
ax.set_yscale("log")

## Metrics

Modify the cells below to compute some more useful classification metrics.

In [None]:
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay

In [None]:
# The predicted class (0 or 1)
y_pred_class = trained_model.predict(X_test)

# The predicted probability (between 0 and 1)
y_pred_prob = trained_model.predict_proba(X_test)[:, 1]

In [None]:
accuracy = accuracy_score(y_test, y_pred_class)
f"The accuracy is: {accuracy}"

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_class)

## Reproducibility check

You should modify the code below so that the training phase is reproducible.

To check this, train two identical models, and pass the parameters of the trained models into ``check_parameters_match``.

**Hint:** You will need to add an extra argument to ``train_test_split``, as well as ``MLPClassifier``.

In [None]:
from utils import check_parameters_match

check_parameters_match?

In [None]:
SEED = 123456789

In [None]:
# FIRST MODEL

rng = np.random.RandomState(SEED)

X_train, X_test, y_train, y_test = train_test_split(inputs, labels, shuffle=True)

model = MLPClassifier(**model_args)

trained_model = model.fit(X_train, y_train)

parameters_1 = trained_model.coefs_ + trained_model.intercepts_

In [None]:
# SECOND MODEL

rng = np.random.RandomState(SEED)

X_train, X_test, y_train, y_test = train_test_split(inputs, labels, shuffle=True)

model = MLPClassifier(**model_args)

trained_model = model.fit(X_train, y_train)

parameters_2 = trained_model.coefs_ + trained_model.intercepts_

In [None]:
assert check_parameters_match(parameters_1, parameters_2), "Models are not identical!"