## Parse of input

In [None]:
import pandas as pd
from scipy.io.arff import loadarff
from sklearn.model_selection import KFold, StratifiedKFold
import numpy as np
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

raw_data = loadarff('breast.w.arff')
df_data = pd.DataFrame(raw_data[0]).dropna()  # converting data to a pandas DataFrame
df_data['Class'].replace({b'malignant': 1, b'benign': 0}, inplace=True)

### confusion matrix of the a MLP with two hidden layers of sizes 3 and 2 in the presence and absence of early stopping

In [None]:
data, target = df_data.drop(columns='Class').values, df_data['Class'].values

predicted_targets1, predicted_targets2, actual_targets = np.array([]), np.array([]), np.array([])

# creating MLP Classifiers with and without early stopping and ajusting max_iter for the model to converge
clasf1 = MLPClassifier(hidden_layer_sizes=(3,2,), early_stopping=False, alpha = 0.1, max_iter = 2000)
clasf2 = MLPClassifier(hidden_layer_sizes=(3,2,), early_stopping=True, alpha = 0.1)

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) # Stratifued CV is better for classification

for train_subset, test_subset in kf.split(data, target):
   X_train, X_test = data[train_subset], data[test_subset]
   Y_train, Y_test = target[train_subset], target[test_subset]

   c1 = clasf1.fit(X_train, Y_train)  # train classifier without early stopping
   predicted_targets1 = np.append(predicted_targets1, c1.predict(X_test))  # test and store predicted values

   c2 = clasf2.fit(X_train, Y_train)  # train classifier with early stopping
   predicted_targets2 = np.append(predicted_targets2, c2.predict(X_test))  # test and store predicted values

   actual_targets = np.append(actual_targets, Y_test) # store the actual values

cnf_matrix1 = confusion_matrix(actual_targets, predicted_targets1, labels=clasf1.classes_)
ConfusionMatrixDisplay(cnf_matrix1, display_labels=clasf1.classes_).plot(cmap=plt.get_cmap('PuBuGn'))
plt.title("Without early stopping")
plt.show()

cnf_matrix2 = confusion_matrix(actual_targets, predicted_targets2, labels=clasf2.classes_)
ConfusionMatrixDisplay(cnf_matrix2, display_labels=clasf2.classes_).plot(cmap=plt.get_cmap('PuBuGn'))
plt.title("With early stopping")
plt.show()

### Plot of the distribution of the residues using boxplots in the presence and absence of regularization

In [None]:
raw_data = loadarff('kin8nm.arff')
df_data = pd.DataFrame(raw_data[0])  # converting data to a pandas DataFrame

data, target = df_data.drop(columns='y').values, df_data['y'].values

kf = KFold(n_splits=5, shuffle=True, random_state=0)

residues1, residues2, residues3, residues4 = [], [], [], []

# Creating MLP Regressors with different regularization terms and without regularization
regr1 = MLPRegressor(hidden_layer_sizes=(3,2,), alpha = 0.1)
regr2 = MLPRegressor(hidden_layer_sizes=(3,2,), alpha = 1)
regr3 = MLPRegressor(hidden_layer_sizes=(3,2,), alpha = 10)
regr4 = MLPRegressor(hidden_layer_sizes=(3,2,), alpha = 0)

for train_subset, test_subset in kf.split(data):
   X_train, X_test = data[train_subset], data[test_subset]
   Y_train, Y_test = target[train_subset], target[test_subset]
   # train
   regr1.fit(X_train, Y_train)
   regr2.fit(X_train, Y_train) 
   regr3.fit(X_train, Y_train) 
   regr4.fit(X_train, Y_train)
   # store the residue for each prediction: absolute value of (actual - predicted)
   residues1.extend(np.absolute(np.subtract(Y_test, regr1.predict(X_test))))
   residues2.extend(np.absolute(np.subtract(Y_test, regr2.predict(X_test))))
   residues3.extend(np.absolute(np.subtract(Y_test, regr3.predict(X_test))))
   residues4.extend(np.absolute(np.subtract(Y_test, regr4.predict(X_test))))

fig, ax = plt.subplots()
ax.set_title("Residues Distribution")
ax.boxplot([residues1, residues2, residues3, residues4], labels=["alpha 0.1", "alpha 1", "alpha 10", "Without regularization"])
plt.show()