# MLP with Keras

## Loading the packages

In [None]:
import numpy as np
import matplotlib.pyplot as pl

## The Dataset
The following script allows you to create a 2D dataset by using the mouse. The left click adds points belonging to class A (blue), and the right click adds points belonging to class B (red). You can create as many points as you desire. The final dataset will contain hence three values per point: x coordinate (-1 ≤ x ≤ 1), y coordinate (-1 ≤ y ≤ 1) and the class ∈ {1,-1}.

In [1]:
!pip install ipympl

Defaulting to user installation because normal site-packages is not writeable


<font color="red">**For it to work on Colab, you will need to reload your session (Exécution -> redémarrer la session)**</font>

<font color="orange">**Make sure to put a large amount of points otherwise the cross validation folds will be really small**</font>

In [None]:
%matplotlib widget

from google.colab import output
output.enable_custom_widget_manager()

fig = pl.figure(figsize=(6,6))
pl.title("Input Dataset")
pl.xlim((-1.2,1.2))
pl.ylim((-1.2,1.2))

dataset = []

def on_press(event):
    if event.key == 'b':
        dataset.append((event.xdata, event.ydata, -1))
        pl.scatter(event.xdata, event.ydata, color='blue')
        pl.draw()
    elif event.key == 'r':
        dataset.append((event.xdata, event.ydata, 1))
        pl.scatter(event.xdata, event.ydata, color='red')
        pl.draw()

# Attach the event handler
fig.canvas.mpl_connect('key_press_event', on_press);

## Show the dataset

In [None]:
dataset = np.array(dataset)
dataset

In [None]:
input_data = dataset[:,0:2]
output_data = dataset[:,2]

In [None]:
input_data

In [None]:
output_data

In [None]:
%matplotlib inline

In [None]:
import keras
from keras import layers
from sklearn.model_selection import KFold

pl.clf()

keras.utils.set_random_seed(123)
kf = KFold(n_splits=3, shuffle=True)

for i, (train_index, test_index) in enumerate(kf.split(input_data)):
  pl.figure(figsize=(4,4))

  # Plot train data
  pl.scatter(input_data[train_index][:,0], input_data[train_index][:,1], c=[(['b', 'r'])[int(d>0)] for d in output_data[train_index]], s=100)
  pl.scatter(input_data[test_index][:,0], input_data[test_index][:,1], c=[(['gray', 'gray'])[int(d>0)] for d in output_data[test_index]], s=100)
  pl.title(f'Split {i + 1}, validation fold in gray.')
  pl.show()

In [None]:
def create_model():
  # 2 inputs
  # 2 hidden nodes
  # 1 output

  mlp = keras.Sequential([
      layers.Input(2),
      layers.Dense(2, activation="tanh"), # Try different numbers of hidden neurons here (e.g. 2, 4, 8, 32, 128)
      layers.Dense(1, activation="tanh"),
  ])

  # Experiment with hyperparameters here:
  # momentum: [0, 0.8, 0.9, 0.99]
  # learning_rate: [0.1, 0.01, 0.001, 0.0001]
  mlp.compile(
      optimizer=keras.optimizers.SGD(learning_rate=0.01, momentum=0.99),
      loss="mse",
  )

  return mlp

mlp = create_model()
mlp.summary()

In [None]:
history_list = []
trained_mlp = []

for i, (train_index, test_index) in enumerate(kf.split(input_data)):
  # We need to create a new model everytime otherwise fit will continue previous training
  mlp = create_model()

  history = mlp.fit(
      x=input_data[train_index], y=output_data[train_index],
      validation_data=(input_data[test_index], output_data[test_index]),
      epochs=400
  )

  history_list.append(history)
  trained_mlp.append(mlp)

# Plot training history

In [None]:
train_losses = np.array([history.history['loss'] for history in history_list])
val_losses = np.array([history.history['val_loss'] for history in history_list])

# Calculate mean and standard deviation for training and validation losses
mean_train_loss = np.mean(train_losses, axis=0)
std_train_loss = np.std(train_losses, axis=0)
mean_val_loss = np.mean(val_losses, axis=0)
std_val_loss = np.std(val_losses, axis=0)

# Plot mean and standard deviation for training loss
pl.plot(mean_train_loss, label='Training Loss (Mean)')
pl.fill_between(range(len(mean_train_loss)), mean_train_loss - std_train_loss, mean_train_loss + std_train_loss, alpha=0.3, label='Training Loss (Std)')

# Plot mean and standard deviation for validation loss
pl.plot(mean_val_loss, label='Validation Loss (Mean)')
pl.fill_between(range(len(mean_val_loss)), mean_val_loss - std_val_loss, mean_val_loss + std_val_loss, alpha=0.3, label='Validation Loss (Std)')

# Add labels and legend
pl.xlabel('Epochs')
pl.ylabel('Loss')
pl.legend()

# Display the plot
pl.show()

# Plot models feature space outputs

In [None]:
for idx, model in enumerate(trained_mlp):
    # Plot model's output in the feature space
    input_x = np.arange(-1.2, 1.2, 0.1)
    input_y = np.arange(-1.2, 1.2, 0.1)
    input_x_matrix, input_y_matrix = np.meshgrid(input_x, input_y)
    inputs_xy = np.concatenate((input_x_matrix.flatten()[:,np.newaxis], input_y_matrix.flatten()[:,np.newaxis]), axis=1)

    output_values = model(inputs_xy)
    output_matrix = np.reshape(output_values, input_x_matrix.shape)

    pl.figure(figsize=(8,8))
    img = pl.imshow(np.flipud(output_matrix), interpolation='None', extent=(-1.2,1.2,-1.2,1.2), cmap="turbo")
    pl.colorbar(img, shrink=0.7)
    pl.scatter(input_data[:,0], input_data[:,1], c=[(['b', 'r'])[int(d>0)] for d in output_data], s=100, edgecolors='black')
    pl.title(f'MLP {idx+1} feature space output')
    pl.show()

# Performances

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, f1_score
import seaborn as sns

def plot_confusion_matrix(confusion_matrix, title):
    # Plot confusion matrix
    pl.figure(figsize=(8, 6))
    sns.heatmap(confusion_matrix.astype(int), annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=["Blue", "Red"], yticklabels=["Blue", "Red"])
    pl.title(title)
    pl.xlabel('Predicted')
    pl.ylabel('True')
    pl.show()

f1_scores = []
mean_confusion_matrix = np.zeros((2, 2))

for i, (train_index, test_index) in enumerate(kf.split(input_data)):
    # Evaluate the trained model on the test fold
    predictions = (trained_mlp[i].predict(input_data[test_index]) > 0).astype(int)
    true_labels = (output_data[test_index] > 0).astype(int)

    # Compute confusion matrix
    cm = confusion_matrix(true_labels, predictions)
    mean_confusion_matrix += confusion_matrix(true_labels, predictions)

    # Compute confusion matrix and plot
    plot_confusion_matrix(cm, f'Confusion Matrix - Fold {i + 1}')

    # Compute F1 score
    f1 = f1_score(true_labels, predictions)
    f1_scores.append(f1)
    print(f"F1 Score - Fold {i + 1}: {f1}")

# Plot mean confusion matrix
plot_confusion_matrix(mean_confusion_matrix, 'Global confusion matrix')

# Calculate and display the mean F1 score across all folds
mean_f1_score = np.mean(f1_scores)
print(f"Mean F1 Score across all folds: {mean_f1_score}")

# Exercise

Please try changing hyperparameters (number of neurons, number of layers, learning rate, momentum, number of epochs...) and observe the impact it has on training and validation loss, convergence, and computation time. For instance, observe if there's overfitting if you put a high number (i.e. 128) of neurons in the hidden layer.

You can also experiment with different datasets (clear separation between classes, unbalanced...)
