In [4]:
%pwd
%cd ../..

/Users/aflamant/Documents/courses/2024-2025/mémoire/03-code/memoire/MLP


In [5]:
RANDOM_STATE = 42
import numpy as np

import os
import copy
import torch
from torch import nn
from torch.utils.data import random_split, DataLoader, Subset
from torch.nn import functional as F
from sklearn.model_selection import KFold
from sklearn.metrics import (d2_absolute_error_score as D2,
                             r2_score as R2,
                             mean_absolute_percentage_error as MAPE)

from dataset import TenBarsCantileverTrussSingleEADataset

import mlflow
import matplotlib.pyplot as plt
from models.architecture import MultiLayerPerceptron
from models.processing import StandardScaler

np.random.seed(RANDOM_STATE)

device = torch.device(
    'cuda' if torch.cuda.is_available()
    else 'mps' if torch.backends.mps.is_available()
    else 'cpu'
)

# 1. Load the data


In [6]:
data_path = "./data/dataset/cantilever/data.hdf5"
_ds = TenBarsCantileverTrussSingleEADataset(data_path)

ds = _ds[np.random.choice(np.arange(len(_ds)), 25000, replace=False)]
in_dim = ds[0][0].__len__()
out_dim = ds[0][1].__len__()

print(f"Dataset size: {len(ds)}")
print(f"  Sample dimension: {in_dim}")
print(f"  Target dimension: {out_dim}")

Dataset size: 25000
  Sample dimension: 31
  Target dimension: 1


# 2. Training and Validation routine

In [43]:
def train(model, train_ds, val_ds, lr, n_epochs, batch_size, verbose=True, plot=False):
    model = model.to(device)
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=True)

    x_scaler = StandardScaler(in_dim).to(device)
    y_scaler = StandardScaler(out_dim).to(device)
    for x, y, _, _, _ in train_dl:
        x_scaler.partial_fit(x.to(device))
        y_scaler.partial_fit(y.to(device))

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_losses = []
    val_losses = []
    train_MSE = []
    val_MSE = []
    train_MAPE = []
    val_MAPE = []
    train_R2 = []
    val_R2 = []
    train_D2 = []
    val_D2 = []

    for epoch in range(n_epochs):
        model.train()
        train_loss_epoch = []
        train_MSE_epoch = []
        train_MAPE_epoch = []
        train_R2_epoch = []
        train_D2_epoch = []

        for batch in train_dl:
            x, y, _, _, _ = batch
            x, y = x.to(device), y.to(device)

            x = x_scaler.transform(x)
            y = y_scaler.transform(y)

            optimizer.zero_grad()
            y_pred = model(x)
            loss = criterion(y, y_pred)
            loss.backward()
            optimizer.step()

            y_unscaled = y_scaler.inverse_transform(y).cpu().detach()
            y_pred_unscaled = y_scaler.inverse_transform(y_pred).cpu().detach()

            train_loss_epoch.append(loss.item())
            train_MSE_epoch.append(F.mse_loss(y_pred, y).item())
            train_MAPE_epoch.append(MAPE(y_unscaled, y_pred_unscaled))
            train_D2_epoch.append(D2(y_unscaled, y_pred_unscaled))
            train_R2_epoch.append(R2(y_unscaled, y_pred_unscaled))

        model.eval()
        val_loss_epoch = []
        val_MSE_epoch = []
        val_MAPE_epoch = []
        val_R2_epoch = []
        val_D2_epoch = []
        for batch in val_dl:
            x, y, _, _, _ = batch
            x, y = x.to(device), y.to(device)

            x = x_scaler.transform(x)
            y = y_scaler.transform(y)

            y_pred = model(x)
            loss = criterion(y_pred, y)

            y_unscaled = y_scaler.inverse_transform(y).cpu().detach()
            y_pred_unscaled = y_scaler.inverse_transform(y_pred).cpu().detach()

            val_loss_epoch.append(loss.item())
            val_MSE_epoch.append(F.mse_loss(y_pred, y).item())
            val_MAPE_epoch.append(MAPE(y_unscaled, y_pred_unscaled))
            val_D2_epoch.append(D2(y_unscaled, y_pred_unscaled))
            val_R2_epoch.append(R2(y_unscaled, y_pred_unscaled))

        # Logging
        mlflow.log_metric("train loss", np.mean(train_loss_epoch), step=epoch)
        mlflow.log_metric("train MSE", np.mean(train_MSE_epoch), step=epoch)
        mlflow.log_metric("train MAPE", np.mean(train_MAPE_epoch), step=epoch)
        mlflow.log_metric("train R2", np.mean(train_R2_epoch), step=epoch)
        mlflow.log_metric("train D2", np.mean(train_D2_epoch), step=epoch)

        mlflow.log_metric("val loss", np.mean(val_loss_epoch), step=epoch)
        mlflow.log_metric("val MSE", np.mean(val_MSE_epoch), step=epoch)
        mlflow.log_metric("val MAPE", np.mean(val_MAPE_epoch), step=epoch)
        mlflow.log_metric("val R2", np.mean(val_R2_epoch), step=epoch)
        mlflow.log_metric("val D2", np.mean(val_D2_epoch), step=epoch)

        train_losses.append(np.mean(train_loss_epoch))
        val_losses.append(np.mean(val_loss_epoch))
        train_MSE.append(np.mean(train_MSE_epoch))
        val_MSE.append(np.mean(val_MSE_epoch))
        train_MAPE.append(np.mean(train_MAPE_epoch))
        val_MAPE.append(np.mean(val_MAPE_epoch))
        train_R2.append(np.mean(train_R2_epoch))
        val_R2.append(np.mean(val_R2_epoch))
        train_D2.append(np.mean(train_D2_epoch))
        val_D2.append(np.mean(val_D2_epoch))

        if verbose and (epoch + 1) % 25 == 0:
            print(f"[Epoch] {epoch + 1:{len(str(n_epochs))}d}/{n_epochs:d}", end='  ')
            print(f"TRAIN", end='   ')
            print(f"Loss: {np.mean(train_loss_epoch):1.4f}", end='   ')
            print(f"MSE: {np.mean(train_MSE_epoch):1.4f}", end='   ')
            print(f"MAPE: {np.mean(train_MAPE_epoch):1.4f}", end='   ')
            print(f"R2: {np.mean(train_R2_epoch): 1.4f}", end='   ')
            print(f"D2: {np.mean(train_D2_epoch): 1.4f}", end='')
            print("  ||  ", end='')
            print(f"VALIDATION", end='   ')
            print(f"Loss: {np.mean(val_loss_epoch):1.4f}", end='   ')
            print(f"MSE: {np.mean(val_MSE_epoch):1.4f}", end='   ')
            print(f"MAPE: {np.mean(val_MAPE_epoch):1.4f}", end='   ')
            print(f"R2: {np.mean(val_R2_epoch): 1.4f}", end='   ')
            print(f"D2: {np.mean(val_D2_epoch): 1.4f}")
    if plot:
        fig, axs = plt.subplots(1, 4, figsize=(24, 8))
        axs[0].set_title("Loss")
        axs[0].plot(train_losses, label='Training')
        axs[0].plot(val_losses, label='Validation')
        axs[0].set_yscale('log')
        axs[3].set_xlabel("Epoch")
        axs[3].legend()

        axs[1].set_title("MAPE")
        axs[1].plot(train_MAPE, label='Training')
        axs[1].plot(val_MAPE, label='Validation')
        axs[1].set_yscale('log')
        axs[3].set_xlabel("Epoch")
        axs[3].legend()

        axs[2].set_title("R2")
        axs[2].plot(train_R2, label='Training')
        axs[2].plot(val_R2, label='Validation')
        axs[2].set_yscale('function', functions=(lambda x: 10 ** x, lambda x: np.log10(x)))
        axs[2].set_ylim(0, 1.0)
        axs[3].set_xlabel("Epoch")
        axs[3].legend()

        axs[3].set_title("D2")
        axs[3].plot(train_D2, label='Training')
        axs[3].plot(val_D2, label='Validation')
        axs[3].set_yscale('function', functions=(lambda x: 10 ** x, lambda x: np.log10(x)))
        axs[3].set_ylim(0, 1.0)
        axs[3].set_xlabel("Epoch")
        axs[3].legend()


    signature = mlflow.models.infer_signature(x.cpu().detach().numpy(), model(x).cpu().detach().numpy())
    model_info = mlflow.pytorch.log_model(
        pytorch_model=model,
        input_example=x.cpu().detach().numpy(),
        artifact_path='model',
        signature=signature,
    )

    # Set a tag that we can use to remind ourselves what this run was for
    # mlflow.set_tag("motivation", "First test on MLFlow")

    return {
        'train_losses': train_losses,
        'val_losses': val_losses,
        'train_MSE': train_MSE,
        'val_MSE': val_MSE,
        'train_MAPE': train_MAPE,
        'val_MAPE': val_MAPE,
        'train_R2': train_R2,
        'val_R2': val_R2,
        'train_D2': train_D2,
        'val_D2': val_D2,
    }

In [44]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")
#mlflow.set_experiment("deep_learning")

with mlflow.start_run(run_name="seventh test"):
    ds_1, ds_2 = random_split(ds, (.8, .2))
    results = train(model=MultiLayerPerceptron(in_dim, out_dim,
                                               40, 3,
                                               nn.ReLU),
                    train_ds=ds_1,
                    lr=4e-4,
                    val_ds=ds_2,
                    n_epochs=200,
                    batch_size=2048,
                    verbose=True)

[Epoch]  25/200  TRAIN   Loss: 0.1326   MSE: 0.1326   MAPE: 0.2717   R2:  0.8675   D2:  0.7059  ||  VALIDATION   Loss: 0.1184   MSE: 0.1184   MAPE: 0.2736   R2:  0.8817   D2:  0.7171
[Epoch]  50/200  TRAIN   Loss: 0.0661   MSE: 0.0661   MAPE: 0.1714   R2:  0.9338   D2:  0.8140  ||  VALIDATION   Loss: 0.0593   MSE: 0.0593   MAPE: 0.1785   R2:  0.9412   D2:  0.8234
[Epoch]  75/200  TRAIN   Loss: 0.0436   MSE: 0.0436   MAPE: 0.1248   R2:  0.9564   D2:  0.8647  ||  VALIDATION   Loss: 0.0418   MSE: 0.0418   MAPE: 0.1322   R2:  0.9590   D2:  0.8701
[Epoch] 100/200  TRAIN   Loss: 0.0325   MSE: 0.0325   MAPE: 0.0977   R2:  0.9675   D2:  0.8902  ||  VALIDATION   Loss: 0.0299   MSE: 0.0299   MAPE: 0.1032   R2:  0.9701   D2:  0.8935
[Epoch] 125/200  TRAIN   Loss: 0.0257   MSE: 0.0257   MAPE: 0.0816   R2:  0.9743   D2:  0.9066  ||  VALIDATION   Loss: 0.0257   MSE: 0.0257   MAPE: 0.0846   R2:  0.9747   D2:  0.9097
[Epoch] 150/200  TRAIN   Loss: 0.0215   MSE: 0.0215   MAPE: 0.0734   R2:  0.9785   D2

 # 2. Hyperparameter tuning
Considering a 10 bar cantilever dataset we want to predict the EA of the bars which is assumed to be a single common value.
The model is an MLP here are the parameters:
- Activation function
- Learning rate
- Number of layers
- Number of neurons per layer

In [None]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")
mlflow.set_experiment("MLP_Cantilever_Capacity")

# Model capacity tuning
n_neurons_values = [10, 15, 20, 25, 30, 35, 40]
n_layers_values = [1, 2, 3, 4, 5]

outer_cv = KFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)
outer_configs = []
outer_scores = []
for outer_fold_id, (train_idx, test_idx) in enumerate(outer_cv.split(_ds)):
    train_ds, test_ds = Subset(_ds, train_idx), Subset(_ds, test_idx)

    inner_cv = KFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE + outer_fold_id)
    inner_scores = {}
    for n_neurons in n_neurons_values:
        for n_layers in n_layers_values:
            inner_scores.setdefault((n_layers, n_neurons), [])
            for inner_fold_id, (train_idx, val_idx) in inner_cv.split(train_ds):
                train_ds, val_ds = Subset(train_ds, train_idx), Subset(train_ds, val_idx)

                model = MultiLayerPerceptron(in_dim, out_dim,
                                             n_layers, n_neurons,
                                             nn.ReLU)

                train(model, train_ds, val_ds)

                inner_scores[(n_layers, n_neurons)].append(score)

    best_score = np.inf
    best_config = None
    for config, scores in inner_scores.items():
        if scores < best_score:
            best_score = scores
            best_config = config

    model = MultiLayerPerceptron(in_dim, out_dim,
                                 best_config[0], best_config[1],
                                 nn.ReLU)

    scores = train(model, train_ds, test_ds)

    outer_configs.append(best_config)
    outer_scores.append(score)

# 3. Training the foundation model
We will train our model with the whole dataset to create a foundation model that will have learnt all the specifics of the problem.

# 4. Prediction on real data
We will use as *real* data, data with shared multiplicative noise:
$$\varepsilon \sim \mathcal N \left( \mu = 1, \sigma = 0.0025 \right)$$

Such that $\hat x = x * \varepsilon$ has 95% chance of being within +- 0.5% of the true value. Which is the same order of magnitude observed with HBM sensors.

This noise will be applied to a set of data from which a subset will be extracted for fine-tuning.

## a. Non fine-tuned prediction
Scores using the foundation model for prediction

z## b. Fine-tuned model
We will finetune the foundation model using the subset of real data as input


### I. Experiment on the size of fine-tuning set
These experiments will help us define how many real example are needed for *sufficient* fine-tuning.

##### Without PINN

##### With PINN