In [1]:
from sqlalchemy.sql.base import elements
%pwd
%cd ../..

/Users/aflamant/Documents/courses/2024-2025/mémoire/03-code/memoire/MLP


In [2]:
# General
import numpy as np
import re
import ast
import torch
import mlflow
import matplotlib.pyplot as plt

from torch import nn
from torch.distributions import Normal
from torch.nn import functional as F
from torch.utils.data import random_split, DataLoader, Subset
from sklearn.metrics import d2_absolute_error_score as D2
from sklearn.model_selection import KFold
import torchmetrics.regression as R

# Model
from MLP.loss import StiffnessToLoadLoss, construct_k_from_ea
from MLP.dataset import TenBarsCantileverTrussSingleEADataset
from MLP.models.architecture import MultiLayerPerceptron
from MLP.models.processing import StandardScaler

# Setup randomness
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
device = torch.device(
    'cuda' if torch.cuda.is_available()
    else 'mps' if torch.backends.mps.is_available()
    else 'cpu'
)

In [3]:
data_path = "./data/dataset/cantilever/data.hdf5"
_ds = TenBarsCantileverTrussSingleEADataset(data_path)

ds = _ds
train_ds, val_ds = random_split(ds, (len(ds) - 25_000, 25_000))

in_dim = ds[0][0].__len__()
out_dim = ds[0][1].__len__()

print(f"Dataset size: {len(ds)}")
print(f"  Sample dimension: {in_dim}")
print(f"  Target dimension: {out_dim}")
print()
print(f"Train dataset size: {len(train_ds)}")
print()
print(f"Validation dataset size: {len(val_ds)}")

Dataset size: 150000
  Sample dimension: 31
  Target dimension: 1

Train dataset size: 125000

Validation dataset size: 25000


In [6]:
test_dataset =  TenBarsCantileverTrussSingleEADataset("./data/dataset/real_cantilever_test/data.hdf5")

test_ds = test_dataset

reference = test_dataset[len(test_dataset) - 1]

In [None]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")
mlflow.set_experiment("MLP_Cantilever_noise_test")

CCC = R.ConcordanceCorrCoef().to(device)
R2 = R.R2Score(multioutput='uniform_average').to(device)
MAPE = R.MeanAbsolutePercentageError().to(device)
MSE = R.MeanSquaredError().to(device)

print("Starting training")

with mlflow.start_run():
    LR = 1e-4
    N_NEURONS = 40
    N_LAYERS = 3
    N_EPOCHS = 2_500
    BATCH_SIZE = 2048
    ACTIVATION = nn.Tanh
    ACTIVATION_PARAMS = {}

    device = torch.device(
        'cuda' if torch.cuda.is_available()
        else 'mps' if torch.backends.mps.is_available()
        else 'cpu'
    )

    model = MultiLayerPerceptron(in_dim, out_dim, N_NEURONS, N_LAYERS, ACTIVATION,
                                 ACTIVATION_PARAMS).to(device)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    n_params = sum(np.prod(p.size()) for p in model_parameters)
    mlflow.log_params({
        'n_neurons': N_NEURONS, 'n_layers': N_LAYERS, 'lr': LR,
        'activation': ACTIVATION.__name__, "capacity": n_params, "n_epochs": N_EPOCHS,
    })

    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, drop_last=False)
    test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, drop_last=False)

    x_scaler = StandardScaler(in_dim).to(device)
    y_scaler = StandardScaler(out_dim).to(device)
    for x, y, _, _, _ in train_dl:
        x_scaler.partial_fit(x.to(device))
        y_scaler.partial_fit(y.to(device))

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    train_losses = []
    val_losses = []
    test_losses = []
    train_MAPE = []
    val_MAPE = []
    test_MAPE = []

    noise = True
    std = 0.0025
    normal = Normal(1, std)
    for epoch in range(N_EPOCHS):
        model.train()
        train_loss_epoch = []
        train_MAPE_epoch = []

        for batch in train_dl:
            x, y, _, _, _ = batch
            x, y = x.to(device), y.to(device)

            if noise:
                e = normal.sample(sample_shape=x.shape).to(device)
                x *= e
            #noise = not noise

            x = x_scaler.transform(x)
            y = y_scaler.transform(y)

            optimizer.zero_grad()
            y_pred = model(x)
            loss = criterion(y, y_pred)
            loss.backward()
            optimizer.step()

            y_unscaled = y_scaler.inverse_transform(y).cpu().detach()
            y_pred_unscaled = y_scaler.inverse_transform(y_pred).cpu().detach()

            train_loss_epoch.append(loss.item())
            train_MAPE_epoch.append(MAPE(y_pred_unscaled, y_unscaled).item())

        model.eval()
        val_loss_epoch = []
        val_MAPE_epoch = []
        for batch in val_dl:
            x, y, _, _, _ = batch
            x, y = x.to(device), y.to(device)

            x = x_scaler.transform(x)
            y = y_scaler.transform(y)

            y_pred = model(x)
            loss = criterion(y_pred, y)

            y_unscaled = y_scaler.inverse_transform(y).cpu().detach()
            y_pred_unscaled = y_scaler.inverse_transform(y_pred).cpu().detach()

            val_loss_epoch.append(loss.item())
            val_MAPE_epoch.append(MAPE(y_pred_unscaled, y_unscaled).item())

        model.eval()
        test_loss_epoch = []
        test_MSE_epoch = []
        test_MAPE_epoch = []
        for batch in test_dl:
            x, y, _, _, _ = batch
            x, y = x.to(device), y.to(device)

            x = x_scaler.transform(x)
            y = y_scaler.transform(y)

            y_pred = model(x)
            loss = criterion(y_pred, y)

            y_unscaled = y_scaler.inverse_transform(y).cpu().detach()
            y_pred_unscaled = y_scaler.inverse_transform(y_pred).cpu().detach()

            test_loss_epoch.append(loss.item())
            test_MAPE_epoch.append(MAPE(y_pred_unscaled, y_unscaled).item())

        mean_train_loss = np.mean(train_loss_epoch)
        mean_train_MAPE = np.mean(train_MAPE_epoch)

        mean_val_loss = np.mean(val_loss_epoch)
        mean_val_MAPE = np.mean(val_MAPE_epoch)

        mean_test_loss = np.mean(test_loss_epoch)
        mean_test_MAPE = np.mean(test_MAPE_epoch)

        # Logging
        mlflow.log_metrics({
            "train_loss": mean_train_loss,
            "train_mape": mean_train_MAPE,

            "val_loss": mean_val_loss,
            "val_mape": mean_val_MAPE,

            "test_loss": mean_test_loss,
            "test_mape": mean_test_MAPE
        }, step=epoch)


        if (epoch + 1) % 100 == 0:
            print(f"[Epoch] {epoch + 1:{len(str(N_EPOCHS))}d}/{N_EPOCHS:d}", end='  ')
            print(f"TRAIN", end='   ')
            print(f"Loss: {mean_train_loss:1.4f}", end='   ')
            print(f"MAPE: {mean_train_MAPE:1.4f}", end='   ')
            print(" ## ", end='')
            print(f"VALIDATION", end='   ')
            print(f"Loss: {mean_val_loss:1.4f}", end='   ')
            print(f"MAPE: {mean_val_MAPE:1.4f}", end='   ')

    signature = mlflow.models.infer_signature(x.cpu().detach().numpy(), model(x).cpu().detach().numpy())

    # Log all models
    mlflow.pytorch.log_model(
        pytorch_model=model,
        input_example=x.cpu().detach().numpy(),
        artifact_path='model',
        signature=signature,
    )

    signature = mlflow.models.infer_signature(x.cpu().detach().numpy(), x_scaler.transform(x).cpu().detach().numpy())
    mlflow.pytorch.log_model(
        pytorch_model=x_scaler,
        artifact_path='x_scaler',
        signature=signature,
    )

    signature = mlflow.models.infer_signature(y_pred.cpu().detach().numpy(),
                                              y_scaler.transform(y_pred).cpu().detach().numpy())
    mlflow.pytorch.log_model(
        pytorch_model=y_scaler,
        artifact_path='y_scaler',
        signature=signature,
    )

Starting training


# 5. Prediction on real data
We will use as *real* data, data with shared multiplicative noise:
$$\varepsilon \sim \mathcal N \left( \mu = 1, \sigma = 0.0025 \right)$$

Such that $\hat x = x * \varepsilon$ has 95% chance of being within +- 0.5% of the true value. Which is the same order of magnitude observed with HBM sensors.

This noise will be applied to a set of data from which a subset will be extracted for fine-tuning. The noised features are:
- Displacement
- Bar strain
- Bars forces

The fine-tuning will be done on a dataset comprised of noised data from a single test to "capture" experiment "noise".

We will then test it on a different structure with similar loss distribution.

In [51]:
fine_tuning_dataset = TenBarsCantileverTrussSingleEADataset("./data/dataset/real_cantilever_train/data.hdf5")
test_dataset =  TenBarsCantileverTrussSingleEADataset("./data/dataset/real_cantilever_test/data.hdf5")

train_ds = Subset(fine_tuning_dataset, np.arange(0,len(fine_tuning_dataset), 5))
test_ds = test_dataset

reference = test_dataset[len(test_dataset) - 1]

In [48]:
CCC = R.ConcordanceCorrCoef().to(device)
R2 = R.R2Score(multioutput='uniform_average').to(device)
MAPE = R.MeanAbsolutePercentageError().to(device)
MSE = R.MeanSquaredError().to(device)

## a. Non fine-tuned prediction
Scores using the foundation model for prediction

In [10]:
class Model(nn.Module):
    def __init__(self, x_scaler, model, y_scaler):
        super().__init__()
        self.x_scaler = x_scaler
        self.model = model
        self.y_scaler = y_scaler

    def forward(self, x):
        x = self.x_scaler.transform(x)
        x = self.model(x)
        x = self.y_scaler.inverse_transform(x)
        return x

In [11]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")

# Load the model
artifact_dir = 'runs:/32ba2bcaf225416e82c55ae552dbb628'

model = Model(mlflow.pytorch.load_model(f"{artifact_dir}/x_scaler"),
              mlflow.pytorch.load_model(f"{artifact_dir}/model"),
              mlflow.pytorch.load_model(f"{artifact_dir}/y_scaler"))

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

In [12]:
y_pred_init = model(reference[0].to(device))

In [13]:
error_init = MAPE(y_pred_init.reshape(1).to(device), reference[1].to(device))

In [15]:
print(f"Prediction: {y_pred_init.item() * 1e-6:.3f} MN")
print(f"Expected: {reference[1].to(device).item() * 1e-6:.3f} MN")
print(f"MAPE: {error_init * 100:.3f}%")

Prediction: 2053.750 MN
Expected: 420.000 MN
MAPE: 388.988%


## b. Fine-tuned model
We will finetune the foundation model using the subset of real data as input


### I. Experiment on the size of fine-tuning set
These experiments will help us define how many real example are needed for *sufficient* fine-tuning.

##### Without PINN

In [45]:
def finetune_no_pinn(train_ds, test_ds, reference, train_size, n_epoch=100, verbose=False):
    artifact_dir = 'runs:/32ba2bcaf225416e82c55ae552dbb628'
    x_scaler = mlflow.pytorch.load_model(f"{artifact_dir}/x_scaler")
    model = mlflow.pytorch.load_model(f"{artifact_dir}/model")
    y_scaler = mlflow.pytorch.load_model(f"{artifact_dir}/y_scaler")

    MAPE = R.MeanAbsolutePercentageError().to(device)

    with mlflow.start_run():
        LR = 1e-4
        N_NEURONS = 40
        N_LAYERS = 3
        N_EPOCHS = n_epoch
        BATCH_SIZE = min(8, train_size)
        ACTIVATION = nn.Tanh

        model_parameters = filter(lambda p: p.requires_grad, model.parameters())
        n_params = sum(np.prod(p.size()) for p in model_parameters)
        mlflow.log_params({
            'n_neurons': N_NEURONS, 'n_layers': N_LAYERS, 'lr': LR,
            'activation': ACTIVATION.__name__, "capacity": n_params, "n_epochs": N_EPOCHS,
            'FEI': False, 'train_size': train_size
        })

        train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
        val_dl = DataLoader(test_ds, batch_size=len(test_ds), shuffle=True, )

        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)

        train_losses = []
        val_losses = []
        train_MAPE = []
        val_MAPE = []
        for epoch in range(N_EPOCHS):
            model.train()
            train_loss_epoch = []
            train_MAPE_epoch = []

            for batch in train_dl:
                x, y, _, _, _ = batch
                x, y = x.to(device), y.to(device)

                x = x_scaler.transform(x)
                y = y_scaler.transform(y)

                optimizer.zero_grad()
                y_pred = model(x)
                loss = criterion(y, y_pred)
                loss.backward()
                optimizer.step()

                y_unscaled = y_scaler.inverse_transform(y).cpu().detach()
                y_pred_unscaled = y_scaler.inverse_transform(y_pred).cpu().detach()

                train_loss_epoch.append(loss.item())
                train_MAPE_epoch.append(MAPE(y_pred_unscaled, y_unscaled).item())

            model.eval()
            val_loss_epoch = []
            val_MAPE_epoch = []
            for batch in val_dl:
                x, y, _, _, _ = batch
                x, y = x.to(device), y.to(device)

                x = x_scaler.transform(x)
                y = y_scaler.transform(y)

                y_pred = model(x)
                loss = criterion(y_pred, y)

                y_unscaled = y_scaler.inverse_transform(y).cpu().detach()
                y_pred_unscaled = y_scaler.inverse_transform(y_pred).cpu().detach()

                val_loss_epoch.append(loss.item())
                val_MAPE_epoch.append(MAPE(y_pred_unscaled, y_unscaled).item())

            mean_train_loss = np.mean(train_loss_epoch)
            mean_train_MAPE = np.mean(train_MAPE_epoch)

            mean_val_loss = np.mean(val_loss_epoch)
            mean_val_MAPE = np.mean(val_MAPE_epoch)

            # Reference structure score
            x, y, _, _, _ = reference
            x = x_scaler.transform(x.to(device))
            y_pred = model(x)
            y_pred = y_scaler.inverse_transform(y_pred)

            reference_MAPE = MAPE(y_pred, y.to(device))

            # Logging
            mlflow.log_metrics({
                "train_loss": mean_train_loss,
                "train_mape": mean_train_MAPE,

                "val_loss": mean_val_loss,
                "val_mape": mean_val_MAPE,

                "reference_MAPE": reference_MAPE,
            }, step=epoch + 1)

            train_losses.append(mean_train_loss)
            val_losses.append(mean_val_loss)
            train_MAPE.append(mean_train_MAPE)
            val_MAPE.append(mean_val_MAPE)

            if (epoch + 1) % 100 == 0 and verbose:
                print(f"[Epoch] {epoch + 1:{len(str(N_EPOCHS))}d}/{N_EPOCHS:d}", end='  ')
                print(f"TRAIN", end='   ')
                print(f"Loss: {mean_train_loss:1.4f}", end='   ')
                print(f"MAPE: {mean_train_MAPE:1.4f}", end='   ')
                print(" ## ", end='')
                print(f"VALIDATION", end='   ')
                print(f"Loss: {mean_val_loss:1.4f}", end='   ')
                print(f"MAPE: {mean_val_MAPE:1.4f}", end='   ')

In [53]:
import logging
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")
mlflow.set_experiment("MLP_Cantilever_finetuning_2")

logging.getLogger("mlflow").setLevel(logging.WARNING)

print("Start")

for i in range(5, len(train_ds), 10):
    idx = np.linspace(0, len(train_ds) - 1, i + 1, dtype=int)
    finetune_no_pinn(Subset(train_ds, idx), test_ds, reference, i + 1, n_epoch=100, verbose=False)

Start


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run chill-rook-883 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/f5074c84573c40cf8152840695ec5d36
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run zealous-snipe-211 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/cb51518c42974ed48a7b54e4133aee69
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run monumental-moose-759 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/29c30e4e72c7406b8dc05f607d3e1643
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run debonair-frog-566 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/7f1bb110c35f46d980c47b3b66ba0cfe
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run nervous-auk-821 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/605999669d9248d99c79c958288e5375
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run gifted-shark-33 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/d0a73867cc4b4a2bb28418fe878c61aa
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run caring-pug-58 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/55e39b9012484551bceec80269d98822
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run enchanting-hawk-441 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/fe63d01a4e2842d49097d212ee7d2787
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run agreeable-dog-711 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/c8876acfeeb046a1903a9cff493a85e5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run whimsical-lamb-28 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/188e6a252138468abf49586c9ef4ce3f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run blushing-midge-704 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/1dca719c37e14d8a92e74972fc0f013e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run invincible-boar-330 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/38b712509e6e43a6b7e386c937e778b9
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run auspicious-seal-605 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/c3070ae7e4e84c5ca1f4df5aa4f7cbee
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run puzzled-shrew-815 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/a2ca84d23554415e8c95fddbf3214fd9
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run placid-bug-742 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/d478f523703143ebb14038fc391930b6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


##### With PINN

In [57]:
elems = torch.tensor([[0, 1],
                      [1, 2],
                      [3, 4],
                      [4, 5],
                      [1, 4],
                      [2, 5],
                      [0, 4],
                      [3, 1],
                      [1, 5],
                      [4, 2]]).to(device)
supports = torch.tensor([0, 1, 6, 7]).to(device)

In [58]:
def finetune_with_pinn(train_ds, val_ds, reference, train_size, n_epoch=100, verbose=False):
    artifact_dir = 'runs:/32ba2bcaf225416e82c55ae552dbb628'
    x_scaler = mlflow.pytorch.load_model(f"{artifact_dir}/x_scaler")
    model = mlflow.pytorch.load_model(f"{artifact_dir}/model")
    y_scaler = mlflow.pytorch.load_model(f"{artifact_dir}/y_scaler")

    MAPE = R.MeanAbsolutePercentageError().to(device)

    with mlflow.start_run():
        LR = 1e-4
        N_NEURONS = 40
        N_LAYERS = 3
        N_EPOCHS = n_epoch
        BATCH_SIZE = min(8, train_size)
        ACTIVATION = nn.Tanh

        model_parameters = filter(lambda p: p.requires_grad, model.parameters())
        n_params = sum(np.prod(p.size()) for p in model_parameters)
        mlflow.log_params({
            'n_neurons': N_NEURONS, 'n_layers': N_LAYERS, 'lr': LR,
            'activation': ACTIVATION.__name__, "capacity": n_params, "n_epochs": N_EPOCHS,
            'FEI': True, 'train_size': train_size
        })

        train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
        val_dl = DataLoader(val_ds, batch_size=len(val_ds), shuffle=True, )

        criterion = nn.MSELoss()
        physics_criterion = StiffnessToLoadLoss()
        physics_loss_scale = None
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)

        for epoch in range(N_EPOCHS):
            model.train()
            train_loss_epoch = []
            train_data_loss_epoch = []
            train_physics_loss_epoch = []
            train_MAPE_epoch = []
            for batch in train_dl:
                data, ea, nodes, u, q = batch

                data, ea = data.to(device), ea.to(device)
                nodes, u, q = nodes.to(device), u.to(device), q.to(device)

                data = x_scaler.transform(data)
                ea = y_scaler.transform(ea)
                ea_unscaled = y_scaler.inverse_transform(ea)

                ea_pred = model(data)
                ea_pred_unscaled = y_scaler.inverse_transform(ea_pred)

                stiffness_pred = construct_k_from_ea(ea_pred_unscaled, nodes, elems, supports, device=device)

                data_loss = criterion(ea, ea_pred)
                physics_loss = physics_criterion(stiffness_pred, u, q)

                if physics_loss_scale is None: physics_loss_scale = physics_loss.item()
                physics_loss /= physics_loss_scale

                optimizer.zero_grad()
                loss = data_loss + physics_loss
                loss.backward()
                optimizer.step()

                train_loss_epoch.append(loss.item())
                train_data_loss_epoch.append(data_loss.item())
                train_physics_loss_epoch.append(physics_loss.item())
                train_MAPE_epoch.append(MAPE(ea_pred_unscaled, ea_unscaled).item())

            model.eval()
            val_loss_epoch = []
            val_data_loss_epoch = []
            val_physics_loss_epoch = []
            val_MAPE_epoch = []
            for batch in val_dl:
                data, ea, nodes, u, q = batch

                data, ea = data.to(device), ea.to(device)
                nodes, u, q = nodes.to(device), u.to(device), q.to(device)

                data = x_scaler.transform(data)
                ea = y_scaler.transform(ea)
                ea_unscaled = y_scaler.inverse_transform(ea)

                ea_pred = model(data)
                ea_pred_unscaled = y_scaler.inverse_transform(ea_pred)

                stiffness_pred = construct_k_from_ea(ea_pred_unscaled, nodes, elems, supports, device=device)

                data_loss = criterion(ea, ea_pred)
                physics_loss = physics_criterion(stiffness_pred, u, q)

                physics_loss /= physics_loss_scale

                optimizer.zero_grad()
                loss = data_loss + physics_loss
                loss.backward()
                optimizer.step()

                val_loss_epoch.append(loss.item())
                val_data_loss_epoch.append(data_loss.item())
                val_physics_loss_epoch.append(physics_loss.item())
                val_MAPE_epoch.append(MAPE(ea_pred_unscaled, ea_unscaled).item())

            mean_train_loss = np.mean(train_loss_epoch)
            mean_train_MAPE = np.mean(train_MAPE_epoch)
            mean_train_data_loss = np.mean(train_data_loss_epoch)
            mean_train_physics_loss = np.mean(train_physics_loss_epoch)

            mean_val_loss = np.mean(val_loss_epoch)
            mean_val_MAPE = np.mean(val_MAPE_epoch)
            mean_val_data_loss = np.mean(val_data_loss_epoch)
            mean_val_physics_loss = np.mean(val_physics_loss_epoch)

            # Reference structure score
            data, ea, _, _, _ = reference
            data = x_scaler.transform(data.to(device))
            ea_pred = model(data)
            ea_pred = y_scaler.inverse_transform(ea_pred)

            reference_MAPE = MAPE(ea_pred, ea.to(device))

            # Logging
            mlflow.log_metrics({
                "train_loss": mean_train_loss,
                "train_mape": mean_train_MAPE,
                "train_data_loss": mean_train_data_loss,
                "train_physics_loss": mean_train_physics_loss,

                "val_loss": mean_val_loss,
                "val_mape": mean_val_MAPE,
                "val_data_loss": mean_val_data_loss,
                "val_physics_loss": mean_val_physics_loss,

                "reference_MAPE": reference_MAPE,
            }, step=epoch + 1)

            if (epoch + 1) % 10 == 0 and verbose:
                print(f"[Epoch] {epoch + 1:{len(str(N_EPOCHS))}d}/{N_EPOCHS:d}", end='  ')
                print(f"TRAIN", end='   ')
                print(f"Loss: {mean_train_loss:1.4f}", end='   ')
                print(f"Data Loss: {mean_train_data_loss:1.4f}", end='   ')
                print(f"Physics Loss: {mean_train_physics_loss:1.4f}", end='   ')
                print(f"MAPE: {mean_train_MAPE:1.4f}", end='   ')
                print(" ## ", end='')
                print(f"VALIDATION", end='   ')
                print(f"Loss: {mean_val_loss:1.4f}", end='   ')
                print(f"Data Loss: {mean_val_data_loss:1.4f}", end='   ')
                print(f"Physics Loss: {mean_val_physics_loss:1.4f}", end='   ')
                print(f"MAPE: {mean_val_MAPE:1.4f}", end='   ')

In [59]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")
mlflow.set_experiment("MLP_Cantilever_finetuning_2")

print("Start")
for i in range(5, len(train_ds), 10):
    idx = np.linspace(0, len(train_ds) - 1, i + 1, dtype=int)
    finetune_with_pinn(Subset(train_ds, idx), test_ds, reference, i + 1, n_epoch=200, verbose=False)

Start


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run silent-bee-498 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/78e16cf076cc4ecbaa4aff8b50acb327
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run skillful-rook-196 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/96bcd775ebbb475ea8d9490fbbfc70b4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run glamorous-auk-939 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/8ff778d74cbf47eaa1e0f57ccc5a47cf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run rumbling-rat-79 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/a198d220fbf249c9bdec22dbba07fb0a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run angry-crow-792 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/d1fd4e282f714f02af622bfe125efab6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run honorable-bee-525 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/55d86c5b47944a6e9c3bfc50b1723025
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run dapper-newt-237 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/b56f8cb4a2d943168ed2053ea9ac02e4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run redolent-croc-847 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/879bf802014f432eb6d2da5f4c507c64
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run luxuriant-panda-802 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/9665ae544fc84553874c0c56ba3b970a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run bustling-perch-756 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/151f0561f9b3427386aa0b5ddef63cb4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run rogue-chimp-496 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/5f89260a344d4dd68c6b2638276e9b2c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run amazing-quail-385 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/1a243d34408b4184b5f8fbecbe495464
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run debonair-hawk-563 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/bc5b452f45614b998d420f9011459ddf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run victorious-elk-296 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/8ed8a2bb840d4627aea7c7eebe0f32ff
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

🏃 View run likeable-whale-398 at: http://127.0.0.1:5000/#/experiments/792915948073873252/runs/264905066621435c83986a7726fcc048
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/792915948073873252


# Final test

In [None]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")
experiment = mlflow.get_experiment_by_name("MLP_Cantilever_finetuning")
runs_df = mlflow.search_runs(experiment_ids=[experiment.experiment_id])

runs_df = runs_df[runs_df['tags.health'].isnull()]
all_run_names = runs_df["tags.mlflow.runName"].dropna().tolist()
