In [None]:
# ============================================================================
# PHASE 3: EXPORT VAL PREDICTIONS (ALIGNMENT-SAFE)
# ============================================================================
import torch
import torchvision
import numpy as np
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm

print("\n" + "="*80)
print("EXPORTING VAL PREDICTIONS FOR FUSION")
print("="*80)

from src.data.label_mapping import CANONICAL_CLASSES, reorder_probs_to_canonical
from src.export.model_exporter import export_predictions

# Index-returning dataset wrapper for alignment safety
class IndexedImageDataset(Dataset):
    def __init__(self, X_paths, indices, transform=None):
        """Dataset that returns (image, real_idx) for alignment verification.
        
        Args:
            X_paths: Full path array for entire df_full
            indices: Subset indices to use (e.g., splits["val_idx"])
            transform: Image transform (should include resize + normalization)
        """
        self.X_paths = X_paths
        self.indices = indices
        self.transform = transform
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, i):
        real_idx = int(self.indices[i])
        img_path = self.X_paths[real_idx]
        
        # Load and preprocess image (read -> float/255, then transform)
        x = torchvision.io.read_image(img_path)
        x = x.float() / 255
        
        if self.transform:
            x = self.transform(x)
        
        return x, real_idx

# Build full path array for entire df_full
X_full = build_image_paths(df_full, np.arange(len(df_full)))
assert len(X_full) == len(df_full), f"Path array length mismatch: {len(X_full)} != {len(df_full)}"

# Create indexed dataset for val split
val_dataset_indexed = IndexedImageDataset(X_full, splits["val_idx"], transform=preprocess)
val_loader_indexed = DataLoader(val_dataset_indexed, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

# Load best model (use map_location for compatibility)
print(f"Loading best model from {save_dir}/best_model.pt")
model.load_state_dict(torch.load(f"{save_dir}/best_model.pt", map_location=device))
model.eval()

# Collect predictions and indices
val_probs_list = []
val_seen_idx_list = []

print("Running inference on val split...")
with torch.no_grad():
    for images, indices in tqdm(val_loader_indexed, desc="Val Inference"):
        images = images.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)
        val_probs_list.append(probs.cpu().numpy())
        val_seen_idx_list.append(indices.cpu().numpy())

# Concatenate results
val_probs = np.concatenate(val_probs_list, axis=0)
val_idx = np.concatenate(val_seen_idx_list)

# Defensive assertions: verify alignment
print("\nAlignment verification:")
print(f"  Collected {len(val_idx)} samples")
print(f"  Expected {len(splits['val_idx'])} samples")

assert len(val_idx) == len(splits["val_idx"]), f"Sample count mismatch: {len(val_idx)} != {len(splits['val_idx'])}"
assert np.array_equal(np.sort(val_idx), np.sort(splits["val_idx"])), "Index set mismatch"
assert np.array_equal(val_idx, splits["val_idx"]), "Index order mismatch (shuffle=False violation)"
print("  ✓ Alignment verified")

# Get ground truth labels from df_full using collected indices
val_labels = df_full.iloc[val_idx]["prdtypecode"].values

# Verify encoder classes match model output shape
assert len(encoder.classes_) == val_probs.shape[1], f"Encoder classes ({len(encoder.classes_)}) != probs shape[1] ({val_probs.shape[1]})"

# Reorder probabilities to canonical class order
print("\nReordering probabilities to canonical class order...")
val_probs_aligned = reorder_probs_to_canonical(val_probs, encoder.classes_, CANONICAL_CLASSES)
print(f"  Input shape: {val_probs.shape} → Output shape: {val_probs_aligned.shape}")

# Export predictions
print("\nExporting predictions...")
export_result = export_predictions(
    out_dir="artifacts/exports",
    model_name="resnet50",
    split_name="val",
    idx=val_idx,
    split_signature=sig,
    probs=val_probs_aligned,
    classes=CANONICAL_CLASSES,
    y_true=val_labels,
    extra_meta={
        "model_architecture": "ResNet50",
        "checkpoint": f"{save_dir}/best_model.pt",
        "image_size": "128x128",
        "batch_size": 64,
    }
)

print("\n" + "="*80)
print("EXPORT SUMMARY")
print("="*80)
print(f"NPZ file:     {export_result['npz_path']}")
print(f"Metadata:     {export_result['meta_json_path']}")
print(f"Classes_fp:   {export_result['classes_fp']}")
print(f"Split_sig:    {export_result['split_signature']}")
print(f"Num samples:  {export_result['num_samples']}")
print("="*80)

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from torchvision.models import resnet50
from sklearn.preprocessing import LabelEncoder
from tqdm.notebook import tqdm


In [None]:
def load_csv_from_gdrive(share_url: str, **read_csv_kwargs) -> pd.DataFrame:
    """Load a CSV file from a shared Google Drive link."""
    file_id = share_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"
    return pd.read_csv(download_url, **read_csv_kwargs)

X_TEST_URL = "https://drive.google.com/file/d/1jHFn8xfMdRUX6W4AQUZq4zIOvkR-biuK/view?usp=sharing"
X_TRAIN_URL = "https://drive.google.com/file/d/1geSiJTTjamysiSbJ8-W9gR1kv-x6HyEd/view?usp=drive_link"
Y_TRAIN_URL = "https://drive.google.com/file/d/16czWmLR5Ff0s5aYIqy1rHT7hc6Gcpfw3/view?usp=sharing"

In [None]:
# Load full dataset
import numpy as np
import hashlib
import json

df_full = load_csv_from_gdrive(X_TRAIN_URL, index_col=0)
y_full = load_csv_from_gdrive(Y_TRAIN_URL, index_col=0)['prdtypecode']

print(f"Total data loaded: {len(df_full):,} samples")
print(f"Unique classes: {y_full.nunique()}")

# Load canonical splits
from src.data.split_manager import load_splits, split_signature

splits = load_splits(verbose=True)
sig = split_signature(splits)

print(f"\nCanonical split sizes:")
print(f"  Train: {len(splits['train_idx']):,}")
print(f"  Val:   {len(splits['val_idx']):,}")
print(f"  Test:  {len(splits['test_idx']):,}")
print(f"\nSplit signature: {sig}")

# Fit LabelEncoder on DEV ONLY (train+val)
print("\n" + "="*80)
print("LABEL ENCODING (FIT ON DEV ONLY)")
print("="*80)

dev_idx = np.concatenate([splits["train_idx"], splits["val_idx"]])

from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(y_full.iloc[dev_idx])

num_classes = len(encoder.classes_)
print(f"LabelEncoder fitted on DEV only (train+val).")
print(f"Number of classes: {num_classes}")
assert num_classes == 27, f"Expected 27 classes, got {num_classes}"

# Fingerprint of class order for alignment
classes_json = json.dumps(encoder.classes_.tolist())
classes_fp = hashlib.sha256(classes_json.encode("utf-8")).hexdigest()[:16]
print(f"Classes fingerprint: {classes_fp}")
print("="*80)

# Build image paths for each split
IMG_ROOT_PATH = "/content/images"
train_indices = splits["train_idx"]
val_indices = splits["val_idx"]
test_indices = splits["test_idx"]

# Create image path arrays
def build_image_paths(df, indices):
    selected_df = df.iloc[indices]
    paths = (
        IMG_ROOT_PATH + "/images/image_train/image_" + selected_df['imageid'].astype('str')
        + "_product_" + selected_df['productid'].astype('str') + ".jpg"
    )
    return np.array(paths)

X_train = build_image_paths(df_full, train_indices)
X_val = build_image_paths(df_full, val_indices)
X_test = build_image_paths(df_full, test_indices)

# Encode labels per split
y_train = encoder.transform(y_full.iloc[train_indices])
y_val = encoder.transform(y_full.iloc[val_indices])
y_test = encoder.transform(y_full.iloc[test_indices])

# Size check
total_samples = len(X_train) + len(X_val) + len(X_test)
pct_train = 100 * len(X_train) / total_samples
pct_val = 100 * len(X_val) / total_samples
pct_test = 100 * len(X_test) / total_samples

print(f"\nData splits created:")
print(f"  Training:   {len(X_train):6,} samples ({pct_train:.1f}%)")
print(f"  Validation: {len(X_val):6,} samples ({pct_val:.1f}%)")
print(f"  Test:       {len(X_test):6,} samples ({pct_test:.1f}%)")
print(f"  Total:      {total_samples:6,}")
assert total_samples == 84916, f"Expected 84916, got {total_samples}"

print(f"\nSplit signature: {sig}")
print(f"Classes fingerprint: {classes_fp}")
print("\nIMPORTANT: Using canonical splits for consistent evaluation")


In [None]:
!pip install -q gdown

# Google Drive file id for images.zip
FILE_ID = "15ZkS0iTQ7j3mHpxil4mABlXwP-jAN_zi"

# Download images.zip into /content/tmp
!mkdir -p /content/tmp /content/images
!gdown --id $FILE_ID -O /content/tmp/images.zip

# Unzip into /content/images
!unzip -q -o /content/tmp/images.zip -d /content/images

IMG_ROOT = "/content/images"

Downloading...
From (original): https://drive.google.com/uc?id=15ZkS0iTQ7j3mHpxil4mABlXwP-jAN_zi
From (redirected): https://drive.google.com/uc?id=15ZkS0iTQ7j3mHpxil4mABlXwP-jAN_zi&confirm=t&uuid=c5758e2a-196e-4e0f-a623-1a0e0ff52e54
To: /content/tmp/images.zip
100% 2.56G/2.56G [00:46<00:00, 55.2MB/s]


In [None]:
from google.colab import drive
import os

drive.mount('/content/drive/')

file_path = '/content/drive/My Drive/DS_rakuten/train_tags.csv'
tags = pd.read_csv(file_path)

tensorboard_folder = "/content/drive/MyDrive/DS_rakuten/tensorboard_backup/"

save_dir = "/content/drive/MyDrive/DS_rakuten/checkpoints"
os.makedirs(save_dir, exist_ok=True)

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


# importation du modèle et définition des Datasets utilisés

In [None]:
from torchvision.models import ResNet50_Weights, resnet50
from torchsummary import summary

import torch
import torch.nn as nn

weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights)

# on modifie le classifier;
# dropout pour limiter l'overfitting
# standardisation pour limiter la sensibilité au LR et l'overfitting
model.fc = nn.Sequential(
    nn.BatchNorm1d(2048),
    nn.Dropout(0.3),
    nn.Linear(2048, 512),
    nn.ReLU(),
    nn.BatchNorm1d(512),
    nn.Dropout(0.3),
    nn.Linear(512, num_classes)
)

device = torch.device("cuda")
model = model.to(device)

In [None]:
from torch.utils.data import DataLoader, Dataset
import torch
import torchvision
from PIL import Image
from torchvision import transforms


class ImageDataset(Dataset):

    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform

    def __getitem__(self, idx):
        x = torchvision.io.read_image(self.X[idx])
        x = x.float() / 255
        # par default interpolation bilinear
        x = torchvision.transforms.functional.resize(x, (128, 128))
        y = self.y[idx]
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return len(self.X)


preprocess = weights.transforms()

train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    # petit zoom aléatoire
    transforms.RandomResizedCrop(128, scale=(0.9, 1)),
    preprocess,
])

train_set = ImageDataset(X_train, y_train, transform=train_transforms)
train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=0)
val_set = ImageDataset(X_val, y_val, transform=preprocess)
val_loader = DataLoader(val_set, batch_size=64, shuffle=False, num_workers=0)

# Entraînement de CLF uniquement

In [None]:


# on freeze les paramètres du modèle
for param in model.parameters():
    param.requires_grad = False


summary(model, input_size=(3,128,128), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           9,408
       BatchNorm2d-2           [-1, 64, 64, 64]             128
              ReLU-3           [-1, 64, 64, 64]               0
         MaxPool2d-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]           4,096
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
             ReLU-10           [-1, 64, 32, 32]               0
           Conv2d-11          [-1, 256, 32, 32]          16,384
      BatchNorm2d-12          [-1, 256, 32, 32]             512
           Conv2d-13          [-1, 256, 32, 32]          16,384
      BatchNorm2d-14          [-1, 256,

In [None]:
for name, module in model.named_modules():
    print('-'*50)
    print(name, ":", module)

--------------------------------------------------
 : ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
      

# Entraînement du CLF + bloc 4

In [None]:
# on récupère le meilleur modèle de la partie classification
model.load_state_dict(torch.load(f"{save_dir}/best_model_clf.pt", weights_only=True))
model.eval();

In [None]:
# on défreeze le layer 4
for param in model.layer4.parameters():
    param.requires_grad = True

In [None]:
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(log_dir=tensorboard_folder+"fine_tuning_l4/")



# Groupes de paramètres avec LRs différents
param_groups = [
    {"params": model.layer4.parameters(), "lr": 1e-4},
    {"params": model.fc.parameters(),      "lr": 1e-3}
]

optimizer = optim.Adam(param_groups)

# utilisation de l'optimisateur Adam avec le scheduler ReduceLROnPlateau
# optimizer = optim.Adam(model.parameters(), lr=0.00001)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                mode='min',
                                                factor=0.2,
                                                patience=1)

early_stopper = EarlyStopper(patience=3, min_delta=1e-4)


epochs = 25
best_val_loss = float('inf')

for epoch in range(epochs):

    model.train()
    loss_total = 0
    progress_bar = tqdm(
        train_loader, desc="Epoch {:1d}".format(epoch+1), leave=True, disable=False
    )

    for i, (X_batch, y_batch) in enumerate(progress_bar):

        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()

        y_pred = model(X_batch)

        loss = criterion(y_pred, y_batch)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        loss_total += loss.item()

        progress_bar.set_postfix(
            {
                "training_loss": "{:.3f}".format(loss_total/(i+1))}
        )



    # Évaluer sur l'ensemble de validation
    # mode évaluation adapte certaines couches
    # comme les normalisations ou dropout au mode d'évaluation
    model.eval()
    val_loss, val_f1 = evaluate()
    train_loss = loss_total/len(train_loader)
    # Affichage de la perte de validation à la fin de l'époque
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss:.3f}, Validation Loss: {val_loss:.3f}, Validation f1 weighted: {val_f1:.3f}")
    # on sauvegarde le meilleur modèle
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), f"{save_dir}/best_model_ft_l4.pt")
    # on note dans le tensorBoard
    writer.add_scalar("Loss/train", train_loss, epoch)
    writer.add_scalar("Loss/validation", val_loss, epoch)
    writer.add_scalar("F1/validation", val_f1, epoch)
    # test si on stop tout
    if early_stopper.early_stop(val_loss):
        break
    # Mise à jour du scheduler
    scheduler.step(val_loss)

# Entraînement du CLF + bloc 4 + bloc 3

In [None]:
# on récupère le meilleur modèle sur loss val de la partie précédente
model.load_state_dict(torch.load(f"{save_dir}/best_model_loss_ft_l4_l3.pt", weights_only=True))
model.eval();

In [None]:
X_batch, y_batch = next(iter(train_loader))

# Définir la fonction de perte
criterion = nn.CrossEntropyLoss()


y_pred = model(X_batch.to(device))

criterion(y_pred, y_batch.to(device))

tensor(0.3751, device='cuda:0', grad_fn=<NllLossBackward0>)

In [None]:
from sklearn.metrics import f1_score

class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

def evaluate():
    val_loss = 0.0
    y_preds = []
    y_true = []
    with torch.no_grad():
        for X_val, y_val in val_loader:
            X_val, y_val = X_val.to(device), y_val.to(device)
            outputs = model(X_val)
            loss = criterion(outputs, y_val)
            val_loss += loss.item()
            # Conversion logits → classes pour le f1 score
            preds = torch.argmax(outputs, dim=1)
            # Stock CPU pour sklearn
            y_preds.extend(preds.cpu().numpy())
            y_true.extend(y_val.cpu().numpy())

    # loss d'évaluation moyen
    val_loss /= len(val_loader)
    f1 = f1_score(y_true, y_preds, average='weighted')

    return val_loss, f1

model.eval();
val_loss, val_f1 = evaluate()

In [None]:
for i, pg in enumerate(optimizer.param_groups):
    print(f"LR groupe {i}: {pg['lr']}")


LR groupe 0: 2e-05
LR groupe 1: 6e-05
LR groupe 2: 0.0002


In [None]:
val_f1

0.6641959702870337

In [None]:
val_f1

0.6607377066190683

In [None]:
val_f1

0.6607377066190683

In [None]:
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(log_dir=tensorboard_folder+"fine_tuning_l4_l3/")



# Groupes de paramètres avec LRs différents
param_groups = [
    {"params": model.layer3.parameters(), "lr": 2e-5},
    {"params": model.layer4.parameters(), "lr": 6e-5},
    {"params": model.fc.parameters(),     "lr": 2e-4},
]

optimizer = optim.Adam(param_groups)


scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                mode='min',
                                                factor=0.2,
                                                patience=1)

early_stopper = EarlyStopper(patience=3, min_delta=1e-4)


epochs = 25
best_val_loss = float('inf')
best_f1 = float('inf')

for epoch in range(epochs):

    model.train()
    loss_total = 0
    progress_bar = tqdm(
        train_loader, desc="Epoch {:1d}".format(epoch+1), leave=True, disable=False
    )

    for i, (X_batch, y_batch) in enumerate(progress_bar):

        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()

        y_pred = model(X_batch)

        loss = criterion(y_pred, y_batch)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        loss_total += loss.item()

        progress_bar.set_postfix(
            {
                "training_loss": "{:.3f}".format(loss_total/(i+1))}
        )



    # Évaluer sur l'ensemble de validation
    # mode évaluation adapte certaines couches
    # comme les normalisations ou dropout au mode d'évaluation
    model.eval()
    val_loss, val_f1 = evaluate()
    train_loss = loss_total/len(train_loader)
    # Affichage de la perte de validation à la fin de l'époque
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss:.3f}, Validation Loss: {val_loss:.3f}, Validation f1 weighted: {val_f1:.3f}")
    # on sauvegarde le meilleur modèle
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), f"{save_dir}/best_model_loss_ft_l4_l3.pt")
    if val_f1 < best_f1:
        best_f1 = val_f1
        torch.save(model.state_dict(), f"{save_dir}/best_model_f1_ft_l4_l3.pt")
    # on note dans le tensorBoard
    writer.add_scalar("Loss/train", train_loss, epoch)
    writer.add_scalar("Loss/validation", val_loss, epoch)
    writer.add_scalar("F1/validation", val_f1, epoch)
    # test si on stop tout
    if early_stopper.early_stop(val_loss):
        break
    # Mise à jour du scheduler
    scheduler.step(val_loss)

Epoch 1:   0%|          | 0/842 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(log_dir=tensorboard_folder+"fine_tuning_l4/")



# Groupes de paramètres avec LRs différents
param_groups = [
    {"params": model.layer4.parameters(), "lr": 1e-4},
    {"params": model.fc.parameters(),      "lr": 1e-3}
]

optimizer = optim.Adam(param_groups)

# utilisation de l'optimisateur Adam avec le scheduler ReduceLROnPlateau
# optimizer = optim.Adam(model.parameters(), lr=0.00001)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                mode='min',
                                                factor=0.2,
                                                patience=1)

early_stopper = EarlyStopper(patience=3, min_delta=1e-4)


epochs = 25
best_val_loss = float('inf')

for epoch in range(epochs):

    model.train()
    loss_total = 0
    progress_bar = tqdm(
        train_loader, desc="Epoch {:1d}".format(epoch+1), leave=True, disable=False
    )

    for i, (X_batch, y_batch) in enumerate(progress_bar):

        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()

        y_pred = model(X_batch)

        loss = criterion(y_pred, y_batch)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        loss_total += loss.item()

        progress_bar.set_postfix(
            {
                "training_loss": "{:.3f}".format(loss_total/(i+1))}
        )



    # Évaluer sur l'ensemble de validation
    # mode évaluation adapte certaines couches
    # comme les normalisations ou dropout au mode d'évaluation
    model.eval()
    val_loss, val_f1 = evaluate()
    train_loss = loss_total/len(train_loader)
    # Affichage de la perte de validation à la fin de l'époque
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss:.3f}, Validation Loss: {val_loss:.3f}, Validation f1 weighted: {val_f1:.3f}")
    # on sauvegarde le meilleur modèle
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), f"{save_dir}/best_model_ft_l4.pt")
    # on note dans le tensorBoard
    writer.add_scalar("Loss/train", train_loss, epoch)
    writer.add_scalar("Loss/validation", val_loss, epoch)
    writer.add_scalar("F1/validation", val_f1, epoch)
    # test si on stop tout
    if early_stopper.early_stop(val_loss):
        break
    # Mise à jour du scheduler
    scheduler.step(val_loss)

Epoch 1:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 1/25, Training Loss: 0.983, Validation Loss: 1.222, Validation f1 weighted: 0.634


Epoch 2:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 2/25, Training Loss: 0.910, Validation Loss: 1.232, Validation f1 weighted: 0.634


Epoch 3:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 3/25, Training Loss: 0.844, Validation Loss: 1.248, Validation f1 weighted: 0.639


Epoch 4:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 4/25, Training Loss: 0.717, Validation Loss: 1.241, Validation f1 weighted: 0.650


In [None]:
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(log_dir=tensorboard_folder+"fine_tuning_xx/")

class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False



# utilisation de l'optimisateur Adam avec le scheduler ReduceLROnPlateau
optimizer = optim.Adam(model.parameters(), lr=0.00001)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                mode='min',
                                                factor=0.1,
                                                patience=2)

early_stopper = EarlyStopper(patience=4, min_delta=1e-4)


epochs = 25
best_val_loss = float('inf')

for epoch in range(epochs):

    model.train()
    loss_total = 0
    progress_bar = tqdm(
        train_loader, desc="Epoch {:1d}".format(epoch+1), leave=True, disable=False
    )

    for i, (X_batch, y_batch) in enumerate(progress_bar):

        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()

        y_pred = model(X_batch)

        loss = criterion(y_pred, y_batch)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        loss_total += loss.item()

        progress_bar.set_postfix(
            {
                "training_loss": "{:.3f}".format(loss_total/(i+1))}
        )



    # Évaluer sur l'ensemble de validation
    # mode évaluation adapte certaines couches
    # comme les normalisations ou dropout au mode d'évaluation
    model.eval()
    val_loss, val_f1 = evaluate()
    train_loss = loss_total/len(train_loader)
    # Affichage de la perte de validation à la fin de l'époque
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss:.3f}, Validation Loss: {val_loss:.3f}, Validation f1 weighted: {val_f1:.3f}")
    # on sauvegarde le meilleur modèle
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), f"{save_dir}/best_model.pt")
    # on note dans le tensorBoard
    writer.add_scalar("Loss/train", train_loss, epoch)
    writer.add_scalar("Loss/validation", val_loss, epoch)
    writer.add_scalar("F1/validation", val_f1, epoch)
    # test si on stop tout
    if early_stopper.early_stop(val_loss):
        break
    # Mise à jour du scheduler
    scheduler.step(val_loss)

Epoch 1:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 1/25, Training Loss: 1.300, Validation Loss: 1.354, Validation f1 weighted: 0.587


Epoch 2:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 2/25, Training Loss: 1.299, Validation Loss: 1.355, Validation f1 weighted: 0.588


Epoch 3:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 3/25, Training Loss: 1.297, Validation Loss: 1.359, Validation f1 weighted: 0.586


Epoch 4:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 4/25, Training Loss: 1.293, Validation Loss: 1.354, Validation f1 weighted: 0.588


Epoch 5:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 5/25, Training Loss: 1.297, Validation Loss: 1.354, Validation f1 weighted: 0.589


Epoch 6:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 6/25, Training Loss: 1.298, Validation Loss: 1.358, Validation f1 weighted: 0.586


Epoch 7:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 7/25, Training Loss: 1.301, Validation Loss: 1.352, Validation f1 weighted: 0.589


Epoch 8:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 8/25, Training Loss: 1.299, Validation Loss: 1.351, Validation f1 weighted: 0.589


Epoch 9:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 9/25, Training Loss: 1.295, Validation Loss: 1.359, Validation f1 weighted: 0.588


Epoch 10:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 10/25, Training Loss: 1.292, Validation Loss: 1.355, Validation f1 weighted: 0.590


Epoch 11:   0%|          | 0/842 [00:00<?, ?it/s]

Epoch 11/25, Training Loss: 1.296, Validation Loss: 1.355, Validation f1 weighted: 0.587


Epoch 12:   0%|          | 0/842 [00:00<?, ?it/s]

KeyboardInterrupt: 