# - Swin Transformer

## 1. Définition du Modèle

In [1]:
import torch
import torch.nn as nn
import timm

class RakutenSwin(nn.Module):
    """Swin Transformer pour classification Rakuten avec Stochastic Depth."""
    
    def __init__(self, model_name='swin_base_patch4_window7_224', num_classes=27, 
                 pretrained=True, drop_path_rate=0.3):
        super(RakutenSwin, self).__init__()
        
        self.backbone = timm.create_model(
            model_name, pretrained=pretrained, num_classes=0, 
            global_pool='avg', drop_path_rate=drop_path_rate
        )
        
        feature_dim = self.backbone.num_features
        self.head = nn.Sequential(
            nn.LayerNorm(feature_dim),
            nn.Dropout(p=0.5),
            nn.Linear(feature_dim, 512),
            nn.GELU(),
            nn.Dropout(p=0.3),
            nn.Linear(512, num_classes)
        )
        
        self.num_classes = num_classes
        self.model_name = model_name
    
    def forward(self, x):
        return self.head(self.backbone(x))

print("Modèle RakutenSwin défini")

Modèle RakutenSwin défini


  from .autonotebook import tqdm as notebook_tqdm


## 2. Configuration

In [2]:
import sys
from pathlib import Path
import pandas as pd
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import json
from datetime import datetime

# Chemins
project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))
sys.path.insert(0, str(project_root / "scripts"))

from src.rakuten_image.datasets import RakutenImageDataset
from load_data import get_split_data_unified

# Timestamp pour versioning des fichiers
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Configuration
CONFIG = {
    "data_dir": project_root / "data" / "raw",
    "img_dir": project_root / "data" / "raw" / "images" / "image_train",
    "checkpoint_dir": project_root / "checkpoints" / "swin_final",
    "output_dir": project_root / "models" / "swin",
    "timestamp": timestamp,
    "model_name": "swin_base_patch4_window7_224",
    "img_size": 224,
    "batch_size": 32,
    "num_epochs": 30,
    "learning_rate": 5e-5,
    "weight_decay": 0.05,
    "random_state": 42,
    "early_stopping_patience": 5,
    "drop_path_rate": 0.3,
    "mixup_alpha": 0.8,
    "cutmix_alpha": 1.0,
    "label_smoothing": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "num_workers": 4,
    "use_amp": True
}

device = torch.device(CONFIG["device"])
print(f"Device: {device}")
print(f"Repertoire images: {CONFIG['img_dir']}")
print(f"Timestamp: {timestamp}")

Device: cuda
Repertoire images: c:\Users\xiaos\Documents\GitHub\DS_rakuten\data\raw\images\image_train
Timestamp: 20260105_114525


## 3. Chargement des Données

In [3]:
# Chargement avec les splits unifiés du projet
X_train, X_val, X_test, y_train, y_val, y_test = get_split_data_unified()

df_train = X_train.copy()
df_train['prdtypecode'] = y_train

df_val = X_val.copy()
df_val['prdtypecode'] = y_val

df_test = X_test.copy()
df_test['prdtypecode'] = y_test

print(f"Train: {len(df_train):,} | Val: {len(df_val):,} | Test: {len(df_test):,}")

# Encodage des labels (sur train uniquement)
label_encoder = LabelEncoder()
label_encoder.fit(df_train['prdtypecode'])

df_train['encoded_label'] = label_encoder.transform(df_train['prdtypecode'])
df_val['encoded_label'] = label_encoder.transform(df_val['prdtypecode'])
df_test['encoded_label'] = label_encoder.transform(df_test['prdtypecode'])

num_classes = len(label_encoder.classes_)

Train: 61,351 | Val: 10,827 | Test: 12,738


## 4. Préparation des DataLoaders

In [4]:
# Transformations
train_transform = transforms.Compose([
    transforms.Resize((CONFIG["img_size"], CONFIG["img_size"])),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandAugment(num_ops=2, magnitude=9),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((CONFIG["img_size"], CONFIG["img_size"])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Datasets
train_dataset = RakutenImageDataset(df_train, CONFIG["img_dir"], train_transform, "encoded_label")
val_dataset = RakutenImageDataset(df_val, CONFIG["img_dir"], val_transform, "encoded_label")
test_dataset = RakutenImageDataset(df_test, CONFIG["img_dir"], val_transform, "encoded_label")

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"], shuffle=True,
                          num_workers=CONFIG["num_workers"], pin_memory=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG["batch_size"], shuffle=False,
                        num_workers=CONFIG["num_workers"], pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"], shuffle=False,
                         num_workers=CONFIG["num_workers"], pin_memory=True)

print(f"Batches - Train: {len(train_loader)} | Val: {len(val_loader)} | Test: {len(test_loader)}")

Pre-building image paths...
✓ Cached 61351 image paths
✓ Labels already zero-indexed: 27 classes
Verifying image files...
✓ All 61351 images verified successfully
Pre-building image paths...
✓ Cached 61351 image paths
✓ RakutenImageDataset initialized: 61351 samples, 27 classes
Pre-building image paths...
✓ Cached 10827 image paths
✓ Labels already zero-indexed: 27 classes
Verifying image files...
✓ All 10827 images verified successfully
Pre-building image paths...
✓ Cached 10827 image paths
✓ RakutenImageDataset initialized: 10827 samples, 27 classes
Pre-building image paths...
✓ Cached 12738 image paths
✓ Labels already zero-indexed: 27 classes
Verifying image files...
✓ All 12738 images verified successfully
Pre-building image paths...
✓ Cached 12738 image paths
✓ RakutenImageDataset initialized: 12738 samples, 27 classes
Batches - Train: 1917 | Val: 339 | Test: 399


## 5. Initialisation du Modèle et Optimiseur

In [5]:
from timm.data.mixup import Mixup
from timm.loss import SoftTargetCrossEntropy

# Modèle
model = RakutenSwin(
    model_name=CONFIG["model_name"],
    num_classes=num_classes,
    pretrained=True,
    drop_path_rate=CONFIG["drop_path_rate"]
).to(device)

# Mixup/CutMix
mixup_fn = Mixup(
    mixup_alpha=CONFIG["mixup_alpha"], cutmix_alpha=CONFIG["cutmix_alpha"],
    prob=1.0, switch_prob=0.5, mode='batch',
    label_smoothing=CONFIG["label_smoothing"], num_classes=num_classes
)

criterion_train = SoftTargetCrossEntropy()
criterion_val = nn.CrossEntropyLoss()

# Optimiseur et Scheduler
optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG["learning_rate"],
                              weight_decay=CONFIG["weight_decay"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG["num_epochs"],
                                                        eta_min=1e-6)
scaler = torch.amp.GradScaler('cuda') if CONFIG["use_amp"] else None

CONFIG["checkpoint_dir"].mkdir(parents=True, exist_ok=True)
print("Modèle et optimiseur initialisés")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Modèle et optimiseur initialisés


## 6. Entraînement

In [6]:
best_val_acc = 0.0
patience_counter = 0
history = {"train_loss": [], "val_loss": [], "val_acc": [], "val_f1": []}

# Nom du fichier avec timestamp
model_filename = f"swin_best_{CONFIG['timestamp']}.pth"

for epoch in range(CONFIG["num_epochs"]):
    print(f"\nEpoch {epoch + 1}/{CONFIG['num_epochs']}")
    
    # Entraînement
    model.train()
    train_loss = 0.0
    
    for images, labels in tqdm(train_loader, desc="Entrainement"):
        images, labels = images.to(device), labels.to(device)
        images, labels = mixup_fn(images, labels)
        
        optimizer.zero_grad()
        
        if CONFIG["use_amp"]:
            with torch.amp.autocast(device_type="cuda"):
                outputs = model(images)
                loss = criterion_train(outputs, labels)
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(images)
            loss = criterion_train(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
            optimizer.step()
        
        train_loss += loss.item()
    
    avg_train_loss = train_loss / len(train_loader)
    
    # Validation
    model.eval()
    val_loss = 0.0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validation"):
            images, labels = images.to(device), labels.to(device)
            
            if CONFIG["use_amp"]:
                with torch.amp.autocast(device_type="cuda"):
                    outputs = model(images)
                    loss = criterion_val(outputs, labels)
            else:
                outputs = model(images)
                loss = criterion_val(outputs, labels)
            
            val_loss += loss.item()
            all_preds.extend(torch.argmax(outputs, dim=-1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = 100.0 * accuracy_score(all_labels, all_preds)
    val_f1 = f1_score(all_labels, all_preds, average='weighted')
    
    # Sauvegarde historique
    history["train_loss"].append(avg_train_loss)
    history["val_loss"].append(avg_val_loss)
    history["val_acc"].append(val_accuracy)
    history["val_f1"].append(val_f1)
    
    print(f"Loss - Train: {avg_train_loss:.4f} | Val: {avg_val_loss:.4f}")
    print(f"Val Acc: {val_accuracy:.2f}% | F1: {val_f1:.4f}")
    
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        patience_counter = 0
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'val_acc': val_accuracy,
            'val_f1': val_f1,
            'timestamp': CONFIG['timestamp']
        }, CONFIG["checkpoint_dir"] / model_filename)
        print(f"Meilleur modele sauvegarde: {model_filename}")
    else:
        patience_counter += 1
        if patience_counter >= CONFIG["early_stopping_patience"]:
            print(f"Arret precoce apres {epoch + 1} epochs")
            break
    
    scheduler.step()

print(f"\nEntrainement termine. Meilleure Val Acc: {best_val_acc:.2f}%")


Epoch 1/30


Entrainement: 100%|██████████| 1917/1917 [10:18<00:00,  3.10it/s]
Validation: 100%|██████████| 339/339 [00:42<00:00,  8.04it/s]


Loss - Train: 2.3455 | Val: 1.2224
Val Acc: 64.42% | F1: 0.6305
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 2/30


Entrainement: 100%|██████████| 1917/1917 [09:48<00:00,  3.26it/s]
Validation: 100%|██████████| 339/339 [00:40<00:00,  8.37it/s]


Loss - Train: 2.1104 | Val: 1.1275
Val Acc: 67.40% | F1: 0.6678
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 3/30


Entrainement: 100%|██████████| 1917/1917 [09:27<00:00,  3.38it/s]
Validation: 100%|██████████| 339/339 [00:43<00:00,  7.83it/s]


Loss - Train: 2.0277 | Val: 1.0703
Val Acc: 70.14% | F1: 0.6950
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 4/30


Entrainement: 100%|██████████| 1917/1917 [10:16<00:00,  3.11it/s]
Validation: 100%|██████████| 339/339 [00:40<00:00,  8.30it/s]


Loss - Train: 1.9509 | Val: 1.0406
Val Acc: 70.49% | F1: 0.6991
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 5/30


Entrainement: 100%|██████████| 1917/1917 [09:33<00:00,  3.34it/s]
Validation: 100%|██████████| 339/339 [00:44<00:00,  7.54it/s]


Loss - Train: 1.9012 | Val: 1.0008
Val Acc: 71.85% | F1: 0.7115
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 6/30


Entrainement: 100%|██████████| 1917/1917 [10:49<00:00,  2.95it/s]
Validation: 100%|██████████| 339/339 [00:46<00:00,  7.32it/s]


Loss - Train: 1.8540 | Val: 0.9959
Val Acc: 72.51% | F1: 0.7205
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 7/30


Entrainement: 100%|██████████| 1917/1917 [10:32<00:00,  3.03it/s]
Validation: 100%|██████████| 339/339 [00:46<00:00,  7.31it/s]


Loss - Train: 1.8238 | Val: 0.9680
Val Acc: 73.22% | F1: 0.7263
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 8/30


Entrainement: 100%|██████████| 1917/1917 [10:16<00:00,  3.11it/s]
Validation: 100%|██████████| 339/339 [00:41<00:00,  8.24it/s]


Loss - Train: 1.7934 | Val: 0.9628
Val Acc: 73.48% | F1: 0.7290
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 9/30


Entrainement: 100%|██████████| 1917/1917 [09:33<00:00,  3.34it/s]
Validation: 100%|██████████| 339/339 [00:41<00:00,  8.23it/s]


Loss - Train: 1.7641 | Val: 0.9630
Val Acc: 73.70% | F1: 0.7351
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 10/30


Entrainement: 100%|██████████| 1917/1917 [09:32<00:00,  3.35it/s]
Validation: 100%|██████████| 339/339 [00:40<00:00,  8.27it/s]


Loss - Train: 1.7246 | Val: 0.9364
Val Acc: 74.73% | F1: 0.7429
Meilleur modele sauvegarde: swin_best_20260105_114525.pth

Epoch 11/30


Entrainement: 100%|██████████| 1917/1917 [10:25<00:00,  3.07it/s]
Validation: 100%|██████████| 339/339 [00:45<00:00,  7.49it/s]


Loss - Train: 1.6997 | Val: 0.9465
Val Acc: 74.09% | F1: 0.7374

Epoch 12/30


Entrainement: 100%|██████████| 1917/1917 [09:46<00:00,  3.27it/s]
Validation: 100%|██████████| 339/339 [00:41<00:00,  8.19it/s]


Loss - Train: 1.6684 | Val: 0.9471
Val Acc: 74.56% | F1: 0.7416

Epoch 13/30


Entrainement: 100%|██████████| 1917/1917 [10:55<00:00,  2.92it/s]
Validation: 100%|██████████| 339/339 [00:51<00:00,  6.64it/s]


Loss - Train: 1.6399 | Val: 0.9555
Val Acc: 74.52% | F1: 0.7410

Epoch 14/30


Entrainement: 100%|██████████| 1917/1917 [12:17<00:00,  2.60it/s]
Validation: 100%|██████████| 339/339 [00:51<00:00,  6.55it/s]


Loss - Train: 1.6170 | Val: 0.9569
Val Acc: 74.55% | F1: 0.7414

Epoch 15/30


Entrainement: 100%|██████████| 1917/1917 [12:53<00:00,  2.48it/s]
Validation: 100%|██████████| 339/339 [00:57<00:00,  5.92it/s]

Loss - Train: 1.6015 | Val: 0.9795
Val Acc: 74.23% | F1: 0.7394
Arret precoce apres 15 epochs

Entrainement termine. Meilleure Val Acc: 74.73%





## 7. Évaluation Finale

In [7]:
import numpy as np

# Chargement du meilleur modele
model_filename = f"swin_best_{CONFIG['timestamp']}.pth"
checkpoint = torch.load(CONFIG["checkpoint_dir"] / model_filename, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"Modele charge: {model_filename}")
print(f"Epoch: {checkpoint['epoch']} | Val Acc: {checkpoint['val_acc']:.2f}% | Val F1: {checkpoint['val_f1']:.4f}")

# Preparation du repertoire de sortie
CONFIG["output_dir"].mkdir(parents=True, exist_ok=True)

# =========================================================================
# Evaluation sur VALIDATION set et export des predictions
# =========================================================================
print("\n" + "="*70)
print("EVALUATION SUR VALIDATION SET")
print("="*70)

val_probs_list = []
val_labels_list = []

with torch.no_grad():
    for images, labels in tqdm(val_loader, desc="Validation"):
        images = images.to(device)
        
        if CONFIG["use_amp"]:
            with torch.amp.autocast(device_type="cuda"):
                outputs = model(images)
        else:
            outputs = model(images)
        
        probs = torch.softmax(outputs, dim=-1).cpu().numpy()
        val_probs_list.append(probs)
        val_labels_list.append(labels.numpy())

val_probs = np.vstack(val_probs_list)
val_labels = np.concatenate(val_labels_list)
val_preds = val_probs.argmax(axis=1)

val_acc = 100.0 * accuracy_score(val_labels, val_preds)
val_f1 = f1_score(val_labels, val_preds, average='weighted')

print(f"Val Acc: {val_acc:.2f}% | Val F1: {val_f1:.4f}")

# Export validation predictions (.npy)
np.save(CONFIG["output_dir"] / f"img_swin_probs_val_{CONFIG['timestamp']}.npy", val_probs)
np.save(CONFIG["output_dir"] / f"img_swin_labels_val_{CONFIG['timestamp']}.npy", val_labels)
np.save(CONFIG["output_dir"] / f"img_swin_preds_val_{CONFIG['timestamp']}.npy", val_preds)
print(f"Predictions validation exportees (.npy)")

# =========================================================================
# Evaluation sur TEST set et export des predictions
# =========================================================================
print("\n" + "="*70)
print("EVALUATION SUR TEST SET")
print("="*70)

test_probs_list = []
test_labels_list = []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Test"):
        images = images.to(device)
        
        if CONFIG["use_amp"]:
            with torch.amp.autocast(device_type="cuda"):
                outputs = model(images)
        else:
            outputs = model(images)
        
        probs = torch.softmax(outputs, dim=-1).cpu().numpy()
        test_probs_list.append(probs)
        test_labels_list.append(labels.numpy())

test_probs = np.vstack(test_probs_list)
test_labels = np.concatenate(test_labels_list)
test_preds = test_probs.argmax(axis=1)

test_acc = 100.0 * accuracy_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds, average='weighted')

print(f"Test Acc: {test_acc:.2f}% | Test F1: {test_f1:.4f}")
print("\nRapport de classification (Test):")
print(classification_report(test_labels, test_preds, digits=4))

# Export test predictions (.npy)
np.save(CONFIG["output_dir"] / f"img_swin_probs_test_{CONFIG['timestamp']}.npy", test_probs)
np.save(CONFIG["output_dir"] / f"img_swin_labels_test_{CONFIG['timestamp']}.npy", test_labels)
np.save(CONFIG["output_dir"] / f"img_swin_preds_test_{CONFIG['timestamp']}.npy", test_preds)
print(f"Predictions test exportees (.npy)")

# =========================================================================
# Sauvegarde des resultats (JSON)
# =========================================================================
results = {
    "timestamp": CONFIG["timestamp"],
    "model_name": CONFIG["model_name"],
    "best_epoch": int(checkpoint['epoch']),
    "val_acc": float(val_acc),
    "val_f1": float(val_f1),
    "test_acc": float(test_acc),
    "test_f1": float(test_f1),
    "num_classes": int(num_classes),
    "train_samples": len(df_train),
    "val_samples": len(df_val),
    "test_samples": len(df_test)
}

results_filename = f"swin_results_{CONFIG['timestamp']}.json"
with open(CONFIG["output_dir"] / results_filename, "w") as f:
    json.dump(results, f, indent=2)

print(f"\nResultats sauvegardes: {results_filename}")
print("="*70)

Modele charge: swin_best_20260105_114525.pth
Epoch: 10 | Val Acc: 74.73% | Val F1: 0.7429

EVALUATION SUR VALIDATION SET


Validation: 100%|██████████| 339/339 [00:53<00:00,  6.30it/s]


Val Acc: 74.73% | Val F1: 0.7429
Predictions validation exportees (.npy)

EVALUATION SUR TEST SET


Test: 100%|██████████| 399/399 [01:00<00:00,  6.56it/s]

Test Acc: 73.17% | Test F1: 0.7264

Rapport de classification (Test):
              precision    recall  f1-score   support

           0     0.6774    0.7195    0.6978       467
           1     0.7209    0.7074    0.7141       376
           2     0.6393    0.5556    0.5945       252
           3     0.7194    0.8000    0.7576       125
           4     0.6718    0.7556    0.7113       401
           5     0.9529    0.9562    0.9545       593
           6     0.6341    0.4522    0.5279       115
           7     0.5162    0.4583    0.4855       731
           8     0.4173    0.3730    0.3939       311
           9     0.7258    0.8811    0.7959       757
          10     0.7969    0.8430    0.8193       121
          11     0.5862    0.5455    0.5651       374
          12     0.6301    0.5679    0.5974       486
          13     0.6409    0.6544    0.6476       761
          14     0.8031    0.8901    0.8443       646
          15     0.8083    0.8083    0.8083       120
          1


