In [1]:
import os
import time
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score

#  CONFIGURATION "NUIT BLANCHE" 
PROJECT_ROOT = r"C:\Users\amisf\Desktop\datascientest_projet"
IMG_DIR = r"C:\Users\amisf\Desktop\datascientest_projet\data\raw\images\images\image_train"
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "implementation", "outputs")
BATCH_SIZE = 32
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
scaler = torch.amp.GradScaler('cuda')

print(f"üåô D√âMARRAGE MODE NUIT (ConvNeXt Tiny) SUR : {DEVICE}")
print("üõ°Ô∏è S√âCURIT√âS : Anti-Crash + Early Stopping (5 √©poques) + Workers=0 (Anti-Freeze)")

# --- 2. DATASET ---
csv_path = os.path.join(PROJECT_ROOT, "data", "raw")
df_x = pd.read_csv(os.path.join(csv_path, "X_train_update.csv"), index_col=0)
df_y = pd.read_csv(os.path.join(csv_path, "Y_train_CVw08PX.csv"), index_col=0)
df = pd.merge(df_x, df_y, left_index=True, right_index=True)
df['path'] = df.apply(lambda x: os.path.join(IMG_DIR, f"image_{x['imageid']}_product_{x['productid']}.jpg"), axis=1)

le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['prdtypecode'])
NUM_CLASSES = len(le.classes_)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label_encoded'], random_state=42)

class ConvNextDataset(Dataset):
    def __init__(self, df, mode='train'):
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.transform_train = transforms.Compose([
            transforms.Resize((256, 256)),      
            transforms.RandomCrop(224),         
            transforms.RandomHorizontalFlip(p=0.5), 
            transforms.RandomRotation(15),      
            transforms.ColorJitter(brightness=0.2, contrast=0.2), 
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.transform_val = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        try: img = Image.open(row['path']).convert("RGB")
        except: img = Image.new('RGB', (224, 224), (0, 0, 0))
        if self.mode == 'train': return self.transform_train(img), torch.tensor(row['label_encoded'], dtype=torch.long)
        else: return self.transform_val(img), torch.tensor(row['label_encoded'], dtype=torch.long)

# CONFIGURATION STABLE OBLIGATOIRE 
train_loader = DataLoader(ConvNextDataset(train_df, 'train'), batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(ConvNextDataset(val_df, 'val'), batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

# MODELE
print("üîß Initialisation ConvNeXt Tiny...")
model = models.convnext_tiny(weights="IMAGENET1K_V1")
n_inputs = model.classifier[2].in_features
model.classifier[2] = nn.Sequential(
    nn.BatchNorm1d(n_inputs),
    nn.Linear(n_inputs, 1024),
    nn.GELU(),
    nn.Dropout(0.3),
    nn.Linear(1024, NUM_CLASSES)
)
model = model.to(DEVICE)

# --- 4. PARAMETRES ---
criterion = nn.CrossEntropyLoss(label_smoothing=0.1) 
optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=0.01) 
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)

# --- 5. BOUCLE PRINCIPALE ---
MAX_EPOCHS = 100
PATIENCE_LIMIT = 5

best_f1 = 0.0
patience_counter = 0

print(f"üî• D√âMARRAGE DU MARATHON ({MAX_EPOCHS} √©poques max)...")

for epoch in range(MAX_EPOCHS):
    model.train()
    loss_ep = 0.0
    t0 = time.time()
    
    for i, (imgs, lbls) in enumerate(train_loader):
        imgs, lbls = imgs.to(DEVICE, non_blocking=True), lbls.to(DEVICE, non_blocking=True)
        
        optimizer.zero_grad()
        with torch.amp.autocast('cuda'):
            out = model(imgs)
            loss = criterion(out, lbls)
        
        if torch.isnan(loss):
            print(f"\n‚ùå ARR√äT D'URGENCE : Loss NaN √† l'√©poque {epoch+1}.")
            exit()
            
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        loss_ep += loss.item()
        
        if i % 100 == 0:
            print(f"   ‚è≥ Ep {epoch+1} | Batch {i} | Loss: {loss.item():.4f}", end="\r")

    # VALIDATION
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for imgs, lbls in val_loader:
            imgs = imgs.to(DEVICE, non_blocking=True)
            with torch.amp.autocast('cuda'):
                out = model(imgs)
            _, p = torch.max(out, 1)
            preds.extend(p.cpu().numpy())
            targets.extend(lbls.cpu().numpy())
    
    val_f1 = f1_score(targets, preds, average='weighted')
    duree = time.time() - t0
    current_lr = optimizer.param_groups[0]['lr']
    
    status = "üí§"
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        torch.save(model.state_dict(), os.path.join(OUTPUT_DIR, "livrable_model_convnext_NIGHT_BEST.pth"))
        
        meta_data = {"epoch": epoch+1, "f1_score": best_f1, "model": "ConvNeXt Tiny"}
        with open(os.path.join(OUTPUT_DIR, "night_run_log.json"), 'w') as f: json.dump(meta_data, f)
        status = "üíæ SAUV√â (NEW RECORD)"
    else:
        patience_counter += 1
        status = f"‚ö†Ô∏è Pas mieux ({patience_counter}/{PATIENCE_LIMIT})"

    print(f"\n‚úÖ FIN EP {epoch+1}/{MAX_EPOCHS} | Time: {duree:.0f}s | F1 Val: {val_f1:.4f} | LR: {current_lr:.1e} | {status}")
    
    scheduler.step(val_f1)
    
    if patience_counter >= PATIENCE_LIMIT:
        print(f"\nüõë ARR√äT AUTOMATIQUE : Le score stagne depuis {PATIENCE_LIMIT} √©poques.")
        print(f"   -> Meilleur score final : {best_f1:.4f}")
        break

print(f"üèÜ NUIT TERMIN√âE. R√©sultat Final : {best_f1:.4f}")

üåô D√âMARRAGE MODE NUIT (ConvNeXt Tiny) SUR : cuda
üõ°Ô∏è S√âCURIT√âS : Anti-Crash + Early Stopping (5 √©poques) + Workers=0 (Anti-Freeze)
üîß Initialisation ConvNeXt Tiny...
üî• D√âMARRAGE DU MARATHON (100 √©poques max)...
   ‚è≥ Ep 1 | Batch 2100 | Loss: 1.7143
‚úÖ FIN EP 1/100 | Time: 602s | F1 Val: 0.6470 | LR: 5.0e-05 | üíæ SAUV√â (NEW RECORD)
   ‚è≥ Ep 2 | Batch 2100 | Loss: 1.7966
‚úÖ FIN EP 2/100 | Time: 595s | F1 Val: 0.6796 | LR: 5.0e-05 | üíæ SAUV√â (NEW RECORD)
   ‚è≥ Ep 3 | Batch 2100 | Loss: 1.3312
‚úÖ FIN EP 3/100 | Time: 596s | F1 Val: 0.6928 | LR: 5.0e-05 | üíæ SAUV√â (NEW RECORD)
   ‚è≥ Ep 4 | Batch 2100 | Loss: 1.3248
‚úÖ FIN EP 4/100 | Time: 599s | F1 Val: 0.6973 | LR: 5.0e-05 | üíæ SAUV√â (NEW RECORD)
   ‚è≥ Ep 5 | Batch 2100 | Loss: 1.0885
‚úÖ FIN EP 5/100 | Time: 592s | F1 Val: 0.7020 | LR: 5.0e-05 | üíæ SAUV√â (NEW RECORD)
   ‚è≥ Ep 6 | Batch 2100 | Loss: 1.0652
‚úÖ FIN EP 6/100 | Time: 592s | F1 Val: 0.6987 | LR: 5.0e-05 | ‚ö†Ô∏è Pas mieux (1/5)
   ‚è