In [8]:
print("LA STRAT√âGIE GAGNANTE : LE FINE-TUNING MAINTENANT , UTILISE QUE CE QUI EST ECRIT EN DESSOUS SI TU A CE MOD√®LE A ENTRAINER")

LA STRAT√âGIE GAGNANTE : LE FINE-TUNING MAINTENANT , UTILISE QUE CE QUI EST ECRIT EN DESSOUS SI TU A CE MOD√®LE A ENTRAINER


In [10]:
import os
import json
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from torch.cuda.amp import GradScaler, autocast

# --- CONFIGURATION ---
PROJECT_ROOT = r"C:\Users\amisf\Desktop\datascientest_projet"
IMG_DIR = r"C:\Users\amisf\Desktop\datascientest_projet\data\raw\images\images\image_train"
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "implementation", "outputs")
BATCH_SIZE = 128 # On profite de la 4070
DEVICE = torch.device("cuda")

print(f"üöÄ RECONSTRUCTION DU MODELE LEGENDAIRE (91%) SUR : {DEVICE}")

# --- DATASET ---
# On utilise la transformation STANDARD ImageNet (celle utilis√©e pour cr√©er les .npy implicitement)
trans = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224), # Le zoom standard ResNet
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

csv_path = os.path.join(PROJECT_ROOT, "data", "raw")
df_x = pd.read_csv(os.path.join(csv_path, "X_train_update.csv"), index_col=0)
df_y = pd.read_csv(os.path.join(csv_path, "Y_train_CVw08PX.csv"), index_col=0)
df = pd.merge(df_x, df_y, left_index=True, right_index=True)
df['path'] = df.apply(lambda x: os.path.join(IMG_DIR, f"image_{x['imageid']}_product_{x['productid']}.jpg"), axis=1)

le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['prdtypecode'])
NUM_CLASSES = len(le.classes_)

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label_encoded'], random_state=42)

class LegendDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        try: img = Image.open(row['path']).convert("RGB")
        except: img = Image.new('RGB', (224, 224), (0, 0, 0))
        if self.transform: img = self.transform(img)
        return img, torch.tensor(row['label_encoded'], dtype=torch.long)

train_loader = DataLoader(LegendDataset(train_df, trans), batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(LegendDataset(val_df, trans), batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

# --- ARCHITECTURE EXACTE DU LOG (Rang DL 1) ---
# L:[2048, 1024, 512] | Opt:adam | Act:gelu | Drop:0.2

class LegendMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        # Reconstruction exacte de ta classe ModularMLP gagnante
        self.net = nn.Sequential(
            # Couche 1 : 2048 -> 2048 (Input ResNet -> Hidden 1)
            nn.Linear(input_dim, 2048),
            nn.BatchNorm1d(2048),
            nn.GELU(), # Activation GELU (C'√©tait le secret !)
            nn.Dropout(0.2),
            
            # Couche 2 : 2048 -> 1024
            nn.Linear(2048, 1024),
            nn.BatchNorm1d(1024),
            nn.GELU(),
            nn.Dropout(0.2),
            
            # Couche 3 : 1024 -> 512
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.GELU(),
            nn.Dropout(0.2),
            
            # Sortie : 512 -> 27
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        return self.net(x)

# Assemblage avec ResNet50
resnet = models.resnet50(weights="IMAGENET1K_V1")

# ON G√àLE TOUT LE RESNET (Comme si on utilisait des .npy fixes)
for param in resnet.parameters():
    param.requires_grad = False

# On remplace la t√™te
resnet.fc = LegendMLP(2048, NUM_CLASSES)
model = resnet.to(DEVICE)

print("üß† Architecture 'Legend' (GELU + Config Exacte) reconstruite.")

# --- ENTRAINEMENT (REPLIQUE DU LOG) ---
criterion = nn.CrossEntropyLoss()
# Optimiseur ADAM (Pas AdamW, comme dans ton log)
optimizer = optim.Adam(model.fc.parameters(), lr=0.001) 
scaler = GradScaler()

print("üî• D√©marrage (Objectif : Retrouver les 91%)...")
EPOCHS = 20 # Comme dans ton log
best_f1 = 0.0

for epoch in range(EPOCHS):
    model.train()
    t0 = time.time()
    
    for i, (imgs, lbls) in enumerate(train_loader):
        imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
        
        optimizer.zero_grad()
        with autocast():
            out = model(imgs)
            loss = criterion(out, lbls)
            
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        if i % 50 == 0: print(f"   ‚è≥ Ep {epoch+1} | Batch {i} | Loss: {loss.item():.4f}", end="\r")

    # Validation
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for imgs, lbls in val_loader:
            imgs = imgs.to(DEVICE)
            with autocast(): out = model(imgs)
            _, p = torch.max(out, 1)
            preds.extend(p.cpu().numpy())
            targets.extend(lbls.cpu().numpy())
    
    val_f1 = f1_score(targets, preds, average='weighted')
    duree = time.time() - t0
    
    print(f"\n‚úÖ FIN EP {epoch+1} | Time: {duree:.0f}s | F1: {val_f1:.4f}")
    
    # Sauvegarde syst√©matique si record
    if val_f1 > best_f1:
        best_f1 = val_f1
        torch.save(model.state_dict(), os.path.join(OUTPUT_DIR, "livrable_model_resnet_reborn.pth"))
        
        # Metadata pour le Voting
        meta_data = {
            "model_name": "ResNet50 Legend 91%", 
            "class_mapping": {int(i): str(c) for i, c in enumerate(le.classes_)}
        }
        with open(os.path.join(OUTPUT_DIR, "livrable_resnet_reborn_metadata.json"), 'w') as f:
            json.dump(meta_data, f, indent=4)
        print("   üíæ Record Sauvegard√©.")

print(f"üèÜ Fini. Score Final : {best_f1:.4f}")

üöÄ RECONSTRUCTION DU MODELE LEGENDAIRE (91%) SUR : cuda
üß† Architecture 'Legend' (GELU + Config Exacte) reconstruite.
üî• D√©marrage (Objectif : Retrouver les 91%)...


  scaler = GradScaler()
  with autocast():


   ‚è≥ Ep 1 | Batch 500 | Loss: 1.7137

  with autocast(): out = model(imgs)



‚úÖ FIN EP 1 | Time: 415s | F1: 0.5418
   üíæ Record Sauvegard√©.


  with autocast():


   ‚è≥ Ep 2 | Batch 500 | Loss: 1.2292

  with autocast(): out = model(imgs)



‚úÖ FIN EP 2 | Time: 399s | F1: 0.5878
   üíæ Record Sauvegard√©.


  with autocast():


   ‚è≥ Ep 3 | Batch 500 | Loss: 1.1363

  with autocast(): out = model(imgs)



‚úÖ FIN EP 3 | Time: 394s | F1: 0.5984
   üíæ Record Sauvegard√©.


  with autocast():


   ‚è≥ Ep 4 | Batch 500 | Loss: 0.8193

  with autocast(): out = model(imgs)



‚úÖ FIN EP 4 | Time: 393s | F1: 0.5994
   üíæ Record Sauvegard√©.


  with autocast():


   ‚è≥ Ep 5 | Batch 500 | Loss: 0.7502

  with autocast(): out = model(imgs)



‚úÖ FIN EP 5 | Time: 394s | F1: 0.6014
   üíæ Record Sauvegard√©.


  with autocast():


   ‚è≥ Ep 6 | Batch 500 | Loss: 0.8910

  with autocast(): out = model(imgs)



‚úÖ FIN EP 6 | Time: 393s | F1: 0.6171
   üíæ Record Sauvegard√©.


  with autocast():


   ‚è≥ Ep 7 | Batch 500 | Loss: 0.7637

  with autocast(): out = model(imgs)



‚úÖ FIN EP 7 | Time: 398s | F1: 0.6074


  with autocast():


   ‚è≥ Ep 8 | Batch 100 | Loss: 0.6717


KeyboardInterrupt



In [12]:
import os
import json
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torchvision import models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score

# --- CONFIGURATION ---
PROJECT_ROOT = r"C:\Users\amisf\Desktop\datascientest_projet"
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "implementation", "outputs")
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"üöÄ OP√âRATION PHOENIX SUR : {DEVICE}")

# 1. CHARGEMENT DU TR√âSOR (.NPY)
print("üì• Chargement des features pr√©-calcul√©es (3.1 Go)...")
try:
    # On charge les fichiers du 5 Janvier
    X_all = np.load(os.path.join(OUTPUT_DIR, 'train_features_resnet50_augmented.npy'))
    y_all_raw = np.load(os.path.join(OUTPUT_DIR, 'train_labels_augmented.npy'))
    print(f"‚úÖ Features charg√©es : {X_all.shape}")
except Exception as e:
    raise FileNotFoundError(f"‚ùå Impossible de charger les .npy : {e}")

# 2. ENCODAGE & SPLIT
# On doit r√©-encoder les labels proprement pour √™tre coh√©rent avec le projet
le = LabelEncoder()
y_all = le.fit_transform(y_all_raw)
NUM_CLASSES = len(le.classes_)

print("‚úÇÔ∏è Cr√©ation du Train/Val...")
X_train, X_val, y_train, y_val = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Conversion en Tenseurs PyTorch (Directement en VRAM pour vitesse lumi√®re)
train_ds = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32).to(DEVICE), 
    torch.tensor(y_train, dtype=torch.long).to(DEVICE)
)
val_ds = TensorDataset(
    torch.tensor(X_val, dtype=torch.float32).to(DEVICE), 
    torch.tensor(y_val, dtype=torch.long).to(DEVICE)
)

# Gros batch size car ce ne sont pas des images, juste des vecteurs
train_loader = DataLoader(train_ds, batch_size=4096, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=4096, shuffle=False)

# 3. L'ARCHITECTURE "LEGEND" (Celle de ton log √† 91%)
# Config : [2048, 1024, 512], Adam, GELU, Drop 0.2
class LegendMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 2048), nn.BatchNorm1d(2048), nn.GELU(), nn.Dropout(0.2),
            nn.Linear(2048, 1024), nn.BatchNorm1d(1024), nn.GELU(), nn.Dropout(0.2),
            nn.Linear(1024, 512),  nn.BatchNorm1d(512),  nn.GELU(), nn.Dropout(0.2),
            nn.Linear(512, num_classes)
        )
    def forward(self, x): return self.net(x)

mlp = LegendMLP(2048, NUM_CLASSES).to(DEVICE)
print("üß† Cerveau MLP initialis√©.")

# 4. ENTRAINEMENT √âCLAIR (Sur les features, pas les images)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp.parameters(), lr=0.001) # Adam standard comme avant

print("üî• D√©marrage Entra√Ænement PHOENIX (√áa va aller tr√®s vite)...")
EPOCHS = 20
best_f1 = 0.0

for epoch in range(EPOCHS):
    mlp.train()
    t0 = time.time()
    
    for bx, by in train_loader:
        optimizer.zero_grad()
        out = mlp(bx)
        loss = criterion(out, by)
        loss.backward()
        optimizer.step()

    # Validation
    mlp.eval()
    preds, targets = [], []
    with torch.no_grad():
        for bx, by in val_loader:
            out = mlp(bx)
            _, p = torch.max(out, 1)
            preds.extend(p.cpu().numpy())
            targets.extend(by.cpu().numpy())
            
    val_f1 = f1_score(targets, preds, average='weighted')
    duree = time.time() - t0
    print(f"‚úÖ EP {epoch+1}/{EPOCHS} | Time: {duree:.1f}s | F1: {val_f1:.4f}")
    
    if val_f1 > best_f1:
        best_f1 = val_f1
        # On sauvegarde juste les poids du MLP pour l'instant
        torch.save(mlp.state_dict(), os.path.join(OUTPUT_DIR, "temp_phoenix_mlp.pth"))

print(f"üèÜ Score PHOENIX atteint : {best_f1:.4f}")

# 5. ASSEMBLAGE FINAL (FUSION CORPS + T√äTE) & SAUVEGARDE POUR VOTING
print("\nüèóÔ∏è Assemblage du mod√®le final pour le Voting...")

# On prend un ResNet vierge
full_model = models.resnet50(weights="IMAGENET1K_V1")
# On g√®le le corps (puisqu'on a entra√Æn√© le MLP sur des features fig√©es)
for param in full_model.parameters():
    param.requires_grad = False

# On lui greffe notre MLP entra√Æn√©
full_model.fc = LegendMLP(2048, NUM_CLASSES)
full_model.fc.load_state_dict(torch.load(os.path.join(OUTPUT_DIR, "temp_phoenix_mlp.pth")))

# On sauvegarde le TOUT (Corps + T√™te) sous le nom que le Voting attend
final_path = os.path.join(OUTPUT_DIR, "livrable_model_resnet_reborn.pth")
torch.save(full_model.state_dict(), final_path)

# Metadata
meta_data = {
    "model_name": "ResNet50 Phoenix (From NPY)", 
    "class_mapping": {int(i): str(c) for i, c in enumerate(le.classes_)}
}
with open(os.path.join(OUTPUT_DIR, "livrable_resnet_reborn_metadata.json"), 'w') as f:
    json.dump(meta_data, f, indent=4)

print(f"üíæ SAUVEGARDE TERMIN√âE : {final_path}")
print("üöÄ Tu peux lancer le Voting, le champion est de retour !")

üöÄ OP√âRATION PHOENIX SUR : cuda
üì• Chargement des features pr√©-calcul√©es (3.1 Go)...
‚úÖ Features charg√©es : (405000, 2048)
‚úÇÔ∏è Cr√©ation du Train/Val...
üß† Cerveau MLP initialis√©.
üî• D√©marrage Entra√Ænement PHOENIX (√áa va aller tr√®s vite)...
‚úÖ EP 1/20 | Time: 3.6s | F1: 0.6585
‚úÖ EP 2/20 | Time: 3.7s | F1: 0.7392
‚úÖ EP 3/20 | Time: 3.7s | F1: 0.7783
‚úÖ EP 4/20 | Time: 3.6s | F1: 0.8136
‚úÖ EP 5/20 | Time: 3.7s | F1: 0.8340
‚úÖ EP 6/20 | Time: 3.8s | F1: 0.8496
‚úÖ EP 7/20 | Time: 3.7s | F1: 0.8693
‚úÖ EP 8/20 | Time: 4.6s | F1: 0.8800
‚úÖ EP 9/20 | Time: 3.7s | F1: 0.8829
‚úÖ EP 10/20 | Time: 3.7s | F1: 0.8826
‚úÖ EP 11/20 | Time: 4.1s | F1: 0.8936
‚úÖ EP 12/20 | Time: 3.7s | F1: 0.8999
‚úÖ EP 13/20 | Time: 3.8s | F1: 0.9026
‚úÖ EP 14/20 | Time: 3.6s | F1: 0.9023
‚úÖ EP 15/20 | Time: 3.7s | F1: 0.9064
‚úÖ EP 16/20 | Time: 3.6s | F1: 0.9104
‚úÖ EP 17/20 | Time: 3.7s | F1: 0.9085
‚úÖ EP 18/20 | Time: 3.7s | F1: 0.9072
‚úÖ EP 19/20 | Time: 3.8s | F1: 0.9125
‚úÖ EP