# PrimeurVision - Fine-tuning YOLOv8

Fine-tuning d'un modele **YOLOv8n pre-entraine sur COCO** pour detecter nos 6 classes de fruits/legumes.

**Entrainement en 2 phases :**
1. **Backbone gele** : on entraine uniquement la tete de detection
2. **Fine-tuning complet** : on degele tout et on affine avec un learning rate plus faible

**Classes** : carotte, aubergine, citron, pomme_de_terre, radis, tomate

## 1. Installation des dependances

In [None]:
!pip install ultralytics -q

## 2. Imports

In [None]:
import os
import shutil
import random
import glob
import yaml
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
from ultralytics import YOLO

In [None]:
# --- HYPERPARAMETRES ---

# Phase 1 : backbone gele
PHASE1_EPOCHS = 10
PHASE1_LR = 1e-2
FREEZE_LAYERS = 10

# Phase 2 : fine-tuning complet
PHASE2_EPOCHS = 40
PHASE2_LR = 1e-3

# Parametres communs
IMG_SIZE = 640
BATCH_SIZE = 16
PATIENCE = 10
CONF_THRESHOLD = 0.25

## 3. Montage Google Drive et chargement du dataset

Uploadez le dossier `dataset/` sur Google Drive dans `My Drive/PrimeurVision/dataset/`.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

DATASET_SRC = '/content/drive/MyDrive/PrimeurVision/dataset'
WORK_DIR = '/content/dataset'

# Copie en local pour accelerer l'entrainement
if os.path.exists(WORK_DIR):
    shutil.rmtree(WORK_DIR)
shutil.copytree(DATASET_SRC, WORK_DIR)

# Charger la config du dataset
data_yaml_path = os.path.join(WORK_DIR, 'data.yaml')
with open(data_yaml_path, 'r') as f:
    data_config = yaml.safe_load(f)

CLASS_NAMES = data_config['names']
NUM_CLASSES = len(CLASS_NAMES)

print(f"{NUM_CLASSES} classes : {CLASS_NAMES}")
for split in ['train', 'val', 'test']:
    img_dir = os.path.join(WORK_DIR, 'images', split)
    if os.path.exists(img_dir):
        print(f"  {split}: {len(os.listdir(img_dir))} images")

## 4. Exploration du dataset

 Affiche 6 images aléatoires avec leurs bounding boxes pour vérifier visuellement que les annotations sont correctes, puis montre la distribution des classes (combien de boxes par classe)

In [None]:
COLORS = ['#FF8C00', '#9B59B6', '#FFD700', '#8B4513', '#E74C3C', '#FF4444']

def parse_yolo_label(label_path):
    """Lit un fichier label YOLO."""
    annotations = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 5:
                class_id = int(parts[0])
                x_center, y_center, width, height = map(float, parts[1:])
                annotations.append((class_id, x_center, y_center, width, height))
    return annotations

def plot_image_with_boxes(img_path, label_path, ax):
    """Affiche une image avec ses bounding boxes."""
    img = Image.open(img_path)
    w, h = img.size
    ax.imshow(img)

    for class_id, xc, yc, bw, bh in parse_yolo_label(label_path):
        x1 = (xc - bw / 2) * w
        y1 = (yc - bh / 2) * h
        color = COLORS[class_id % len(COLORS)]
        rect = patches.Rectangle((x1, y1), bw * w, bh * h,
                                 linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)
        ax.text(x1, y1 - 5, CLASS_NAMES.get(class_id, '?'), color=color,
                fontsize=10, fontweight='bold', backgroundcolor='black')
    ax.axis('off')

# Afficher 6 images aleatoires du train
train_images_dir = os.path.join(WORK_DIR, 'images', 'train')
train_labels_dir = os.path.join(WORK_DIR, 'labels', 'train')

image_files = sorted(glob.glob(os.path.join(train_images_dir, '*.jpg')))
sample_images = random.sample(image_files, min(6, len(image_files)))

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
for ax, img_path in zip(axes.flatten(), sample_images):
    base_name = os.path.splitext(os.path.basename(img_path))[0]
    label_path = os.path.join(train_labels_dir, base_name + '.txt')
    if os.path.exists(label_path):
        plot_image_with_boxes(img_path, label_path, ax)

plt.suptitle('Echantillon du dataset', fontsize=14)
plt.tight_layout()
plt.show()

# Distribution des classes
for split in ['train', 'val', 'test']:
    labels_dir = os.path.join(WORK_DIR, 'labels', split)
    if not os.path.exists(labels_dir):
        continue
    class_counts = {}
    for lf in glob.glob(os.path.join(labels_dir, '*.txt')):
        for class_id, *_ in parse_yolo_label(lf):
            name = CLASS_NAMES.get(class_id, f'class_{class_id}')
            class_counts[name] = class_counts.get(name, 0) + 1
    print(f"[{split}] {class_counts}")

## 5. Creation du split test (si necessaire)

Si le dossier `test/` n'existe pas, on le cree en prenant 10% du train.

In [ ]:
test_images_dir = os.path.join(WORK_DIR, 'images', 'test')
test_labels_dir = os.path.join(WORK_DIR, 'labels', 'test')

if not os.path.exists(test_images_dir):
    os.makedirs(test_images_dir, exist_ok=True)
    os.makedirs(test_labels_dir, exist_ok=True)

    train_imgs = sorted(glob.glob(os.path.join(WORK_DIR, 'images', 'train', '*.jpg')))
    random.seed(42)
    random.shuffle(train_imgs)

    n_test = max(1, int(len(train_imgs) * 0.1))
    for img_path in train_imgs[:n_test]:
        base_name = os.path.splitext(os.path.basename(img_path))[0]
        label_path = os.path.join(WORK_DIR, 'labels', 'train', base_name + '.txt')
        shutil.move(img_path, os.path.join(test_images_dir, os.path.basename(img_path)))
        if os.path.exists(label_path):
            shutil.move(label_path, os.path.join(test_labels_dir, base_name + '.txt'))
    print(f"{n_test} images deplacees vers test/")
else:
    print(f"Test existant : {len(os.listdir(test_images_dir))} images")

for split in ['train', 'val', 'test']:
    img_dir = os.path.join(WORK_DIR, 'images', split)
    if os.path.exists(img_dir):
        print(f"  {split}: {len(os.listdir(img_dir))} images")

## 6. Mise a jour de data.yaml

In [None]:
data_config['path'] = WORK_DIR
data_config['train'] = 'images/train'
data_config['val'] = 'images/val'
data_config['test'] = 'images/test'

with open(data_yaml_path, 'w') as f:
    yaml.dump(data_config, f, default_flow_style=False)

print(open(data_yaml_path).read())

## 7. Entrainement en 2 phases

Entraînement phase 1 — Charge YOLOv8n pré-entraîné et l'entraîne avec le backbone gelé (seule la tête de détection apprend). Ça permet d'adapter le modèle à nos classes sans casser les features déjà apprises

Entraînement phase 2 — Reprend le meilleur modèle de la phase 1 et dégèle tout pour affiner l'ensemble du réseau avec un learning rate plus faible

In [None]:
# Phase 1 : backbone gele
model = YOLO('yolov8n.pt')

results_phase1 = model.train(
    data=data_yaml_path,
    epochs=PHASE1_EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    lr0=PHASE1_LR,
    freeze=FREEZE_LAYERS,
    name='primeurvision_phase1',
    patience=PATIENCE,
    save=True,
    plots=True
)

In [None]:
# Phase 2 : fine-tuning complet a partir du meilleur modele phase 1
best_phase1 = os.path.join('runs', 'detect', 'primeurvision_phase1', 'weights', 'best.pt')
model = YOLO(best_phase1)

results_phase2 = model.train(
    data=data_yaml_path,
    epochs=PHASE2_EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    lr0=PHASE2_LR,
    freeze=0,
    name='primeurvision_phase2',
    patience=PATIENCE,
    save=True,
    plots=True
)

RESULTS_DIR = os.path.join('runs', 'detect', 'primeurvision_phase2')

## 8. Courbes d'entrainement

In [ ]:
# Courbes d'entrainement (loss, mAP au fil des epochs)
results_img = os.path.join(RESULTS_DIR, 'results.png')
if os.path.exists(results_img):
    plt.figure(figsize=(18, 8))
    plt.imshow(Image.open(results_img))
    plt.axis('off')
    plt.title('Courbes d\'entrainement (phase 2)')
    plt.show()

# Matrice de confusion sur la validation
confusion_img = os.path.join(RESULTS_DIR, 'confusion_matrix.png')
if os.path.exists(confusion_img):
    plt.figure(figsize=(8, 8))
    plt.imshow(Image.open(confusion_img))
    plt.axis('off')
    plt.title('Matrice de confusion (validation)')
    plt.show()

## 9. Sauvegarde du modele sur Drive

In [None]:
save_dir = '/content/drive/MyDrive/PrimeurVision/models'
os.makedirs(save_dir, exist_ok=True)

# Sauvegarde du meilleur modele
best_model_path = os.path.join(RESULTS_DIR, 'weights', 'best.pt')
shutil.copy2(best_model_path, os.path.join(save_dir, 'best_yolov8n_primeurvision.pt'))

# Sauvegarde des courbes et de la matrice de confusion
for fname in ['results.png', 'confusion_matrix.png']:
    src = os.path.join(RESULTS_DIR, fname)
    if os.path.exists(src):
        shutil.copy2(src, os.path.join(save_dir, fname))

print(f"Modele et courbes sauvegardes dans : {save_dir}")