# PrimeurVision ‚Äî √âvaluation du mod√®le YOLOv8

√âvaluation du **mod√®le final retenu** sur le **jeu de test** (36 images, jamais vues pendant l'entra√Ænement).

**Mod√®le** : YOLOv8n (v1) ‚Äî meilleure g√©n√©ralisation malgr√© une capacit√© moindre que YOLOv8s (v2 trop grand pour 166 images d'entra√Ænement ‚Üí overfitting)

**M√©triques** : mAP@50, mAP@50-95, Pr√©cision, Recall, AP par classe

**Classes** : carotte (0), aubergine (1), citron (2), pomme_de_terre (3), radis (4), tomate (5)

## 1. Installation

In [43]:
!pip install ultralytics -q

## 2. Imports

In [44]:
import os
import shutil
import random
import glob
import yaml
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
from ultralytics import YOLO

## 3. Chargement du mod√®le et du dataset

**Colab** : charge depuis Google Drive.  
**Local** : charge depuis `models/best_yolov8n_primeurvision.pt` dans le projet.

In [45]:
import sys
import pathlib

ON_COLAB = 'google.colab' in sys.modules or 'google.colab' in str(sys.path)

if ON_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    # Mod√®le final retenu : v1 (YOLOv8n)
    MODEL_PATH  = '/content/drive/MyDrive/PrimeurVision/models/best_yolov8n_primeurvision.pt'
    DATASET_SRC = '/content/drive/MyDrive/PrimeurVision/dataset'
    MODELS_DIR  = '/content/drive/MyDrive/PrimeurVision/models'
    WORK_DIR    = '/content/dataset'
    if os.path.exists(WORK_DIR):
        shutil.rmtree(WORK_DIR)
    shutil.copytree(DATASET_SRC, WORK_DIR)
else:
    cwd = pathlib.Path(os.getcwd())
    PROJECT_ROOT = None
    for candidate in [cwd, cwd.parent]:
        if (candidate / 'dataset' / 'data.yaml').exists():
            PROJECT_ROOT = candidate
            break
    if PROJECT_ROOT is None:
        raise FileNotFoundError("Impossible de trouver la racine du projet.")

    WORK_DIR   = str(PROJECT_ROOT / 'dataset')
    MODELS_DIR = str(PROJECT_ROOT / 'models')
    # Mod√®le final retenu : v1 (YOLOv8n) ‚Äî meilleure g√©n√©ralisation
    MODEL_PATH = str(PROJECT_ROOT / 'models' / 'best_yolov8n_primeurvision.pt')
    print(f"Racine projet : {PROJECT_ROOT}")
    print(f"Mod√®le        : {MODEL_PATH}")
    print(f"Mod√®le existe : {os.path.exists(MODEL_PATH)}")

CONF_THRESHOLD = 0.25

data_yaml_path = os.path.join(WORK_DIR, 'data.yaml')
with open(data_yaml_path, 'r') as f:
    data_config = yaml.safe_load(f)

data_config['path']  = WORK_DIR
data_config['train'] = 'images/train'
data_config['val']   = 'images/val'
data_config['test']  = 'images/test'
with open(data_yaml_path, 'w') as f:
    yaml.dump(data_config, f, default_flow_style=False)

CLASS_NAMES = data_config['names']

model = YOLO(MODEL_PATH)
print(f"Classes : {list(CLASS_NAMES.values())}")
print(f"Images de test : {len(os.listdir(os.path.join(WORK_DIR, 'images', 'test')))}")

Racine projet : /Users/eugenie/Desktop/M2-SISE/13 - Deep learning - Machine learning - Computer Vision/projet_computer_vision
Mod√®le        : /Users/eugenie/Desktop/M2-SISE/13 - Deep learning - Machine learning - Computer Vision/projet_computer_vision/models/best_yolov8n_primeurvision.pt
Mod√®le existe : True
Classes : ['carotte', 'aubergine', 'citron', 'pomme_de_terre', 'radis', 'tomate']
Images de test : 36


## 4. √âvaluation quantitative sur le test

In [46]:
metrics = model.val(data=data_yaml_path, split='test', conf=CONF_THRESHOLD)

print("=" * 45)
print("  R√âSULTATS SUR LE JEU DE TEST")
print("=" * 45)
print(f"  mAP@50    : {metrics.box.map50:.4f}")
print(f"  mAP@50-95 : {metrics.box.map:.4f}")
print(f"  Pr√©cision : {metrics.box.mp:.4f}")
print(f"  Recall    : {metrics.box.mr:.4f}")
print("-" * 45)
print("  AP@50 par classe :")
for i, name in CLASS_NAMES.items():
    ap50 = metrics.box.ap50[i] if i < len(metrics.box.ap50) else 0
    bar  = '‚ñà' * int(ap50 * 20)
    print(f"  {name:20s} : {ap50:.4f}  {bar}")
print("=" * 45)

Ultralytics 8.4.14 üöÄ Python-3.11.14 torch-2.10.0 CPU (Apple M3)
Model summary (fused): 73 layers, 3,006,818 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 345.6¬±477.1 MB/s, size: 89.7 KB)
[K[34m[1mval: [0mScanning /Users/eugenie/Desktop/M2-SISE/13 - Deep learning - Machine learning - Computer Vision/projet_computer_vision/dataset/labels/test.cache... 36 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 36/36 18.9Mit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 3/3 1.4s/it 4.3s3.7ss
                   all         36        482      0.502      0.431      0.455      0.311
               carotte          8        232      0.457      0.228       0.33      0.184
             aubergine          7         30      0.643        0.3      0.482      0.328
                citron          9         49      0.444     

## 5. Matrice de confusion et courbes

In [47]:
eval_dir = str(metrics.save_dir)

# Matrice de confusion normalis√©e
for fname in ['confusion_matrix_normalized.png', 'confusion_matrix.png']:
    confusion_img = os.path.join(eval_dir, fname)
    if os.path.exists(confusion_img):
        plt.figure(figsize=(8, 8))
        plt.imshow(Image.open(confusion_img))
        plt.axis('off')
        plt.title('Matrice de confusion ‚Äî jeu de test')
        plt.show()
        break

# Courbe Pr√©cision-Rappel
pr_img = os.path.join(eval_dir, 'PR_curve.png')
if os.path.exists(pr_img):
    plt.figure(figsize=(10, 6))
    plt.imshow(Image.open(pr_img))
    plt.axis('off')
    plt.title('Courbe Pr√©cision-Rappel ‚Äî jeu de test')
    plt.show()

# Courbes d'entra√Ænement (depuis dossier models/)
curves_img = os.path.join(MODELS_DIR, 'results.png')
if os.path.exists(curves_img):
    plt.figure(figsize=(18, 8))
    plt.imshow(Image.open(curves_img))
    plt.axis('off')
    plt.title("Courbes d'entra√Ænement (phase 2)")
    plt.show()
else:
    print(f"(courbes d'entra√Ænement non trouv√©es dans {MODELS_DIR})")

<Figure size 800x800 with 1 Axes>

<Figure size 1800x800 with 1 Axes>

## 6. R√©sultats qualitatifs ‚Äî Pr√©dictions r√©ussies

Exemples de d√©tections correctes sur le jeu de test.

In [48]:
test_images = glob.glob(os.path.join(WORK_DIR, 'images', 'test', '*.jpg'))

def load_gt_classes(img_path):
    """Charge les classes ground truth depuis le fichier label YOLO."""
    label_path = img_path.replace('images', 'labels').replace('.jpg', '.txt')
    if not os.path.exists(label_path):
        return set()
    with open(label_path) as f:
        return set(int(line.split()[0]) for line in f if line.strip())

def score_image(img_path, result):
    """
    Score de r√©ussite : fraction des classes GT correctement pr√©dites.
    Bonus si la confiance est √©lev√©e et pas de faux positifs.
    """
    gt_classes   = load_gt_classes(img_path)
    pred_classes = set(int(c) for c in result.boxes.cls) if len(result.boxes) > 0 else set()
    if not gt_classes:
        return 0.0
    true_pos     = len(gt_classes & pred_classes)
    false_pos    = len(pred_classes - gt_classes)
    recall_score = true_pos / len(gt_classes)
    penalty      = false_pos / max(len(pred_classes), 1)
    avg_conf     = float(result.boxes.conf.mean()) if len(result.boxes) > 0 else 0.0
    return recall_score * (1 - 0.3 * penalty) + 0.1 * avg_conf

# √âvaluer toutes les images
all_scored = []
for img_path in test_images:
    result = model.predict(img_path, conf=CONF_THRESHOLD, verbose=False)[0]
    score  = score_image(img_path, result)
    all_scored.append((img_path, result, score))

# Trier du meilleur au moins bon
all_scored.sort(key=lambda x: x[2], reverse=True)
best = all_scored[:3]

fig, axes = plt.subplots(1, 3, figsize=(18, 6))
for ax, (img_path, result, score) in zip(axes, best):
    ax.imshow(result.plot()[:, :, ::-1])
    gt = load_gt_classes(img_path)
    ax.set_title(
        f"GT: {set(CLASS_NAMES[c] for c in gt)}  |  score: {score:.2f}",
        fontsize=11, color='green', fontweight='bold'
    )
    ax.axis('off')

plt.suptitle('Pr√©dictions les plus r√©ussies ‚Äî jeu de test', fontsize=14)
plt.tight_layout()
plt.savefig(os.path.join(PROJECT_ROOT, 'docs', 'img', 'predictions_reussies.png'),
            dpi=150, bbox_inches='tight')
plt.show()

<Figure size 1800x600 with 3 Axes>

## 7. Analyse des erreurs ‚Äî Cas difficiles

On identifie les images o√π le mod√®le d√©tecte peu ou avec une faible confiance. Ces cas r√©v√®lent les limites du mod√®le : objets partiellement visibles, occlusions, angles atypiques, ou classes sous-repr√©sent√©es dans le dataset.

In [49]:
# all_scored est d√©j√† tri√© du meilleur au moins bon (calcul√© en cellule pr√©c√©dente)
# On exclut les images sans GT (label vide) : leur score=0 ne refl√®te pas un √©chec de d√©tection
scored_with_gt = [(p, r, s) for p, r, s in all_scored if load_gt_classes(p)]

# Les cas difficiles = score le plus bas parmi les images annot√©es
worst = scored_with_gt[-3:][::-1]  # 3 pires, du plus mauvais au moins mauvais

fig, axes = plt.subplots(1, 3, figsize=(18, 6))
for ax, (img_path, result, score) in zip(axes, worst):
    ax.imshow(result.plot()[:, :, ::-1])
    gt     = load_gt_classes(img_path)
    pred   = set(int(c) for c in result.boxes.cls) if len(result.boxes) > 0 else set()
    missed = gt - pred
    ax.set_title(
        f"GT: {set(CLASS_NAMES[c] for c in gt)}\n"
        f"Manqu√©s: {set(CLASS_NAMES[c] for c in missed) if missed else chr(8709)}  |  score: {score:.2f}",
        fontsize=11, color='crimson', fontweight='bold'
    )
    ax.axis('off')

plt.suptitle('Cas difficiles ‚Äî pr√©dictions les plus √©loign√©es du ground truth', fontsize=14)
plt.tight_layout()
plt.savefig(os.path.join(PROJECT_ROOT, 'docs', 'img', 'predictions_difficiles.png'),
            dpi=150, bbox_inches='tight')
plt.show()

# R√©sum√©
n_zero = sum(1 for _, r, _ in all_scored if len(r.boxes) == 0)
n_ok   = sum(1 for _, _, s in all_scored if s >= 0.9)
print(f"Images sans aucune d√©tection      : {n_zero}/{len(all_scored)}")
print(f"Images bien d√©tect√©es (score‚â•0.9) : {n_ok}/{len(all_scored)}")

<Figure size 1800x600 with 3 Axes>

Images sans aucune d√©tection      : 1/36
Images bien d√©tect√©es (score‚â•0.9) : 23/36


## 8. Distribution des confiances et des classes d√©tect√©es

In [50]:
COLORS = ['#FF8C00', '#9B59B6', '#FFD700', '#8B4513', '#E74C3C', '#FF4444']

all_confs    = []
class_counts = {v: 0 for v in CLASS_NAMES.values()}

for img_path in test_images:
    result = model.predict(img_path, conf=CONF_THRESHOLD, verbose=False)[0]
    for box in result.boxes:
        all_confs.append(float(box.conf))
        name = CLASS_NAMES.get(int(box.cls), f'class_{int(box.cls)}')
        class_counts[name] = class_counts.get(name, 0) + 1

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Distribution des scores de confiance
ax1.hist(all_confs, bins=20, color='steelblue', edgecolor='white')
ax1.axvline(CONF_THRESHOLD, color='red', linestyle='--', label=f'Seuil = {CONF_THRESHOLD}')
ax1.set_title('Distribution des scores de confiance (test)')
ax1.set_xlabel('Confiance')
ax1.set_ylabel('Nombre de d√©tections')
ax1.legend()

# Nombre de d√©tections par classe pr√©dite
bars = ax2.bar(class_counts.keys(), class_counts.values(), color=COLORS)
ax2.bar_label(bars)
ax2.set_title('D√©tections par classe (jeu de test)')
ax2.set_xlabel('Classe')
ax2.set_ylabel('Nb d√©tections')
ax2.tick_params(axis='x', rotation=30)

plt.tight_layout()
plt.show()

print(f"Total d√©tections : {len(all_confs)}")
if all_confs:
    print(f"Confiance moyenne : {np.mean(all_confs):.3f}")
    print(f"Confiance m√©diane : {np.median(all_confs):.3f}")

<Figure size 1400x500 with 2 Axes>

Total d√©tections : 361
Confiance moyenne : 0.522
Confiance m√©diane : 0.482
