# Models Testing on Real Data

This section evaluates the fine-tuned models on the held-out test set.

## 1. Environment Setup

In [2]:
import os
import random
import shutil
import numpy as np
from pathlib import Path
from tqdm import tqdm
import yaml
from ultralytics import YOLO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/home/jovyan/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


## 2. Configuration & Utilities

In [7]:
# Setup Test Data
TEST_DATASET_PATH = Path("last-test")
IMG_EXTS = [".jpg", ".jpeg", ".png"]
WANDB_PROJECT = "YOUR_WANDB_PROJECT"
WANDB_ENTITY = "YOUR_WANDB_ENTITY" 

def create_test_yaml(test_path, run_name):
    """
    Creates a YAML file for the test dataset.
    """
    images_dir = test_path / "images"
    
    # Scan all valid image files
    test_images = []
    for ext in IMG_EXTS:
        test_images.extend(images_dir.rglob(f"*{ext}"))
        
    test_images = sorted(list(set(test_images)))
    print(f"[{run_name}] Total Test Images: {len(test_images)}")
    
    # Create txt path
    test_txt_path = Path(f"{run_name}_test.txt")
    
    with open(test_txt_path, 'w') as f:
        f.write('\n'.join([str(p.resolve()) for p in test_images]))
        
    # Create YAML
    yaml_content = f"""
path: {Path.cwd()}
train: {test_txt_path.resolve()} # Not used for validation but required
val: {test_txt_path.resolve()}  # Used for validation
test: {test_txt_path.resolve()} # Used for prediction/testing

nc: 1
names: ['object']
"""
    yaml_path = Path(f"{run_name}_test_dataset.yaml")
    with open(yaml_path, 'w') as f:
        f.write(yaml_content)
        
    return yaml_path

## 3. Evaluation Loop

In [8]:
# Run Evaluation on All Models
# Paths are relative to the notebook location (runs/obb/...)
models_to_test = [
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_5pct.pt",
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_10pct.pt",
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_20pct.pt",
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_30pct.pt",
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_40pct.pt",
    "YOLO_OBB_FineTuning_Results/yolo_obb_ft_5pct.pt",
    "YOLO_OBB_FineTuning_Results/yolo_obb_ft_10pct.pt",
    "YOLO_OBB_FineTuning_Results/yolo_obb_ft_20pct.pt",
    "YOLO_OBB_FineTuning_Results/yolo_obb_ft_30pct.pt",
    "YOLO_OBB_FineTuning_Results/yolo_obb_ft_40pct.pt",
    "YOLO_OBB_FineTuning_Results/yolo_obb_ft_50pct.pt",
    "YOLO_OBB_FineTuning_Results/yolo_obb_scratch_alpha_baseline_40.pt",
    "YOLO_OBB_Mixed_Training_Results/YOLO26l_OBB_Real_Data_only.pt",
    "YOLO_OBB_Mixed_Training_Results/YOLO26l_OBB_E1_Synthesized_only.pt"
    
]
results_summary = {}
test_yaml = create_test_yaml(TEST_DATASET_PATH, "final_test")

for model_name in models_to_test:
    model_path = Path(model_name)
    
    if not model_path.exists():
        print(f"Model {model_name} not found at {model_path.resolve()}. Skipping.")
        continue
        
    print(f"\nEvaluating {model_name}...")
    try:
        model = YOLO(model_path)
        # Extract valid run name for wandb
        run_name = model_path.parent.parent.name # e.g. yolo_obb_ft_5pct
        
        metrics = model.val(data=str(test_yaml), split='test', project=WANDB_PROJECT, name=f"test_{run_name}")
        
        results_summary[model_name] = {
            "map50": metrics.box.map50,
            "map50-95": metrics.box.map,
            "precision": metrics.box.mp,  # mean precision
            "recall": metrics.box.mr      # mean recall
        }
        print(f"Result for {model_name}:")
        print(f"  mAP50={metrics.box.map50:.4f}, mAP50-95={metrics.box.map:.4f}")
        print(f"  Precision={metrics.box.mp:.4f}, Recall={metrics.box.mr:.4f}")
        
    except Exception as e:
        print(f"Error evaluating {model_name}: {e}")

print("\n=== FINAL RESULTS ===")
for m, res in results_summary.items():
    print(f"{m}:")
    print(f"  mAP50={res['map50']:.4f}, mAP50-95={res['map50-95']:.4f}")
    print(f"  Precision={res['precision']:.4f}, Recall={res['recall']:.4f}")

# Save results to file
import json
from datetime import datetime

# Create results directory if it doesn't exist
results_dir = Path("last-test/test_results_last")
results_dir.mkdir(parents=True, exist_ok=True)

# Save as JSON
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
json_file = results_dir / f"test_results_{timestamp}.json"
with open(json_file, 'w') as f:
    json.dump(results_summary, f, indent=4)
print(f"\nResults saved to: {json_file}")

# Save as formatted text file
txt_file = results_dir / f"test_results_{timestamp}.txt"
with open(txt_file, 'w') as f:
    f.write("=== MODEL EVALUATION RESULTS ===\n")
    f.write(f"Test Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
    
    for m, res in results_summary.items():
        f.write(f"Model: {m}\n")
        f.write(f"  mAP50:     {res['map50']:.4f}\n")
        f.write(f"  mAP50-95:  {res['map50-95']:.4f}\n")
        f.write(f"  Precision: {res['precision']:.4f}\n")
        f.write(f"  Recall:    {res['recall']:.4f}\n")
        f.write("\n")
print(f"Results saved to: {txt_file}")

[final_test] Total Test Images: 199

Evaluating YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_5pct.pt...
Ultralytics 8.4.12 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (NVIDIA H100 NVL MIG 1g.24gb, 22144MiB)
YOLO26l-obb summary (fused): 200 layers, 25,594,962 parameters, 0 gradients, 89.8 GFLOPs
[KDownloading https://ultralytics.com/assets/Arial.ttf to '/home/jovyan/.config/Ultralytics/Arial.ttf': 100% ━━━━━━━━━━━━ 755.1KB 84.7KB/s 8.9s 8.9s<0.3ss
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 24.3±6.5 MB/s, size: 62.2 KB)
[K[34m[1mval: [0mScanning /home/jovyan/last-test/labels... 199 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 199/199 403.2it/s 0.5s0.1s
[34m[1mval: [0mNew cache created: /home/jovyan/last-test/labels.cache
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 13/13 4.3it/s 3.1s0.2s
                   all        199       2418      0.907      0.883      0.938      0.724
Speed: 1.3ms p