# Section 1: Improved Logo Detector

This notebook implements improvements to the baseline model.



In [None]:
import json
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import torch
import warnings
warnings.filterwarnings('ignore')

# Set style for better visualizations
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Check device (MPS for Mac M1)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Load baseline information
with open('baseline_info.json', 'r') as f:
    baseline_info = json.load(f)

print("Baseline metrics:")
print(f"  Val mAP50: {baseline_info['metrics']['val_map50']:.4f}")
print(f"  Test mAP50: {baseline_info['metrics']['test_map50']:.4f}")


In [None]:
# Install ultralytics if not already installed
try:
    from ultralytics import YOLO
    print("Ultralytics already installed")
except ImportError:
    print("Installing ultralytics...")
    import subprocess
    subprocess.check_call(["pip", "install", "ultralytics"])
    from ultralytics import YOLO

# Load dataset config
config_path = Path(baseline_info['dataset_config'])
all_classes = baseline_info['classes']

print("Initializing YOLO11")
#improved_model = YOLO('yolo11n.pt')  
improved_model = YOLO('yolo11n.pt')  

print(f"Model initialized. Device: {device}")
print(f"Number of classes: {len(all_classes)}")


## Training with Improvements


In [None]:
# Train improved model with enhanced settings
print("Starting improved model training...")
print(f"Dataset config: {config_path}")

improved_results = improved_model.train(
    # Core Data & Device Settings
    data=str(config_path),
    epochs=40,             # Sufficient time for rare classes to converge
    imgsz=640,
    batch=8,               
    device=str(device),
    patience=15,           # Higher patience to allow minority classes to catch up
    
    # Loss Weights: Tuned for Identity over mere Localization
    box=7.5,    # Increase weight of box loss (default is usually 7.5)
    cls=0.5,    # Weight of classification loss (default 0.5)
    dfl=1.5,    # Weight of distribution focal loss (default 1.5)
    
    # Augmentation: The "Logo-Safe" Suite
    hsv_h=0.015,           # Restored for color robustness
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=3.0,           # Subtle rotation only
    translate=0.1,         # Shift objects to learn position independence
    scale=0.3,             # Reduced to prevent logos from becoming too tiny
    mosaic=1.0,            # High density learning for small objects
    copy_paste=0.3,        # CRITICAL: Helps balance SAP/FedEx by reusing instances
    
    # Explicitly Omitted/Disabled (The "Risky" ones for logos)
    shear=0.0,             # Prevents distortion of circles/squares
    perspective=0.0,       # Prevents unrealistic 3D warping
    flipud=0.0,            # Logos should not be upside down
    #fliplr=0.5,            # Remove Horizontal flips
    mixup=0.0,             # Keep features sharp for small objects
    
    # Final Optimization
    close_mosaic=10,       # Disable mosaic in last 10 epochs to sharpen edges
    val=True,              # Ensure validation runs to monitor F1-score
    plots=True             # Generate result plots for final analysis
)
print("\nImproved training completed!")
print(f"Results saved to: {improved_results.save_dir}")


In [None]:
# Load the best improved model
improved_best = YOLO(improved_results.save_dir / 'weights' / 'best.pt')

# Evaluate on validation set
print("Evaluating improved model on validation set...")
val_metrics_improved = improved_best.val(data=str(config_path), split='val')
print(f"\nImproved Validation mAP50: {val_metrics_improved.box.map50:.4f}")
print(f"Improved Validation mAP50-95: {val_metrics_improved.box.map:.4f}")

# Evaluate on test set
print("\nEvaluating improved model on test set...")
test_metrics_improved = improved_best.val(data=str(config_path), split='test')
print(f"\nImproved Test mAP50: {test_metrics_improved.box.map50:.4f}")
print(f"Improved Test mAP50-95: {test_metrics_improved.box.map:.4f}")

# Save metrics
improved_metrics = {
    'val_map50': float(val_metrics_improved.box.map50),
    'val_map50_95': float(val_metrics_improved.box.map),
    'test_map50': float(test_metrics_improved.box.map50),
    'test_map50_95': float(test_metrics_improved.box.map)
}

with open('improved_metrics.json', 'w') as f:
    json.dump(improved_metrics, f, indent=2)

print("\nImproved metrics saved to improved_metrics.json")

# Compare with baseline
print("\n" + "="*60)
print("COMPARISON: Baseline vs Improved")
print("="*60)
print(f"{'Metric':<25} {'Baseline':<15} {'Improved':<15} {'Improvement':<15}")
print("-"*60)
print(f"{'Val mAP50':<25} {baseline_info['metrics']['val_map50']:<15.4f} {improved_metrics['val_map50']:<15.4f} {improved_metrics['val_map50'] - baseline_info['metrics']['val_map50']:+.4f}")
print(f"{'Val mAP50-95':<25} {baseline_info['metrics']['val_map50_95']:<15.4f} {improved_metrics['val_map50_95']:<15.4f} {improved_metrics['val_map50_95'] - baseline_info['metrics']['val_map50_95']:+.4f}")
print(f"{'Test mAP50':<25} {baseline_info['metrics']['test_map50']:<15.4f} {improved_metrics['test_map50']:<15.4f} {improved_metrics['test_map50'] - baseline_info['metrics']['test_map50']:+.4f}")
print(f"{'Test mAP50-95':<25} {baseline_info['metrics']['test_map50_95']:<15.4f} {improved_metrics['test_map50_95']:<15.4f} {improved_metrics['test_map50_95'] - baseline_info['metrics']['test_map50_95']:+.4f}")
print("="*60)


## Visualize Training Results - Improved Model


In [None]:
# Load training results
results_csv = improved_results.save_dir / 'results.csv'
if results_csv.exists():
    import pandas as pd
    results_df = pd.read_csv(results_csv)
    
    # Plot training and validation losses
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # Box loss
    axes[0, 0].plot(results_df['epoch'], results_df['train/box_loss'], label='Train Box Loss', linewidth=2)
    axes[0, 0].plot(results_df['epoch'], results_df['val/box_loss'], label='Val Box Loss', linewidth=2)
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Box Loss')
    axes[0, 0].set_title('Improved Model: Box Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Classification loss
    axes[1, 0].plot(results_df['epoch'], results_df['train/cls_loss'], label='Train Cls Loss', linewidth=2)
    axes[1, 0].plot(results_df['epoch'], results_df['val/cls_loss'], label='Val Cls Loss', linewidth=2)
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Classification Loss')
    axes[1, 0].set_title('Improved Model: Classification Loss')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # mAP50
    axes[1, 1].plot(results_df['epoch'], results_df['metrics/mAP50(B)'], label='mAP50', linewidth=2, color='green')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('mAP50')
    axes[1, 1].set_title('Improved Model: Validation mAP50')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('improved_training_curves.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("Training curves saved to improved_training_curves.png")
    
    # Compare training curves with baseline
    baseline_csv = Path(baseline_info['model_path']).parent.parent / 'results.csv'
    if baseline_csv.exists():
        baseline_df = pd.read_csv(baseline_csv)
        
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        
        # Box loss comparison
        axes[0, 0].plot(baseline_df['epoch'], baseline_df['val/box_loss'], 
                       label='Baseline Val Box Loss', linewidth=2, linestyle='--', alpha=0.7)
        axes[0, 0].plot(results_df['epoch'], results_df['val/box_loss'], 
                       label='Improved Val Box Loss', linewidth=2)
        axes[0, 0].set_xlabel('Epoch')
        axes[0, 0].set_ylabel('Validation Box Loss')
        axes[0, 0].set_title('Box Loss Comparison')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        # mAP50 comparison
        axes[0, 1].plot(baseline_df['epoch'], baseline_df['metrics/mAP50(B)'], 
                        label='Baseline mAP50', linewidth=2, linestyle='--', alpha=0.7)
        axes[0, 1].plot(results_df['epoch'], results_df['metrics/mAP50(B)'], 
                       label='Improved mAP50', linewidth=2)
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('mAP50')
        axes[0, 1].set_title('mAP50 Comparison')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
        
        
        baseline_total = baseline_df['val/box_loss']  + baseline_df['val/cls_loss']
        improved_total = results_df['val/box_loss']  + results_df['val/cls_loss']

        axes[1, 0].plot(baseline_df['epoch'], baseline_total, 
                       label='Baseline Total Val Loss', linewidth=2, linestyle='--', alpha=0.7)
        axes[1, 0].plot(results_df['epoch'], improved_total, 
                       label='Improved Total Val Loss', linewidth=2)
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('Total Validation Loss')
        axes[1, 0].set_title('Total Loss Comparison')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
        
        # Metrics comparison bar chart
        metrics_comparison = {
            'Val mAP50': [baseline_info['metrics']['val_map50'], improved_metrics['val_map50']],
            'Test mAP50': [baseline_info['metrics']['test_map50'], improved_metrics['test_map50']],
            'Val mAP50-95': [baseline_info['metrics']['val_map50_95'], improved_metrics['val_map50_95']],
            'Test mAP50-95': [baseline_info['metrics']['test_map50_95'], improved_metrics['test_map50_95']]
        }
        
        x = np.arange(len(metrics_comparison))
        width = 0.35
        fig2, ax = plt.subplots(figsize=(12, 6))
        baseline_vals = [v[0] for v in metrics_comparison.values()]
        improved_vals = [v[1] for v in metrics_comparison.values()]
        ax.bar(x - width/2, baseline_vals, width, label='Baseline', alpha=0.8)
        ax.bar(x + width/2, improved_vals, width, label='Improved', alpha=0.8)
        ax.set_ylabel('mAP Score')
        ax.set_title('Model Performance Comparison')
        ax.set_xticks(x)
        ax.set_xticklabels(metrics_comparison.keys())
        ax.legend()
        ax.grid(True, alpha=0.3, axis='y')
        plt.tight_layout()
        plt.savefig('model_comparison.png', dpi=300, bbox_inches='tight')
        plt.show()
        
        print("Comparison plots saved to model_comparison.png")
else:
    print("Results CSV not found. Training may still be in progress.")


In [None]:
results_csv = improved_results.save_dir / 'results.csv'

if results_csv.exists():
    import pandas as pd
    results_df = pd.read_csv(results_csv)
    
    # Plot training and validation losses
    #fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fix, axes = plt.subplots(2, 3, figsize=(20, 12))
    # Box loss
    axes[0, 0].plot(results_df['epoch'], results_df['train/box_loss'], label='Train Box Loss', linewidth=2)
    axes[0, 0].plot(results_df['epoch'], results_df['val/box_loss'], label='Val Box Loss', linewidth=2)
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Box Loss')
    axes[0, 0].set_title('Box Loss: Training vs Validation')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    

    # mAP50-95
    axes[1, 1].plot(results_df['epoch'], results_df['metrics/mAP50-95(B)'], label='mAP50-95', linewidth=2, color='green')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('mAP50-95')
    axes[1, 1].set_title('Validation mAP50-95 Over Time')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    # Classification loss
    axes[0, 1].plot(results_df['epoch'], results_df['train/cls_loss'], label='Train Cls Loss', linewidth=2)
    axes[0, 1].plot(results_df['epoch'], results_df['val/cls_loss'], label='Val Cls Loss', linewidth=2)
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Classification Loss')
    axes[0, 1].set_title('Classification Loss: Training vs Validation')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)

    #train/dfl_loss
    axes[0, 2].plot(results_df['epoch'], results_df['train/dfl_loss'], label='Train dfl Loss', linewidth=2)
    axes[0, 2].plot(results_df['epoch'], results_df['val/dfl_loss'], label='Val dfl Loss', linewidth=2)
    axes[0, 2].set_xlabel('Epoch')
    axes[0, 2].set_ylabel('dfl Loss')
    axes[0, 2].set_title('dfl Loss: Training vs Validation')
    axes[0, 2].legend()
    axes[0, 2].grid(True, alpha=0.3)
    
    # mAP50
    axes[1, 0].plot(results_df['epoch'], results_df['metrics/mAP50(B)'], label='mAP50', linewidth=2, color='green')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('mAP50')
    axes[1, 0].set_title('Validation mAP50 Over Time')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    

    results_df['metrics/F1(B)'] = 2 * (results_df['metrics/precision(B)'] * results_df['metrics/recall(B)']) / \
                                 (results_df['metrics/precision(B)'] + results_df['metrics/recall(B)'] + 1e-6)

    # Add this to your plotting loop or as a new subplot
    axes[1, 2].plot(results_df['epoch'], results_df['metrics/F1(B)'], label='F1 Score', color='orange', linewidth=2)
    axes[1, 2].set_title('Model Reliability (F1 Score)')
    axes[1, 2].legend()

    plt.tight_layout()
    plt.savefig('improved0_training_curves.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("Training curves saved to improved0_training_curves.png")
else:
    print("Results CSV not found. Training may still be in progress.")

In [None]:
# Run predictions on test images
YOLO_DATASET = Path("yolo_dataset")
test_image_files = list((YOLO_DATASET / 'test' / 'images').glob("*.jpg"))[:12]

# Get predictions
predictions = improved_best.predict(
    source=[str(f) for f in test_image_files],
    conf=0.25,
    iou=0.45,
    save=False,
    show=False
)

# Visualize predictions
fig, axes = plt.subplots(3, 4, figsize=(20, 15))
axes = axes.flatten()

for idx, (img_file, pred) in enumerate(zip(test_image_files, predictions)):
    # Load original image
    img = Image.open(img_file)
    axes[idx].imshow(img)
    axes[idx].axis('off')
    axes[idx].set_title(f"{img_file.name[:40]}...", fontsize=8)
    
    # Draw predictions
    if pred.boxes is not None and len(pred.boxes) > 0:
        boxes = pred.boxes.xyxy.cpu().numpy()
        confidences = pred.boxes.conf.cpu().numpy()
        classes = pred.boxes.cls.cpu().numpy().astype(int)
        
        for box, conf, cls in zip(boxes, confidences, classes):
            x1, y1, x2, y2 = box
            class_name = all_classes[cls]
            
            # Draw bounding box
            from matplotlib.patches import Rectangle
            rect = Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, 
                           edgecolor='cyan', facecolor='none')
            axes[idx].add_patch(rect)
            axes[idx].text(x1, y1-5, f"{class_name} {conf:.2f}", 
                          color='cyan', fontsize=7,
                          bbox=dict(boxstyle='round,pad=0.3', facecolor='black', alpha=0.7))

plt.tight_layout()
plt.savefig('improved_test_predictions.png', dpi=300, bbox_inches='tight')
plt.show()

print("Improved test predictions saved to improved_test_predictions.png")


In [None]:
# Save improved model info for Section 2
improved_info = {
    'model_path': str(improved_results.save_dir / 'weights' / 'best.pt'),
    'classes': all_classes,
    'num_classes': len(all_classes),
    'metrics': improved_metrics,
    'dataset_config': str(config_path),
    'baseline_metrics': baseline_info['metrics']
}

with open('improved_info.json', 'w') as f:
    json.dump(improved_info, f, indent=2)

print("\nImproved model information saved to improved_info.json")
print("\n" + "="*50)
print("IMPROVED MODEL COMPLETE")
print("="*50)
print(f"Baseline Test mAP50: {baseline_info['metrics']['test_map50']:.4f}")
print(f"Improved Test mAP50: {improved_metrics['test_map50']:.4f}")
print(f"Improvement: {improved_metrics['test_map50'] - baseline_info['metrics']['test_map50']:+.4f} ({((improved_metrics['test_map50'] / baseline_info['metrics']['test_map50'] - 1) * 100):+.2f}%)")
