# 🌶️ Bell Pepper Detection with YOLOv8

This notebook trains a specialized YOLOv8 model for bell pepper detection and quality assessment.

## 📋 Steps Overview:
1. **Setup Environment** - Install dependencies
2. **Upload Dataset** - Upload your YOLO dataset
3. **Configure Training** - Set training parameters
4. **Train Model** - Train YOLOv8 on bell pepper data
5. **Validate Results** - Test and evaluate the model
6. **Export Model** - Download trained model for production

---


## 🔧 Step 1: Setup Environment


In [None]:
# Install required packages
%pip install ultralytics
%pip install roboflow
%pip install wandb  # For experiment tracking (optional)

# Import necessary libraries
import os
import torch
import yaml
from ultralytics import YOLO
from IPython.display import Image, display
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import shutil
import zipfile

# Check GPU availability
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")
else:
    print("⚠️ No GPU detected. Training will be slower on CPU.")


## 📁 Step 2: Download Dataset from Google Drive

Since you've uploaded your dataset to Google Drive, we'll download it directly to Colab.


In [None]:
# Download dataset from Google Drive
import gdown
import zipfile
import os
from pathlib import Path

# Your Google Drive file ID
file_id = "1PZYraaZ4q-G_N0fQz_GXlD8nNvdOoFNJ"
url = f"https://drive.google.com/uc?id={file_id}"

print("📥 Downloading Bell Pepper YOLO Dataset from Google Drive...")
print("=" * 55)

# Download the ZIP file
gdown.download(url, "bell_pepper_yolo_dataset.zip", quiet=False)

# Extract the dataset
print("\n📂 Extracting dataset...")
with zipfile.ZipFile("bell_pepper_yolo_dataset.zip", 'r') as zip_ref:
    zip_ref.extractall(".")

# Clean up ZIP file
os.remove("bell_pepper_yolo_dataset.zip")

print("✅ Dataset downloaded and extracted successfully!")

# Find the dataset directory (it might be nested)
dataset_path = None
possible_paths = ["yolo_dataset", "dataset", "bell_pepper_dataset"]

# Check current directory for extracted files
extracted_items = [item for item in os.listdir(".") if os.path.isdir(item)]
print(f"\n📁 Extracted directories: {extracted_items}")

# Look for the dataset directory
for item in extracted_items:
    item_path = Path(item)
    # Check if this directory contains the expected YOLO structure
    if (item_path / "images").exists() or (item_path / "dataset.yaml").exists():
        dataset_path = str(item_path)
        print(f"✅ Found dataset directory: {dataset_path}")
        break

# If not found in direct subdirectories, look deeper
if dataset_path is None:
    for root, dirs, files in os.walk("."):
        if "dataset.yaml" in files:
            dataset_path = root
            print(f"✅ Found dataset directory: {dataset_path}")
            break

if dataset_path is None:
    print("❌ Dataset directory not found! Let's explore the structure...")
    # Show all extracted contents
    for root, dirs, files in os.walk("."):
        if root != ".":  # Skip current directory
            level = root.count(os.sep) - 1
            indent = " " * 2 * level
            print(f"{indent}{os.path.basename(root)}/")
            subindent = " " * 2 * (level + 1)
            for file in files[:5]:  # Show first 5 files
                print(f"{subindent}{file}")
            if len(files) > 5:
                print(f"{subindent}... and {len(files) - 5} more files")
else:
    # Show the found dataset structure
    print(f"\n📁 Dataset structure in {dataset_path}:")
    for root, dirs, files in os.walk(dataset_path):
        level = root.replace(dataset_path, "").count(os.sep)
        indent = " " * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = " " * 2 * (level + 1)
        for file in files[:3]:  # Show first 3 files
            print(f"{subindent}{file}")
        if len(files) > 3:
            print(f"{subindent}... and {len(files) - 3} more files")


In [None]:
# Verify dataset structure and preview samples
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import yaml

def verify_dataset_structure(dataset_path):
    """Verify the dataset has correct YOLO structure"""
    if dataset_path is None:
        print("❌ No dataset path provided!")
        return None, False
        
    dataset_path = Path(dataset_path)
    
    required_dirs = [
        'images/train', 'images/val',
        'labels/train', 'labels/val'
    ]
    
    print("🔍 Dataset Structure Verification:")
    print("=" * 40)
    
    all_good = True
    for dir_name in required_dirs:
        dir_path = dataset_path / dir_name
        if dir_path.exists():
            file_count = len(list(dir_path.glob('*')))
            print(f"✅ {dir_name}: {file_count} files")
        else:
            print(f"❌ {dir_name}: Missing!")
            all_good = False
    
    # Check for dataset.yaml
    yaml_path = dataset_path / 'dataset.yaml'
    if yaml_path.exists():
        print(f"✅ dataset.yaml: Found")
        
        # Load and display dataset config
        with open(yaml_path, 'r') as f:
            config = yaml.safe_load(f)
        
        print(f"\n📊 Dataset Configuration:")
        print(f"   Classes: {config.get('nc', 'Unknown')}")
        print(f"   Class names: {config.get('names', 'Unknown')}")
        print(f"   Train images: {config.get('train_images', 'Unknown')}")
        print(f"   Val images: {config.get('val_images', 'Unknown')}")
        
        return config, all_good
        
    else:
        print(f"❌ dataset.yaml: Missing!")
        all_good = False
    
    if all_good:
        print("\n🎉 Dataset structure is correct!")
    else:
        print("\n⚠️ Dataset structure issues detected. Please fix before training.")
    
    return None, all_good

# Verify the dataset using the found path
if 'dataset_path' in locals() and dataset_path is not None:
    config, dataset_valid = verify_dataset_structure(dataset_path)
else:
    print("⚠️ Dataset path not found. Please run the previous cell first.")
    config, dataset_valid = None, False


In [None]:
# Fix dataset structure if needed
if not dataset_valid and dataset_path is not None:
    print("🔧 Attempting to fix dataset structure...")
    
    # Sometimes the dataset might be in a subdirectory or have different structure
    # Let's look for images and labels directories
    dataset_root = Path(dataset_path)
    
    # Find images and labels directories
    images_dirs = list(dataset_root.rglob("images"))
    labels_dirs = list(dataset_root.rglob("labels"))
    yaml_files = list(dataset_root.rglob("*.yaml"))
    
    print(f"Found images directories: {[str(d) for d in images_dirs]}")
    print(f"Found labels directories: {[str(d) for d in labels_dirs]}")
    print(f"Found YAML files: {[str(f) for f in yaml_files]}")
    
    # If we found the structure but it's nested, update the dataset_path
    if images_dirs and labels_dirs:
        # Find the common parent directory
        common_parent = images_dirs[0].parent
        if (common_parent / "images").exists() and (common_parent / "labels").exists():
            dataset_path = str(common_parent)
            print(f"🔄 Updated dataset path to: {dataset_path}")
            
            # Re-verify with the new path
            config, dataset_valid = verify_dataset_structure(dataset_path)
    
    # If still not valid, try to create a proper structure
    if not dataset_valid:
        print("🔨 Attempting to restructure dataset...")
        
        # Create a new properly structured dataset
        new_dataset_path = "bell_pepper_yolo_dataset"
        new_dataset = Path(new_dataset_path)
        
        # Create directories
        (new_dataset / "images" / "train").mkdir(parents=True, exist_ok=True)
        (new_dataset / "images" / "val").mkdir(parents=True, exist_ok=True)
        (new_dataset / "labels" / "train").mkdir(parents=True, exist_ok=True)
        (new_dataset / "labels" / "val").mkdir(parents=True, exist_ok=True)
        
        # If we have any YAML files, copy the first one
        if yaml_files:
            import shutil
            shutil.copy2(yaml_files[0], new_dataset / "dataset.yaml")
            print(f"✅ Copied dataset configuration: {yaml_files[0].name}")
        
        # Try to move/copy files if we can find them
        all_images = list(Path(dataset_path).rglob("*.jpg")) + list(Path(dataset_path).rglob("*.png"))
        all_labels = list(Path(dataset_path).rglob("*.txt"))
        
        if all_images:
            print(f"Found {len(all_images)} image files")
            # For now, put everything in train (you can split later)
            train_dir = new_dataset / "images" / "train"
            for img in all_images[:10]:  # Copy first 10 as example
                shutil.copy2(img, train_dir / img.name)
        
        if all_labels and len(all_labels) > 1:  # More than just dataset.yaml
            print(f"Found {len(all_labels)} label files")
            train_labels_dir = new_dataset / "labels" / "train"
            for lbl in all_labels[:10]:  # Copy first 10 as example
                if lbl.name != "dataset.yaml":
                    shutil.copy2(lbl, train_labels_dir / lbl.name)
        
        # Update dataset path
        dataset_path = new_dataset_path
        print(f"🔄 Created new dataset structure at: {dataset_path}")
        
        # Re-verify
        config, dataset_valid = verify_dataset_structure(dataset_path)

if dataset_valid:
    print("🎉 Dataset is now ready for training!")
else:
    print("⚠️ Dataset structure still needs manual fixing.")
    print("Please check the extracted files and ensure proper YOLO format.")


In [None]:
# Preview sample images with annotations
def preview_samples(dataset_path, num_samples=4):
    """Preview sample images with their annotations"""
    dataset_path = Path(dataset_path)
    
    # Get sample images from training set
    train_images = list((dataset_path / 'images' / 'train').glob('*'))
    sample_images = train_images[:num_samples]
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    axes = axes.flatten()
    
    # Load class names from config
    class_names = config['names'] if config else [f'bell_pepper_{i}' for i in range(6)]
    
    for i, img_path in enumerate(sample_images):
        if i >= 4:
            break
            
        # Load image
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        height, width = img.shape[:2]
        
        # Load corresponding label file
        label_path = dataset_path / 'labels' / 'train' / f"{img_path.stem}.txt"
        
        ax = axes[i]
        ax.imshow(img)
        ax.set_title(f"Sample {i+1}: {img_path.name}", fontsize=10)
        ax.axis('off')
        
        # Draw bounding boxes if label file exists
        bbox_count = 0
        if label_path.exists():
            with open(label_path, 'r') as f:
                lines = f.readlines()
            
            for line in lines:
                parts = line.strip().split()
                if len(parts) == 5:
                    class_id, x_center, y_center, bbox_width, bbox_height = map(float, parts)
                    
                    # Convert YOLO format to pixel coordinates
                    x_center *= width
                    y_center *= height
                    bbox_width *= width
                    bbox_height *= height
                    
                    # Calculate top-left corner
                    x = x_center - bbox_width / 2
                    y = y_center - bbox_height / 2
                    
                    # Draw bounding box
                    rect = patches.Rectangle((x, y), bbox_width, bbox_height, 
                                           linewidth=2, edgecolor='red', facecolor='none')
                    ax.add_patch(rect)
                    
                    # Add class label
                    class_name = class_names[int(class_id)] if int(class_id) < len(class_names) else f"Class {int(class_id)}"
                    ax.text(x, y-5, class_name, color='red', fontsize=8, 
                           bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
                    bbox_count += 1
        
        # Add bbox count to title
        ax.set_title(f"Sample {i+1} ({bbox_count} peppers): {img_path.name}", fontsize=10)
    
    plt.tight_layout()
    plt.show()

if dataset_valid and config:
    print("🖼️ Sample Images with Annotations:")
    preview_samples(dataset_path)


## ⚙️ Step 3: Configure Training Parameters


In [None]:
# Training configuration optimized for your dataset
TRAINING_CONFIG = {
    # Model settings - Using nano model for faster training with limited specs
    'model_size': 'yolov8n.pt',  # Options: yolov8n.pt, yolov8s.pt, yolov8m.pt
    
    # Training parameters optimized for your 620 image dataset
    'epochs': 150,              # Good for your dataset size
    'batch_size': 16,           # Will adjust based on GPU memory
    'img_size': 640,            # Standard YOLO input size
    'lr': 0.01,                 # Learning rate
    'patience': 50,             # Early stopping patience
    
    # Data augmentation for bell peppers
    'augment': True,            # Enable data augmentation
    'hsv_h': 0.015,            # HSV-Hue augmentation (good for color varieties)
    'hsv_s': 0.7,              # HSV-Saturation augmentation  
    'hsv_v': 0.4,              # HSV-Value augmentation
    'degrees': 10.0,           # Small rotation (peppers can be oriented differently)
    'translate': 0.1,          # Translation
    'scale': 0.5,              # Scale variation
    'shear': 0.0,              # No shear (keeps pepper shape)
    'perspective': 0.0,        # No perspective (keeps pepper shape)
    'flipud': 0.0,             # No vertical flip (peppers hang down)
    'fliplr': 0.5,             # Horizontal flip (peppers can be on either side)
    'mosaic': 1.0,             # Mosaic augmentation (great for detection)
    'mixup': 0.0,              # No mixup for better bounding box learning
    
    # Output settings
    'project': 'bell_pepper_training',  # Project name
    'name': 'yolov8_bell_pepper_v1',    # Experiment name
    'save_period': 25,                  # Save checkpoint every 25 epochs
}

print("🔧 Training Configuration for Bell Pepper Detection:")
print("=" * 55)
for key, value in TRAINING_CONFIG.items():
    print(f"{key:20}: {value}")

# Adjust batch size based on available GPU memory
if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)  # GB
    print(f"\n💾 GPU Memory: {gpu_memory:.1f} GB")
    
    if gpu_memory < 6:
        TRAINING_CONFIG['batch_size'] = 8
        print("⚠️ Reduced batch size to 8 due to limited GPU memory")
    elif gpu_memory < 4:
        TRAINING_CONFIG['batch_size'] = 4
        print("⚠️ Reduced batch size to 4 due to very limited GPU memory")
    elif gpu_memory > 12:
        TRAINING_CONFIG['batch_size'] = 32
        print("🚀 Increased batch size to 32 for better training speed")
else:
    TRAINING_CONFIG['batch_size'] = 4
    TRAINING_CONFIG['epochs'] = 100
    print("⚠️ CPU training detected - reduced batch size and epochs")

print(f"\n🎯 Final batch size: {TRAINING_CONFIG['batch_size']}")

# Only print dataset info if config is available
if config is not None:
    print(f"📊 Dataset: {config.get('train_images', 'Unknown')} train, {config.get('val_images', 'Unknown')} val images")
    print(f"🏷️ Classes: {config.get('nc', 'Unknown')} bell pepper varieties")
else:
    print("⚠️ Dataset configuration not loaded. Please verify dataset structure first.")
    print("📊 Dataset: Configuration pending...")
    print("🏷️ Classes: Will be determined after dataset verification")


In [None]:
# Fix dataset.yaml paths before training
print("🔧 Fixing dataset.yaml paths for training...")
print("=" * 40)

if dataset_path is None:
    print("❌ Dataset path not found. Please run the previous cells first.")
else:
    # Load the current dataset.yaml
    yaml_path = Path(dataset_path) / 'dataset.yaml'
    
    if yaml_path.exists():
        with open(yaml_path, 'r') as f:
            dataset_config = yaml.safe_load(f)
        
        # Get absolute path to dataset directory
        current_dataset_path = Path(dataset_path).absolute()
        
        # Create corrected dataset.yaml with absolute paths
        corrected_config = {
            'path': str(current_dataset_path),  # Dataset root dir (absolute path)
            'train': 'images/train',  # Train images (relative to 'path')
            'val': 'images/val',      # Val images (relative to 'path')
            'nc': dataset_config.get('nc', 6),  # Number of classes
            'names': dataset_config.get('names', [f'bell_pepper_{i+1}' for i in range(6)]),  # Class names
        }
        
        # Write corrected dataset.yaml
        with open(yaml_path, 'w') as f:
            yaml.dump(corrected_config, f, default_flow_style=False)
        
        print(f"✅ Updated dataset.yaml with correct paths:")
        print(f"   Path: {corrected_config['path']}")
        print(f"   Train: {corrected_config['train']}")
        print(f"   Val: {corrected_config['val']}")
        print(f"   Classes: {corrected_config['nc']}")
        print(f"   Names: {corrected_config['names']}")
        
        # Verify the paths exist
        train_path = current_dataset_path / 'images' / 'train'
        val_path = current_dataset_path / 'images' / 'val'
        train_labels_path = current_dataset_path / 'labels' / 'train'
        val_labels_path = current_dataset_path / 'labels' / 'val'
        
        train_images = len(list(train_path.glob('*'))) if train_path.exists() else 0
        val_images = len(list(val_path.glob('*'))) if val_path.exists() else 0
        train_labels = len(list(train_labels_path.glob('*.txt'))) if train_labels_path.exists() else 0
        val_labels = len(list(val_labels_path.glob('*.txt'))) if val_labels_path.exists() else 0
        
        print(f"\n📊 Dataset Statistics:")
        print(f"   Train images: {train_images}")
        print(f"   Train labels: {train_labels}")
        print(f"   Val images: {val_images}")
        print(f"   Val labels: {val_labels}")
        
        # Check if paths are valid
        if train_images == 0 or val_images == 0:
            print("\n⚠️ Warning: Some image directories are empty!")
            print("Searching for images in dataset...")
            
            # Find all images in the dataset
            all_images = list(current_dataset_path.rglob("*.jpg")) + list(current_dataset_path.rglob("*.png"))
            all_labels = list(current_dataset_path.rglob("*.txt"))
            
            print(f"Total images found: {len(all_images)}")
            print(f"Total txt files found: {len(all_labels)}")
            
            if len(all_images) > 0:
                print("\n📁 Image file locations (first 5):")
                for img in all_images[:5]:
                    rel_path = img.relative_to(current_dataset_path)
                    print(f"   {rel_path}")
                
                # Try to organize files if needed
                if train_images == 0 and len(all_images) > 0:
                    print("\n🔨 Attempting to organize dataset...")
                    
                    # Create directories if they don't exist
                    train_path.mkdir(parents=True, exist_ok=True)
                    val_path.mkdir(parents=True, exist_ok=True)
                    train_labels_path.mkdir(parents=True, exist_ok=True)
                    val_labels_path.mkdir(parents=True, exist_ok=True)
                    
                    # Move images to train/val (80/20 split)
                    import shutil
                    import random
                    
                    random.shuffle(all_images)
                    split_idx = int(0.8 * len(all_images))
                    
                    train_imgs = all_images[:split_idx]
                    val_imgs = all_images[split_idx:]
                    
                    # Copy images
                    for img in train_imgs:
                        shutil.copy2(img, train_path / img.name)
                    
                    for img in val_imgs:
                        shutil.copy2(img, val_path / img.name)
                    
                    # Copy corresponding labels
                    for img in train_imgs:
                        label_name = img.stem + '.txt'
                        for label_file in all_labels:
                            if label_file.name == label_name:
                                shutil.copy2(label_file, train_labels_path / label_name)
                                break
                    
                    for img in val_imgs:
                        label_name = img.stem + '.txt'
                        for label_file in all_labels:
                            if label_file.name == label_name:
                                shutil.copy2(label_file, val_labels_path / label_name)
                                break
                    
                    print(f"✅ Organized dataset:")
                    print(f"   Moved {len(train_imgs)} images to train")
                    print(f"   Moved {len(val_imgs)} images to val")
        
        print(f"\n🎯 Dataset ready for training!")
        print(f"📄 Using dataset config: {yaml_path}")
        
    else:
        print(f"❌ dataset.yaml not found at {yaml_path}")
        print("Creating a basic dataset.yaml file...")
        
        # Create a basic dataset.yaml
        basic_config = {
            'path': str(Path(dataset_path).absolute()),
            'train': 'images/train',
            'val': 'images/val',
            'nc': 6,
            'names': [f'bell_pepper_{i+1}' for i in range(6)]
        }
        
        with open(yaml_path, 'w') as f:
            yaml.dump(basic_config, f, default_flow_style=False)
        
        print(f"✅ Created basic dataset.yaml at {yaml_path}")


## 🚀 Step 4: Train the Bell Pepper Detection Model


In [None]:
# Initialize and train the bell pepper detection model
print("🚀 Starting Bell Pepper YOLOv8 Training...")
print("=" * 50)

# Load the base model
model = YOLO(TRAINING_CONFIG['model_size'])

# Print model info
print(f"📱 Model: {TRAINING_CONFIG['model_size']}")
print(f"🔢 Parameters: {sum(p.numel() for p in model.model.parameters()):,}")
print(f"📊 Dataset: {config['nc']} classes, {config['train_images']} train images")
print(f"⚙️ Batch size: {TRAINING_CONFIG['batch_size']}, Epochs: {TRAINING_CONFIG['epochs']}")

print("\n🎬 Starting training process...")
print("This may take 30-60 minutes depending on your GPU...")

# Start training with optimized settings for bell peppers
results = model.train(
    data=f"{dataset_path}/dataset.yaml",
    epochs=TRAINING_CONFIG['epochs'],
    batch=TRAINING_CONFIG['batch_size'],
    imgsz=TRAINING_CONFIG['img_size'],
    lr0=TRAINING_CONFIG['lr'],
    patience=TRAINING_CONFIG['patience'],
    
    # Data augmentation optimized for bell peppers
    augment=TRAINING_CONFIG['augment'],
    hsv_h=TRAINING_CONFIG['hsv_h'],
    hsv_s=TRAINING_CONFIG['hsv_s'],
    hsv_v=TRAINING_CONFIG['hsv_v'],
    degrees=TRAINING_CONFIG['degrees'],
    translate=TRAINING_CONFIG['translate'],
    scale=TRAINING_CONFIG['scale'],
    shear=TRAINING_CONFIG['shear'],
    perspective=TRAINING_CONFIG['perspective'],
    flipud=TRAINING_CONFIG['flipud'],
    fliplr=TRAINING_CONFIG['fliplr'],
    mosaic=TRAINING_CONFIG['mosaic'],
    mixup=TRAINING_CONFIG['mixup'],
    
    # Output settings
    project=TRAINING_CONFIG['project'],
    name=TRAINING_CONFIG['name'],
    save_period=TRAINING_CONFIG['save_period'],
    
    # Additional settings for optimal training
    device=0 if torch.cuda.is_available() else 'cpu',
    workers=4,
    verbose=True,
    seed=42,  # For reproducible results
    optimizer='AdamW',  # Good optimizer for small datasets
    close_mosaic=10,    # Disable mosaic in last 10 epochs for better accuracy
)

print("\n🎉 Training completed successfully!")
print("📁 Training results and model weights have been saved.")

# Display training summary
if hasattr(results, 'results_dict'):
    print(f"\n📊 Final Training Metrics:")
    metrics = results.results_dict
    if 'metrics/mAP50(B)' in metrics:
        print(f"   mAP@0.5: {metrics['metrics/mAP50(B)']:.3f}")
    if 'metrics/mAP50-95(B)' in metrics:
        print(f"   mAP@0.5:0.95: {metrics['metrics/mAP50-95(B)']:.3f}")


## 📊 Step 5: Analyze Training Results


In [None]:
# Find and display training results
from IPython.display import Image, display
import pandas as pd

# Find the latest training run directory
project_dir = Path(TRAINING_CONFIG['project'])
latest_run = max(project_dir.glob(f"{TRAINING_CONFIG['name']}*"), key=os.path.getctime)

print(f"📁 Training results saved in: {latest_run}")
print(f"🎯 Model files location: {latest_run}/weights/")

# Display training curves
results_img = latest_run / 'results.png'
if results_img.exists():
    print("\n📈 Training Results and Loss Curves:")
    display(Image(str(results_img), width=800))
else:
    print("⚠️ Results image not found")

# Display confusion matrix
confusion_matrix = latest_run / 'confusion_matrix.png'
if confusion_matrix.exists():
    print("\n🔍 Confusion Matrix:")
    display(Image(str(confusion_matrix), width=600))
else:
    print("⚠️ Confusion matrix not found")

# Display F1 confidence curve
f1_curve = latest_run / 'F1_curve.png'
if f1_curve.exists():
    print("\n📊 F1-Confidence Curve:")
    display(Image(str(f1_curve), width=600))

# Display precision-recall curve
pr_curve = latest_run / 'PR_curve.png'
if pr_curve.exists():
    print("\n📊 Precision-Recall Curve:")
    display(Image(str(pr_curve), width=600))

# Display validation batch predictions
val_batch = latest_run / 'val_batch0_pred.jpg'
if val_batch.exists():
    print("\n🎯 Model Predictions on Validation Images:")
    display(Image(str(val_batch), width=800))
else:
    print("⚠️ Validation predictions not found")


In [None]:
# Test the trained model and evaluate performance
import random

# Load the best trained model
best_model_path = latest_run / 'weights' / 'best.pt'
trained_model = YOLO(str(best_model_path))

print(f"🧠 Loaded best trained model: {best_model_path}")

# Validate the model on the validation set
print("\n📊 Running Model Validation...")
val_results = trained_model.val(data=f"{dataset_path}/dataset.yaml")

# Print detailed performance metrics
print(f"\n🎯 Bell Pepper Detection Performance:")
print("=" * 45)
print(f"mAP@0.5     : {val_results.box.map50:.3f} ({val_results.box.map50*100:.1f}%)")
print(f"mAP@0.5:0.95: {val_results.box.map:.3f} ({val_results.box.map*100:.1f}%)")
print(f"Precision   : {val_results.box.mp:.3f} ({val_results.box.mp*100:.1f}%)")
print(f"Recall      : {val_results.box.mr:.3f} ({val_results.box.mr*100:.1f}%)")

# Performance interpretation
map50 = val_results.box.map50
if map50 > 0.9:
    performance = "🌟 Excellent - Ready for production!"
    recommendation = "Your model is performing exceptionally well!"
elif map50 > 0.8:
    performance = "🎉 Very Good - Great for most applications"
    recommendation = "Excellent results! Consider testing on real-world data."
elif map50 > 0.7:
    performance = "👍 Good - Suitable for many use cases"
    recommendation = "Good performance. Fine-tune confidence threshold as needed."
elif map50 > 0.6:
    performance = "⚠️ Fair - May need improvement"
    recommendation = "Consider training longer or adding more diverse data."
else:
    performance = "❌ Needs Improvement"
    recommendation = "Try training with more epochs, better data, or larger model."

print(f"\nOverall Performance: {performance}")
print(f"Recommendation: {recommendation}")

# Calculate additional metrics
total_params = sum(p.numel() for p in trained_model.model.parameters())
model_size_mb = os.path.getsize(best_model_path) / (1024 * 1024)

print(f"\n📱 Model Specifications:")
print(f"   Parameters: {total_params:,}")
print(f"   Model size: {model_size_mb:.1f} MB")
print(f"   Classes: {val_results.names}")

# Class-wise performance if available
if hasattr(val_results.box, 'ap_class_index') and val_results.box.ap_class_index is not None:
    print(f"\n📊 Per-Class Performance (mAP@0.5):")
    for i, class_name in enumerate(val_results.names):
        if i < len(val_results.box.ap):
            class_map = val_results.box.ap[i].mean()  # Average across IoU thresholds
            print(f"   {class_name}: {class_map:.3f} ({class_map*100:.1f}%)")


In [None]:
# Test model on sample validation images
val_images = list((Path(dataset_path) / 'images' / 'val').glob('*'))
test_images = random.sample(val_images, min(6, len(val_images)))

print("🔮 Model Predictions on Sample Validation Images:")
print("=" * 55)

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

detection_summary = []

for i, img_path in enumerate(test_images):
    if i >= 6:
        break
    
    # Run inference
    results = trained_model(str(img_path), conf=0.25)  # Lower confidence for more detections
    result = results[0]
    
    # Plot the result
    annotated_img = result.plot()
    
    ax = axes[i]
    ax.imshow(annotated_img)
    ax.set_title(f"Test {i+1}: {img_path.name}", fontsize=10)
    ax.axis('off')
    
    # Count detections by class
    detections_count = {}
    if result.boxes is not None:
        for box in result.boxes:
            conf = float(box.conf.cpu().numpy()[0])
            cls = int(box.cls.cpu().numpy()[0])
            class_name = result.names[cls]
            
            if class_name not in detections_count:
                detections_count[class_name] = 0
            detections_count[class_name] += 1
        
        detection_summary.append({
            'image': img_path.name,
            'total_detections': len(result.boxes),
            'detections': detections_count
        })
        
        # Add detection count to title
        total_dets = len(result.boxes)
        ax.set_title(f"Test {i+1} ({total_dets} peppers): {img_path.name}", fontsize=10)
    else:
        detection_summary.append({
            'image': img_path.name,
            'total_detections': 0,
            'detections': {}
        })

plt.tight_layout()
plt.show()

# Print detection summary
print("\n📋 Detection Summary:")
total_detected = 0
class_totals = {}

for summary in detection_summary:
    img_name = summary['image']
    total_dets = summary['total_detections']
    total_detected += total_dets
    
    print(f"\n📸 {img_name}: {total_dets} bell pepper(s) detected")
    
    for class_name, count in summary['detections'].items():
        print(f"   - {class_name}: {count}")
        if class_name not in class_totals:
            class_totals[class_name] = 0
        class_totals[class_name] += count

print(f"\n🎯 Overall Test Results:")
print(f"   Total images tested: {len(detection_summary)}")
print(f"   Total bell peppers detected: {total_detected}")
print(f"   Average per image: {total_detected/len(detection_summary):.1f}")

if class_totals:
    print(f"   Detected classes:")
    for class_name, count in class_totals.items():
        print(f"     - {class_name}: {count} instances")


## 📦 Step 6: Export and Download Your Trained Model


In [None]:
# Create deployment package for your Flask app
print("📦 Creating Deployment Package...")
print("=" * 35)

# Create deployment directory
deployment_dir = Path('bell_pepper_model_deployment')
deployment_dir.mkdir(exist_ok=True)

# Copy the best model
shutil.copy2(best_model_path, deployment_dir / 'bell_pepper_model.pt')
print("✅ Copied best model: bell_pepper_model.pt")

# Copy training configuration
shutil.copy2(latest_run / 'args.yaml', deployment_dir / 'training_config.yaml')
print("✅ Copied training configuration")

# Copy dataset configuration
shutil.copy2(f"{dataset_path}/dataset.yaml", deployment_dir / 'dataset_classes.yaml')
print("✅ Copied dataset classes configuration")

# Create comprehensive model information file
model_info = f'''# 🌶️ Bell Pepper YOLOv8 Detection Model

## Model Information
- **Model Type**: YOLOv8 Nano (optimized for speed and efficiency)
- **Training Date**: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
- **Dataset Size**: {config['train_images']} training, {config['val_images']} validation images
- **Classes**: {config['nc']} bell pepper varieties
- **Image Size**: {TRAINING_CONFIG['img_size']}px
- **Epochs Trained**: {TRAINING_CONFIG['epochs']}

## Performance Metrics
- **mAP@0.5**: {val_results.box.map50:.3f} ({val_results.box.map50*100:.1f}%)
- **mAP@0.5:0.95**: {val_results.box.map:.3f} ({val_results.box.map*100:.1f}%)
- **Precision**: {val_results.box.mp:.3f} ({val_results.box.mp*100:.1f}%)
- **Recall**: {val_results.box.mr:.3f} ({val_results.box.mr*100:.1f}%)
- **Model Size**: {model_size_mb:.1f} MB
- **Parameters**: {total_params:,}

## Classes Detected
{chr(10).join([f'- {name}' for name in val_results.names])}

## Usage in Python
```python
from ultralytics import YOLO

# Load your trained model
model = YOLO('bell_pepper_model.pt')

# Run inference on an image
results = model('path/to/bell_pepper_image.jpg')

# Process results
for result in results:
    boxes = result.boxes  # Bounding boxes
    for box in boxes:
        conf = box.conf.item()  # Confidence score
        cls = box.cls.item()    # Class ID
        class_name = result.names[int(cls)]  # Class name
        print(f"Detected {{class_name}} with {{conf:.2f}} confidence")
```

## Integration with Your Flask App

### Step 1: Update Model Loading
Replace the model loading in your `app.py`:

```python
# Replace this line:
MODELS = {{
    'object_detection': YOLO('yolov8n.pt'),  # General model
    # ...
}}

# With this:
MODELS = {{
    'bell_pepper_detection': YOLO('models/bell_pepper_model.pt'),  # Your trained model
    'ripeness_detection': YOLO('models/bell_pepper_model.pt'),     # Same model for ripeness
    'disease_detection': None  # Add disease model later if needed
}}
```

### Step 2: Update Detection Logic
In your upload function, change:

```python
# From:
results = MODELS['object_detection'](filepath)

# To:
results = MODELS['bell_pepper_detection'](filepath)
```

### Step 3: Update Bell Pepper Detection
Your model now automatically detects bell peppers, so update the detection logic:

```python
# All detections from your model are bell peppers
bell_peppers_detected = len(result.boxes) > 0 if result.boxes is not None else False

# Each detection will have a specific bell pepper class
for box in result.boxes:
    cls = int(box.cls.cpu().numpy()[0])
    class_name = result.names[cls]  # This will be bell_pepper_1, bell_pepper_2, etc.
    conf = float(box.conf.cpu().numpy()[0])
    
    # Use confidence threshold
    if conf > 0.5:  # Adjust this threshold as needed
        # Process detected bell pepper
        pepper_variety = class_name  # The specific variety/color
```

## Recommended Confidence Thresholds
- **High Precision**: 0.7+ (fewer false positives)
- **Balanced**: 0.5 (good balance)
- **High Recall**: 0.3+ (catch more peppers, but may have false positives)

## Model Performance Analysis
- **Best for**: Bell pepper detection and variety classification
- **Strengths**: Fast inference, good accuracy on bell peppers
- **Use cases**: Quality control, agricultural automation, inventory management

## Troubleshooting
1. **Low detections**: Lower confidence threshold to 0.3-0.4
2. **Too many false positives**: Increase confidence threshold to 0.6-0.7
3. **Wrong varieties**: Your model distinguishes 6 varieties - check class mappings
4. **Performance issues**: Model is optimized for speed, but ensure GPU is available

## Next Steps
1. Copy `bell_pepper_model.pt` to your Flask app's `models/` directory
2. Update your Flask app using the integration code above
3. Test with real bell pepper images
4. Fine-tune confidence thresholds based on your specific needs
5. Consider collecting more data for classes with lower performance

---
**Created with ❤️ using YOLOv8 and the RGBD Pepper Dataset**
'''

# Write the comprehensive README
with open(deployment_dir / 'README.md', 'w', encoding='utf-8') as f:
    f.write(model_info)

print("✅ Created comprehensive model documentation")

# Create a quick integration script
integration_script = f'''#!/usr/bin/env python3
"""
Quick integration script for Flask app
Run this script to automatically update your Flask app
"""

import os
import shutil
from pathlib import Path

def integrate_bell_pepper_model():
    print("🌶️ Integrating Bell Pepper Model with Flask App")
    print("=" * 50)
    
    # Check if model file exists
    model_file = "bell_pepper_model.pt"
    if not os.path.exists(model_file):
        print(f"❌ Model file not found: {{model_file}}")
        return False
    
    # Create models directory in Flask app
    models_dir = Path("../models")  # Adjust path as needed
    models_dir.mkdir(exist_ok=True)
    
    # Copy model file
    shutil.copy2(model_file, models_dir / model_file)
    print(f"✅ Copied {{model_file}} to {{models_dir}}")
    
    print("\\n🎯 Next Steps:")
    print("1. Update your Flask app.py model loading code")
    print("2. Test with bell pepper images")
    print("3. Adjust confidence thresholds as needed")
    print("\\nSee README.md for detailed integration instructions!")
    
    return True

if __name__ == "__main__":
    integrate_bell_pepper_model()
'''

with open(deployment_dir / 'integrate.py', 'w') as f:
    f.write(integration_script)

print("✅ Created integration script")

# List all files in deployment package
print(f"\\n📁 Deployment Package Contents:")
for file in deployment_dir.glob('*'):
    size_mb = file.stat().st_size / (1024 * 1024)
    print(f"   - {{file.name}} ({{size_mb:.1f}} MB)")

print(f"\\n📊 Package Summary:")
print(f"   Location: {{deployment_dir}}")
print(f"   Total files: {{len(list(deployment_dir.glob('*')))}}")
print(f"   Main model: bell_pepper_model.pt ({{model_size_mb:.1f}} MB)")
print(f"   Ready for production deployment! 🚀")


In [None]:
# Create downloadable ZIP package
import zipfile

print("📥 Creating Downloadable Package...")

# Create ZIP file with all deployment files
zip_filename = 'bell_pepper_yolov8_trained_model'
with zipfile.ZipFile(f'{zip_filename}.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    for file_path in deployment_dir.glob('*'):
        zipf.write(file_path, file_path.name)
        print(f"   Added: {file_path.name}")

print(f"✅ Created deployment package: {zip_filename}.zip")

# Download the package
from google.colab import files

print("\n⬇️ Downloading your trained bell pepper model...")
files.download(f"{zip_filename}.zip")

print("\n🎉 SUCCESS! Your Bell Pepper Detection Model is Ready!")
print("=" * 60)
print("📦 Downloaded Package Contains:")
print("   • bell_pepper_model.pt - Your trained model")
print("   • README.md - Complete integration guide")
print("   • integrate.py - Quick integration script")
print("   • Configuration files")

print("\n🚀 Next Steps:")
print("1. Extract the downloaded ZIP file")
print("2. Copy 'bell_pepper_model.pt' to your Flask app's models/ folder")
print("3. Follow the integration instructions in README.md")
print("4. Test your specialized bell pepper detector!")

print(f"\n📊 Your Model Performance:")
print(f"   • mAP@0.5: {val_results.box.map50*100:.1f}% - {performance.split(' - ')[0]}")
print(f"   • Model Size: {model_size_mb:.1f} MB")
print(f"   • Classes: {config['nc']} bell pepper varieties")
print(f"   • Ready for production use! 🌶️")

print("\n💡 Tips for Best Results:")
print("   • Use confidence threshold of 0.5 for balanced results")
print("   • Lower to 0.3 for higher recall (more detections)")
print("   • Raise to 0.7 for higher precision (fewer false positives)")
print("   • Your model now detects specific bell pepper varieties!")

print("\n🎯 Integration Summary:")
print("   Your specialized model will significantly outperform")
print("   the generic YOLO model for bell pepper detection!")
print("   Thank you for using the RGBD Pepper dataset! 📸🌶️")
