# **Download the requried Datasets**

In [None]:
import requests
from tqdm import tqdm
import time
import random
import os

def download_kaggle_dataset(url, output):
    try:
        # stream request
        response = requests.get(url, stream=True)
        response.raise_for_status()

        total_size = int(response.headers.get("content-length", 0))
        block_size = 1024  # 1 KB

        with open(output, "wb") as file, tqdm(
            total=total_size, unit="iB", unit_scale=True, desc=f"Downloading -> {output}"
        ) as bar:
            for data in response.iter_content(block_size):
                bar.update(len(data))
                file.write(data)

        print(f"✅ Downloaded to {output}")

    except Exception as e:
        print(f"❌ Failed to download {url}: {e}")

#########################################################################
#########################################################################

# list of datasets
# find a kaggle dataset, click on download -> cURL -> copy the URL of dataset only
datasets_list = [
    {"url": " https://www.kaggle.com/api/v1/datasets/download/iamsouravbanerjee/animal-image-dataset-90-different-animals",
     "output": "/content/datasets/90-mix-animals.zip",
     "category": "mix"},

    {"url": "https://www.kaggle.com/api/v1/datasets/download/antoreepjana/animals-detection-images-dataset",
     "output": "/content/datasets/animals-detection-images-dataset.zip",
     "category": "mix"},

    {"url": "https://www.kaggle.com/api/v1/datasets/download/sharansmenon/aquarium-dataset",
     "output": "/content/datasets/aquarium-dataset.zip",
     "category": "sea"},

    {"url": "https://www.kaggle.com/api/v1/datasets/download/wenewone/cub2002011",
     "output": "/content/datasets/cub2002011.zip",
     "category": "air"},

    {"url": "https://www.kaggle.com/api/v1/datasets/download/duyminhle/nabirds",
     "output": "/content/datasets/nabirds.zip",
     "category": "air"},

    {"url": "https://www.kaggle.com/api/v1/datasets/download/alessiocorrado99/animals10",
     "output": "/content/datasets/animals10.zip",
     "category": "land"},
]

# Create the 'dataset' directory
os.makedirs("/content/datasets", exist_ok=True)

# loop over datasets
for dataset in datasets_list:
    try:
        download_kaggle_dataset(dataset["url"], dataset["output"])
    except Exception as e:
        print(f"❌ Failed to download {dataset['url']}: {e}")

    # random delay between 5–15 seconds
    delay = random.uniform(5, 15)
    print(f"⏳ Waiting {delay:.2f} seconds before next download...")
    time.sleep(delay)


# **Unzip into categorical paths**

In [None]:
import zipfile
import os

def unzip_datasets(datasets_list):

    for dataset in datasets_list:
        input_path = dataset["input_path"]
        output_path = dataset["output_path"]

        try:
            # make sure output directory exists
            os.makedirs(output_path, exist_ok=True)

            print(f"Unzipping {input_path} -> {output_path} ...")
            with zipfile.ZipFile(input_path, 'r') as zip_ref:
                zip_ref.extractall(output_path)

            print(f"✅ Done: {input_path}\n")

        except Exception as e:
            print(f"❌ Failed to unzip {input_path}: {e}\n")


#########################################################################
#########################################################################

datasets_to_unzip = [
    {"input_path": "/content/datasets/90-mix-animals.zip",
     "output_path": "/content/data/mix/90-mix-animals"},

    {"input_path": "/content/datasets/animals-detection-images-dataset.zip",
     "output_path": "/content/data/mix/animals-detection-images-dataset"},

    {"input_path": "/content/datasets/animals10.zip",
     "output_path": "/content/data/land/animals10"},

    {"input_path": "/content/datasets/aquarium-dataset.zip",
     "output_path": "/content/data/sea/aquarium-dataset"},

    {"input_path": "/content/datasets/cub2002011.zip",
     "output_path": "/content/data/air/cub2002011"},

    {"input_path": "/content/datasets/nabirds.zip",
     "output_path": "/content/data/air/nabirds"}
]

unzip_datasets(datasets_to_unzip)

In [3]:
!rm -rf /content/datasets

# **Prepare the Datasets for YOLOv11**

In [None]:
# Install required packages
!pip install -q ultralytics opencv-python pyyaml

In [4]:
# Prepare multiple animal datasets for YOLOv11 training

import os
import shutil
import yaml
import cv2
import json
from pathlib import Path
from collections import defaultdict
import random
import numpy as np
from IPython.display import display, HTML
import matplotlib.pyplot as plt

def prepare_yolo_dataset(base_path="/content/data", output_path="/content/yolo_dataset",
                        train_split=0.8, val_split=0.1, test_split=0.1,
                        max_samples_per_class=None):
    """
    base_path (str): Path to the base data directory
    output_path (str): Path where organized YOLO dataset will be saved
    train_split (float): Proportion of data for training
    val_split (float): Proportion of data for validation
    test_split (float): Proportion of data for testing
    max_samples_per_class (int): Maximum samples per class (for balancing)
    """

    # Create output directory structure
    output_path = Path(output_path)
    for split in ['train', 'val', 'test']:
        (output_path / split / 'images').mkdir(parents=True, exist_ok=True)
        (output_path / split / 'labels').mkdir(parents=True, exist_ok=True)

    # Class mapping - will be built dynamically
    class_names = set()
    class_to_id = {}
    all_data = []
    dataset_stats = {}

    print("🔍 Processing datasets...")
    print("=" * 50)

    # Process CUB-200-2011 (Birds dataset)
    cub_path = Path(base_path) / "air" / "cub2002011" / "CUB_200_2011"
    if cub_path.exists():
        print("🦅 Processing CUB-200-2011 dataset...")
        cub_data = process_cub_dataset(cub_path, class_names)
        all_data.extend(cub_data)
        dataset_stats['CUB-200-2011'] = len(cub_data)
        print(f"   ✅ Found {len(cub_data)} bird samples")
    else:
        print("❌ CUB-200-2011 dataset not found")

    # Process NABirds dataset
    nabirds_path = Path(base_path) / "air" / "nabirds"
    if nabirds_path.exists():
        print("🐦 Processing NABirds dataset...")
        nabirds_data = process_nabirds_dataset(nabirds_path, class_names)
        all_data.extend(nabirds_data)
        dataset_stats['NABirds'] = len(nabirds_data)
        print(f"   ✅ Found {len(nabirds_data)} bird samples")
    else:
        print("❌ NABirds dataset not found")

    # Process Animals10 dataset
    animals10_path = Path(base_path) / "land" / "animals10"
    if animals10_path.exists():
        print("🐾 Processing Animals10 dataset...")
        animals10_data = process_animals10_dataset(animals10_path, class_names)
        all_data.extend(animals10_data)
        dataset_stats['Animals10'] = len(animals10_data)
        print(f"   ✅ Found {len(animals10_data)} land animal samples")
    else:
        print("❌ Animals10 dataset not found")

    # Process 90-mix-animals dataset
    mix_animals_path = Path(base_path) / "mix" / "90-mix-animals"
    if mix_animals_path.exists():
        print("🦓 Processing 90-mix-animals dataset...")
        mix_data = process_mix_animals_dataset(mix_animals_path, class_names)
        all_data.extend(mix_data)
        dataset_stats['90-mix-animals'] = len(mix_data)
        print(f"   ✅ Found {len(mix_data)} mixed animal samples")
    else:
        print("❌ 90-mix-animals dataset not found")

    # Process animals-detection-images dataset (already has YOLO format)
    detection_path = Path(base_path) / "mix" / "animals-detection-images-dataset"
    if detection_path.exists():
        print("🎯 Processing animals-detection-images dataset...")
        detection_data = process_detection_dataset(detection_path, class_names)
        all_data.extend(detection_data)
        dataset_stats['Animals-Detection'] = len(detection_data)
        print(f"   ✅ Found {len(detection_data)} detection samples")
    else:
        print("❌ Animals-detection-images dataset not found")

    # Process Aquarium dataset
    aquarium_path = Path(base_path) / "sea" / "aquarium-dataset" / "Aquarium Combined"
    if aquarium_path.exists():
        print("🐠 Processing Aquarium dataset...")
        aquarium_data = process_aquarium_dataset(aquarium_path, class_names)
        all_data.extend(aquarium_data)
        dataset_stats['Aquarium'] = len(aquarium_data)
        print(f"   ✅ Found {len(aquarium_data)} aquarium samples")
    else:
        print("❌ Aquarium dataset not found")

    print("=" * 50)

    # Balance dataset if needed
    if max_samples_per_class:
        all_data = balance_dataset(all_data, max_samples_per_class)

    # Create class mapping with all species
    sorted_classes = sorted(list(class_names))
    class_to_id = {class_name: idx for idx, class_name in enumerate(sorted_classes)}

    # Display dataset statistics
    display_dataset_stats(dataset_stats, sorted_classes, all_data)

    # Shuffle and split data
    random.shuffle(all_data)
    train_end = int(len(all_data) * train_split)
    val_end = train_end + int(len(all_data) * val_split)

    splits = {
        'train': all_data[:train_end],
        'val': all_data[train_end:val_end],
        'test': all_data[val_end:]
    }

    # Copy files and create labels
    for split_name, split_data in splits.items():
        print(f"📁 Processing {split_name} split: {len(split_data)} samples")

        for idx, (img_path, class_name, bbox) in enumerate(split_data):
            if not os.path.exists(img_path):
                continue

            try:
                # Copy image
                img_ext = Path(img_path).suffix
                new_img_name = f"{split_name}_{idx:06d}{img_ext}"
                new_img_path = output_path / split_name / 'images' / new_img_name
                shutil.copy2(img_path, new_img_path)

                # Create YOLO label file
                label_path = output_path / split_name / 'labels' / f"{split_name}_{idx:06d}.txt"

                with open(label_path, 'w') as f:
                    class_id = class_to_id[class_name]

                    # If bbox is provided, use it; otherwise create full image bbox
                    if bbox:
                        x_center, y_center, width, height = bbox
                        f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
                    else:
                        # Full image annotation (for classification datasets)
                        f.write(f"{class_id} 0.5 0.5 1.0 1.0\n")
            except Exception as e:
                print(f"⚠️  Error processing {img_path}: {str(e)}")
                continue

    # Create dataset.yaml file with ALL species classes
    create_dataset_yaml(output_path, sorted_classes, dataset_stats)

    # Display completion summary
    print("\n" + "=" * 60)
    print("🎉 DATASET PREPARATION COMPLETE!")
    print("=" * 60)
    print(f"📊 Total Classes: {len(sorted_classes)}")
    print(f"📁 Output Directory: {output_path}")
    print(f"🚂 Train: {len(splits['train'])}")
    print(f"✅ Val: {len(splits['val'])}")
    print(f"🧪 Test: {len(splits['test'])}")
    print(f"📄 Config File: {output_path}/dataset.yaml")

    return output_path, sorted_classes, splits

#########################################
#########################################

def display_dataset_stats(dataset_stats, sorted_classes, all_data):
    """Display comprehensive dataset statistics"""
    print(f"📊 DATASET STATISTICS")
    print("=" * 50)

    # Dataset breakdown
    total_samples = sum(dataset_stats.values())
    for dataset_name, count in dataset_stats.items():
        percentage = (count / total_samples) * 100
        print(f"{dataset_name:20}: {count:6d} samples ({percentage:5.1f}%)")

    print(f"{'TOTAL':20}: {total_samples:6d} samples")
    print(f"\n🏷️  Total Classes: {len(sorted_classes)}")

    # Show class distribution
    class_counts = defaultdict(int)
    for _, class_name, _ in all_data:
        class_counts[class_name] += 1

    print(f"\n📈 Top 10 Classes by Sample Count:")
    sorted_classes_by_count = sorted(class_counts.items(), key=lambda x: x[1], reverse=True)
    for i, (class_name, count) in enumerate(sorted_classes_by_count[:10]):
        print(f"  {i+1:2d}. {class_name:25}: {count:4d} samples")

    if len(sorted_classes_by_count) > 10:
        print(f"  ... and {len(sorted_classes_by_count) - 10} more classes")

#########################################
#########################################

def balance_dataset(all_data, max_samples_per_class):
    """Balance dataset by limiting samples per class"""
    class_data = defaultdict(list)
    for item in all_data:
        class_data[item[1]].append(item)

    balanced_data = []
    for class_name, items in class_data.items():
        if len(items) > max_samples_per_class:
            items = random.sample(items, max_samples_per_class)
        balanced_data.extend(items)

    print(f"⚖️  Dataset balanced: {len(all_data)} -> {len(balanced_data)} samples")
    return balanced_data

#########################################
#########################################

def process_cub_dataset(cub_path, class_names):
    """Process CUB-200-2011 dataset with detailed bird species"""
    data = []
    images_path = cub_path / "images"

    # Read classes to get proper species names
    classes_file = cub_path / "classes.txt"
    class_id_to_name = {}
    if classes_file.exists():
        with open(classes_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    class_id = parts[0]
                    class_name = ' '.join(parts[1:]).replace(' ', '_').replace('.', '').replace('-', '_')
                    class_id_to_name[class_id] = class_name

    # Read bounding boxes
    bbox_file = cub_path / "bounding_boxes.txt"
    bboxes = {}
    if bbox_file.exists():
        with open(bbox_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 5:
                    img_id = parts[0]
                    x, y, w, h = map(float, parts[1:5])
                    bboxes[img_id] = (x, y, w, h)

    # Read image info and class labels
    images_file = cub_path / "images.txt"
    labels_file = cub_path / "image_class_labels.txt"

    img_to_class = {}
    if labels_file.exists():
        with open(labels_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    img_id = parts[0]
                    class_id = parts[1]
                    img_to_class[img_id] = class_id

    if images_file.exists():
        with open(images_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    img_id = parts[0]
                    img_path = parts[1]

                    full_img_path = images_path / img_path
                    if full_img_path.exists() and img_id in img_to_class:
                        class_id = img_to_class[img_id]
                        if class_id in class_id_to_name:
                            class_name = class_id_to_name[class_id]
                        else:
                            # Fallback to folder name
                            class_name = img_path.split('/')[0].split('.', 1)[1] if '.' in img_path.split('/')[0] else img_path.split('/')[0]
                            class_name = class_name.replace(' ', '_').replace('-', '_')

                        class_names.add(class_name)

                        # Convert bbox to YOLO format if available
                        bbox = None
                        if img_id in bboxes:
                            try:
                                img = cv2.imread(str(full_img_path))
                                if img is not None:
                                    img_h, img_w = img.shape[:2]
                                    x, y, w, h = bboxes[img_id]
                                    # Convert to YOLO format
                                    x_center = (x + w/2) / img_w
                                    y_center = (y + h/2) / img_h
                                    w_norm = w / img_w
                                    h_norm = h / img_h
                                    bbox = (x_center, y_center, w_norm, h_norm)
                            except:
                                pass

                        data.append((str(full_img_path), class_name, bbox))

    return data

#########################################
#########################################

def process_nabirds_dataset(nabirds_path, class_names):
    """Process NABirds dataset with detailed species names"""
    data = []
    images_path = nabirds_path / "images"

    # Read class names
    classes_file = nabirds_path / "classes.txt"
    class_id_to_name = {}
    if classes_file.exists():
        with open(classes_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    class_id = parts[0]
                    class_name = ' '.join(parts[1:]).replace(' ', '_').replace('-', '_')
                    class_id_to_name[class_id] = class_name

    # Read images and their classes
    images_file = nabirds_path / "images.txt"
    labels_file = nabirds_path / "image_class_labels.txt"

    img_to_class = {}
    if labels_file.exists():
        with open(labels_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    img_id = parts[0]
                    class_id = parts[1]
                    img_to_class[img_id] = class_id

    img_id_to_path = {}
    if images_file.exists():
        with open(images_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    img_id = parts[0]
                    img_path = parts[1]
                    img_id_to_path[img_id] = img_path

    # Combine image paths with class labels
    for img_id, img_path in img_id_to_path.items():
        if img_id in img_to_class:
            class_id = img_to_class[img_id]
            if class_id in class_id_to_name:
                class_name = class_id_to_name[class_id]
                class_names.add(class_name)

                full_img_path = images_path / img_path
                if full_img_path.exists():
                    data.append((str(full_img_path), class_name, None))

    return data

#########################################
#########################################

def process_animals10_dataset(animals10_path, class_names):
    """Process Animals10 dataset"""
    data = []
    raw_img_path = animals10_path / "raw-img"

    # Translation mapping (Italian to English)
    translation_map = {
        'cane': 'dog',
        'cavallo': 'horse',
        'elefante': 'elephant',
        'farfalla': 'butterfly',
        'gallina': 'chicken',
        'gatto': 'cat',
        'mucca': 'cow',
        'pecora': 'sheep',
        'ragno': 'spider',
        'scoiattolo': 'squirrel'
    }

    if raw_img_path.exists():
        for class_folder in raw_img_path.iterdir():
            if class_folder.is_dir():
                italian_name = class_folder.name
                class_name = translation_map.get(italian_name, italian_name)
                class_names.add(class_name)

                for img_file in class_folder.glob("*.jpeg"):
                    data.append((str(img_file), class_name, None))

    return data

#########################################
#########################################

def process_mix_animals_dataset(mix_path, class_names):
    """Process 90-mix-animals dataset"""
    data = []
    animals_path = mix_path / "animals" / "animals"

    if animals_path.exists():
        for class_folder in animals_path.iterdir():
            if class_folder.is_dir():
                class_name = class_folder.name.replace('-', '_').replace(' ', '_')
                class_names.add(class_name)

                for img_file in class_folder.glob("*.jpg"):
                    data.append((str(img_file), class_name, None))

    return data

#########################################
#########################################

def process_detection_dataset(detection_path, class_names):
    """Process animals-detection-images dataset (already in YOLO format)"""
    data = []

    for split in ['train', 'test']:
        split_path = detection_path / split
        if split_path.exists():
            for class_folder in split_path.iterdir():
                if class_folder.is_dir():
                    class_name = class_folder.name.replace(' ', '_').replace('-', '_')
                    class_names.add(class_name)

                    # Process images and labels
                    for img_file in class_folder.glob("*.jpg"):
                        label_file = class_folder / "Label" / (img_file.stem + ".txt")

                        bbox = None
                        if label_file.exists():
                            try:
                                with open(label_file, 'r') as f:
                                    line = f.readline().strip()
                                    if line:
                                        parts = line.split()
                                        if len(parts) >= 5:
                                            _, x_center, y_center, width, height = map(float, parts[:5])
                                            bbox = (x_center, y_center, width, height)
                            except:
                                pass

                        data.append((str(img_file), class_name, bbox))

    return data

#########################################
#########################################

def process_aquarium_dataset(aquarium_path, class_names):
    """Process Aquarium dataset"""
    data = []

    # More specific fish classification could be added here
    class_name = "fish"
    class_names.add(class_name)

    for split in ['train', 'test', 'valid']:
        split_path = aquarium_path / split
        if split_path.exists():
            for img_file in split_path.glob("*.jpg"):
                if not str(img_file.parent).endswith('.ipynb_checkpoints'):
                    data.append((str(img_file), class_name, None))

    return data

#########################################
#########################################

def create_dataset_yaml(output_path, class_names, dataset_stats):
    """Create comprehensive dataset.yaml file for YOLOv11 with all species"""

    yaml_content = {
        'path': str(output_path),
        'train': 'train/images',
        'val': 'val/images',
        'test': 'test/images',
        'nc': len(class_names),
        'names': class_names,  # This contains ALL species as individual classes

        # Additional metadata
        'dataset_info': {
            'description': 'Multi-domain animal detection dataset',
            'total_classes': len(class_names),
            'total_samples': sum(dataset_stats.values()),
            'source_datasets': list(dataset_stats.keys()),
            'domains': ['air', 'land', 'sea', 'mixed']
        },

        # Source dataset breakdown
        'source_stats': dataset_stats
    }

    yaml_path = output_path / 'dataset.yaml'
    with open(yaml_path, 'w') as f:
        yaml.dump(yaml_content, f, default_flow_style=False, sort_keys=False)

    print(f"📄 Created dataset.yaml with {len(class_names)} individual species classes")

    # Also create a detailed class list file
    classes_path = output_path / 'classes.txt'
    with open(classes_path, 'w') as f:
        for i, class_name in enumerate(class_names):
            f.write(f"{i}: {class_name}\n")

    print(f"📝 Created classes.txt with detailed class mapping")

#########################################
#########################################

def visualize_sample_images(dataset_path, num_samples=9):
    """Visualize sample images from the prepared dataset"""
    dataset_path = Path(dataset_path)
    train_images_path = dataset_path / 'train' / 'images'
    train_labels_path = dataset_path / 'train' / 'labels'

    # Load class names
    with open(dataset_path / 'dataset.yaml', 'r') as f:
        config = yaml.safe_load(f)
    class_names = config['names']

    # Get random sample images
    image_files = list(train_images_path.glob('*.jpg')) + list(train_images_path.glob('*.jpeg'))
    sample_files = random.sample(image_files, min(num_samples, len(image_files)))

    fig, axes = plt.subplots(3, 3, figsize=(15, 15))
    axes = axes.flatten()

    for i, img_file in enumerate(sample_files):
        # Load image
        img = cv2.imread(str(img_file))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Load corresponding label
        label_file = train_labels_path / (img_file.stem + '.txt')
        class_id = 0
        if label_file.exists():
            with open(label_file, 'r') as f:
                line = f.readline().strip()
                if line:
                    class_id = int(line.split()[0])

        axes[i].imshow(img)
        axes[i].set_title(f"Class: {class_names[class_id]}", fontsize=10)
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

#########################################
#########################################

if __name__ == "__main__":
    print("🚀 Starting YOLOv11 Dataset Preparation")
    print("=" * 60)

    # Set random seed for reproducibility
    random.seed(42)
    np.random.seed(42)

    # Prepare the dataset
    try:
        dataset_path, classes, splits = prepare_yolo_dataset(
            base_path="/content/data", # Root folder of all the datasets
            output_path="/content/yolo_dataset", # Output path of the organized/prepared data
            train_split=0.8,
            val_split=0.1,
            test_split=0.1,
            max_samples_per_class=1000  # Optional: balance dataset
        )

        print(f"\n🎯 Sample classes: {classes[:10]}")

        # Visualize sample images
        print("\n🖼️  Visualizing sample images...")
        visualize_sample_images(dataset_path)

    except Exception as e:
        print(f"❌ Error: {str(e)}")
        print("Make sure your data directory structure matches the expected format.")

Output hidden; open in https://colab.research.google.com to view.

In [None]:
!rm -rf /content/data

## **Clean classes.txt**

In [None]:
import re

# Path to classes.txt (replaces the uncleaned classes.txt with the cleaned one)
in_file = "/content/yolo_dataset/classes.txt"
out_file = "/content/yolo_dataset/classes.txt"

# Load the .txt in 'lines'
with open(in_file, "r") as f:
    lines = f.readlines()

cleaned = []
for line in lines:
    # Split on ':' then remove the leading numeric prefix (like 001)
    name = line.split(":")[1].strip()
    name = re.sub(r"^\d+", "", name)   # remove starting digits using re
    cleaned.append(name)

# Write cleaned file
with open(out_file, "w") as f:
    f.write("\n".join(cleaned))

print(f"✅ Cleaned classes written to: {out_file}")


## **Clean dataset.yaml**

In [None]:
import yaml
import re

yaml_file = "/content/yolo_dataset/dataset.yaml"

# Load yaml
with open(yaml_file, "r") as f:
    data = yaml.safe_load(f)

# Clean names list
cleaned_names = [re.sub(r"^\d+", "", name) for name in data["names"]]
data["names"] = cleaned_names

# Save back
with open(yaml_file, "w") as f:
    yaml.dump(data, f, sort_keys=False)

print("✅ Fixed dataset.yaml. First 10 names:")
print(data["names"][:10])


# **Train the Model**

In [None]:
from ultralytics import YOLO

# Load YOLOv11 model
model = YOLO('yolo11m.pt')

# Train the model
results = model.train(
    data='/content/yolo_dataset/dataset.yaml',
    epochs=100,
    imgsz=640,
    batch=16
)

# **Evaluate the model**

In [None]:
model = YOLO("/content/yolo_dataset/runs/detect/train/weights/best.pt")

# Validate on your dataset (this gives precision, recall, mAP, confusion matrix, etc.)
metrics = model.val(
    data="/content/dataset/meta.yaml",
    split="test",
    imgsz=512
)

print(metrics)  # shows metrics dict

# **Inference**

In [None]:
from google.colab import files
from ultralytics import YOLO

# Load your trained model
model = YOLO("/content/yolo_dataset/runs/detect/train/weights/best.pt")

# Upload an image
uploaded = files.upload()

# Run inference (object detection)
for filename in uploaded.keys():
    results = model.predict(
        source=filename,
        save=True, # save annotated image(s)
        conf=0.25, # confidence threshold
        imgsz=640
    )

    print(f"Processed {filename}")


### **Optional Step - Exporting the Trained Model (for Deployment & Portability)**

In [None]:
# PyTorch format (.pt)
# Export converts it to other formats:
# ONNX → open format, runs on many platforms (good for production).
# TorchScript → optimized PyTorch model (faster inference).
# TensorRT / CoreML → for NVIDIA GPUs / Apple devices.

In [None]:
# IoU (Intersection over union) = how much the modal drawn box overlapped with the real box (ground truth).
# mAP = your average score across all objects and overlap thresholds.
# Export = translating your model into another “language” so other systems can use it.