In [None]:
# Install required packages
!pip install ultralytics roboflow -q

# Import necessary libraries
from ultralytics import YOLO
from roboflow import Roboflow
import os
import shutil
import yaml
import glob
from pathlib import Path
import random
import json
from PIL import Image
import cv2
import numpy as np

In [None]:
# Your Roboflow API key
api_key = "1VHz8QF5F69ddtGCCfTg"

# Function to download object detection dataset from Roboflow
def download_detection_dataset(api_key, workspace, project_name, version, format_type="yolov8", location="./datasets"):
    rf = Roboflow(api_key=api_key)
    project = rf.workspace(workspace).project(project_name)
    dataset = project.version(version).download(format_type, location=f"{location}/{project_name}")
    return dataset, f"{location}/{project_name}"

# Function to download classification dataset from Roboflow
def download_classification_dataset(api_key, workspace, project_name, version, location="./datasets"):
    rf = Roboflow(api_key=api_key)
    project = rf.workspace(workspace).project(project_name)
    dataset = project.version(version).download("folder", location=f"{location}/{project_name}")
    return dataset, f"{location}/{project_name}"

# Download the classification dataset
print("Downloading Footprint Classification dataset...")
footprint_dataset, footprint_path = download_classification_dataset(
    api_key=api_key,
    workspace="sml-project-hfi0w",
    project_name="footprint-classification",
    version=1,
    location="./footprint_dataset"
)

In [None]:
# Identify classes in the footprint classification dataset
footprint_classes = []
for class_dir in os.listdir(f"{footprint_path}/train"):
    if os.path.isdir(f"{footprint_path}/train/{class_dir}"):
        footprint_classes.append(class_dir)
        
print(f"Footprint classification classes: {footprint_classes}")

In [None]:
# Download all other detection datasets
print("Downloading object detection datasets...")

# Dataset 2: Wild Animal Foot Prints
dataset2, dataset2_path = download_detection_dataset(
    api_key=api_key,
    workspace="traffic-bfp0e",
    project_name="wild-animal-foot-prints",
    version=1,
    location="./dataset2"
)

# Dataset 3: Fred's Test
dataset3, dataset3_path = download_detection_dataset(
    api_key=api_key,
    workspace="fredstest",
    project_name="othsgdsreq",
    version=38,
    location="./dataset3"
)

# Dataset 4: New Animal Detection
dataset4, dataset4_path = download_detection_dataset(
    api_key=api_key,
    workspace="lavhini",
    project_name="new_animaldetection",
    version=1,
    location="./dataset4"
)

print("All datasets downloaded successfully!")

In [None]:
# Load class information from detection datasets
def get_classes_from_yaml(yaml_path):
    with open(yaml_path, 'r') as f:
        data = yaml.safe_load(f)
    return data.get('names', [])

dataset2_classes = get_classes_from_yaml(f"{dataset2_path}/data.yaml")
dataset3_classes = get_classes_from_yaml(f"{dataset3_path}/data.yaml")
dataset4_classes = get_classes_from_yaml(f"{dataset4_path}/data.yaml")

print(f"Dataset2 classes: {dataset2_classes}")
print(f"Dataset3 classes: {dataset3_classes}")
print(f"Dataset4 classes: {dataset4_classes}")


In [None]:
# Create directory structure for the combined dataset
combined_dataset_path = "./combined_dataset"
os.makedirs(combined_dataset_path, exist_ok=True)

# Create train, val, test directories
for dir_name in ["train", "valid", "test"]:
    # Create images and labels directories in each split
    for sub_dir in ["images", "labels"]:
        os.makedirs(f"{combined_dataset_path}/{dir_name}/{sub_dir}", exist_ok=True)

# Create a unified class list
unified_classes = footprint_classes + dataset2_classes + dataset3_classes + dataset4_classes

# Remove any duplicate class names
unified_classes = list(dict.fromkeys(unified_classes))
print(f"Unified classes ({len(unified_classes)}): {unified_classes}")

# Create a mapping for class indices across different datasets
class_mappings = {
    "footprint": {i: unified_classes.index(footprint_classes[i]) for i in range(len(footprint_classes))},
    "dataset2": {i: unified_classes.index(dataset2_classes[i]) for i in range(len(dataset2_classes))},
    "dataset3": {i: unified_classes.index(dataset3_classes[i]) for i in range(len(dataset3_classes))},
    "dataset4": {i: unified_classes.index(dataset4_classes[i]) for i in range(len(dataset4_classes))}
}


In [None]:
# Function to convert classification images to object detection format
def convert_classification_to_detection(image_path, label_id, output_img_path, output_label_path):
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        return False
    
    height, width, _ = img.shape
    
    # Save image to output path
    cv2.imwrite(output_img_path, img)
    
    # Create detection annotation (full image as bounding box)
    # Format: class_id x_center y_center width height
    # All normalized to [0, 1]
    x_center, y_center = 0.5, 0.5  # Center of image
    w, h = 1.0, 1.0  # Full image
    
    # Write label file
    with open(output_label_path, 'w') as f:
        f.write(f"{label_id} {x_center} {y_center} {w} {h}\n")
    
    return True

In [None]:
# Function to process and remap label files for detection datasets
def process_detection_label_file(src_file, dst_file, mapping):
    with open(src_file, 'r') as f:
        lines = f.readlines()
    
    updated_lines = []
    for line in lines:
        parts = line.strip().split()
        if len(parts) >= 5:  # Ensure proper format: class_id x y w h
            class_id = int(parts[0])
            if class_id in mapping:
                parts[0] = str(mapping[class_id])
                updated_lines.append(' '.join(parts) + '\n')
    
    with open(dst_file, 'w') as f:
        f.writelines(updated_lines)

In [None]:
# Process Footprint Classification dataset (convert to detection format)
for split in ["train", "valid", "test"]:
    split_dir = "train" if split == "train" else "valid" if split == "valid" else "test"
    if not os.path.exists(f"{footprint_path}/{split_dir}"):
        print(f"Split {split_dir} not found in footprint dataset")
        continue
        
    # For each class folder
    for class_idx, class_name in enumerate(footprint_classes):
        class_dir = f"{footprint_path}/{split_dir}/{class_name}"
        if not os.path.exists(class_dir):
            continue
            
        # Process each image in the class
        for img_file in glob.glob(f"{class_dir}/*.jpg") + glob.glob(f"{class_dir}/*.jpeg") + glob.glob(f"{class_dir}/*.png"):
            img_filename = f"footprint_{class_name}_{os.path.basename(img_file)}"
            out_img_path = f"{combined_dataset_path}/{split}/images/{img_filename}"
            out_label_path = f"{combined_dataset_path}/{split}/labels/{os.path.splitext(img_filename)[0]}.txt"
            
            # Convert to detection format and save
            mapped_class_id = class_mappings["footprint"][class_idx]
            convert_classification_to_detection(img_file, mapped_class_id, out_img_path, out_label_path)


In [None]:

# Process detection datasets
dataset_paths = {
    "dataset2": dataset2_path,
    "dataset3": dataset3_path,
    "dataset4": dataset4_path
}

# Function to combine detection datasets
def combine_detection_datasets(dataset_path, dataset_key, split, class_mapping):
    images_path = f"{dataset_path}/{split}/images"
    labels_path = f"{dataset_path}/{split}/labels"
    
    if not os.path.exists(images_path) or not os.path.exists(labels_path):
        print(f"Skipping {dataset_key} {split} - directory not found")
        return
    
    # Process images
    for img_file in glob.glob(f"{images_path}/*"):
        img_filename = os.path.basename(img_file)
        # Add dataset prefix to avoid filename conflicts
        new_img_filename = f"{dataset_key}_{img_filename}"
        shutil.copy(img_file, f"{combined_dataset_path}/{split}/images/{new_img_filename}")
        
        # Process corresponding label if it exists
        label_filename = os.path.splitext(img_filename)[0] + ".txt"
        label_file = f"{labels_path}/{label_filename}"
        if os.path.exists(label_file):
            new_label_file = f"{combined_dataset_path}/{split}/labels/{dataset_key}_{label_filename}"
            # Remap class IDs according to our unified class list
            process_detection_label_file(label_file, new_label_file, class_mapping)

# Combine all detection datasets
for dataset_key, dataset_path in dataset_paths.items():
    print(f"Processing {dataset_key}...")
    for split in ["train", "valid", "test"]:
        combine_detection_datasets(dataset_path, dataset_key, split, class_mappings[dataset_key])

print("Datasets successfully combined!")

In [None]:
# Create YAML configuration file for the combined dataset
yaml_content = {
    'path': combined_dataset_path,
    'train': 'train/images',
    'val': 'valid/images',
    'test': 'test/images',
    'nc': len(unified_classes),
    'names': unified_classes
}

# Write YAML file
yaml_path = f"{combined_dataset_path}/data.yaml"
with open(yaml_path, 'w') as f:
    yaml.dump(yaml_content, f, sort_keys=False)

print(f"Created data.yaml with {len(unified_classes)} classes")


In [None]:
# Train YOLOv8 model with the combined dataset
# Load a pre-trained YOLOv8 model for transfer learning
model = YOLO('yolov8n.pt')  # Use 'yolov8s.pt' or other variants for better performance

# Set training parameters for optimal performance
results = model.train(
    data=yaml_path,
    epochs=100,
    imgsz=640,
    batch=16,  # Adjust based on your GPU memory
    patience=10,
    save_period=10,
    device=0,  # Use GPU
    workers=8,  # Adjust based on your CPU cores
    pretrained=True,
    optimizer='SGD',  # or 'Adam'
    lr0=0.01,
    lrf=0.01,
    momentum=0.937,
    weight_decay=0.0005,
    warmup_epochs=3.0,
    warmup_momentum=0.8,
    warmup_bias_lr=0.1,
    box=7.5,
    cls=0.5,
    dfl=1.5,
    fl_gamma=0.0,
    label_smoothing=0.0,
    nbs=64,
    hsv_h=0.015,
    hsv_s=0.7, 
    hsv_v=0.4,
    translate=0.1,
    scale=0.5,
    fliplr=0.5,
    flipud=0.0,
    mosaic=1.0,
    mixup=0.0,
    copy_paste=0.0,
    amp=True  # Use mixed precision for faster training
)


In [None]:
# Evaluate the model
metrics = model.val()
print(f"mAP50-95: {metrics.box.map}")
print(f"mAP50: {metrics.box.map50}")
print(f"Precision: {metrics.box.p}")
print(f"Recall: {metrics.box.r}")


In [None]:
# Save the model
model.export(format='onnx')  # Export to ONNX format
model.save('animal_footprint_detector.pt')  # Save PyTorch model

print("Training and evaluation complete!")