In [None]:
# Install required packages
!pip install ultralytics roboflow -q

# Import necessary libraries
from ultralytics import YOLO
from roboflow import Roboflow
import os
import shutil
import yaml
import glob
from pathlib import Path
import random
import json
from PIL import Image
import cv2
import numpy as np

In [None]:
# Your Roboflow API key
api_key = ""

# Function to download object detection dataset from Roboflow
def download_detection_dataset(api_key, workspace, project_name, version, format_type="yolov8", location="./datasets"):
    rf = Roboflow(api_key=api_key)
    project = rf.workspace(workspace).project(project_name)
    dataset = project.version(version).download(format_type, location=f"{location}/{project_name}")
    return dataset, f"{location}/{project_name}"

# Function to download classification dataset from Roboflow
def download_classification_dataset(api_key, workspace, project_name, version, location="./datasets"):
    rf = Roboflow(api_key=api_key)
    project = rf.workspace(workspace).project(project_name)
    dataset = project.version(version).download("folder", location=f"{location}/{project_name}")
    return dataset, f"{location}/{project_name}"

# Download the classification dataset
print("Downloading Footprint Classification dataset...")
footprint_dataset, footprint_path = download_classification_dataset(
    api_key=api_key,
    workspace="sml-project-hfi0w",
    project_name="footprint-classification",
    version=1,
    location="./footprint_dataset"
)

In [None]:
# Identify classes in the footprint classification dataset
footprint_classes = []
for class_dir in os.listdir(f"{footprint_path}/train"):
    if os.path.isdir(f"{footprint_path}/train/{class_dir}"):
        footprint_classes.append(class_dir)

print(f"Footprint classification classes: {footprint_classes}")

In [None]:
# Download all other detection datasets
print("Downloading object detection datasets...")

# Dataset 2: Wild Animal Foot Prints
dataset2, dataset2_path = download_detection_dataset(
    api_key=api_key,
    workspace="traffic-bfp0e",
    project_name="wild-animal-foot-prints",
    version=1,
    location="./dataset2"
)

# Dataset 3: Fred's Test
dataset3, dataset3_path = download_detection_dataset(
    api_key=api_key,
    workspace="fredstest",
    project_name="othsgdsreq",
    version=38,
    location="./dataset3"
)

# Dataset 4: New Animal Detection
dataset4, dataset4_path = download_detection_dataset(
    api_key=api_key,
    workspace="lavhini",
    project_name="new_animaldetection",
    version=1,
    location="./dataset4"
)

print("All datasets downloaded successfully!")

In [None]:
# Load class information from detection datasets
def get_classes_from_yaml(yaml_path):
    with open(yaml_path, 'r') as f:
        data = yaml.safe_load(f)
    return data.get('names', [])

dataset2_classes = get_classes_from_yaml(f"{dataset2_path}/data.yaml")
dataset3_classes = get_classes_from_yaml(f"{dataset3_path}/data.yaml")
dataset4_classes = get_classes_from_yaml(f"{dataset4_path}/data.yaml")

print(f"Dataset2 classes: {dataset2_classes}")
print(f"Dataset3 classes: {dataset3_classes}")
print(f"Dataset4 classes: {dataset4_classes}")


In [None]:
# Create directory structure for the combined dataset
combined_dataset_path = "./combined_dataset"
os.makedirs(combined_dataset_path, exist_ok=True)

# Create train, val, test directories
for dir_name in ["train", "valid", "test"]:
    # Create images and labels directories in each split
    for sub_dir in ["images", "labels"]:
        os.makedirs(f"{combined_dataset_path}/{dir_name}/{sub_dir}", exist_ok=True)

# Create a unified class list
unified_classes = footprint_classes + dataset2_classes + dataset3_classes + dataset4_classes

# Remove any duplicate class names
unified_classes = list(dict.fromkeys(unified_classes))
print(f"Unified classes ({len(unified_classes)}): {unified_classes}")

# Create a mapping for class indices across different datasets
class_mappings = {
    "footprint": {i: unified_classes.index(footprint_classes[i]) for i in range(len(footprint_classes))},
    "dataset2": {i: unified_classes.index(dataset2_classes[i]) for i in range(len(dataset2_classes))},
    "dataset3": {i: unified_classes.index(dataset3_classes[i]) for i in range(len(dataset3_classes))},
    "dataset4": {i: unified_classes.index(dataset4_classes[i]) for i in range(len(dataset4_classes))}
}


In [None]:
# Function to convert classification images to object detection format
def convert_classification_to_detection(image_path, label_id, output_img_path, output_label_path):
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        return False

    height, width, _ = img.shape

    # Save image to output path
    cv2.imwrite(output_img_path, img)

    # Create detection annotation (full image as bounding box)
    # Format: class_id x_center y_center width height
    # All normalized to [0, 1]
    x_center, y_center = 0.5, 0.5  # Center of image
    w, h = 1.0, 1.0  # Full image

    # Write label file
    with open(output_label_path, 'w') as f:
        f.write(f"{label_id} {x_center} {y_center} {w} {h}\n")

    return True

In [None]:
# Function to process and remap label files for detection datasets
def process_detection_label_file(src_file, dst_file, mapping):
    with open(src_file, 'r') as f:
        lines = f.readlines()

    updated_lines = []
    for line in lines:
        parts = line.strip().split()
        if len(parts) >= 5:  # Ensure proper format: class_id x y w h
            class_id = int(parts[0])
            if class_id in mapping:
                parts[0] = str(mapping[class_id])
                updated_lines.append(' '.join(parts) + '\n')

    with open(dst_file, 'w') as f:
        f.writelines(updated_lines)

In [None]:
# Process Footprint Classification dataset (convert to detection format)
for split in ["train", "valid", "test"]:
    split_dir = "train" if split == "train" else "valid" if split == "valid" else "test"
    if not os.path.exists(f"{footprint_path}/{split_dir}"):
        print(f"Split {split_dir} not found in footprint dataset")
        continue

    # For each class folder
    for class_idx, class_name in enumerate(footprint_classes):
        class_dir = f"{footprint_path}/{split_dir}/{class_name}"
        if not os.path.exists(class_dir):
            continue

        # Process each image in the class
        for img_file in glob.glob(f"{class_dir}/*.jpg") + glob.glob(f"{class_dir}/*.jpeg") + glob.glob(f"{class_dir}/*.png"):
            img_filename = f"footprint_{class_name}_{os.path.basename(img_file)}"
            out_img_path = f"{combined_dataset_path}/{split}/images/{img_filename}"
            out_label_path = f"{combined_dataset_path}/{split}/labels/{os.path.splitext(img_filename)[0]}.txt"

            # Convert to detection format and save
            mapped_class_id = class_mappings["footprint"][class_idx]
            convert_classification_to_detection(img_file, mapped_class_id, out_img_path, out_label_path)


In [None]:

# Process detection datasets
dataset_paths = {
    "dataset2": dataset2_path,
    "dataset3": dataset3_path,
    "dataset4": dataset4_path
}

# Function to combine detection datasets
def combine_detection_datasets(dataset_path, dataset_key, split, class_mapping):
    images_path = f"{dataset_path}/{split}/images"
    labels_path = f"{dataset_path}/{split}/labels"

    if not os.path.exists(images_path) or not os.path.exists(labels_path):
        print(f"Skipping {dataset_key} {split} - directory not found")
        return

    # Process images
    for img_file in glob.glob(f"{images_path}/*"):
        img_filename = os.path.basename(img_file)
        # Add dataset prefix to avoid filename conflicts
        new_img_filename = f"{dataset_key}_{img_filename}"
        shutil.copy(img_file, f"{combined_dataset_path}/{split}/images/{new_img_filename}")

        # Process corresponding label if it exists
        label_filename = os.path.splitext(img_filename)[0] + ".txt"
        label_file = f"{labels_path}/{label_filename}"
        if os.path.exists(label_file):
            new_label_file = f"{combined_dataset_path}/{split}/labels/{dataset_key}_{label_filename}"
            # Remap class IDs according to our unified class list
            process_detection_label_file(label_file, new_label_file, class_mapping)

# Combine all detection datasets
for dataset_key, dataset_path in dataset_paths.items():
    print(f"Processing {dataset_key}...")
    for split in ["train", "valid", "test"]:
        combine_detection_datasets(dataset_path, dataset_key, split, class_mappings[dataset_key])

print("Datasets successfully combined!")

In [None]:
# Create YAML configuration file for the combined dataset
yaml_content = {
    'path': combined_dataset_path,
    'train': 'train/images',
    'val': 'valid/images',
    'test': 'test/images',
    'nc': len(unified_classes),
    'names': unified_classes
}

# Write YAML file
yaml_path = f"{combined_dataset_path}/data.yaml"
with open(yaml_path, 'w') as f:
    yaml.dump(yaml_content, f, sort_keys=False)

print(f"Created data.yaml with {len(unified_classes)} classes")


In [None]:
!pip install torch torchvision torchaudio
import torch

In [None]:
# For the classification portion:
model_cls = YOLO('yolov8n-cls.pt')  # Classification model
results_cls = model_cls.train(
    data=dataset_path,
    epochs=100,
    imgsz=640,
    batch=16,
    device=0 if torch.cuda.is_available() else 'cpu',
    workers=8,
    patience=50,
    save=True,
    project='footprint_classification',
    name='yolov8_classification',
    pretrained=True,
    optimizer='Adam',
    lr0=0.001,
    lrf=0.01,
    momentum=0.937,
    weight_decay=0.0005,
    warmup_epochs=3.0,
    warmup_momentum=0.8,
    warmup_bias_lr=0.1,
    verbose=True,
    seed=42
)

In [None]:
# Function to ensure dataset has multiple classes for classification
def check_and_fix_classification_dataset(dataset_path):
    train_path = f"{dataset_path}/train"
    classes = [d for d in os.listdir(train_path) if os.path.isdir(f"{train_path}/{d}")]

    print(f"Found classes: {classes}")

    if len(classes) < 2:
        print(f"Error: Only found {len(classes)} classes. Creating a dummy class for testing.")
        # Create a dummy second class by copying some images from the first class
        dummy_class = "dummy_class"
        os.makedirs(f"{train_path}/{dummy_class}", exist_ok=True)

        # Copy a few images from the first class
        first_class = classes[0]
        images = os.listdir(f"{train_path}/{first_class}")
        for img in images[:min(10, len(images))]:
            shutil.copy(f"{train_path}/{first_class}/{img}",
                        f"{train_path}/{dummy_class}/{img}")

        print(f"Created dummy class with {min(10, len(images))} images")

        # Do the same for validation/test set if needed
        for split in ["valid", "test"]:
            if os.path.exists(f"{dataset_path}/{split}"):
                os.makedirs(f"{dataset_path}/{split}/{dummy_class}", exist_ok=True)
                if os.path.exists(f"{dataset_path}/{split}/{first_class}"):
                    split_images = os.listdir(f"{dataset_path}/{split}/{first_class}")
                    for img in split_images[:min(5, len(split_images))]:
                        shutil.copy(f"{dataset_path}/{split}/{first_class}/{img}",
                                    f"{dataset_path}/{split}/{dummy_class}/{img}")

In [None]:
# Evaluate the model
metrics = model.val()
print(f"mAP50-95: {metrics.box.map}")
print(f"mAP50: {metrics.box.map50}")
print(f"Precision: {metrics.box.p}")
print(f"Recall: {metrics.box.r}")


In [None]:
# Save the model
model.export(format='onnx')  # Export to ONNX format
model.save('animal_footprint_detector.pt')  # Save PyTorch model

print("Training and evaluation complete!")

------------------------------------------------


In [None]:
# Install required packages
# !pip install ultralytics roboflow -q

# Import necessary libraries
from ultralytics import YOLO
from roboflow import Roboflow
import os
import shutil
import yaml
import glob
import random
from pathlib import Path

# Your Roboflow API key
api_key = ""  # Replace with your actual API key

# 1. DOWNLOAD ALL DATASETS IN THEIR CORRECT FORMAT

# Function to download dataset from Roboflow in appropriate format
def download_dataset(api_key, workspace, project_name, version, format_type="yolov8", location="./datasets"):
    rf = Roboflow(api_key=api_key)
    project = rf.workspace(workspace).project(project_name)
    dataset = project.version(version).download(format_type, location=f"{location}/{project_name}")
    return dataset, f"{location}/{project_name}"

# Create directory for downloaded datasets
os.makedirs("./downloaded_datasets", exist_ok=True)

# Dataset 1: Footprint Classification - Classification dataset
footprint_dataset, footprint_path = download_dataset(
    api_key=api_key,
    workspace="sml-project-hfi0w",
    project_name="footprint-classification",
    version=1,
    format_type="folder",  # Important: use "folder" for classification datasets
    location="./downloaded_datasets"
)

# All others are detection datasets - use "yolov8" format
# Dataset 2: Wild Animal Foot Prints
dataset2, dataset2_path = download_dataset(
    api_key=api_key,
    workspace="traffic-bfp0e",
    project_name="wild-animal-foot-prints",
    version=1,
    format_type="yolov8",
    location="./downloaded_datasets"
)

# Dataset 3: Fred's Test
dataset3, dataset3_path = download_dataset(
    api_key=api_key,
    workspace="fredstest",
    project_name="othsgdsreq",
    version=38,
    format_type="yolov8",
    location="./downloaded_datasets"
)

# Dataset 4: New Animal Detection
dataset4, dataset4_path = download_dataset(
    api_key=api_key,
    workspace="lavhini",
    project_name="new_animaldetection",
    version=1,
    format_type="yolov8",
    location="./downloaded_datasets"
)

print("All datasets downloaded successfully!")

# 2. EXAMINE DATASET FORMATS AND EXTRACT CLASS INFORMATION

# For classification dataset, get classes from folder structure
footprint_classes = []
try:
    footprint_train_path = os.path.join(footprint_path, "train")
    if os.path.exists(footprint_train_path):
        footprint_classes = [d for d in os.listdir(footprint_train_path)
                           if os.path.isdir(os.path.join(footprint_train_path, d))]
    print(f"Footprint classification classes: {footprint_classes}")
except Exception as e:
    print(f"Error getting footprint classes: {e}")

# For detection datasets, get classes from data.yaml
def get_classes_from_yaml(yaml_path):
    try:
        with open(yaml_path, 'r') as f:
            data = yaml.safe_load(f)
        return data.get('names', [])
    except Exception as e:
        print(f"Error reading YAML file {yaml_path}: {e}")
        return []

dataset2_classes = get_classes_from_yaml(f"{dataset2_path}/data.yaml")
dataset3_classes = get_classes_from_yaml(f"{dataset3_path}/data.yaml")
dataset4_classes = get_classes_from_yaml(f"{dataset4_path}/data.yaml")

print(f"Dataset2 classes (Wild Animal Foot Prints): {dataset2_classes}")
print(f"Dataset3 classes (Fred's Test): {dataset3_classes}")
print(f"Dataset4 classes (New Animal Detection): {dataset4_classes}")

# 3. DECIDE ON THE MODEL TYPE: DETECTION is better since most datasets are detection format

# Create a unified classes list
all_classes = []
# Add all class names, avoiding duplicates
for class_list in [footprint_classes, dataset2_classes, dataset3_classes, dataset4_classes]:
    for cls in class_list:
        if cls not in all_classes:
            all_classes.append(cls)

print(f"Combined unique classes ({len(all_classes)}): {all_classes}")

# 4. PREPARE COMBINED DATASET DIRECTORY
combined_dataset_path = "./combined_animal_dataset"
os.makedirs(combined_dataset_path, exist_ok=True)

for split in ["train", "valid", "test"]:
    os.makedirs(f"{combined_dataset_path}/{split}/images", exist_ok=True)
    os.makedirs(f"{combined_dataset_path}/{split}/labels", exist_ok=True)

# 5. TRANSFORM CLASSIFICATION DATASET TO DETECTION FORMAT
# For the footprint classification dataset, we need to create object detection style annotations

def convert_classification_to_detection(image_path, class_name, class_id, output_img_path, output_label_path):
    """Convert classification image to detection format with full-image bounding box"""
    # Copy the image
    shutil.copy(image_path, output_img_path)

    # Create a label file with full-image bounding box
    with open(output_label_path, 'w') as f:
        # Format: class_id x_center y_center width height (normalized 0-1)
        f.write(f"{class_id} 0.5 0.5 1.0 1.0\n")

    return True

# Process footprint classification dataset (convert to detection)
# Map class names to indices in the unified class list
class_to_idx = {cls: idx for idx, cls in enumerate(all_classes)}

# Check if footprint dataset exists and has the expected structure
if footprint_classes:
    for split in ["train", "valid", "test"]:
        # Map split names (test/val/valid)
        src_split = "train" if split == "train" else "valid" if split == "valid" else "test"
        src_dir = os.path.join(footprint_path, src_split)

        if not os.path.exists(src_dir):
            print(f"Split {src_split} not found in footprint dataset")
            continue

        # Process each class
        for class_name in footprint_classes:
            class_dir = os.path.join(src_dir, class_name)
            if not os.path.exists(class_dir):
                continue

            # Get class index in the unified class list
            class_id = class_to_idx[class_name]

            # Process each image
            for img_file in glob.glob(f"{class_dir}/*.jpg") + glob.glob(f"{class_dir}/*.jpeg") + glob.glob(f"{class_dir}/*.png"):
                # Create unique filenames
                base_name = os.path.basename(img_file)
                unique_name = f"footprint_{class_name}_{base_name}"

                # Output paths
                out_img_path = os.path.join(combined_dataset_path, split, "images", unique_name)
                out_label_path = os.path.join(combined_dataset_path, split, "labels",
                                             os.path.splitext(unique_name)[0] + ".txt")

                # Convert and copy
                convert_classification_to_detection(img_file, class_name, class_id, out_img_path, out_label_path)

# 6. COPY AND ADAPT DETECTION DATASETS
def process_detection_dataset(dataset_path, dataset_name, class_mapping):
    """
    Process a detection dataset:
    1. Copy images to combined dataset
    2. Remap class IDs in label files
    """
    # For each split (train/val/test)
    for split_src, split_dst in [("train", "train"), ("valid", "valid"), ("test", "test")]:
        images_dir = os.path.join(dataset_path, split_src, "images")
        labels_dir = os.path.join(dataset_path, split_src, "labels")

        if not os.path.exists(images_dir) or not os.path.exists(labels_dir):
            print(f"Skipping {dataset_name} {split_src} - directory not found")
            continue

        # Process each image
        for img_file in glob.glob(f"{images_dir}/*"):
            img_filename = os.path.basename(img_file)
            base_name = os.path.splitext(img_filename)[0]

            # Create unique filename
            new_img_filename = f"{dataset_name}_{img_filename}"
            new_label_filename = f"{dataset_name}_{base_name}.txt"

            # Copy image
            shutil.copy(img_file, os.path.join(combined_dataset_path, split_dst, "images", new_img_filename))

            # Process label if it exists
            label_file = os.path.join(labels_dir, f"{base_name}.txt")
            if os.path.exists(label_file):
                # Read original labels
                with open(label_file, 'r') as f:
                    lines = f.readlines()

                # Remap class IDs
                new_lines = []
                for line in lines:
                    parts = line.strip().split()
                    if len(parts) >= 5:  # class_id x y w h
                        old_class_id = int(parts[0])
                        # Get original class name
                        if old_class_id < len(class_mapping):
                            class_name = class_mapping[old_class_id]
                            # Map to new unified class ID
                            new_class_id = class_to_idx.get(class_name)
                            if new_class_id is not None:
                                parts[0] = str(new_class_id)
                                new_lines.append(' '.join(parts) + '\n')

                # Write new label file
                with open(os.path.join(combined_dataset_path, split_dst, "labels", new_label_filename), 'w') as f:
                    f.writelines(new_lines)

# Process each detection dataset
if dataset2_classes:
    process_detection_dataset(dataset2_path, "wild_footprints", dataset2_classes)
if dataset3_classes:
    process_detection_dataset(dataset3_path, "freds_test", dataset3_classes)
if dataset4_classes:
    process_detection_dataset(dataset4_path, "new_animal", dataset4_classes)

# 7. CREATE YAML CONFIG FILE FOR THE COMBINED DATASET
data_yaml = {
    'path': os.path.abspath(combined_dataset_path),
    'train': 'train/images',
    'val': 'valid/images',
    'test': 'test/images',
    'nc': len(all_classes),
    'names': all_classes
}

# Write data.yaml
with open(os.path.join(combined_dataset_path, 'data.yaml'), 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

# 8. TRAIN THE MODEL WITH PROPER PARAMETERS
import torch

# Check if GPU is available and set device accordingly
device = 0 if torch.cuda.is_available() else 'cpu'
print(f"Training on device: {device}")

# Use a detection model since we've converted everything to detection format
model = YOLO('yolov8n.pt')  # Using the smallest model for testing; use s/m/l/x for better performance

# Set training parameters
results = model.train(
    data=os.path.join(combined_dataset_path, 'data.yaml'),
    epochs=100,
    imgsz=640,
    batch=16 if torch.cuda.is_available() else 8,  # Smaller batch size for CPU
    patience=20,  # Early stopping patience
    save_period=10,  # Save checkpoint every 10 epochs
    device=device,  # Use GPU if available, otherwise CPU
    workers=8 if torch.cuda.is_available() else 2,  # Fewer workers for CPU
    pretrained=True,
    optimizer='SGD',  # or 'Adam'
    lr0=0.01,  # Initial learning rate
    lrf=0.01,  # Final learning rate as a fraction of lr0
    momentum=0.937,
    weight_decay=0.0005,
    warmup_epochs=3.0,
    warmup_momentum=0.8,
    warmup_bias_lr=0.1,
    box=7.5,
    cls=0.5,
    dfl=1.5,
    val=True,  # Run validation
    amp=torch.cuda.is_available(),  # Only use mixed precision with GPU
    verbose=True,
    project='animal_detection',
    name='combined_dataset'
)

# 9. EVALUATE MODEL
metrics = model.val()
print(f"Validation results:")
print(f"mAP50-95: {metrics.box.map:.4f}")
print(f"mAP50: {metrics.box.map50:.4f}")
print(f"Precision: {metrics.box.p:.4f}")
print(f"Recall: {metrics.box.r:.4f}")

# 10. SAVE THE MODEL
model.export(format='onnx')  # Export to ONNX format
print(f"Model saved to {os.path.join('animal_detection/combined_dataset', model.name)}")