# Introduction
This file will be used to format the training set for the different tests

We will begin by clearing folders that have been created by this same script. So we can get a fresh setup

In [7]:
%reset -f
import os
import shutil
import random
from tqdm import tqdm  # For the progress bar

# Path to the 'dataset' folder
dataset_folder = "dataset"

# List of folders to keep
# folders_to_keep = ["images", "labels"]
folders_to_keep = []

# Iterate through all items in the dataset folder
for item in os.listdir(dataset_folder):
    item_path = os.path.join(dataset_folder, item)
    
    # Check if the item is not in the keep list
    if item not in folders_to_keep:
        # Remove the folder or file
        if os.path.isdir(item_path):
            shutil.rmtree(item_path)  # Remove directories
            print(f"Removed folder: {item_path}")
        else:
            os.remove(item_path)  # Remove files
            print(f"Removed file: {item_path}")



Removed folder: dataset/labels
Removed folder: dataset/images


In [8]:
from ultralytics.utils.downloads import download
from pathlib import Path

# Define the dataset folder path
dataset_dir = Path().parent / "dataset"  # Set dataset path relative to the script

# Ensure the dataset folder exists
dataset_dir.mkdir(parents=True, exist_ok=True)

# Download labels
segments = True  # segment or box labels
url = 'https://github.com/ultralytics/assets/releases/download/v0.0.0/'
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
download(urls, dir=dataset_dir / 'labels')

# Download images
urls = [
    'http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
    'http://images.cocodataset.org/zips/val2017.zip'  # 1G, 5k images
    # ,'http://images.cocodataset.org/zips/test2017.zip'  # 7G, 41k images (optional)
]
download(urls, dir=dataset_dir / 'images', threads=3)

print(f"Labels saved to {dataset_dir / 'labels'}")
print(f"Images saved to {dataset_dir / 'images'}")


Downloading https://ultralytics.com/assets/coco2017labels-segments.zip to 'dataset/labels/coco2017labels-segments.zip'...


100%|██████████| 169M/169M [00:04<00:00, 41.3MB/s] 
Unzipping dataset/labels/coco2017labels-segments.zip to /home/tyche/ForeHelm/training/COCO/dataset/labels/coco...: 100%|██████████| 122232/122232 [00:09<00:00, 13228.27file/s]

Downloading http://images.cocodataset.org/zips/val2017.zip to 'dataset/images/val2017.zip'...
Downloading http://images.cocodataset.org/zips/train2017.zip to 'dataset/images/train2017.zip'...





Labels saved to dataset/labels
Images saved to dataset/images


# Image reordering
Copies all the images from ~/datasets/coco into the /dataset folder

In [9]:
%reset -f
import os
import shutil
from tqdm import tqdm

# Define source directories
coco_train_images = os.path.expanduser("dataset/images/train2017")
coco_val_images = os.path.expanduser("dataset/images/val2017")
coco_train_labels = os.path.expanduser("dataset/labels/coco/labels/train2017")
coco_val_labels = os.path.expanduser("dataset/labels/coco/labels/val2017")

# Define target directories
dataset_images = "dataset/images"
dataset_labels = "dataset/labels"

# Ensure target directories exist
os.makedirs(dataset_images, exist_ok=True)
os.makedirs(dataset_labels, exist_ok=True)

# Helper function to copy files
def copy_files(source_dir, target_dir, file_extension, phase_name):
    """
    Copy files with a specific extension from source_dir to target_dir.

    Args:
        source_dir (str): Source directory path.
        target_dir (str): Target directory path.
        file_extension (str): File extension to filter.
        phase_name (str): Name of the phase (e.g., 'train', 'val') for progress.
    """
    files = [f for f in os.listdir(source_dir) if f.endswith(file_extension)]
    
    with tqdm(total=len(files), desc=f"Copying {phase_name} files", unit="file") as pbar:
        for file_name in files:
            src_path = os.path.relpath(os.path.join(source_dir, file_name))
            dst_path = os.path.relpath(os.path.join(target_dir, file_name))
            shutil.move(src_path, dst_path)
            pbar.update(1)

# Copy files
copy_files(coco_train_images, dataset_images, ".jpg", "Training Images")
copy_files(coco_val_images, dataset_images, ".jpg", "Validation Images")
copy_files(coco_train_labels, dataset_labels, ".txt", "Training Labels")
copy_files(coco_val_labels, dataset_labels, ".txt", "Validation Labels")

print("Files successfully copied to dataset/images and dataset/labels with relative paths.")




Copying Training Images files: 100%|██████████| 118287/118287 [00:03<00:00, 34947.97file/s]
Copying Validation Images files: 100%|██████████| 5000/5000 [00:00<00:00, 32437.49file/s]
Copying Training Labels files: 100%|██████████| 117266/117266 [00:03<00:00, 34319.79file/s]
Copying Validation Labels files: 100%|██████████| 4952/4952 [00:00<00:00, 37771.54file/s]

Files successfully copied to dataset/images and dataset/labels with relative paths.





In [10]:
import os

# Directories to clean
directories = [
    "dataset/images",
    "dataset/labels"
]

def clean_directory(directory, extensions=[".jpg", ".txt"]):
    """
    Removes all files and folders from a directory except those with the specified extensions.
    
    Args:
        directory (str): The directory to clean.
        extensions (list): The file extensions to keep (default: [".jpg", ".txt"]).
    """
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        # Check if the item is a file and doesn't have a desired extension
        if os.path.isfile(item_path) and not any(item.lower().endswith(ext) for ext in extensions):
            os.remove(item_path)  # Remove the file
        # If the item is a folder, remove it
        elif os.path.isdir(item_path):
            for root, dirs, files in os.walk(item_path, topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
            os.rmdir(item_path)  # Remove the directory itself

# Clean each directory
for directory in directories:
    if os.path.exists(directory):
        clean_directory(directory)
        print(f"Cleaned directory: {directory}")
    else:
        print(f"Directory does not exist: {directory}")

print("Cleaning complete!")


Cleaned directory: dataset/images
Cleaned directory: dataset/labels
Cleaning complete!


Reformat dataset to include only vehicles.
The original dataset contains the labels of:

    ["aeroplane", "bicyclebike", "bird", "boat", "bottle", "bus",
    "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
    "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

A new filtered label directory has been created so that only remain the vehicles:

    ["car", "bus", "motorbike", "bicyclebike"]

These new labels are stored in dataset/labels_filtered

In [12]:
# Directory containing YOLO label .txt files
label_dir = "dataset/labels"  # Replace with your label directory path

# Allowed Class IDs for vehicle-related objects
ALLOWED_CLASSES = {0, 1, 2, 3, 4, 5, 6, 7}  # person, bicycle, car, motorcycle, airplane, bus, train, truck

# Directory to save filtered labels
output_dir = "dataset/labels_filtered"
os.makedirs(output_dir, exist_ok=True)

def filter_labels(label_file):
    """
    Reads a YOLO label file, filters out unwanted classes,
    and writes the remaining labels to a new file.
    """
    input_path = os.path.join(label_dir, label_file)
    output_path = os.path.join(output_dir, label_file)

    with open(input_path, "r") as infile, open(output_path, "w") as outfile:
        for line in infile:
            parts = line.split()
            class_id = int(parts[0])  # Extract class ID
            if class_id in ALLOWED_CLASSES:
                # Write the line if class ID is allowed
                outfile.write(line)

# List all .txt files in the label directory
label_files = [f for f in os.listdir(label_dir) if f.endswith(".txt")]

# Process all .txt files with a progress bar
with tqdm(total=len(label_files), desc="Filtering Labels", unit="file") as pbar:
    for file_name in label_files:
        filter_labels(file_name)
        pbar.update(1)

print(f"Filtered labels saved in: {output_dir}")

Filtering Labels: 100%|██████████| 122218/122218 [00:40<00:00, 2998.12file/s]

Filtered labels saved in: dataset/labels_filtered





Remove empty labels

In [13]:
# Define the path to the labels folder
labels_folder = "dataset/labels_filtered"

# List all .txt files in the labels folder
label_files = [f for f in os.listdir(labels_folder) if f.endswith('.txt')]

# Initialize a counter for removed files
removed_count = 0

# Check each label file and remove it if it's empty
for label_file in label_files:
    label_path = os.path.join(labels_folder, label_file)
    if os.path.getsize(label_path) == 0:  # Check if the file size is 0 bytes
        os.remove(label_path)  # Remove the empty file
        removed_count += 1
        # print(f"Removed empty label: {label_file}")

# Output the result
print(f"Total empty labels removed: {removed_count}")



Total empty labels removed: 45800


Counting images and filtered_labels

In [14]:
print(os.getcwd())
# Define the paths to the images and labels folders
images_folder = "dataset/images"
labels_folder = "dataset/labels_filtered"

# List all files in the images and labels folders
image_files = [f for f in os.listdir(images_folder) if f.endswith('.jpg')]
label_files = [f for f in os.listdir(labels_folder) if f.endswith('.txt')]

# Count the total number of images and labels
num_images = len(image_files)
num_labels = len(label_files)

# Check for matching files (base filenames without extensions)
image_basenames = {os.path.splitext(f)[0] for f in image_files}
label_basenames = {os.path.splitext(f)[0] for f in label_files}

# Count matched and unmatched files
matched_files = image_basenames & label_basenames
unmatched_images = image_basenames - label_basenames
unmatched_labels = label_basenames - image_basenames

print(f"Total images: {num_images}")
print(f"Total labels: {num_labels}")
print(f"Matched files: {len(matched_files)}")
print(f"Unmatched images: {len(unmatched_images)}")
print(f"Unmatched labels: {len(unmatched_labels)}")

# Optionally print the unmatched files
if unmatched_images:
    print("Unmatched images (no corresponding label):")
    for img in unmatched_images:
        print(f"  {img}")

if unmatched_labels:
    print("Unmatched labels (no corresponding image):")
    for lbl in unmatched_labels:
        print(f"  {lbl}")


/home/tyche/ForeHelm/training/COCO
Total images: 123287
Total labels: 76418
Matched files: 76418
Unmatched images: 46869
Unmatched labels: 0
Unmatched images (no corresponding label):
  000000264017
  000000030997
  000000379077
  000000092187
  000000177609
  000000243773
  000000007936
  000000305871
  000000019559
  000000120744
  000000035146
  000000508481
  000000567914
  000000165410
  000000531322
  000000151387
  000000543447
  000000347879
  000000076087
  000000459401
  000000427771
  000000310655
  000000115159
  000000077479
  000000574499
  000000133594
  000000414648
  000000219121
  000000436426
  000000450305
  000000396909
  000000492572
  000000216682
  000000405604
  000000185930
  000000572055
  000000218456
  000000078107
  000000528868
  000000335925
  000000394759
  000000399542
  000000319749
  000000125438
  000000106750
  000000485044
  000000523664
  000000035763
  000000168375
  000000558576
  000000421229
  000000234942
  000000452968
  000000280424
  0000

Create the folder of images_filtered with a reduced number of unlabellel images.
The ratio of labelled images and unlabelled images has been set to 50/50. 

In [15]:
import random

# Paths
images_folder = "dataset/images"
labels_folder = "dataset/labels_filtered"
output_folder = "dataset/images_filtered"

# Ratio of labeled and unlabeled images
r_label = 50
r_unlabel = 100 - r_label

# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

# Get all image files and corresponding label files
image_files = [f for f in os.listdir(images_folder) if f.endswith('.jpg')]
label_files = [f for f in os.listdir(labels_folder) if f.endswith('.txt')]

# Get the base filenames (without extensions) for labels
label_basenames = {os.path.splitext(label)[0] for label in label_files}

# Separate labeled and unlabeled images
labeled_images = [img for img in image_files if os.path.splitext(img)[0] in label_basenames]
unlabeled_images = [img for img in image_files if os.path.splitext(img)[0] not in label_basenames]

# Check counts
num_labeled = len(labeled_images)
num_unlabeled_to_select = min(int(num_labeled * r_unlabel / r_label), len(unlabeled_images))

# Randomly select the required number of unlabeled images
selected_unlabeled_images = random.sample(unlabeled_images, num_unlabeled_to_select)

# Combine labeled and selected unlabeled images
images_to_copy = labeled_images + selected_unlabeled_images

# Copy labeled and selected unlabeled images to the output folder with a progress bar
with tqdm(total=len(images_to_copy), desc="Copying Images", unit="file") as pbar:
    for img in images_to_copy:
        src_path = os.path.join(images_folder, img)
        dst_path = os.path.join(output_folder, img)
        shutil.copy(src_path, dst_path)
        pbar.update(1)

# Output results
print(f"Total labeled images: {num_labeled}")
print(f"Total unlabeled images selected: {len(selected_unlabeled_images)}")
print(f"Total images in 'images_filtered': {len(os.listdir(output_folder))}")



Copying Images: 100%|██████████| 123287/123287 [03:08<00:00, 654.09file/s]


Total labeled images: 76418
Total unlabeled images selected: 46869
Total images in 'images_filtered': 123287


# Create Train, Validation and Test image sets
From the image and labels ("dataset/images", "dataset/labels_filtered")
Create the test, validation and test sets.


- Training is stored in ("dataset/train/images", "dataset/train/labels")
- Validation is stored in ("dataset/valid/images", "dataset/valid/labels")
- Test is stored in ("dataset/test/images", "dataset/test/labels")


In [2]:
import os
import shutil
import random
from tqdm import tqdm  # For progress bar

The following code takes the unfiltered images and create the corresponding training, validation and test set in the following folders:
- dataset/train
- dataset/valid
- dataset/test

In [17]:
import os
import shutil
import random
from tqdm import tqdm

# Split data into 20% train, 5% validation, and 5% test
train_perc = 0.25
valid_perc = 0.03
test_perc = 0.03

# Define folder paths
images_folder = "dataset/images"  # Folder containing filtered images
labels_folder = "dataset/labels"  # Folder containing filtered labels

train_images_folder = "dataset/train/images"
train_labels_folder = "dataset/train/labels"

valid_images_folder = "dataset/valid/images"
valid_labels_folder = "dataset/valid/labels"

test_images_folder = "dataset/test/images"
test_labels_folder = "dataset/test/labels"

# Create output directories
for folder in [train_images_folder, train_labels_folder,
               valid_images_folder, valid_labels_folder,
               test_images_folder, test_labels_folder]:
    if os.path.exists(folder):
        # Remove the folder and its contents
        shutil.rmtree(folder, ignore_errors=True)
    
    os.makedirs(folder, exist_ok=True)

# Get a list of all images
image_files = sorted(os.listdir(images_folder))

# Create a list of images with and without labels
data = []
for image_file in image_files:
    label_file = os.path.splitext(image_file)[0] + ".txt"
    if os.path.exists(os.path.join(labels_folder, label_file)):
        data.append((image_file, label_file))  # Image has a corresponding label
    else:
        data.append((image_file, None))  # Image has no label (no objects detected)

# Shuffle the data
random.shuffle(data)

# Calculate splits
total_data = len(data)
train_split = int(train_perc * total_data)
valid_split = train_split + int(valid_perc * total_data)
test_split = valid_split + int(test_perc * total_data)

# Allocate data
train_data = data[:train_split]
valid_data = data[train_split:valid_split]
test_data = data[valid_split:test_split]

# Function to copy images and labels with a progress bar
def copy_files(data, dest_images_folder, dest_labels_folder, phase_name):
    with tqdm(total=len(data), desc=f"Copying {phase_name}") as pbar:
        for image_file, label_file in data:
            # Copy the image file
            shutil.copy(os.path.join(images_folder, image_file), os.path.join(dest_images_folder, image_file))
            # Copy the label file if it exists
            if label_file:
                shutil.copy(os.path.join(labels_folder, label_file), os.path.join(dest_labels_folder, label_file))
            # Update progress bar
            pbar.update(1)

# Copy data to respective folders
copy_files(train_data, train_images_folder, train_labels_folder, "Training Data")
copy_files(valid_data, valid_images_folder, valid_labels_folder, "Validation Data")
copy_files(test_data, test_images_folder, test_labels_folder, "Testing Data")

print("Dataset split complete!")
print(f"Training data: {len(train_data)} images")
print(f"Validation data: {len(valid_data)} images")
print(f"Testing data: {len(test_data)} images")


Copying Training Data: 100%|██████████| 98629/98629 [04:42<00:00, 349.68it/s]
Copying Validation Data: 100%|██████████| 12329/12329 [00:32<00:00, 375.18it/s]
Copying Testing Data: 100%|██████████| 12329/12329 [00:33<00:00, 371.48it/s]

Dataset split complete!
Training data: 98629 images
Validation data: 12329 images
Testing data: 12329 images





The following code takes the filtered images and create the corresponding training, validation and test set in the following folders:
- dataset/train_filtered
- dataset/valid_filtered
- dataset/test_filtered


In [1]:
import os
import shutil
import random
from tqdm import tqdm

# Split data into 20% train, 5% validation, and 5% test
train_perc = 0.8
valid_perc = 0.1
test_perc = 0.1

# Define folder paths
images_folder = "dataset/images_filtered"  # Folder containing filtered images
labels_folder = "dataset/labels_filtered"  # Folder containing filtered labels

train_images_folder = "dataset/train_filtered/images"
train_labels_folder = "dataset/train_filtered/labels"

valid_images_folder = "dataset/valid_filtered/images"
valid_labels_folder = "dataset/valid_filtered/labels"

test_images_folder = "dataset/test_filtered/images"
test_labels_folder = "dataset/test_filtered/labels"

# Create output directories
for folder in [train_images_folder, train_labels_folder,
               valid_images_folder, valid_labels_folder,
               test_images_folder, test_labels_folder]:
    if os.path.exists(folder):
        # Remove the folder and its contents
        shutil.rmtree(folder, ignore_errors=True)
    
    os.makedirs(folder, exist_ok=True)

# Get a list of all images
image_files = sorted(os.listdir(images_folder))

# Create a list of images with and without labels
data = []
for image_file in image_files:
    label_file = os.path.splitext(image_file)[0] + ".txt"
    if os.path.exists(os.path.join(labels_folder, label_file)):
        data.append((image_file, label_file))  # Image has a corresponding label
    else:
        data.append((image_file, None))  # Image has no label (no objects detected)

# Shuffle the data
random.shuffle(data)

# Calculate splits
total_data = len(data)
train_split = int(train_perc * total_data)
valid_split = train_split + int(valid_perc * total_data)
test_split = valid_split + int(test_perc * total_data)

# Allocate data
train_data = data[:train_split]
valid_data = data[train_split:valid_split]
test_data = data[valid_split:test_split]

# Function to copy images and labels with a progress bar
def copy_files(data, dest_images_folder, dest_labels_folder, phase_name):
    with tqdm(total=len(data), desc=f"Copying {phase_name}") as pbar:
        for image_file, label_file in data:
            # Copy the image file
            shutil.copy(os.path.join(images_folder, image_file), os.path.join(dest_images_folder, image_file))
            # Copy the label file if it exists
            if label_file:
                shutil.copy(os.path.join(labels_folder, label_file), os.path.join(dest_labels_folder, label_file))
            # Update progress bar
            pbar.update(1)

# Copy data to respective folders
copy_files(train_data, train_images_folder, train_labels_folder, "Training Data")
copy_files(valid_data, valid_images_folder, valid_labels_folder, "Validation Data")
copy_files(test_data, test_images_folder, test_labels_folder, "Testing Data")

print("Dataset split complete!")
print(f"Training data: {len(train_data)} images")
print(f"Validation data: {len(valid_data)} images")
print(f"Testing data: {len(test_data)} images")


Copying Training Data: 100%|██████████| 98629/98629 [01:38<00:00, 1002.93it/s]
Copying Validation Data: 100%|██████████| 12328/12328 [00:24<00:00, 501.46it/s]
Copying Testing Data: 100%|██████████| 12328/12328 [00:23<00:00, 518.70it/s]

Dataset split complete!
Training data: 98629 images
Validation data: 12328 images
Testing data: 12328 images





In [10]:
import os
from collections import defaultdict
from tqdm import tqdm

# Directories to process
directories = [
    "dataset/labels",
    "dataset/train/labels",
    "dataset/valid/labels",
    "dataset/test/labels",
    
    "dataset/labels_filtered",
    "dataset/train_filtered/labels",
    "dataset/valid_filtered/labels",
    "dataset/test_filtered/labels",
]

def count_labels_in_directory(directory):
    """
    Counts the number of each class in a directory of YOLO label files with a progress bar.
    
    Args:
        directory (str): Path to the directory containing label files.
        
    Returns:
        dict: A dictionary with class IDs as keys and counts as values.
    """
    class_counts = defaultdict(int)
    
    # Get all .txt files in the directory
    label_files = [f for f in os.listdir(directory) if f.endswith(".txt")]
    
    # Process each file with a progress bar
    with tqdm(total=len(label_files), desc=f"Processing {directory}", unit="file") as pbar:
        for label_file in label_files:
            label_path = os.path.join(directory, label_file)
            with open(label_path, "r") as file:
                for line in file:
                    parts = line.split()
                    class_id = int(parts[0])  # Extract class ID
                    class_counts[class_id] += 1  # Increment the count for the class ID
            pbar.update(1)
    
    return class_counts

# Count labels for each directory
results = {}

for directory in directories:
    if os.path.exists(directory):
        results[directory] = count_labels_in_directory(directory)
    else:
        results[directory] = None  # Directory does not exist

# Display the results
print("\nClass counts per directory:")
for directory, class_counts in results.items():
    print(f"\nDirectory: {directory}")
    if class_counts is not None:
        for class_id, count in sorted(class_counts.items()):
            print(f"  Class {class_id}: {count}")
    else:
        print("  Directory does not exist or contains no labels.")



Processing dataset/labels: 100%|██████████| 122218/122218 [00:25<00:00, 4851.66file/s]
Processing dataset/train/labels: 100%|██████████| 85551/85551 [00:20<00:00, 4276.11file/s]
Processing dataset/valid/labels: 100%|██████████| 18317/18317 [00:03<00:00, 4807.46file/s]
Processing dataset/test/labels: 100%|██████████| 18350/18350 [00:03<00:00, 5028.28file/s]
Processing dataset/labels_filtered: 100%|██████████| 76418/76418 [00:08<00:00, 8916.71file/s] 
Processing dataset/train_filtered/labels: 100%|██████████| 53609/53609 [00:08<00:00, 6230.13file/s]
Processing dataset/valid_filtered/labels: 100%|██████████| 11330/11330 [00:00<00:00, 47554.80file/s]
Processing dataset/test_filtered/labels: 100%|██████████| 11479/11479 [00:00<00:00, 51541.40file/s]


Class counts per directory:

Directory: dataset/labels
  Class 0: 268029
  Class 1: 7370
  Class 2: 45449
  Class 3: 9021
  Class 4: 5272
  Class 5: 6344
  Class 6: 4760
  Class 7: 10384
  Class 8: 11000
  Class 9: 13476
  Class 10: 1966
  Class 11: 2058
  Class 12: 1343
  Class 13: 10231
  Class 14: 10969
  Class 15: 4968
  Class 16: 5718
  Class 17: 6839
  Class 18: 9577
  Class 19: 8386
  Class 20: 5736
  Class 21: 1365
  Class 22: 5535
  Class 23: 5360
  Class 24: 9084
  Class 25: 11672
  Class 26: 12882
  Class 27: 6700
  Class 28: 6411
  Class 29: 2796
  Class 30: 6864
  Class 31: 2750
  Class 32: 6559
  Class 33: 9129
  Class 34: 3418
  Class 35: 3895
  Class 36: 5715
  Class 37: 6362
  Class 38: 5032
  Class 39: 25083
  Class 40: 8180
  Class 41: 21469
  Class 42: 5689
  Class 43: 8085
  Class 44: 6412
  Class 45: 14946
  Class 46: 9565
  Class 47: 6012
  Class 48: 4533
  Class 49: 6587
  Class 50: 7573
  Class 51: 8123
  Class 52: 3008
  Class 53: 6091
  Class 54: 7333
  Clas




# Load YOLO Model and Begin Training!!

In [11]:
from ultralytics import YOLO

In [13]:
model16 = YOLO("yolo11n.yaml")
model16.train(
    data="train_filtered.yaml",
    project="./16_scratch_filtered",
    pretrained=False,  
    epochs = 100,
    patience=10, 
    save_period=5,
    # time=2,
    # cache="ram",
    batch=16,
    exist_ok=True,
    resume=True, 
    plots=True
    )

New https://pypi.org/project/ultralytics/8.3.55 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.54 🚀 Python-3.12.3 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.yaml, data=train_filtered.yaml, epochs=100, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=10, cache=False, device=None, workers=8, project=./16_scratch_filtered, name=train, exist_ok=False, pretrained=False, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=None, amp=True, fraction=1, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_ma

[34m[1mtrain: [0mScanning /home/tyche/ForeHelm/YOLO_training/COCO/dataset/train_filtered/labels... 45812 images, 28160 backgrounds, 0 corrupt: 100%|██████████| 73972/73972 [01:13<00:00, 1005.87it/s]


[34m[1mtrain: [0mNew cache created: /home/tyche/ForeHelm/YOLO_training/COCO/dataset/train_filtered/labels.cache


[34m[1mval: [0mScanning /home/tyche/ForeHelm/YOLO_training/COCO/dataset/valid_filtered/labels.cache... 15341 images, 9316 backgrounds, 0 corrupt: 100%|██████████| 24657/24657 [00:00<?, ?it/s]


Plotting labels to 16_scratch_filtered/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1m16_scratch_filtered/train[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      3.08G      3.053      4.116      3.456         34        640: 100%|██████████| 4624/4624 [13:34<00:00,  5.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 771/771 [02:14<00:00,  5.74it/s]


: 

In [None]:
modelX = YOLO("yolo11n.yaml")
modelX.train(
    data="train_filtered.yaml",
    project="./X_scratch_filtered",
    pretrained=False,  
    epochs = 100,
    patience=10, 
    save_period=10,
    batch=-1,
    resume=True, 
    save=True,
    fraction=1
    )

# Validating the YOLO models
The following code is used to validate and compare the different YOLO models that have been trained

This code tries the default and pretrained yolo11n model with the COCO dataset

In [3]:
from ultralytics import YOLO

# Load a model

model = YOLO("16_scratch_filtered/train/weights/best.pt")
validation_results = model.val(data="train_filtered.yaml", device="0", plots=True)

Ultralytics 8.3.55 🚀 Python-3.12.3 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLO11n summary (fused): 238 layers, 2,583,712 parameters, 0 gradients, 6.3 GFLOPs


[34m[1mval: [0mScanning /home/tyche/ForeHelm/training/COCO/dataset/valid_filtered/labels.cache... 7641 images, 4687 backgrounds, 0 corrupt: 100%|██████████| 12328/12328 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 771/771 [01:15<00:00, 10.15it/s]


                   all      12328      35853      0.751      0.576      0.659      0.468
                person       6674      26998      0.813      0.631       0.74      0.511
               bicycle        323        696      0.694      0.398      0.471      0.264
                   car       1280       4603      0.713       0.49      0.575      0.364
            motorcycle        370        907      0.743      0.535      0.624      0.393
              airplane        329        585      0.853      0.692      0.778      0.588
                   bus        412        631      0.785      0.675      0.773      0.631
                 train        368        446      0.808      0.765      0.827      0.659
                 truck        627        987      0.596       0.42      0.483      0.339
Speed: 0.3ms preprocess, 2.6ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1m/home/tyche/ForeHelm/runs/detect/val11[0m


In [4]:
# Load a model
model = YOLO("yolo11n.pt")
validation_results = model.val(data="train_filtered.yaml", device="0", plots=True)

Ultralytics 8.3.55 🚀 Python-3.12.3 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs


[34m[1mval: [0mScanning /home/tyche/ForeHelm/training/COCO/dataset/valid_filtered/labels.cache... 7641 images, 4687 backgrounds, 0 corrupt: 100%|██████████| 12328/12328 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 771/771 [01:32<00:00,  8.36it/s]


                   all      12328      35853      0.792      0.634      0.723      0.531
                person       6674      26998      0.816      0.654      0.759      0.533
               bicycle        323        696      0.712      0.457       0.55      0.334
                   car       1280       4603      0.741       0.51      0.613      0.397
            motorcycle        370        907      0.822      0.592      0.706      0.464
              airplane        329        585      0.858       0.79      0.859      0.677
                   bus        412        631      0.834      0.753      0.836      0.696
                 train        368        446       0.87      0.836      0.893      0.721
                 truck        627        987      0.683      0.476      0.572      0.424
Speed: 0.5ms preprocess, 3.3ms inference, 0.0ms loss, 1.1ms postprocess per image
Results saved to [1m/home/tyche/ForeHelm/runs/detect/val12[0m


In [None]:
# Validate the model
validation_results.box.map  # map50-95
validation_results.box.map50  # map50
validation_results.box.map75  # map75
validation_results.box.maps  # a list contains map50-95 of each category