In [28]:
import os
import glob
import xml.etree.ElementTree as ET

# The current directory where the notebook is located
current_directory = os.getcwd()

#annotation directories
annotation_dirs = ['Annotations 1', 'Annotations 2', 'Annotations 3']

# Extract classes from XML files
def extract_classes_from_xml(directory):
    classes = set()
    # Search for all XML files in the directory
    for xml_file in glob.glob(os.path.join(directory, '*.xml')):
        tree = ET.parse(xml_file)
        for obj in tree.getroot().iter('object'):
            # Add the class name to the set
            classes.add(obj.find('name').text)
    return classes


all_classes = set()

# Iterate over each annotation directory and update the set of classes
for annotation_dir in annotation_dirs:
    dir_path = os.path.join(current_directory, annotation_dir)
    all_classes.update(extract_classes_from_xml(dir_path))

# Convert the set to a list and sort it to maintain a consistent order
all_classes = sorted(list(all_classes))
print(f"All classes: {all_classes}")

# Convert XML to YOLO format
def convert_to_yolo_format(xml_file, output_file, class_list):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    size = root.find('size')
    image_width = int(size.find('width').text)
    image_height = int(size.find('height').text)

    if image_width == 0 or image_height == 0:
        # Handle the case where width or height is zero
        print(f"Warning: Image dimensions are zero in {xml_file}. Skipping.")
        return

    with open(output_file, 'w') as out_file:
        for obj in root.iter('object'):
            class_name = obj.find('name').text
            class_id = class_list.index(class_name)
            bndbox = obj.find('bndbox')
            xmin = float(bndbox.find('xmin').text)
            xmax = float(bndbox.find('xmax').text)
            ymin = float(bndbox.find('ymin').text)
            ymax = float(bndbox.find('ymax').text)

            x_center = (xmin + xmax) / 2.0
            y_center = (ymin + ymax) / 2.0
            width = xmax - xmin
            height = ymax - ymin

            # Normalize coordinates to values between 0 and 1
            x_center /= image_width
            y_center /= image_height
            width /= image_width
            height /= image_height

            # Write to the output file in YOLO format
            out_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

# Output directory for YOLO format annotations
output_dir = os.path.join(current_directory, 'YOLO_annotations')
os.makedirs(output_dir, exist_ok=True)

# Process all XML files and convert them to YOLO format text files
for annotation_dir in annotation_dirs:
    dir_path = os.path.join(current_directory, annotation_dir)

    for xml_file in glob.glob(os.path.join(dir_path, '*.xml')):
        file_name = os.path.splitext(os.path.basename(xml_file))[0]
        output_file = os.path.join(output_dir, file_name + '.txt')
        convert_to_yolo_format(xml_file, output_file, all_classes)

print("Conversion to YOLO format completed.")


All classes: ['basil', 'beef', 'dw', 'ham', 'hamburger', 'olive', 'olives', 'onion', 'pepper', 'pepperoni', 'pesto', 'pineapple', 'pizza', 'soup']
Conversion to YOLO format completed.


In [29]:
import random
from sklearn.model_selection import train_test_split

# The current directory where the YOLO annotations are located
current_directory = os.getcwd()
yolo_annotations_dir = os.path.join(current_directory, 'YOLO_annotations')

# List all YOLO annotation files
annotation_files = glob.glob(os.path.join(yolo_annotations_dir, '*.txt'))

# Split the data into training, validation, and testing sets
train_ratio = 0.7  # 70% for training
val_ratio = 0.15   # 15% for validation
test_ratio = 0.15  # 15% for testing

train_files, test_files = train_test_split(annotation_files, test_size=val_ratio + test_ratio, random_state=42)
val_files, test_files = train_test_split(test_files, test_size=test_ratio / (val_ratio + test_ratio), random_state=42)

# Create directories for the sets
train_dir = os.path.join(yolo_annotations_dir, 'train')
val_dir = os.path.join(yolo_annotations_dir, 'val')
test_dir = os.path.join(yolo_annotations_dir, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Move annotation files to their respective sets
for annotation_file in train_files:
    file_name = os.path.basename(annotation_file)
    os.rename(annotation_file, os.path.join(train_dir, file_name))

for annotation_file in val_files:
    file_name = os.path.basename(annotation_file)
    os.rename(annotation_file, os.path.join(val_dir, file_name))

for annotation_file in test_files:
    file_name = os.path.basename(annotation_file)
    os.rename(annotation_file, os.path.join(test_dir, file_name))

print("YOLO annotations split into training, validation, and testing sets.")


YOLO annotations split into training, validation, and testing sets.


In [33]:
import cv2
import numpy as np
import shutil

# Input directory for original images and base output directory
input_directory = 'Images'
output_base_directory = 'Processed_Images'

# Directories for the existing YOLO annotation sets
annotation_dirs = {
    'train': 'YOLO_annotations/train',
    'val': 'YOLO_annotations/val',
    'test': 'YOLO_annotations/test'
}

# `Output directories for images and labels
output_image_dirs = {k: os.path.join(output_base_directory, 'images', k) for k in annotation_dirs.keys()}
output_label_dirs = {k: os.path.join(output_base_directory, 'labels', k) for k in annotation_dirs.keys()}

# Ensure all output directories exist
for dir in list(output_image_dirs.values()) + list(output_label_dirs.values()):
    os.makedirs(dir, exist_ok=True)

# Target size for resizing
target_size = (640,640)

# Resize and pad images to maintain aspect ratio
def resize_and_pad(image, target_size):
    h, w = image.shape[:2]
    scale = min(target_size[0] / h, target_size[1] / w)
    new_w, new_h = int(w * scale), int(h * scale)
    resized_image = cv2.resize(image, (new_w, new_h))

    padded_image = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
    pad_top = (target_size[0] - new_h) // 2
    pad_left = (target_size[1] - new_w) // 2

    padded_image[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized_image
    return padded_image

# Process and save images
def process_and_save_images(file_list, output_image_dir, ann_dir):
    for filename in file_list:
        input_path = os.path.join(input_directory, filename)
        output_path = os.path.join(output_image_dir, filename)

        image = cv2.imread(input_path)
        if image is None:
            print(f"Warning: Unable to load image at {input_path}. Deleting corresponding annotation.")
            annotation_filename = os.path.splitext(filename)[0] + '.txt'
            ann_file_path = os.path.join(ann_dir, annotation_filename)
            if os.path.exists(ann_file_path):
                os.remove(ann_file_path)
            continue

        image = resize_and_pad(image, target_size)
        image = image.astype(np.float32) / 255.0
        cv2.imwrite(output_path, (image * 255).astype(np.uint8))

# Process and copy images and annotations
for set_name, ann_dir in annotation_dirs.items():
    # List all annotation files in the directory
    ann_files = glob.glob(os.path.join(ann_dir, '*.txt'))
    
    # List corresponding image filenames
    img_files = [os.path.splitext(os.path.basename(f))[0] + '.jpg' for f in ann_files]  

    # Process and save the images to the corresponding set directory
    process_and_save_images(img_files, output_image_dirs[set_name], ann_dir)

    # Copy the remaining annotation files to the corresponding set directory
    for ann_file in glob.glob(os.path.join(ann_dir, '*.txt')):
        dst_annotation_path = os.path.join(output_label_dirs[set_name], os.path.basename(ann_file))
        shutil.copy(ann_file, dst_annotation_path)

print("Image and annotation processing and splitting completed.")

Image and annotation processing and splitting completed.


In [34]:
# Base directories for images and labels
base_image_directory = 'Processed_Images\images'  # Adjust with your path
base_label_directory = 'Processed_Images\labels'  # Adjust with your path

# Subdirectories
subdirs = ['train', 'val', 'test']

# Check matching files
def check_matching_files(image_dir, label_dir):
    image_files = {os.path.splitext(file)[0] for file in os.listdir(image_dir) if file.endswith(('.jpg', '.png', '.jpeg'))}
    label_files = {os.path.splitext(file)[0] for file in os.listdir(label_dir) if file.endswith('.txt')}

    missing_labels = image_files - label_files
    missing_images = label_files - image_files

    if missing_labels:
        print(f"Missing label files in {os.path.basename(label_dir)}: {missing_labels}")

    if missing_images:
        print(f"Missing image files in {os.path.basename(image_dir)}: {missing_images}")

    if not missing_labels and not missing_images:
        print(f"All images in {os.path.basename(image_dir)} have corresponding label files.")

# Check each subdirectory
for subdir in subdirs:
    image_dir = os.path.join(base_image_directory, subdir)
    label_dir = os.path.join(base_label_directory, subdir)
    check_matching_files(image_dir, label_dir)


All images in train have corresponding label files.
All images in val have corresponding label files.
All images in test have corresponding label files.


In [35]:
# Base directories for images and labels
base_image_directory = 'Processed_Images/images'  
base_label_directory = 'Processed_Images/labels'  

# Subdirectories
subdirs = ['train', 'val', 'test']

# Check if each label has a corresponding image file
def check_labels_have_images(image_dir, label_dir):
    image_files = {os.path.splitext(file)[0] for file in os.listdir(image_dir) if file.endswith(('.jpg', '.png', '.jpeg'))}
    label_files = {os.path.splitext(file)[0] for file in os.listdir(label_dir) if file.endswith('.txt')}

    missing_images_for_labels = label_files - image_files

    if missing_images_for_labels:
        print(f"In '{os.path.basename(label_dir)}', the following labels have no corresponding images: {missing_images_for_labels}")
    else:
        print(f"All labels in '{os.path.basename(label_dir)}' have corresponding image files.")

# Check each subdirectory
for subdir in subdirs:
    image_dir = os.path.join(base_image_directory, subdir)
    label_dir = os.path.join(base_label_directory, subdir)
    check_labels_have_images(image_dir, label_dir)

All labels in 'train' have corresponding image files.
All labels in 'val' have corresponding image files.
All labels in 'test' have corresponding image files.


Training yolov5 model in terminal:

python train.py --img 640 --batch 16 --epochs 50 --data data.yaml --cfg yolov5s.yaml --weights yolov5s.pt --name yolov5_custom

Testing:

python val.py --weights runs/train/yolov5_custom2/weights/best.pt --data data.yaml --img 640 --task test