In [None]:
from PIL import Image, ImageDraw
import os
import glob
import numpy as np
import shutil

# Check if training set bounding boxes are drawn correctly

In [None]:
def draw_bbox(image):
    """
    Function reads an image and its corresponding bounding box annotations file,
    draws the bounding boxes on the image, and displays it.

    Args:
        image (str): The filename (with extension) of the image to be processed. 
                     The image should be stored in the path '../data/logo_detection/images/'.

    The corresponding bounding box annotations for the image should be available in the 
    file with the same name (replacing '.jpg' with '.txt') in the directory '../data/logo_detection/labels/'.

    The function reads the bounding box data, where each line in the file has the format:
    label x_center y_center width height
    (x_center, y_center, width, height) are expressed as ratios of the image width and height.

    The bounding boxes are drawn as rectangles on the image using the Pillow library, 
    with the center, width, and height converted into top-left and bottom-right coordinates.

    The image with the drawn bounding boxes is displayed using the Image.show() method.

    Example:
        draw_bbox('test_image.jpg')
        
    This would process the image 'test_image.jpg' and its corresponding bounding box annotations 
    file 'test_image.txt', draw the bounding boxes on the image, and display it.
    """
    
    # convert image path to label path
    images_dir = '../data/logo_detection/images/'
    bboxes_dir = '../data/logo_detection/labels/'
    
    image_path = images_dir + image
    bbox_path = (bboxes_dir + image).replace('.jpg', '.txt')
    
    # Open the image and create ImageDraw object for drawing
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)

    with open(bbox_path, 'r') as f:
        for line in f.readlines():
            # Split the line into five values
            label, x, y, w, h = line.split(' ')

            # Convert string into float
            x = float(x)
            y = float(y)
            w = float(w)
            h = float(h)

            # Convert center position, width, height into
            # top-left and bottom-right coordinates
            W, H = image.size
            x1 = (x - w/2) * W
            y1 = (y - h/2) * H
            x2 = (x + w/2) * W
            y2 = (y + h/2) * H

            # Draw the bounding box with red lines
            draw.rectangle((x1, y1, x2, y2),
                           outline=(255, 0, 0), # Red in RGB
                           width=1)             # Line width
    image.show()

In [None]:
draw_bbox('32.jpg')

# Prepare dir structure for YOLO

In [None]:
if os.path.exists('../data/logo_detection/'):
    for folder in ['images', 'labels']:
        for split in ['train', 'val', 'test']:
            os.makedirs(f'../data/logo_detection/{folder}/{split}')

# Splitting data into Train, Validation and Test sets

In [None]:
def get_filenames(dir):
    filenames = set()
    
    for path in glob.glob(os.path.join(dir, '*.jpg')):
        # Extract the filename
        filename = os.path.split(path)[-1]        
        filenames.add(filename)

    return filenames

In [None]:
logo_images = get_filenames('../data/logo_detection/images/')
logo_images = np.array(list(logo_images))

##### Shuffle data

In [None]:
np.random.seed(42)
np.random.shuffle(logo_images)

##### Copy images to Training, Validation and Test directories

In [None]:
def split_dataset(image_names, train_size, val_size):
    """
    This function splits a dataset of images (and their associated bounding box files) into training, 
    validation, and testing sets, according to the given proportions.

    Args:
        image_names (list): List of the filenames of the images to be split. 
                            These should be stored in the path '../data/logo_detection/images/'.

        train_size (float): Proportion of the total dataset to be used for training. 
                            This should be a float between 0.0 and 1.0.

        val_size (float): Proportion of the total dataset to be used for validation. 
                          This should be a float between 0.0 and 1.0.
                          The proportion for the testing set is computed as 1 - train_size - val_size.

    The function splits the input image filenames into training, validation, and testing sets,
    according to the given proportions. 

    It then copies the images and their corresponding bounding box annotation files 
    to the appropriate folders (train, val, or test) within '../data/logo_detection/images/' and '../data/logo_detection/labels/'.

    If an image does not have an associated bounding box file, it is skipped and not included in the split. 
    The function prints a message indicating which image was skipped.

    Example:
        split_dataset(logo_images, train_size = 0.8, val_size = 0.1)
        
    This would split the list of images 'logo_images' into training, validation, and testing sets
    in the proportions 80%, 10%, and 10% respectively, and copy the images and their associated bounding box files to 
    the corresponding subfolders within '../data/logo_detection/images/' and '../data/logo_detection/labels/'.
    """
    
    skipping_counter = 0
    train_size = round(len(image_names) * train_size)
    val_size = round(len(image_names) * val_size)
    test_size = len(image_names) - train_size - val_size
    
    for i, image_name in enumerate(image_names):
        
        # Bounding box filename
        bbox_name = image_name.replace('.jpg', '.txt')
        
        # Split into train, val, or test
        
        if i < train_size:
            split = 'train'
        elif i < train_size + val_size:
            split = 'val'
        else:
            split = 'test'
        
        # Source paths
        source_image_path = f'../data/logo_detection/images/{image_name}'
        source_bbox_path = f'../data/logo_detection/labels/{bbox_name}'

        # Destination paths
        target_image_folder = f'../data/logo_detection/images/{split}'
        target_bbox_folder = f'../data/logo_detection/labels/{split}'
        
        # Copy files
        
        try:
            shutil.copy(source_bbox_path, target_bbox_folder)
            shutil.copy(source_image_path, target_image_folder)
        except Exception:
            skipping_counter += 1
            print(f'image {source_image_path} has no bounding box associated with it.\nThis image will not be used in training.\nSkipping...')
        
    print(f'Skipped {skipping_counter} images due to lack of bounding box.')

In [None]:
split_dataset(logo_images, train_size = 0.8, val_size = 0.1)

# Training model

To train the model run

python <yolo_PATH>/yolov5/train.py --data custom_logo_training.yaml --weights yolov5x.pt --epochs 300 --batch 64 --freeze 10

Edit the number of epochs and batch size as to your needs.