In [12]:
# %% [markdown]
# # VOC to YOLO Converter and YOLOv5 Training
# 
# This notebook walks through the process of:
# 1. Downloading the dataset using Kaggle API.
# 2. Converting Pascal VOC annotations into YOLO format.
# 3. Setting up YOLOv5 for object detection.
# 4. Training the YOLOv5 model on the custom dataset.
# 5. Performing inference on test images.

#### Step 1: Get the Dataset from Kaggle

In [14]:
# %% [bash]
# Install the Kaggle API
%%bash
%pip install kaggle

# %% [bash]
# Create a Kaggle directory and set the API key
mkdir -p ~/.kaggle
cp kaggle.json ~/.kaggle/kaggle.json
chmod 600 ~/.kaggle/kaggle.json

# %% [bash]
# Download the dataset from Kaggle
kaggle datasets download -d andrewmvd/road-sign-detection

# %% [bash]
# Unzip the downloaded dataset and organize it into the required folders
unzip -q road-sign-detection.zip -d data
mv ./data/annotations ./data/labels

SyntaxError: invalid syntax (3794774139.py, line 8)

#### Step 2: Set Up YOLOv5

In [None]:
# %% [bash]
# Clone the YOLOv5 repository and install the necessary dependencies
%%bash
git clone https://github.com/ultralytics/yolov5
cd yolov5 
pip install -r requirements.txt

#### Step 3: Convert Pascal VOC Annotations to YOLO Format

In [None]:
import os
import xml.etree.ElementTree as ET

def convert_box(image_size, box):
    """
    Convert Pascal VOC bounding box to YOLO format.
    
    Args:
    - image_size: tuple (width, height) of the image.
    - box: list of bounding box coordinates [xmin, xmax, ymin, ymax].
    
    Returns:
    - Normalized (x_center, y_center, width, height) in YOLO format.
    """
    image_w, image_h = image_size
    xmin, xmax, ymin, ymax = box
    
    # Center x, y coordinates
    x_center = (xmin + xmax) / 2.0
    y_center = (ymin + ymax) / 2.0
    
    # Width and height of the box
    box_width = xmax - xmin
    box_height = ymax - ymin
    
    # Normalize coordinates
    x_center /= image_w
    y_center /= image_h
    box_width /= image_w
    box_height /= image_h
    
    return x_center, y_center, box_width, box_height

def convert_voc_to_yolo(label_dir='./data/labels', output_dir='./data/labels'):
    """
    Convert Pascal VOC annotations (XML) to YOLO format and save them.
    
    Args:
    - label_dir: Directory where the VOC XML files are stored.
    - output_dir: Directory where the YOLO .txt files will be saved.
    """
    class_names = ['trafficlight', 'speedlimit', 'crosswalk', 'stop']
    os.makedirs(output_dir, exist_ok=True)
    
    for annotation_file in os.listdir(label_dir):
        file_base, file_ext = os.path.splitext(annotation_file)
        
        if file_ext != '.xml':
            continue
        
        input_file_path = os.path.join(label_dir, annotation_file)
        output_file_path = os.path.join(output_dir, f'{file_base}.txt')
        
        try:
            tree = ET.parse(input_file_path)
            root = tree.getroot()

            # Extract image size
            size_element = root.find('size')
            image_w = int(size_element.find('width').text)
            image_h = int(size_element.find('height').text)
            
            with open(output_file_path, 'w') as output_file:
                for obj in root.iter('object'):
                    class_name = obj.find('name').text
                    if class_name not in class_names:
                        continue  # Skip classes not in the predefined list
                    
                    # Skip difficult objects
                    difficult = obj.find('difficult')
                    if difficult is not None and int(difficult.text) == 1:
                        continue
                    
                    # Get bounding box coordinates
                    xml_box = obj.find('bndbox')
                    bounding_box = [
                        float(xml_box.find(tag).text) for tag in ('xmin', 'xmax', 'ymin', 'ymax')
                    ]
                    
                    # Convert bounding box to YOLO format
                    yolo_box = convert_box((image_w, image_h), bounding_box)
                    class_id = class_names.index(class_name)
                    
                    # Write the YOLO format annotation to file
                    output_file.write(f"{class_id} " + " ".join(f"{val:.6f}" for val in yolo_box) + '\n')
        
        except ET.ParseError:
            print(f"Error parsing XML file: {input_file_path}")
        except Exception as e:
            print(f"Error processing file {input_file_path}: {e}")

# Run the VOC to YOLO conversion
convert_voc_to_yolo()


In [None]:
# %% [bash]
# Create the custom YAML file for training
%%bash
echo "
path: ../data
train: images  # directory containing training images
val: images    # directory containing validation images

# Define the class names
names:
  0: trafficlight
  1: speedlimit
  2: crosswalk
  3: stop
" > yolov5/customVOC.yaml

In [None]:
# %% [bash]
# Start training the YOLOv5 model
%%bash
cd yolov5
python train.py --img 320 --batch 16 --epochs 500 --data customVOC.yaml --weights yolov5s.pt --workers 2

In [None]:
import torch
import cv2
import matplotlib.pyplot as plt
import numpy as np
import itertools
import os

In [None]:
# Load the best trained model
model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5/runs/train/exp/weights/best.pt', force_reload=True)

In [None]:
# Plot predictions on random images from the dataset
fig, ax = plt.subplots(2, 4, figsize=(20, 10))
imgs = os.listdir('./data/images')

for idx in itertools.product(range(2), range(4)): 
    imgname = np.random.choice(imgs)
    img = cv2.imread(f'./data/images/{imgname}')
    
    # Perform inference and render the result
    results = model(img)
    ax[idx[0], idx[1]].imshow(cv2.cvtColor(np.squeeze(results.render()), cv2.COLOR_BGR2RGB))
    ax[idx[0], idx[1]].set_title(f"Prediction: {imgname}")
    ax[idx[0], idx[1]].axis('off')

plt.tight_layout()
plt.show()