## FireNet YOLOv5 custom data set training

Adapted from https://blog.paperspace.com/train-yolov5-custom-data/#convert-the-annotations-into-the-yolo-v5-format
## Set up code

In [1]:
!git clone https://github.com/ultralytics/yolov5

fatal: destination path 'yolov5' already exists and is not an empty directory.


In [2]:
%pip install -r yolov5/requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [5]:
%pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.2.2-cp311-cp311-win_amd64.whl (8.3 MB)
     ---------------------------------------- 8.3/8.3 MB 2.2 MB/s eta 0:00:00
Collecting joblib>=1.1.1
  Using cached joblib-1.2.0-py3-none-any.whl (297 kB)
Collecting threadpoolctl>=2.0.0
  Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.2.0 scikit-learn-1.2.2 threadpoolctl-3.1.0
Note: you may need to restart the kernel to use updated packages.


In [21]:
import torch
from IPython.display import Image  # for displaying images
import os 
import random
import shutil
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET
from xml.dom import minidom
from tqdm import tqdm
from PIL import Image, ImageDraw
import numpy as np
import matplotlib.pyplot as plt

import csv

random.seed(108)

In [8]:
# Function to get the data from XML Annotation
def extract_info_from_xml(xml_file):
    root = ET.parse(xml_file).getroot()
    
    # Initialise the info dict 
    info_dict = {}
    info_dict['bboxes'] = []

    # Parse the XML Tree
    for elem in root:
        # Get the file name 
        if elem.tag == "filename":
            info_dict['filename'] = elem.text
            
        # Get the image size
        elif elem.tag == "size":
            image_size = []
            for subelem in elem:
                image_size.append(int(subelem.text))
            
            info_dict['image_size'] = tuple(image_size)
        
        # Get details of the bounding box 
        elif elem.tag == "object":
            bbox = {}
            for subelem in elem:
                if subelem.tag == "name":
                    bbox["class"] = subelem.text
                    
                elif subelem.tag == "bndbox":
                    for subsubelem in subelem:
                        bbox[subsubelem.tag] = int(subsubelem.text)            
            info_dict['bboxes'].append(bbox)
    
    return info_dict

In [20]:
example_xml_file = os.path.join('..\data\FireNet_ObjectDetection', "0000.xml")
print(extract_info_from_xml(example_xml_file))

{'bboxes': [{'class': 'Fire_Extinguisher', 'xmin': 265, 'ymin': 1018, 'xmax': 761, 'ymax': 1950}, {'class': 'Fire_Suppression_Signage', 'xmin': 495, 'ymin': 773, 'xmax': 692, 'ymax': 958}], 'filename': '0000.jpg', 'image_size': (1536, 2048, 3)}


In [37]:
labels_file = os.path.join('..\data', "FireNet_labels.csv")
with open(labels_file, mode='r') as in_file:
    reader = csv.reader(in_file)
    next(reader) # Skipping header 
    class_name_to_id_mapping = {rows[1]:rows[0] for rows in reader}

In [54]:
# Convert the info dict to the required yolo format and write it to disk
def convert_to_yolov5(info_dict):
    print_buffer = []
    
    # For each bounding box
    for b in info_dict["bboxes"]:
        try:
            class_id = class_name_to_id_mapping[b["class"]]
        except KeyError:
            print("Invalid Class. Must be one from ", class_name_to_id_mapping.keys())
        
        # Transform the bbox co-ordinates as per the format required by YOLO v5
        b_center_x = (b["xmin"] + b["xmax"]) / 2 
        b_center_y = (b["ymin"] + b["ymax"]) / 2
        b_width    = (b["xmax"] - b["xmin"])
        b_height   = (b["ymax"] - b["ymin"])
        
        # Normalise the co-ordinates by the dimensions of the image
        image_w, image_h, image_c = info_dict["image_size"]  
        b_center_x /= image_w 
        b_center_y /= image_h 
        b_width    /= image_w 
        b_height   /= image_h 
        
        #Write the bbox details to the file 
        print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))
        
    # Name of the file which we have to save 
    save_file_name = os.path.join("..\data\FireNet_ObjectDetection", info_dict["filename"].replace("jpg", "txt"))
    # Save the annotation to disk
    print("\n".join(print_buffer), file= open(save_file_name, "w"))

In [56]:
# Get the annotations
annotations = [os.path.join('..\data\FireNet_ObjectDetection', x) for x in os.listdir('..\data\FireNet_ObjectDetection') if x[-3:] == "xml"]
annotations.sort()

# Convert and save the annotations
for ann in tqdm(annotations):
    info_dict = extract_info_from_xml(ann)
    convert_to_yolov5(info_dict)
annotations = [os.path.join('..\data\FireNet_ObjectDetection', x) for x in os.listdir('..\data\FireNet_ObjectDetection') if x[-3:] == "txt"]

100%|██████████| 1452/1452 [00:01<00:00, 1180.85it/s]


..\data\FireNet_ObjectDetection\0000.txt
