In [1]:
# Jupyter notebook of training YOLO as a baseline model
# Note: This notebook was running on google colab, there may be some errors with
# dependencies

In [2]:
 # Start from installing kaggle
 !pip install -q kaggle

In [None]:
# Upload kaggle creds
from google.colab import files

files.upload()
print('') # To not pring kaggle creds

In [6]:
!cp kaggle.json ~/.kaggle/

In [7]:
# Download dataset
!kaggle datasets download -d khlaifiabilel/military-aircraft-recognition-dataset

Downloading military-aircraft-recognition-dataset.zip to /content
 99% 1.12G/1.13G [00:09<00:00, 123MB/s]
100% 1.13G/1.13G [00:09<00:00, 132MB/s]


In [None]:
# Unzip dataset to data folder
!unzip military-aircraft-recognition-dataset.zip -d "data"

In [9]:
# Paths
ROOT_DIR = "data/"
IMAGE_DIR = ROOT_DIR + "JPEGImages/"
HORIZONTAL_BB = ROOT_DIR + "Annotations/Horizontal Bounding Boxes/"
ORIENTED_BB = ROOT_DIR + "Annotations/Oriented Bounding Boxes/"

TRAIN_SET_TXT = ROOT_DIR + "ImageSets/Main/train.txt"
TEST_SET_TXT = ROOT_DIR + "ImageSets/Main/test.txt"

In [10]:
# Imports
import random
import os
import xml.etree.ElementTree as ET
import shutil
import yaml

import cv2
import pandas as pd
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid

In [11]:
# Set seed
random.seed(1991)

In [12]:
# Reading xml files
# XML reader
def parse_xml(xml_file_path: str):
    # Parses xml file, returns dict
    tree = ET.parse(xml_file_path)

    filename = tree.find('filename').text
    img_size = {
        'width': int(tree.find('size').find('width').text),
        'height': int(tree.find('size').find('height').text),
        'depth': int(tree.find('size').find('depth').text)
    }
    segmented = int(tree.find('segmented').text)
    bboxes = []
    for obj in tree.findall('object'):
        bndbox = obj.find('bndbox')
        bboxes.append({
            'name': obj.find('name').text,
            'xmin': int(bndbox.find('xmin').text),
            'ymin': int(bndbox.find('ymin').text),
            'xmax': int(bndbox.find('xmax').text),
            'ymax': int(bndbox.find('ymax').text)
        })
    database = tree.find('source').find('database').text

    return {
        'filename': filename,
        'img_size': img_size,
        'segmented': segmented,
        'bboxes': bboxes,
        'database': database
    }

def get_xml_file_path(filename, option='HORIZONTAL'):
    if option == 'HORIZONTAL':
        return HORIZONTAL_BB + filename
    elif option == 'ORIENTED':
        return ORIENTED_BB + filename

In [13]:
# Convert classes to classes_map for YOLO model
classes = ['A19', 'A1', 'A20', 'A16', 'A5', 'A13', 'A15', 'A3', 'A17', 'A11',
       'A14', 'A8', 'A2', 'A10', 'A9', 'A4', 'A18', 'A7', 'A12', 'A6']

classes_map = {item: index for index, item in enumerate(classes)}
classes_map

{'A19': 0,
 'A1': 1,
 'A20': 2,
 'A16': 3,
 'A5': 4,
 'A13': 5,
 'A15': 6,
 'A3': 7,
 'A17': 8,
 'A11': 9,
 'A14': 10,
 'A8': 11,
 'A2': 12,
 'A10': 13,
 'A9': 14,
 'A4': 15,
 'A18': 16,
 'A7': 17,
 'A12': 18,
 'A6': 19}

In [14]:
# Get train and test ids
def get_train_set_ids():
    with open(TRAIN_SET_TXT, 'r') as file:
        ids = file.read().splitlines()
    return ids

def get_test_set_ids():
    with open(TEST_SET_TXT, 'r') as file:
        ids = file.read().splitlines()
    return ids

train_set_ids = get_train_set_ids()
test_set_ids = get_test_set_ids()
print(f'Size of train set is: {len(train_set_ids)}')
print(f'Size of test set is: {len(test_set_ids)}')
print(f'Total images: {len(train_set_ids) + len(test_set_ids)}')

Size of train set is: 1331
Size of test set is: 2511
Total images: 3842


In [15]:
# Helping functions
# Displaying images

def get_image(image_path: str):
    img = cv2.imread(image_path)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


def get_image_shape(image_path: str):
    img = cv2.imread(image_path)
    return img.shape

def get_image_from_id(obj_id: str):
    image_path = IMAGE_DIR + obj_id + ".jpg"
    return get_image(image_path)

def display_image(image_name: str):
    image_path = IMAGE_DIR + image_name
    plt.imshow(get_image(image_path))

def get_random_img_names():
    return random.sample(os.listdir(IMAGE_DIR), 16)

In [16]:
# Convert the info dict to the required yolo format and write it to disk
def convert_to_yolov5(info_dict, obj_id, save_folder):
    print_buffer = []

    # For each bounding box
    for b in info_dict["bboxes"]:
        try:
            class_id = classes_map[b["name"]]
        except KeyError:
            print("Invalid Class. Must be one from ", classes_map.keys())

        # Transform the bbox co-ordinates as per the format required by YOLO v5
        b_center_x = (b["xmin"] + b["xmax"]) / 2
        b_center_y = (b["ymin"] + b["ymax"]) / 2
        b_width    = (b["xmax"] - b["xmin"])
        b_height   = (b["ymax"] - b["ymin"])

        # Normalise the co-ordinates by the dimensions of the image
        image_h, image_w, image_c = get_image_shape(IMAGE_DIR + obj_id + ".jpg")
        b_center_x /= image_w
        b_center_y /= image_h
        b_width    /= image_w
        b_height   /= image_h

        #Write the bbox details to the file
        print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))

    # Name of the file which we have to save
    save_file_name = os.path.join(save_folder, obj_id + ".txt")

    # Save the annotation to disk
    # print("\n".join(print_buffer), file= open(save_file_name, "w"))
    with open(save_file_name, 'w') as fl:
      fl.write("\n".join(print_buffer))

In [17]:
# Create appropriate folders
!mkdir "data for yolo"
!mkdir "data for yolo/labels"
!mkdir "data for yolo/images"
!mkdir "data for yolo/labels/train"
!mkdir "data for yolo/labels/test"
!mkdir "data for yolo/images/train"
!mkdir "data for yolo/images/test"

In [19]:
# Convert dataset to yolo format
for obj_id in train_set_ids:
  info_dict = parse_xml(HORIZONTAL_BB + str(obj_id) + ".xml")
  convert_to_yolov5(info_dict, str(obj_id), "data for yolo/labels/train/")

for obj_id in test_set_ids:
  info_dict = parse_xml(HORIZONTAL_BB + str(obj_id) + ".xml")
  convert_to_yolov5(info_dict, str(obj_id), "data for yolo/labels/test/")

In [20]:
# Copy images to YOLO
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.move(f, destination_folder)
        except:
            print(f)
            assert False

train_img_files = [IMAGE_DIR + str(obj_id) + ".jpg" for obj_id in train_set_ids]
test_img_files = [IMAGE_DIR + str(obj_id) + ".jpg" for obj_id in test_set_ids]

# Move the splits into their folders
move_files_to_folder(train_img_files, 'data for yolo/images/train')
move_files_to_folder(test_img_files, 'data for yolo/images/test')

In [21]:
class_id_to_name_mapping = dict(zip(classes_map.values(), classes_map.keys()))

In [22]:
# Make .yaml file for yolo
d = {
    'path':'../data for yolo/',
    'train': 'images/train',
    'val': 'images/test',
    'nc': 20,
    'names': class_id_to_name_mapping
    }

with open('dataset.yml', 'w') as yaml_file:
    yaml.dump(d, yaml_file, default_flow_style=False)

In [25]:
# Define our hyp yaml file
hyps = {
     "lr0": 0.01,  # initial learning rate (SGD=1E-2, Adam=1E-3)
 'lrf': 0.01,  # final OneCycleLR learning rate (lr0 * lrf)
 "momentum": 0.937,  # SGD momentum/Adam beta1
 "weight_decay": 0.0005,  # optimizer weight decay 5e-4
 "warmup_epochs": 3.0,  # warmup epochs (fractions ok)
 "warmup_momentum": 0.8,  # warmup initial momentum
 "warmup_bias_lr": 0.1,  # warmup initial bias lr
 "box": 0.05,  # box loss gain
 "cls": 0.5,  # cls loss gain
 "cls_pw": 1.0,  # cls BCELoss positive_weight
 "obj": 1.0,  # obj loss gain (scale with pixels)
 "obj_pw": 1.0,  # obj BCELoss positive_weight
 "iou_t": 0.20,  # IoU training threshold
 "anchor_t": 4.0,  # anchor-multiple threshold
 # anchors: 3  # anchors per output layer (0 to ignore)
 "fl_gamma": 0.0,  # focal loss gamma (efficientDet default gamma=1.5)
 "hsv_h": 0.015,  # image HSV-Hue augmentation (fraction)
 "hsv_s": 0.5,  # image HSV-Saturation augmentation (fraction)
 "hsv_v": 0.4,  # image HSV-Value augmentation (fraction)
 "degrees": 0.0,  # image rotation (+/- deg)
 "translate": 0.1,  # image translation (+/- fraction)
 "scale": 0.4,  # image scale (+/- gain)
 "shear": 0.00, # image shear (+/- deg)
 "perspective": 0.0,  # image perspective (+/- fraction), range 0-0.001
 "flipud": 0.0,  # image flip up-down (probability)
 "fliplr": 0.0,  # image flip left-right (probability)
 "mosaic": 1.0,  # image mosaic (probability)
 "mixup": 0.0,  # image mixup (probability)
 "copy_paste": 0.0  # segment copy-paste (probability)
}

with open('hyps.yml', 'w') as yaml_file:
    yaml.dump(hyps, yaml_file, default_flow_style=False)

In [23]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.0.199-py3-none-any.whl (644 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/644.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/644.5 kB[0m [31m4.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.5/644.5 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.0.199


In [24]:
# Clone yolo git repo
!git clone https://github.com/ultralytics/yolov5  # clone
!pip install -r yolov5/requirements.txt  # install


Cloning into 'yolov5'...
remote: Enumerating objects: 16008, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 16008 (delta 22), reused 22 (delta 13), pack-reused 15967[K
Receiving objects: 100% (16008/16008), 14.66 MiB | 20.97 MiB/s, done.
Resolving deltas: 100% (10984/10984), done.
Collecting gitpython>=3.1.30 (from -r yolov5/requirements.txt (line 5))
  Downloading GitPython-3.1.37-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.0/190.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython>=3.1.30->-r yolov5/requirements.txt (line 5))
  Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython>=3.1.30->-r yolov5/requirements.txt (line 5))
  Downloading smmap-5.

In [29]:
# Train YOLO5 for 75 epochs
!python yolov5/train.py --img 800 --batch 24 --epochs 75 --data dataset.yml --name 'mar results' --patience 50 --weights yolov5m.pt --hyp hyps.yml

[34m[1mtrain: [0mweights=yolov5m.pt, cfg=, data=dataset.yml, hyp=hyps.yml, epochs=75, batch_size=24, imgsz=800, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=mar results, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=50, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-227-ge4df1ec Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

[34m[1mhyperparameters: [0manchor_t=4.0, box=0.05, cls=0.5, cls_pw=1.0, copy_paste=0.0, degrees=0.0, fl_gamma=0.0, fliplr=0.0, flipud=0.0, hsv_h=0.015, hsv_s=0.5, hsv_v=0.4, iou_t=0.2, lr0=0.01, lrf=0.01, mixup=0.0, momentum=0.937, mosaic=