## Download Data

In [None]:
# !gdown --fuzzy 'https://drive.google.com/file/d/1mqtONldpayaQ97SH1p_UfdoH6nywVfYt/view?usp=sharing'

In [None]:
import pandas as pd

df = pd.read_parquet('detection_results_validation.parquet')
df

In [None]:
df = df[df['classid'] == 0] # only get rows with class 0 person.
df.loc[df['classid'] == 0, 'classid'] = 'person'
df

In [None]:
df = df.head(1000)

## Convert Data to VOC format

In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

def create_voc_xml(filename, objects, output_dir):
    # Create the root element
    annotation = ET.Element('annotation')

    # Add the filename element
    filename_elem = ET.SubElement(annotation, 'filename')
    filename_elem.text = os.path.basename(filename)

    # Add the size element (assuming a fixed size, you can modify this according to your needs)
    size = ET.SubElement(annotation, 'size')
    width = ET.SubElement(size, 'width')
    width.text = '1024'  # Replace with the actual width of your image
    height = ET.SubElement(size, 'height')
    height.text = '1024'  # Replace with the actual height of your image
    depth = ET.SubElement(size, 'depth')
    depth.text = '3'

    # Add the object elements
    for obj in objects:
        obj_elem = ET.SubElement(annotation, 'object')
        name = ET.SubElement(obj_elem, 'name')
        name.text = str(obj['classid'])
        pose = ET.SubElement(obj_elem, 'pose')
        pose.text = 'Unspecified'
        truncated = ET.SubElement(obj_elem, 'truncated')
        truncated.text = '0'
        difficult = ET.SubElement(obj_elem, 'difficult')
        difficult.text = '0'
        bndbox = ET.SubElement(obj_elem, 'bndbox')
        xmin = ET.SubElement(bndbox, 'xmin')
        xmin.text = str(obj['x1'])
        ymin = ET.SubElement(bndbox, 'ymin')
        ymin.text = str(obj['y1'])
        xmax = ET.SubElement(bndbox, 'xmax')
        xmax.text = str(obj['x2'])
        ymax = ET.SubElement(bndbox, 'ymax')
        ymax.text = str(obj['y2'])

    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Generate the output file path
    output_file = os.path.join(output_dir, os.path.splitext(os.path.basename(filename))[0] + '.xml')

    # Write the XML to the output file
    tree = ET.ElementTree(annotation)
    tree.write(output_file, encoding='utf-8', xml_declaration=True)


output_directory = 'pascal_voc_annotations'

grouped_df = df.groupby('filename').apply(lambda x: x.to_dict('records')).reset_index(name='objects')

for _, row in grouped_df.iterrows():
    filename = row['filename']
    objects = row['objects']
    create_voc_xml(filename, objects, output_directory)

## Plot XML to verify conversion is correct

In [None]:
import cv2
import matplotlib.pyplot as plt

def plot_image_with_bounding_boxes(xml_path, image_folder):
    # Parse the XML file
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Get the image filename from the XML
    image_filename = root.find('filename').text
    image_path = os.path.join(image_folder, image_filename)

    # Read the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Extract the bounding box information
    for obj in root.findall('object'):
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        class_id = obj.find('name').text

        # Draw the bounding box on the image
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

        # Put the class ID text above the bounding box
        cv2.putText(image, class_id, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Display the image with bounding boxes and class IDs
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [None]:
image_folder = '/workspace/yolo_v8_training/oiv7_full/validation/'
xml_path = 'pascal_voc_annotations/011417767312812e.xml'
plot_image_with_bounding_boxes(xml_path, image_folder)

# {0: 'person', 1: 'head', 2: 'hand', 3: 'face'}

## Load Into IceVision

In [None]:
from icevision.all import *

parser = parsers.VOCBBoxParser(annotations_dir='pascal_voc_annotations/', 
                               images_dir='/workspace/yolo_v8_training/oiv7_full/validation/')

In [None]:
# Parse annotations to create records
train_records, valid_records = parser.parse() # Defaults to 80:20 split
parser.class_map

In [None]:
image_size = 384
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])

In [None]:
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

In [None]:
%matplotlib inline
# Show an element of the train_ds with augmentation transformations applied
samples = [train_ds[0] for _ in range(3)]
show_samples(samples, ncols=3)

In [None]:
extra_args = {}

model_type = models.ultralytics.yolov5
backbone = model_type.backbones.small
extra_args['img_size'] = image_size

model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) 

In [None]:
# Data Loaders
train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)

In [None]:
# show batch
model_type.show_batch(first(valid_dl), ncols=4)

In [None]:
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]

learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)

In [None]:
learn.lr_find()

In [None]:
learn.fine_tune(20, 0.001, freeze_epochs=5)

In [None]:
model_type.show_results(model, valid_ds, detection_threshold=.5)

In [None]:
sorted_samples, sorted_preds, losses_stats = model_type.interp.plot_top_losses(model, valid_ds, sort_by="loss_total", n_samples=10)

In [None]:
from icevision.models.checkpoint import *
save_icevision_checkpoint(model,
                        model_name='ultralytics.yolov5', 
                        backbone_name='small',
                        img_size=384,
                        classes=parser.class_map.get_classes(),
                        filename='./models/model_checkpoint.pth',
                        meta={'icevision_version': '0.12.0'})