In [10]:
import torch
import torch.nn as nn
import json
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from PIL import Image, ImageDraw, ImageOps
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
from torchvision.transforms import v2
import matplotlib.pyplot as plt
import os
import random
import math

Mount cloud drive with data on GC

In [8]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [9]:
!ls ./drive/MyDrive/ML_IDA24_USDCdataset

README.dataset.txt   submission.csv	   test_images	 usdc_train.json
README.roboflow.txt  test_file_names.json  train_images


Main PATHs

In [12]:
TRAIN_DATA_DIR = './drive/MyDrive/ML_IDA24_USDCdataset/train_images/train_images'
TEST_DATA_DIR = './drive/MyDrive/ML_IDA24_USDCdataset/test_images/test_images'

In [15]:
!ls {TRAIN_DATA_DIR}

1478019952686311006_jpg.rf.JLSB3LP2Q4RuGHYKqfF6.jpg
1478019953180167674_jpg.rf.azslsZnM8FLQPu3QWLTl.jpg
1478019953689774621_jpg.rf.UpOkvtBppZZaLfvVZ0JX.jpg
1478019954685370994_jpg.rf.Jah8pEPis5n1x6iQCtmT.jpg
1478019955185244088_jpg.rf.gJpj2eCO1Dd7Sic9WlhE.jpg
1478019955679801306_jpg.rf.QnW1eAO23mmDm9ap1gm7.jpg
1478019956186247611_jpg.rf.GnF8WwC0ZJpj6eNo46lF.jpg
1478019957180061202_jpg.rf.YHpll4d5Wk2IKGKflOe3.jpg
1478019957687018435_jpg.rf.fkmnRl8x2gQznYHV13Bp.jpg
1478019958179775471_jpg.rf.YbZnPudSlDQsDo3YPZUh.jpg
1478019958682197101_jpg.rf.XCoBnRWpmUTZFpYFGG3Y.jpg
1478019959187829768_jpg.rf.UdTiXYVJQPlo7cvSdGVU.jpg
1478019960680764792_jpg.rf.JzgjCNvlnf4KVqsEyFn6.jpg
1478019961182003465_jpg.rf.1YoPELIQZqEgJpEQITU3.jpg
1478019961680640592_jpg.rf.QdTm8KjQ6HYfMEiUIcP6.jpg
1478019962681840550_jpg.rf.9SaPKKeOtxgtGv5hULvq.jpg
1478019963181283434_jpg.rf.dmpxlfrl1sH1A5bYrYwR.jpg
1478019963682173845_jpg.rf.fBeA4q8BG2i9HeZL4ELP.jpg
1478019964181479375_jpg.rf.ZY6JR1OkA8sBeFkxfFWg.jpg
147801996468

In [16]:
!ls {TEST_DATA_DIR}

Prepare repo & weights for YOLOv5 model

In [17]:
if not os.path.exists('yolov5'):
    !git clone https://github.com/ultralytics/yolov5.git
%cd yolov5/
!pip install -r requirements.txt

Cloning into 'yolov5'...
remote: Enumerating objects: 17067, done.[K
remote: Counting objects: 100% (45/45), done.[K
remote: Compressing objects: 100% (37/37), done.[K
remote: Total 17067 (delta 22), reused 23 (delta 8), pack-reused 17022 (from 1)[K
Receiving objects: 100% (17067/17067), 15.69 MiB | 21.83 MiB/s, done.
Resolving deltas: 100% (11716/11716), done.
/content/yolov5
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.2.34 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.3.38-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics>=8.2.34->-r requirements.txt (line 18))
  Downloading ultralytics_thop-2.0.12-py3-none-any.whl.metadata (9.4 kB)
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Downloading ultralytics-8.3.38-py3-none-any.whl (896 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [3

Vanilla YOLOv5 tuning for custom dataset

YOLOv5 tuning for custom dataset with custom augmentations

Utilitary functoins

In [11]:
def create_dataset(images_df: pd.DataFrame, annotations_df: pd.DataFrame) -> dict:
    train_dataset = []
    for _, img_row in images_df.iterrows():
        image_id = img_row['image_id']
        file_name = img_row['file_name']

        img_annotations = annotations_df[annotations_df['image_id'] == image_id]

        annotations_list = []
        for _, ann_row in img_annotations.iterrows():
            annotations_list.append({
                'class' : ann_row['category_id'],
                'bbox' : ann_row['bbox'],
                'area' : ann_row['area']
            })

        train_dataset.append({
            'file_name' : file_name,
            'annotations' : annotations_list
        })
    return train_dataset


class LocalContrastNorm:
    """Local Contrast Normalization"""
    def __init__(self, kernel_size=3, eps=1e-5):
        self.kernel_size = kernel_size
        self.eps = eps

    def __call__(self, pic):
        mean = F.avg_pool2d(pic, self.kernel_size, stride=1, padding=self.kernel_size//2)
        std = torch.sqrt(F.avg_pool2d((pic - mean)**2, self.kernel_size, stride=1, padding=self.kernel_size//2) + self.eps)
        std[std < 1] = 1
        return (pic - mean) / std


def plot_image(image, bboxes, labels, classes_colors):
    for i, bbox in enumerate(bboxes):
        xmin, ymin, w, h = bbox
        draw = ImageDraw.Draw(image)
        draw.rectangle([xmin, ymin, xmin + w, ymin + h], outline=classes_colors[labels[i]], width=3)
    plt.imshow(image)
    plt.show()


def rotate_point(point: tuple, old_img_size: tuple, new_img_size: tuple,  angle: float):
    x, y = point
    w, h = old_img_size
    angle_rad = math.radians(-angle)

    cx, cy = w / 2, h / 2

    new_width, new_height = new_img_size
    # new_width = int(abs(w * math.cos(angle_rad)) + abs(h * math.sin(angle_rad)))
    # new_height = int(abs(w * math.sin(angle_rad)) + abs(h * math.cos(angle_rad)))

    new_cx = new_width / 2
    new_cy = new_height / 2

    translated_x = x - cx
    translated_y = y - cy

    rotated_x = translated_x * math.cos(angle_rad) - translated_y * math.sin(angle_rad)
    rotated_y = translated_x * math.sin(angle_rad) + translated_y * math.cos(angle_rad)

    final_x = rotated_x + new_cx
    final_y = rotated_y + new_cy

    return final_x, final_y


def rotate_bbox(bbox, old_img_size: tuple, new_img_size: tuple, angle: float):
    x_min, y_min, w, h = bbox
    points = [
        (x_min, y_min),
        (x_min, y_min + h),
        (x_min + w, y_min + h),
        (x_min + w, y_min)
    ]

    rotated_x = []
    rotated_y = []
    for point in points:
        r_x, r_y = rotate_point(point, old_img_size, new_img_size, angle)
        rotated_x.append(r_x)
        rotated_y.append(r_y)

    x_min = min(rotated_x)
    y_min = min(rotated_y)
    w = max(rotated_x) - x_min
    h = max(rotated_y) - y_min

    return [x_min, y_min, w, h]

Dataset preparation

In [None]:
with open('data/usdc_train.json', 'r') as file:
    train_data = json.load(file)

In [None]:
classes_dict = {}
for sample in train_data['categories']:
    classes_dict[sample['id']-1] = sample['name']
del classes_dict[-1]
classes_dict

In [None]:
classes_colors = ['red', 'blue', 'green', 'violet', 'gold', 'brown',
                  'darkseagreen', 'aquamarine', 'olive', 'plum', 'sandybrown']

In [None]:
annotations_df = pd.DataFrame(train_data['annotations'])
annotations_df = annotations_df.drop(columns=['id', 'segmentation', 'iscrowd', 'confidence', 'score'])
annotations_df.head()

In [None]:
annotations_df['category_id'].unique()

In [None]:
annotations_df['category_id'] = annotations_df['category_id'].apply(lambda x: x - 1)

In [None]:
images_df = pd.DataFrame(train_data['images'])
images_df = images_df.drop(columns=['license', 'height', 'width', 'date_captured'])
images_df = images_df.rename(columns={'id' : 'image_id'})
images_df.head()

In [None]:
train_annotations = create_dataset(images_df, annotations_df)

In [None]:
class DetectionDataset(Dataset):
    def __init__(self, annotations: dict, img_dir: str, transform=None):
        self.annotations = annotations
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        sample = self.annotations[idx]
        img_path = os.path.join(TRAIN_DATA_DIR, sample['file_name'])

        image = Image.open(img_path)

        bboxes = []
        labels = []
        for obj in sample['annotations']:
            bboxes.append(obj['bbox'])
            labels.append(obj['class'])

        bboxes = torch.tensor(bboxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        w_old, h_old = image.size

        if self.transform:
            image, bboxes = self.transform.forward(image, bboxes)

        return image, {'bboxes' : bboxes, 'labels' : labels}

In [None]:
train_dataset = DetectionDataset(train_annotations, TRAIN_DATA_DIR)

In [None]:
image = train_dataset[2323][0]
bboxes = train_dataset[2323][1]['bboxes']
labels = train_dataset[2323][1]['labels']

plot_image(image, bboxes, labels, classes_colors)

Local Contrast Normalization

In [None]:
class BasicTransform:
    def __init__(self, resize=None, lcn=False):
        self.resize = resize
        self.lcn = lcn

    def forward(self, img, bboxes):
        resize = self.resize
        lcn = self.lcn

        if random.uniform(0, 1) < 0.5:
            img = ImageOps.mirror(img)
            for i, bbox in enumerate(bboxes):
                x, y, w, h = bbox.tolist()
                new_x = img.size[0] - x - w
                bboxes[i] = torch.tensor([new_x, y, w, h])


        angle = random.randint(-30, 30)
        r_image = img.rotate(angle, expand=1)
        for i, bbox in enumerate(bboxes):
            bboxes[i] = torch.tensor(rotate_bbox(bbox, img.size, r_image.size, angle),
                                    dtype=torch.float32)

        r_image = v2.ColorJitter(brightness=(0.4, 2), contrast=(1, 6),
                saturation=(0, 2.5), hue=(-0.25,0.25))(r_image)

        r_image = v2.RandomInvert(0.3)(r_image)

        if resize:
            w_old, h_old = r_image.size
            r_image = transforms.Resize(resize)(r_image)
            w_new, h_new = r_image.size
            scale_x = w_new / w_old
            scale_y = h_new / h_old
            for i, bbox in enumerate(bboxes):
                x_old, y_old, w_old, h_old = bbox
                bboxes[i] = torch.tensor([x_old * scale_x, y_old * scale_y, w_old * scale_x, h_old * scale_y],
                                            dtype=torch.float32)

        r_image = transforms.ToTensor()(r_image)

        if lcn:
            r_image = LocalContrastNorm(kernel_size=7)(r_image)

        return r_image, bboxes