## Dataset Preparation

#### - Paths

In [1]:
# Dataset
import pathlib
dataset_path = pathlib.Path("/home/emvasilopoulos/projects/datasets/coco128")
images_path = dataset_path / "images"
train_images_path = images_path / "train2017"
val_images_path = images_path / "val2017"
annotations_path = dataset_path / "annotations"

#### - Download (Run once)

In [None]:
import custom_yolo_lib.dataset.coco.downloader

images_path.mkdir(parents=True, exist_ok=True)
annotations_path.mkdir(parents=True, exist_ok=True)
if not list(annotations_path.glob("*.json")):
    custom_yolo_lib.dataset.coco.downloader.download_train_val_annotations_2017(annotations_path)
if not list(train_images_path.glob("*.jpg")):
    custom_yolo_lib.dataset.coco.downloader.download_train_images_2017(train_images_path, val_images_path)
if not list(val_images_path.glob("*.jpg")):
    custom_yolo_lib.dataset.coco.downloader.download_val_images_2017(val_images_path, train_images_path)

print("Manually unzip and organize the downloaded data")

#### - Parse Raw COCO to repo's format (Run once)

In [None]:
import custom_yolo_lib.dataset.coco.raw_annotations_parser
import custom_yolo_lib.dataset.coco.tasks.utils

val_annotations_path = annotations_path / "instances_val2017.json"
train_annotations_path = annotations_path / "instances_train2017.json"
for split in ["val", "train"]:
    p = annotations_path / f"instances_{split}2017.json"
    raw_parser = custom_yolo_lib.dataset.coco.raw_annotations_parser.RawCOCOAnnotationsParser(p)
    raw_parser.parse_data()
    filename = custom_yolo_lib.dataset.coco.tasks.utils.get_task_file(
        "instances",
        split,
        "2017",
        is_grouped=True,
        filetype=custom_yolo_lib.dataset.coco.tasks.utils.AnnotationsType.json
    )
    grouped_annotations_path = p.parent / filename
    raw_parser.write_data(grouped_annotations_path)
    custom_yolo_lib.dataset.coco.tasks.utils.convert_grouped_instances_json_to_csv(grouped_annotations_path)


## Training - Model, Dataset, Configurations

In [2]:
import torch
import custom_yolo_lib.model.bundled
import custom_yolo_lib.dataset.coco.tasks.instances
import custom_yolo_lib.image_size

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
image_size = custom_yolo_lib.image_size.ImageSize(640, 640)

model = custom_yolo_lib.model.bundled.YOLOModel(80, training=True)
model.to(device)

val_dataset = custom_yolo_lib.dataset.coco.tasks.instances.COCOInstances2017(dataset_path, "val", expected_image_size=image_size, device=device)
train_dataset = custom_yolo_lib.dataset.coco.tasks.instances.COCOInstances2017(dataset_path, "train", expected_image_size=image_size, device=device)
training_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=True,
)
validation_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=2,
    shuffle=False,
)

In [7]:
val_dataset = custom_yolo_lib.dataset.coco.tasks.instances.COCOInstances2017(dataset_path, "val", expected_image_size=image_size, device=device)
validation_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=2,
    shuffle=False,
)
for i, (images, targets) in enumerate(validation_loader):
    images = images.to(device)
    targets = targets.to(device)
    predictions_s, predictions_m, predictions_l = model(images)
    print(predictions_s.anchor1_output.shape)
    print(predictions_m.anchor2_output.shape)
    print(predictions_l.anchor3_output.shape)
    break


torch.Size([2, 85, 80, 80])
torch.Size([2, 85, 40, 40])
torch.Size([2, 85, 20, 20])
