In [16]:
import sys
import torch
import locale
from torch import cuda
import skimage.io as io
from pathlib import Path
import supervision as sv
from ultralytics import YOLO
from matplotlib import pyplot as plt
locale.getpreferredencoding = lambda: "UTF-8"

In [17]:
# Code to check if we're in collab or on a local environment
try:
    import google.collab
    IN_COLLAB = True
except:
    IN_COLLAB = False

# Setup file paths
if (IN_COLLAB):
    path_map = {'DS_ROOT': Path('/datasets')}
    path_map['PROC_ROOT'] = Path('/processed')
else:
    path_map = {'DS_ROOT': Path('./datasets')}
    path_map['PROC_ROOT'] = Path('./processed')

# Src data
path_map['DS_ONE'] = path_map['DS_ROOT'] / 'VOC2020'
path_map['DS_ONE_IMG'] = path_map['DS_ONE'] / 'JPEGImages'
path_map['DS_ONE_ANN'] = path_map['DS_ONE'] / 'Annotations'

# Processed dataset
path_map['DS_ONE_PROC'] = path_map['PROC_ROOT'] / 'VOC2020'
path_map['DS_ONE_YAML'] = path_map['DS_ONE_PROC'] / 'data.yaml'

# Confirm file paths
def create_dirs():
    print('Creating dir structure...')
    for name, path in path_map.items():
        if isinstance(path, Path) and not path.suffix:
            path = path.resolve()
            path_map[name] = path
            path.mkdir(parents=True, exist_ok=True)
            print('  ',path)
    print('Done!')

create_dirs()

Creating dir structure...
   /home/evan/Documents/code/school/SmokeDetection/datasets
   /home/evan/Documents/code/school/SmokeDetection/processed
   /home/evan/Documents/code/school/SmokeDetection/datasets/VOC2020
   /home/evan/Documents/code/school/SmokeDetection/datasets/VOC2020/JPEGImages
   /home/evan/Documents/code/school/SmokeDetection/datasets/VOC2020/Annotations
   /home/evan/Documents/code/school/SmokeDetection/processed/VOC2020
Done!


In [10]:
def make_yaml(yaml_path, class_map):
    rel_train = 'images/test'
    rel_val = 'images/val'
    with open(yaml_path, 'w') as f:
        f.write(f"path: {proc_ds}\n")
        f.write(f"train: {rel_train}\n")
        f.write(f"val: {rel_val}\n")
        f.write(f"names:\n")
        for id, name in class_map.items():
            f.write(f"  {id}: {name}\n")

In [None]:
def make_classes():
    # TODO: actually make the classes lol

In [None]:
classes = make_classes
make_data_yaml(path_map['DS_ONE_YAML'], classes)

In [None]:
def check_equal(ds, expected_num: int|None = None):
    assert(len(ds.image_paths) == len(ds.annotations))
    if expected_num is not None:
        assert(len(ds.image_paths) == expected_num)

In [None]:
ds = sv.DetectionDataset.from_yolo(images_directory_path=path_map['DS_ONE_IMG'], 
                                   annotations_directory_path=path_map['DS_ONE_ANN'], 
                                   data_yaml_path=path_map['DS_ONE_YAML']
)
check_equal(ds)
print(type(ds))

In [None]:
def split_data(ds, split=0.7, shuffle=True, seed=None):
  return ds.split(split_ratio=split, shuffle=shuffle, random_state=seed)

def split_70_25_5(ds, seed):
  ds_train, ds_val = split_data(ds, split=0.7, seed=seed)
  ds_val, ds_test = split_data(ds_val, split=0.83, seed=seed)
  return ds_train, ds_val, ds_test

In [None]:
ds_train, ds_val, ds_test = split_70_25_5(ds, 0)

check_equal(ds_train)
check_equal(ds_test)
check_equal(ds_val)