In [2]:
import os

# Dataset

In [5]:
dataset_dir = '/Users/apple/YOLO/datasets/coco8/'
img_dir = os.path.join(dataset_dir, 'images')
label_dir = os.path.join(dataset_dir, 'labels')

train_img_dir = os.path.join(img_dir, 'train')
val_img_dir = os.path.join(img_dir, 'val')
list_train_filenames = [file.split('.')[0] for file in os.listdir(train_img_dir)]
list_val_filenames = [file.split('.')[0] for file in os.listdir(val_img_dir)]

In [4]:
list_train_filenames

['000000000009', '000000000034', '000000000030', '000000000025']

In [35]:
import torch
from utils import LetterBox
import numpy as np
import cv2

def pre_transform(im):
    same_shapes = len({x.shape for x in im}) == 1
    letterbox = LetterBox(
        [640, 640],
        auto=False,
        stride=32,
    )
    return [letterbox(image=x) for x in im]

def preprocess(im):
    not_tensor = not isinstance(im, torch.Tensor)
    if not_tensor:
        im = np.stack(pre_transform(im))
        im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
        im = np.ascontiguousarray(im)  # contiguous
        im = torch.from_numpy(im)

    if not_tensor:
        im = im / 255  # 0 - 255 to 0.0 - 1.0
    return im

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataset_dir, split, list_filenames, transform=None):
        self.dataset_dir = dataset_dir
        self.split = split
        self.list_filenames = list_filenames
        self.transform = transform
        self.img_dir = os.path.join(dataset_dir, 'images', split)
        self.label_dir = os.path.join(dataset_dir, 'labels', split)
    
    def __len__(self):
        return len(self.list_filenames)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.list_filenames[idx] + '.jpg')
        label_path = os.path.join(self.label_dir, self.list_filenames[idx] + '.txt')
        img = cv2.imread(img_path)
        img = preprocess([img])

        with open(label_path, 'r') as f:
            labels = f.readlines()
            
        labels = [label.strip().split() for label in labels]
        cls = torch.tensor([int(label[0]) for label in labels])
        box = torch.stack([torch.tensor([float(label[1]), float(label[2]), float(label[3]), float(label[4])]) for label in labels])
        img = img.squeeze(0)
        if self.transform:
            img = self.transform(img)
            
        inputs = {
            'img': img,
            'cls': cls,
            'box': box,
        }
        return inputs
        
train_dataset = MyDataset(dataset_dir, 'train', list_train_filenames)
val_dataset = MyDataset(dataset_dir, 'val', list_val_filenames)

In [36]:
import torch

batch = {
    'img': torch.randn(4, 3, 640, 640),
    'cls': torch.tensor([[22., 22., 45., 45., 50., 45., 49., 49., 49., 49., 45., 45., 58., 75., 23., 23., 45., 45., 49., 49., 49., 49.]]).T,
    'bboxes': torch.randn(22, 4),
    'batch_idx': torch.tensor([0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 3., 3., 3., 3., 3., 3., 3., 3.])
}


In [37]:
# train_dataset[0]

In [43]:
def collate_fn(batch):
    img = torch.stack([sample['img'] for sample in batch])
    cls = torch.cat([sample['cls'] for sample in batch])
    box = torch.cat([sample['box'] for sample in batch])
    batch_idx = torch.cat([torch.tensor([i] * len(sample['cls'])) for i, sample in enumerate(batch)])
    return {
        'img': img,
        'cls': cls,
        'bboxes': box,
        'batch_idx': batch_idx,
    }

In [44]:
batch = collate_fn([train_dataset[0], train_dataset[1]])

In [45]:
batch['img'].shape

torch.Size([2, 3, 640, 640])

# Init Model

In [46]:
import ultralytics
args = ultralytics.utils.IterableSimpleNamespace(
box=7.5,
cls=0.5,
dfl=1.5,
)

In [48]:
from ultralytics.nn.tasks import DetectionModel

model = DetectionModel("/Users/apple/YOLO/ultralytics/ultralytics/cfg/models/11/yolo11.yaml", nc=1000, verbose=False )
model.args = args
model.criterion = model.init_criterion()

loss, loss_items  = model(batch)

Overriding model.yaml nc=80 with nc=1000
