In [1]:
from pathlib import Path

import torch
import fiftyone as fo

from src.data.torch_dataset import make_tmod_dataloader
from src.data.fo_dataset import load_all_splits_as_one

dataset_root = Path("data")
dataset = load_all_splits_as_one(dataset_root, ['train', 'valid', 'test'])

image_paths, sample_ids = zip(*[(s.filepath, s.id) for s in dataset])

 100% |█████████████████| 360/360 [297.8ms elapsed, 0s remaining, 1.2K samples/s]  
 100% |█████████████████| 101/101 [95.7ms elapsed, 0s remaining, 1.1K samples/s]   
 100% |███████████████████| 51/51 [58.3ms elapsed, 0s remaining, 875.0 samples/s]  


In [2]:
dataloader = make_tmod_dataloader(image_paths, sample_ids, batch_size=32, num_workers=4)

In [3]:
print("Building model")
model = torch.nn.Sequential(
    torch.nn.Conv2d(3, 32, 5), # 636 
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2), # 318
    torch.nn.Conv2d(32, 64, 3), # 316
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2), # 158
    torch.nn.Flatten(), 
    torch.nn.Linear(64 * 158 * 158, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 4)
)

model.to('mps')
loss_fn = torch.nn.L1Loss(reduction='none')
optim = torch.optim.Adam(model.parameters(), lr=1e-3)

def full_iteration(dataloader, dataset, model, loss_fn, optim):
    for imgs, img_ids in pb(dataloader):
        # get bounding box labels
        bounding_boxes_gt = dataset[img_ids].values('detections.detections.bounding_box')
        null_bboxes = []
        for i, bb in enumerate(bounding_boxes_gt):
            if bb is None:
                null_bboxes.append(i)
                bounding_boxes_gt[i] = [[-1, -1, -1, -1]]
        bounding_boxes_gt = torch.tensor(bounding_boxes_gt).squeeze(1)

        # move to GPU
        imgs = imgs.to('mps')
        bounding_boxes_gt = bounding_boxes_gt.to('mps')

        # training
        optim.zero_grad()
        bbox_preds = model(imgs)
        loss = loss_fn(bbox_preds, bounding_boxes_gt)
        loss = loss.mean(dim=1)
        # mask out samples with no bounding box
        for i in null_bboxes:
            loss[i] = 0
        loss = loss.mean()
        loss.backward()
        optim.step()
        print(loss.item())
        return loss.item()

Building model


In [5]:
print("Starting training")
with fo.ProgressBar() as pb:
    for epoch in range(10):
        print(f"Epoch {epoch}")
        full_iteration(dataloader, dataset, model, loss_fn, optim)

Starting training
Epoch 0
   0% ||------------------|  0/16 [5.4s elapsed, ? remaining, ? samples/s] 

In [None]:
loss

tensor([0.0000, 0.3980], device='mps:0', grad_fn=<CopySlices>)

In [None]:
mask = torch.Tensor([1 if bb != None else 0 for bb in bounding_boxes_gt])
for i, bb in enumerate(bounding_boxes_gt):
    if bb is None:
        bounding_boxes_gt[i] = [[-1, -1, -1, -1]]
bounding_boxes_gt = torch.tensor(bounding_boxes_gt).squeeze(1)

# move to GPU
imgs = imgs.to('mps')
bounding_boxes_gt = bounding_boxes_gt.to('mps')

# training
optim.zero_grad()
print(imgs.shape)
bbox_preds = model(imgs)
loss = loss_fn(bbox_preds, bounding_boxes_gt)
print(loss)
print(bounding_boxes_gt)
loss = (loss * mask)
print(f'loss after masking')
print(loss)

  bounding_boxes_gt = torch.tensor(bounding_boxes_gt).squeeze(1)


torch.Size([2, 3, 640, 640])
tensor([[1.0318, 1.0833, 0.9429, 0.9933],
        [0.8398, 0.1728, 0.1817, 0.4675]], device='mps:0',
       grad_fn=<AbsBackward0>)
tensor([[-1.0000, -1.0000, -1.0000, -1.0000],
        [ 0.9094,  0.2219,  0.0906,  0.5188]], device='mps:0')


RuntimeError: The size of tensor a (4) must match the size of tensor b (2) at non-singleton dimension 1

In [None]:
mask

[False, False]