In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from mini_yolov3.model import MiniYOLOV3
from mini_yolov3.trainer import Trainer
import torch
from torchvision.transforms import v2
from mini_yolov3.dataset import SVHNDataset
from torch.utils.data import Subset

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
anchors = torch.Tensor([[0.2, 0.8]])

In [4]:
model = MiniYOLOV3(
    image_size=32,
    num_classes=10,
    anchors=anchors
)

model.to("mps")

MiniYOLOV3(
  (conv): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (4): ReLU()
    (5): Conv2d(32, 15, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  )
)

In [5]:
train_dataset = SVHNDataset(split="train", image_transform=v2.Compose([
                        v2.ToTensor(),
                        v2.Resize((32, 32))  
                      ]))
train_dataset = Subset(train_dataset, range(1))



In [6]:
train_dataset[0]["bbox"]

tensor([[0.3551, 0.0217, 0.1963, 0.8696],
        [0.5327, 0.0652, 0.1495, 0.8696]])

In [7]:
trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    num_epochs=5,
    lr=3e-4,
    lambda_coord=5.0,
    lambda_noobj=0.5
)

In [8]:
trainer.train()

  0%|          | 0/5 [00:00<?, ?it/s]

pred:  tensor([[ 0.4471,  0.4353,  0.0988,  0.0919],
        [ 0.5109,  0.4891, -0.0147,  0.0146]], device='mps:0',
       grad_fn=<AsStridedBackward0>)
target:  tensor([[ 2.0327e-01,  2.0652e-01, -1.8869e-02,  8.3382e-02],
        [ 1.0748e-01,  1.0000e-08, -2.9080e-01,  8.3382e-02]], device='mps:0')


100%|██████████| 5/5 [00:00<00:00,  5.07it/s, loss=1.75]

pred:  tensor([[ 0.4413,  0.4354,  0.0940,  0.0715],
        [ 0.5058,  0.4856, -0.0308,  0.0161]], device='mps:0',
       grad_fn=<AsStridedBackward0>)
target:  tensor([[ 2.0327e-01,  2.0652e-01, -1.8869e-02,  8.3382e-02],
        [ 1.0748e-01,  1.0000e-08, -2.9080e-01,  8.3382e-02]], device='mps:0')
pred:  tensor([[ 0.4319,  0.4310,  0.0814,  0.0549],
        [ 0.5037,  0.4852, -0.0444,  0.0171]], device='mps:0',
       grad_fn=<AsStridedBackward0>)
target:  tensor([[ 2.0327e-01,  2.0652e-01, -1.8869e-02,  8.3382e-02],
        [ 1.0748e-01,  1.0000e-08, -2.9080e-01,  8.3382e-02]], device='mps:0')
pred:  tensor([[ 0.4229,  0.4264,  0.0721,  0.0406],
        [ 0.5010,  0.4856, -0.0483,  0.0136]], device='mps:0',
       grad_fn=<AsStridedBackward0>)
target:  tensor([[ 2.0327e-01,  2.0652e-01, -1.8869e-02,  8.3382e-02],
        [ 1.0748e-01,  1.0000e-08, -2.9080e-01,  8.3382e-02]], device='mps:0')
pred:  tensor([[ 0.4142,  0.4234,  0.0682,  0.0250],
        [ 0.4966,  0.4846, -0.0562,  0




[1.7461978197097778,
 1.7397525310516357,
 1.7399100065231323,
 1.7446318864822388,
 1.7473220825195312]