In [3]:
import sys
from pathlib import Path

In [4]:
%load_ext autoreload
%autoreload 3 -p
%matplotlib inline

## imports

In [5]:
import pprint

In [6]:
import torch
import torchvision
from torch import nn

In [7]:
from yolov1.models.backbone_factory import BackboneFactory

In [8]:
import yolov1
from yolov1.config import parse_config

## configs

In [65]:
def load_config():
    config_path = "../../yolov1/src/yolov1/configs/default.yaml"
    config = parse_config(config_file=config_path)
    pprint.pp(config.dict())
    return config

In [66]:
config = load_config()

{'data': {'root': '/home/ajkdrag/workspace/projects/architectures-impl-pytorch/.data',
          'train': 'train',
          'val': 'valid',
          'names': ['Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck'],
          'augmentations': {'apply': False,
                            'horizontal_flip': 0.8,
                            'vertical_flip': 0.0,
                            'brightness_contrast': 0.8,
                            'shift_scale_rotate': 0.0,
                            'random_crop': 0.0,
                            'random_crop_dims': (448.0, 448.0)}},
 'training': {'epochs': 200,
              'dls_kwargs': {'pin_memory': True,
                             'batch_size': 32,
                             'drop_last': True,
                             'num_workers': 2},
              'optim_kwargs': {'lr': 0.0001, 'weight_decay': 0.0001},
              'checkpoints_dir': '/home/ajkdrag/workspace/projects/architectures-impl-pytorch/checkpoints',
              'sav

## arch

In [67]:
from yolov1.models.arch import YOLOv1

In [74]:
config.model.freeze_backbone = False

In [75]:
model = YOLOv1(config.model)

Loading pretrained weights from Hugging Face hub (timm/resnet18.a1_in1k)
[timm/resnet18.a1_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.


In [76]:
model

YOLOv1(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act1): ReLU(inplace=True)
        (aa): Identity()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1

In [77]:
from yolov1.utils.general import count_parameters

In [78]:
count_parameters(model)

63565332

In [71]:
count_parameters(model)

52388820

In [63]:
out = model(torch.rand([2, 3, 224, 224]))

In [64]:
out.shape

torch.Size([2, 7, 7, 10])

In [15]:
import math

In [16]:
backbone = BackboneFactory.create_backbone("resnet50",
                                           pretrained=True,
                                          num_classes=0, global_pool="")

Loading pretrained weights from Hugging Face hub (timm/resnet50.a1_in1k)
[timm/resnet50.a1_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.


In [17]:
backbone_out_scale = math.ceil(448 / 32)

In [18]:
backbone_out_scale

14

In [19]:
detector_hidden_sizes= [1024, 512]

In [20]:
backbone_out_units = 2048 * \
    (backbone_out_scale) ** 2
detector = nn.Sequential(
    nn.Flatten(),
    nn.Linear(backbone_out_units, detector_hidden_sizes[0]),
    nn.LeakyReLU(0.1),
    nn.Dropout(0.2),
    nn.Linear(detector_hidden_sizes[0], detector_hidden_sizes[1]),
    # nn.BatchNorm1d(detector_hidden_sizes[1]),
    nn.LeakyReLU(0.1),
    nn.Dropout(0.2),
    nn.Linear(detector_hidden_sizes[1], config.model.S * config.model.S * (config.model.B * 5 + config.model.nc)),
    # YOLOOutputLayer(),
)

In [99]:
detector

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=401408, out_features=1024, bias=True)
  (2): LeakyReLU(negative_slope=0.1)
  (3): Dropout(p=0.2, inplace=False)
  (4): Linear(in_features=1024, out_features=512, bias=True)
  (5): LeakyReLU(negative_slope=0.1)
  (6): Dropout(p=0.2, inplace=False)
  (7): Linear(in_features=512, out_features=490, bias=True)
)

In [105]:
backbone.fc = detector

In [106]:
out = backbone(torch.rand([2, 3, 448, 448]))

In [79]:
out.shape

torch.Size([2, 7, 7, 10])