# Inspect YOLOv3 Model

In [2]:
import os
from pprint import pprint
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
import torch
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import Subset
from torchvision.datasets import VOCDetection, wrap_dataset_for_transforms_v2
from torchvision.transforms import v2
from torchvision.tv_tensors._dataset_wrapper import VOC_DETECTION_CATEGORIES, VOC_DETECTION_CATEGORY_TO_IDX
from torchvision import tv_tensors
from torchvision.utils import draw_bounding_boxes
from torchvision.ops import box_convert, clip_boxes_to_image, nms, batched_nms
from torchmetrics.detection import MeanAveragePrecision
from torchinfo import summary
import sys
sys.path.append('../')
from models.yolov3 import Yolov3Config, Yolov3
# from dataloaders.voc import Resize, Voc2Yolov3  # TODO: uncomment after implementation
from evaluator import DetEvaluator
from torch.utils.tensorboard import SummaryWriter
# palette is a list of color tuples, which is used for visualization.
palette =  [(106, 0, 228), (119, 11, 32), (165, 42, 42), (0, 0, 192),
            (197, 226, 255), (0, 60, 100), (0, 0, 142), (255, 77, 255),
            (153, 69, 1), (120, 166, 157), (0, 182, 199),
            (0, 226, 252), (182, 182, 255), (0, 0, 230), (220, 20, 60),
            (163, 255, 0), (0, 82, 0), (3, 95, 161), (0, 80, 100),
            (183, 130, 88)]

## Inspect Structure & In/Out Shape

In [13]:
img_h, img_w = 416, 416
n_class = 80  # 80 for coco, 20 for voc
n_scale = 3
n_anchor_per_scale = 3
anchors = (
    ((10, 13), (16, 30), (33, 23)),
    ((30, 61), (62, 45), (59, 119)),
    ((116, 90), (156, 198), (373, 326)),
)

model_args = dict(
    img_h=img_h,
    img_w=img_w,
    n_class=n_class,
    n_scale=n_scale,
    n_anchor_per_scale=n_anchor_per_scale,
    anchors=anchors,
)

model_config = Yolov3Config(**model_args)
model = Yolov3(model_config)

batch_size = 1
summary(model, input_size=[(batch_size, 3, img_h, img_w),], device='cuda',
        col_names=("input_size", "output_size", "num_params", "kernel_size", "mult_adds", "trainable"), verbose=2,
        depth=4, row_settings=("depth", "var_names"));

number of parameters: 61.95M
Layer (type (var_name):depth-idx)                       Input Shape               Output Shape              Param #                   Kernel Shape              Mult-Adds                 Trainable
Yolov3 (Yolov3)                                         [1, 3, 416, 416]          [1, 3, 52, 52, 85]        --                        --                        --                        True
├─Darknet53Backbone (backbone): 1-1                     [1, 3, 416, 416]          [1, 256, 52, 52]          --                        --                        --                        True
│    └─conv0.conv.weight                                                                                    ├─864                     [32, 3, 3, 3]
│    └─conv0.bn.weight                                                                                      ├─32                      [32]
│    └─conv0.bn.bias                                                                                      

In [14]:
writer = SummaryWriter()
imgs = torch.randn(batch_size, 3, img_h, img_w)
model.to('cpu')
writer.add_graph(model, [imgs,])
writer.close()

In [16]:
model.eval()
imgs = torch.randn(batch_size, 3, img_h, img_w)
logits = model.generate(imgs)
print("Output when eval():")
for logit in logits:
    print(logit.shape)

Output when eval():
torch.Size([1, 10647, 85])
torch.Size([1, 3, 52, 52, 85])
torch.Size([1, 3, 26, 26, 85])
torch.Size([1, 3, 13, 13, 85])


In [17]:
model.train()
imgs = torch.randn(batch_size, 3, img_h, img_w)
logits = model(imgs)
print("Output when train():")
for logit in logits:
    print(logit.shape)

Output when train():
torch.Size([1, 3, 52, 52, 85])
torch.Size([1, 3, 26, 26, 85])
torch.Size([1, 3, 13, 13, 85])


In [18]:
model.train()
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of trainable parameters: {total_params}")

Number of trainable parameters: 61949149


## Inspect state_dict

In [19]:
import tempfile

# Save state_dict as a temporary file
with tempfile.NamedTemporaryFile(suffix='.pth') as temp_file:
    torch.save(model.state_dict(), temp_file.name)
    state_dict = torch.load(temp_file.name)

# Print the keys of the state_dict
for key in state_dict.keys():
    print(key)


anchors_scale3
anchors_scale4
anchors_scale5
backbone.conv0.conv.weight
backbone.conv0.bn.weight
backbone.conv0.bn.bias
backbone.conv0.bn.running_mean
backbone.conv0.bn.running_var
backbone.conv0.bn.num_batches_tracked
backbone.conv1.conv.weight
backbone.conv1.bn.weight
backbone.conv1.bn.bias
backbone.conv1.bn.running_mean
backbone.conv1.bn.running_var
backbone.conv1.bn.num_batches_tracked
backbone.stage1_block1.conv1x1.conv.weight
backbone.stage1_block1.conv1x1.bn.weight
backbone.stage1_block1.conv1x1.bn.bias
backbone.stage1_block1.conv1x1.bn.running_mean
backbone.stage1_block1.conv1x1.bn.running_var
backbone.stage1_block1.conv1x1.bn.num_batches_tracked
backbone.stage1_block1.conv3x3.conv.weight
backbone.stage1_block1.conv3x3.bn.weight
backbone.stage1_block1.conv3x3.bn.bias
backbone.stage1_block1.conv3x3.bn.running_mean
backbone.stage1_block1.conv3x3.bn.running_var
backbone.stage1_block1.conv3x3.bn.num_batches_tracked
backbone.conv2.conv.weight
backbone.conv2.bn.weight
backbone.conv2.

## Inspect gradients

In [None]:
# TODO