# Inspect Darknet19 Model

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torchinfo import summary
import sys
sys.path.append('../')
from torch.utils.tensorboard import SummaryWriter

## Inspect Structure & In/Out Shape

In [2]:
from models.darknet19 import Darknet19Config, Darknet19

img_h, img_w, n_class = 224, 224, 1000
model_args = dict(
    img_h=img_h,
    img_w=img_w,
    n_class=n_class
)

model_config = Darknet19Config(**model_args)
model = Darknet19(model_config)

batch_size = 1
summary(model, input_size=[(batch_size, 3, img_h, img_w)], device='cpu',
        col_names=("input_size", "output_size", "num_params", "kernel_size", "mult_adds", "trainable"), verbose=2,
        depth=4, row_settings=("depth", "var_names"));

number of parameters: 20.84M
Layer (type (var_name):depth-idx)             Input Shape               Output Shape              Param #                   Kernel Shape              Mult-Adds                 Trainable
Darknet19 (Darknet19)                         [1, 3, 224, 224]          [1, 1000]                 --                        --                        --                        True
├─Darknet19Backbone (backbone): 1-1           [1, 3, 224, 224]          [1, 1024, 7, 7]           --                        --                        --                        True
│    └─conv1.conv.weight                                                                          ├─864                     [32, 3, 3, 3]
│    └─conv1.bn.weight                                                                            ├─32                      [32]
│    └─conv1.bn.bias                                                                              ├─32                      [32]
│    └─conv2.conv.weight   

In [3]:
writer = SummaryWriter()
imgs = torch.randn(batch_size, 3, img_h, img_w)
targets = torch.randint(0, n_class, (batch_size,))
writer.add_graph(model, [imgs, targets])
writer.close()

In [4]:
logits, loss = model(imgs, targets)
print(logits.shape)
print(loss.shape)

torch.Size([1, 1000])
torch.Size([])


## Inspect state_dict

In [4]:
import tempfile

# Save state_dict as a temporary file
with tempfile.NamedTemporaryFile(suffix='.pth') as temp_file:
    torch.save(model.state_dict(), temp_file.name)
    state_dict = torch.load(temp_file.name)

# Print the keys of the state_dict
for key in state_dict.keys():
    print(key)


backbone.conv1.conv.weight
backbone.conv1.bn.weight
backbone.conv1.bn.bias
backbone.conv1.bn.running_mean
backbone.conv1.bn.running_var
backbone.conv1.bn.num_batches_tracked
backbone.conv2.conv.weight
backbone.conv2.bn.weight
backbone.conv2.bn.bias
backbone.conv2.bn.running_mean
backbone.conv2.bn.running_var
backbone.conv2.bn.num_batches_tracked
backbone.conv3.conv.weight
backbone.conv3.bn.weight
backbone.conv3.bn.bias
backbone.conv3.bn.running_mean
backbone.conv3.bn.running_var
backbone.conv3.bn.num_batches_tracked
backbone.conv4.conv.weight
backbone.conv4.bn.weight
backbone.conv4.bn.bias
backbone.conv4.bn.running_mean
backbone.conv4.bn.running_var
backbone.conv4.bn.num_batches_tracked
backbone.conv5.conv.weight
backbone.conv5.bn.weight
backbone.conv5.bn.bias
backbone.conv5.bn.running_mean
backbone.conv5.bn.running_var
backbone.conv5.bn.num_batches_tracked
backbone.conv6.conv.weight
backbone.conv6.bn.weight
backbone.conv6.bn.bias
backbone.conv6.bn.running_mean
backbone.conv6.bn.runnin

## Inspec gradients

In [3]:
class DebugDependencyDarknet19(Darknet19):
    """Darknet19 with dummy loss function for debugging dependencies."""
    def __init__(self, loss_img_idx, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.loss_img_idx = loss_img_idx

    def _compute_loss(self, logits: Tensor, targets: Tensor) -> Tensor:
        """
        Compute the dummy loss as sum of all outputs of img i.
        Args:
            logits (Tensor): size(N, n_class)
            targets (Tensor): size(N,)
        Returns:
            loss (Tensor): size(,)
        """
        return torch.sum(logits[self.loss_img_idx])


To verify no inadvertently mix information across the batch dimension, run the backward pass all the way to the input, and ensure that you get a non-zero gradient only on the i-th input.

In [4]:
batch_size = 5
X = torch.randn(batch_size, 3, img_h, img_w, requires_grad=True)
Y =  torch.randint(0, n_class, (batch_size,))

In [5]:
print("Setup:")
torch.manual_seed(1337);  # since kaiming_normal_ weight init is random
loss_img_idx = 2
debug_dependency_model = DebugDependencyDarknet19(loss_img_idx, model_config)
debug_dependency_model.eval()  # since batchnorm will mixed variables between images in a batch

optimizer_type = 'adam'
learning_rate = 3e-4
beta1 = 0.9
beta2 = 0.999
weight_decay = 5e-4
device_type = 'cpu'
use_fused = False
optimizer = debug_dependency_model.configure_optimizers(optimizer_type, learning_rate, (beta1, beta2), weight_decay, device_type, use_fused)

print("\n\nForward pass:")
logits, loss = debug_dependency_model(X, Y)
print(f"{logits.shape=}")
print(f"{loss=}")

print("\n\nBackward pass & Gradients of X::")
optimizer.zero_grad(set_to_none=True)
X.grad = None  # zero out X gradients
loss.backward()

print(f"{X.grad.shape=}")
for idx_img in range(batch_size):
    print(f"Zero gradient of {idx_img}'th img: {torch.all(X.grad[idx_img]==0)}")

print("\n\nOptimizer step:")
optimizer.step()
print(f"Loss after optimized: {debug_dependency_model(X, Y)[1]}")

Setup:
number of parameters: 20.84M
num decayed parameter tensors: 19, with 20,826,976 parameters
num non-decayed parameter tensors: 37, with 15,400 parameters
using fused Adam: False


Forward pass:
logits.shape=torch.Size([5, 1000])
loss=tensor(9.1461, grad_fn=<SumBackward0>)


Backward pass & Gradients of X::
X.grad.shape=torch.Size([5, 3, 224, 224])
Zero gradient of 0'th img: True
Zero gradient of 1'th img: True
Zero gradient of 2'th img: False
Zero gradient of 3'th img: True
Zero gradient of 4'th img: True


Optimizer step:
Loss after optimized: -259.7261047363281


## Extract Features

In [2]:
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor
from models.darknet19 import Darknet19Config, Darknet19Backbone

img_h, img_w = 416, 416
backbone = Darknet19Backbone(Darknet19Config(img_h=img_h, img_w=img_w))

In [3]:
train_nodes, eval_nodes = get_graph_node_names(backbone)
print(f"{train_nodes=}")
print(f"{eval_nodes=}")

train_nodes=['x', 'conv1.conv', 'conv1.bn', 'conv1.leaky_relu', 'maxpool1', 'conv2.conv', 'conv2.bn', 'conv2.leaky_relu', 'maxpool2', 'conv3.conv', 'conv3.bn', 'conv3.leaky_relu', 'conv4.conv', 'conv4.bn', 'conv4.leaky_relu', 'conv5.conv', 'conv5.bn', 'conv5.leaky_relu', 'maxpool3', 'conv6.conv', 'conv6.bn', 'conv6.leaky_relu', 'conv7.conv', 'conv7.bn', 'conv7.leaky_relu', 'conv8.conv', 'conv8.bn', 'conv8.leaky_relu', 'maxpool4', 'conv9.conv', 'conv9.bn', 'conv9.leaky_relu', 'conv10.conv', 'conv10.bn', 'conv10.leaky_relu', 'conv11.conv', 'conv11.bn', 'conv11.leaky_relu', 'conv12.conv', 'conv12.bn', 'conv12.leaky_relu', 'conv13.conv', 'conv13.bn', 'conv13.leaky_relu', 'maxpool5', 'conv14.conv', 'conv14.bn', 'conv14.leaky_relu', 'conv15.conv', 'conv15.bn', 'conv15.leaky_relu', 'conv16.conv', 'conv16.bn', 'conv16.leaky_relu', 'conv17.conv', 'conv17.bn', 'conv17.leaky_relu', 'conv18.conv', 'conv18.bn', 'conv18.leaky_relu']
eval_nodes=['x', 'conv1.conv', 'conv1.bn', 'conv1.leaky_relu', 'max

  torch.has_cuda,
  torch.has_cudnn,
  torch.has_mps,
  torch.has_mkldnn,


In [5]:
backbone_with_feat = create_feature_extractor(backbone,
                                              return_nodes={'conv13': 'conv13',
                                                            'conv18': 'conv18',})
imgs = torch.randn(1, 3, img_h, img_w)
feat = backbone_with_feat(imgs)
print(f"{feat.keys()=}")
print(f"{feat['conv13'].shape=}")
print(f"{feat['conv18'].shape=}")

feat.keys()=dict_keys(['conv13', 'conv18'])
feat['conv13'].shape=torch.Size([1, 512, 26, 26])
feat['conv18'].shape=torch.Size([1, 1024, 13, 13])


In [6]:
batch_size = 1
summary(backbone_with_feat, input_size=[(batch_size, 3, img_h, img_w)], device='cpu',
        col_names=("input_size", "output_size", "num_params", "kernel_size", "mult_adds", "trainable"), verbose=2,
        depth=4, row_settings=("depth", "var_names"));

Layer (type (var_name):depth-idx)        Input Shape               Output Shape              Param #                   Kernel Shape              Mult-Adds                 Trainable
Darknet19Backbone (Darknet19Backbone)    [1, 3, 416, 416]          [1, 1024, 13, 13]         --                        --                        --                        True
├─Module (conv1): 1-1                    --                        --                        --                        --                        --                        True
│    └─conv.weight                                                                           ├─864                     [32, 3, 3, 3]
│    └─bn.weight                                                                             ├─32                      [32]
│    └─bn.bias                                                                               └─32                      [32]
│    └─Conv2d (conv): 2-1                [1, 3, 416, 416]          [1, 32, 416, 416]  

## Inspect Results

In [6]:
from models.darknet19 import Darknet19Config, Darknet19

device = 'cuda'
from_ckpt = '../out/darknet19_imagenet2012/20240306-064919/ckpt_last.pt'

checkpoint = torch.load(from_ckpt, map_location='cpu')
checkpoint_model_args = checkpoint['model_args']
model_config = Darknet19Config(**checkpoint_model_args)
model = Darknet19(model_config)
state_dict = checkpoint['model']
model.load_state_dict(state_dict)
model = model.to(device)
model.eval()

number of parameters: 20.84M


Darknet19(
  (backbone): Darknet19Backbone(
    (conv1): Darknet19Conv2d(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=1e-06, momentum=0.01, affine=True, track_running_stats=True)
    )
    (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Darknet19Conv2d(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-06, momentum=0.01, affine=True, track_running_stats=True)
    )
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv3): Darknet19Conv2d(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-06, momentum=0.01, affine=True, track_running_stats=True)
    )
    (conv4): Darknet19Conv2d(
      (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
   

Modify the transform accordingly

In [9]:
from torch.utils.data.dataloader import DataLoader
import torchvision
from torchvision.transforms import v2

data_dir = '../data/imagenet2012'
img_h, img_w = checkpoint['config']['img_h'], checkpoint['config']['img_w']
imgs_mean = checkpoint['config']['imgs_mean']
imgs_std = checkpoint['config']['imgs_std']

dataset = torchvision.datasets.ImageNet(
    data_dir, split='val',
    transform=v2.Compose([
        v2.ToImage(),
        v2.Resize(size=max(img_h, img_w), #int(max(img_h, img_w) * 256 / 224),
                  #max_size=max(img_h, img_w) + 1,  # max_size must > size
                  antialias=True),
        v2.CenterCrop(size=(img_h, img_w)),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=imgs_mean, std=imgs_std),
    ])
)
dataloader = DataLoader(dataset, batch_size=256, shuffle=False, num_workers=8, pin_memory=True)

In [10]:
from tqdm import tqdm

with torch.inference_mode():
    acc1, acc5 = 0.0, 0.0
    n_seen = 0
    for X, Y in tqdm(dataloader):
        if 'cuda' in device:
            X = X.to(device, non_blocking=True)
            Y = Y.to(device, non_blocking=True)
        else:
            X, Y = X.to(device), Y.to(device)
        logits, loss = model(X, Y)
        _, pred = logits.topk(5, 1, True, True)
        pred = pred.t()
        correct = pred.eq(Y.view(1, -1).expand_as(pred))
        n_seen += X.size(0)
        acc1 += correct[:1].reshape(-1).float().sum(0).item()
        acc5 += correct[:5].reshape(-1).float().sum(0).item()
    print(acc1, acc5)
    acc1 = 100 * acc1 / n_seen
    acc5 = 100 * acc5 / n_seen
    print(f"Top-1 accuracy: {acc1:.2f}%")
    print(f"Top-5 accuracy: {acc5:.2f}%")


100%|██████████| 196/196 [00:19<00:00,  9.91it/s]

36588.0 45721.0
Top-1 accuracy: 73.18%
Top-5 accuracy: 91.44%





### Results

| Runs                                          | Acc1   | Acc5   | Duration |
|-----------------------------------------------|--------|--------|----------|
| 20240301-133108/ckpt_last.pt (overfit-no-aug) | 63.23% | 85.00% | 26.0s    |
| 20240301-062829/ckpt_last.pt (simple-aug)     | 73.03% | 91.28% | 21.4s    |
| 20240303-055517/ckpt_last.pt (448 simple-aug) | 75.88% | 93.01% | 1m0.2s   |
| 20240306-064919/ckpt_last.pt (timm-aug)       | 73.18% | 91.44% | 25.7s    |
