In [1]:
import numpy as np
import torch

In [2]:
from torch.utils.data import DataLoader

In [3]:
from datasets import PascalVOC
from postprocess import DecodeCenter, NMS, RestoreCoords
from metrics import AP
from msra_resnet import get_pose_net
from transforms import LetterBox, ToUnit, Normalize

In [4]:
torch.backends.cudnn.benchmark=True

In [5]:
# use the configuration below for ResNet-18 based CenterNet
model = get_pose_net(num_layers=18, heads={'hm': 20, 'wh':2, 'reg':2}, head_conv=64)

=> loading pretrained model https://download.pytorch.org/models/resnet18-5c106cde.pth


In [6]:
# load pretrained weights
state_dict_path = 'resnet18_pascal.pth'
state_dict = torch.load(state_dict_path)['model']
model.load_state_dict(state_dict)

<All keys matched successfully>

In [7]:
# use gpu acceleration and set model to eval mode for faster inference
device = 'cuda'
model = model.to(device)
model.eval()

PoseResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True

In [8]:
# declare image preprocessing steps
input_size = 384
transforms = []
transforms.append(LetterBox(input_size))
transforms.append(ToUnit())
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
transforms.append(Normalize(mean, std))

In [9]:
# prase the PascalVOC dataset and prepare the dataloader
dataset_path = 'VOCdevkit'
dataset = PascalVOC(dataset_path, 'test', transforms=transforms)
num_classes = len(dataset.CLASSES)
k_true = dataset.max_objects
easy_objects = dataset.easy_objects
batch_size = 32
num_workers = 4
dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True)

In [10]:
# declare postprocessing steps
output_stride = 4 # CenterNet-specific parameter
k_pred = 100 # number of bboxes the model generates per image before any filtering 
decode = DecodeCenter(input_size, output_stride, k_pred)
filt = NMS()
restore = RestoreCoords(input_size)

In [11]:
# initialize the mAP calculator
metric = AP(k_true, k_pred, num_classes, easy_objects, decode, filt, restore, device)

In [12]:
# simultaneously determine true positives and false positives for a batch 
with torch.no_grad():
    for i, data in enumerate(dataloader):
        x = data['image'].permute((0,3,1,2)).to(device)
        y = model(x)[0]
        metric.accumulate(y, data)

In [13]:
# compute mAP after inference is done for the entire dataset
mAP = metric.compute()

In [14]:
print(mAP)

0.6658356189727783


In [15]:
# clear intermediate results
metric.reset()