# Image encoding benchmark

Visualize pretrainer results on a given image and measure the encoding time.

First, build the model:

In [7]:
import sys
sys.path.append("../")
from multitudinous.utils.model_builder import build_img_backbone
import torch

BACKBONE_VARIANT = "se_resnet50"

# build the model
model = build_img_backbone(BACKBONE_VARIANT, 4, "../se_resnet50_ae/img_backbone_15.pth")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

model.eval()

total_params = sum(p.numel() for p in model.parameters())

# print the model
print(model)

print(f"Total number of parameters: {total_params}")

ResNet(
  (conv1): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): SEBottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1

Now, create the dataset and dataloader instances:

In [8]:
from multitudinous.utils.dataset_builder import build_img_dataset

# build the dataset
dataset = build_img_dataset("tum_rgbd", "../../data/tum_rgbd1")
# create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

Now, run inference on a given image:

In [9]:
import torch

# create the timers
starter = torch.cuda.Event(enable_timing=True)
ender = torch.cuda.Event(enable_timing=True)

# metrics
total_imgs = len(dataset)
total_elapsed_time = 0
curr_sample = 0
avg_encoding_time = 0

for rgb, depth in dataloader:

    # move the images to the device
    rgb = rgb.to(device)
    depth = depth.to(device)
    depth = depth.unsqueeze(1)
    rgbd = torch.cat((rgb, depth), dim=1)

    # run inference
    with torch.no_grad():
        starter.record() # start recording
        output = model(rgbd)
        ender.record()
        torch.cuda.synchronize()
        encoding_time = starter.elapsed_time(ender)

    total_elapsed_time += encoding_time
    curr_sample += 1

    avg_encoding_time = total_elapsed_time/curr_sample

    if curr_sample % 100 == 0:
        print(f"Sample {curr_sample}/{total_imgs} - Avg. encoding time: {avg_encoding_time} ms")

print(f"\n\n** {BACKBONE_VARIANT} METRICS **")
print(f"Avg. encoding time: {avg_encoding_time} ms")
print(f"Avg. throughput: {1/(avg_encoding_time/1000)} fps")

del model, rgb, depth, rgbd, output, dataset

Sample 100/2890 - Avg. encoding time: 37.57482913970947 ms
Sample 200/2890 - Avg. encoding time: 37.53007165908814 ms
Sample 300/2890 - Avg. encoding time: 37.53169342041016 ms
Sample 400/2890 - Avg. encoding time: 37.53664779663086 ms
Sample 500/2890 - Avg. encoding time: 37.537389030456545 ms
Sample 600/2890 - Avg. encoding time: 37.54853165308634 ms
Sample 700/2890 - Avg. encoding time: 37.561709752764024 ms
Sample 800/2890 - Avg. encoding time: 37.5742326259613 ms
Sample 900/2890 - Avg. encoding time: 37.58298943413629 ms
Sample 1000/2890 - Avg. encoding time: 37.58961904144287 ms
Sample 1100/2890 - Avg. encoding time: 37.59559854680842 ms
Sample 1200/2890 - Avg. encoding time: 37.60112257639567 ms
Sample 1300/2890 - Avg. encoding time: 37.60481145125169 ms
Sample 1400/2890 - Avg. encoding time: 37.608991426740374 ms
Sample 1500/2890 - Avg. encoding time: 37.61288556162516 ms
Sample 1600/2890 - Avg. encoding time: 37.615627882480624 ms
Sample 1700/2890 - Avg. encoding time: 37.6183