In [7]:
%matplotlib inline

import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
from torchvision import transforms
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
import time
from torchvision.models import resnet50, ResNet50_Weights, ResNet152_Weights, EfficientNet_B7_Weights

### Puprose

Measure inference time for some pretrained models for example ResNet152

In [2]:
#Select device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
#Network model
model = torchvision.models.resnet152(weights=ResNet152_Weights.IMAGENET1K_V2).to(device)

#Change last fully conected layer to a one with 1 outputs. This layer is trainable.
model.fc = torch.nn.Sequential(
               torch.nn.Linear(2048, 128),
               torch.nn.ReLU(inplace=True),
               torch.nn.Linear(128, 1),
               torch.nn.Sigmoid()).to(device)

In [9]:
optimal_batch_size = 2
dummy_input = torch.randn(optimal_batch_size, 3,224,224, dtype=torch.float).to(device)

In [12]:
# INIT LOGGERS
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
repetitions = 300
timings=np.zeros((repetitions,1))

#GPU-WARM-UP
for _ in range(10):
    
    #Forward passto model
    _ = model(dummy_input)

# MEASURE PERFORMANCE
with torch.no_grad():
    for rep in range(repetitions):
        starter.record()
        _ = model(dummy_input)
        ender.record()
        # WAIT FOR GPU SYNC
        torch.cuda.synchronize()
        curr_time = starter.elapsed_time(ender)
        timings[rep] = curr_time
mean_syn = np.sum(timings) / repetitions
std_syn = np.std(timings)
print(f'{mean_syn} ms')

15.221631495157878 ms


### ToDo

### References

- https://deci.ai/blog/measure-inference-time-deep-neural-networks/