In [1]:
import torch
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity

In [2]:
model = models.resnet18()
inputs = torch.randn(5, 3, 224, 224)

In [3]:
inputs


tensor([[[[-1.2130e+00, -2.1366e-02,  1.8520e+00,  ..., -1.1397e+00,
           -1.2264e+00,  1.1271e+00],
          [-1.2930e-01, -8.4158e-01,  2.2435e-03,  ..., -1.3814e-01,
            1.3570e+00,  2.2503e+00],
          [ 1.1455e+00,  1.0473e+00, -1.3119e+00,  ..., -1.2527e+00,
           -2.8997e-01, -2.6788e-02],
          ...,
          [-1.1192e-01, -1.0185e+00,  1.2440e-01,  ..., -6.0027e-01,
           -9.8627e-01,  4.0893e-01],
          [ 1.5661e-01,  2.8437e+00, -2.0038e+00,  ...,  1.0942e+00,
            7.8916e-01, -1.6268e+00],
          [-3.3045e-01,  1.5281e+00, -6.1346e-01,  ...,  6.4249e-01,
           -1.1717e-02,  1.1233e+00]],

         [[ 8.3485e-01,  5.0955e-02,  9.6315e-01,  ...,  1.1820e-01,
           -1.8596e+00, -1.6322e+00],
          [ 1.4537e+00,  6.1535e-02,  1.4116e+00,  ...,  5.9929e-01,
            3.9586e-02,  8.5000e-01],
          [ 8.8343e-01,  1.0610e-01,  3.1133e-01,  ..., -7.5147e-01,
            9.9704e-02, -1.4362e+00],
          ...,
     

# Using profiler to analyze execution time

In [4]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

In [5]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         5.05%      32.434ms       100.00%     642.076ms     642.076ms             1  
                     aten::conv2d         0.31%       2.021ms        59.09%     379.415ms      18.971ms            20  
                aten::convolution         0.95%       6.104ms        58.78%     377.394ms      18.870ms            20  
               aten::_convolution         0.55%       3.544ms        57.83%     371.290ms      18.564ms            20  
         aten::mkldnn_convolution        56.94%     365.566ms        57.27%     367.746ms      18.387ms            20  
                 aten::batch_norm       

In [6]:
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls                                                                      Input Shapes  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                  model_inference         5.05%      32.434ms       100.00%     642.076ms     642.076ms             1                                                                                []  
                     aten::conv2d         0.23%       1.504ms        16.39%     105.227ms     105.227ms             1                             [[5, 3, 224, 224], [64, 3, 7, 7], [], [], [], 

In [7]:
model = models.resnet18().cuda()
inputs = torch.randn(1, 3, 224, 224).cuda()

with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))


---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         0.67%      14.477ms       100.00%        2.160s        2.160s     195.000us         0.01%        1.577s        1.577s             1  
                     aten::conv2d         0.03%     655.000us        88.21%        1.905s      95.260ms      66.000us         0.00%        1.349s      67.456ms            20  
                aten::convolution         0.06%       1.362ms        88.18%        1.905s      95.228ms      68.000us  

# Using profiler to analyze memory consumption

In [8]:
model = models.resnet18()
inputs = torch.randn(5, 3, 224, 224)

with profile(activities=[ProfilerActivity.CPU],
        profile_memory=True, record_shapes=True) as prof:
    model(inputs)

print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))
print(prof.key_averages().table(sort_by="cpu_memory_usage", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.36%       2.005ms         0.36%       2.005ms      10.025us      94.86 Mb      94.86 Mb           200  
    aten::max_pool2d_with_indices        19.11%     106.389ms        19.11%     106.389ms     106.389ms      11.48 Mb      11.48 Mb             1  
                      aten::addmm         0.11%     591.000us         0.12%     661.000us     661.000us      19.53 Kb      19.53 Kb             1  
                       aten::mean         0.02%      98.000us         0.11%     591.000us     591.000us      10.

# Using tracing functionality

In [9]:
model = models.resnet18().cuda()
inputs = torch.randn(1, 3, 224, 224).cuda()

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
    model(inputs)

prof.export_chrome_trace("trace.json")

# Examining stack traces

In [11]:
with profile(
    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
    with_stack=True,
) as prof:
    model(inputs)

# Print aggregated stats
print(prof.key_averages(group_by_stack_n=5).table(sort_by="self_cuda_time_total", row_limit=2))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
          aten::cudnn_convolution         3.42%      28.724ms         3.42%      28.724ms       1.436ms       3.093ms        60.77%       3.093ms     154.650us            20  
           aten::cudnn_batch_norm        94.53%     793.439ms        94.68%     794.623ms      39.731ms     889.000us        17.47%       1.162ms      58.100us            20  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  