In [1]:
import torch
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity

In [2]:
model = models.resnet18()
inputs = torch.randn(5, 3, 224, 224)

In [3]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

STAGE:2023-06-03 06:24:37 147292:147292 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-06-03 06:24:37 147292:147292 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-03 06:24:37 147292:147292 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [4]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         1.39%       1.024ms       100.00%      73.821ms      73.821ms             1  
                     aten::conv2d         3.09%       2.283ms        69.68%      51.437ms       2.572ms            20  
                aten::convolution         0.35%     259.000us        69.56%      51.349ms       2.567ms            20  
               aten::_convolution         0.27%     203.000us        69.21%      51.090ms       2.554ms            20  
         aten::mkldnn_convolution        68.68%      50.701ms        68.93%      50.887ms       2.544ms            20  
                 aten::batch_norm       

In [5]:
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls                                                                      Input Shapes  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                  model_inference         1.39%       1.024ms       100.00%      73.821ms      73.821ms             1                                                                                []  
                     aten::conv2d         0.02%      15.000us        17.12%      12.640ms       4.213ms             3                            [[5, 512, 7, 7], [512, 512, 3, 3], [], [], [], 

In [6]:
model = models.resnet18().cuda()
inputs = torch.randn(5, 3, 224, 224).cuda()

with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

STAGE:2023-06-03 06:27:16 147292:147292 ActivityProfilerController.cpp:311] Completed Stage: Warm Up


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference        -0.02%    -368.000us       100.00%        1.574s        1.574s       0.000us         0.00%       2.055ms       2.055ms             1  
                                      aten::convolution         0.01%     161.000us        96.17%        1.513s      75.673ms       0.000us         0.00%       1.607ms      80.350us            20  
         

STAGE:2023-06-03 06:27:18 147292:147292 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-03 06:27:18 147292:147292 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [7]:
model = models.resnet18()
inputs = torch.randn(5, 3, 224, 224)

with profile(activities=[ProfilerActivity.CPU],
        profile_memory=True, record_shapes=True) as prof:
    model(inputs)

print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

# (omitting some columns)
# ---------------------------------  ------------  ------------  ------------
#                              Name       CPU Mem  Self CPU Mem    # of Calls
# ---------------------------------  ------------  ------------  ------------
#                       aten::empty      94.79 Mb      94.79 Mb           121
#     aten::max_pool2d_with_indices      11.48 Mb      11.48 Mb             1
#                       aten::addmm      19.53 Kb      19.53 Kb             1
#               aten::empty_strided         572 b         572 b            25
#                     aten::resize_         240 b         240 b             6
#                         aten::abs         480 b         240 b             4
#                         aten::add         160 b         160 b            20
#               aten::masked_select         120 b         112 b             1
#                          aten::ne         122 b          53 b             6
#                          aten::eq          60 b          30 b             2
# ---------------------------------  ------------  ------------  ------------
# Self CPU time total: 53.064ms

print(prof.key_averages().table(sort_by="cpu_memory_usage", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.41%     220.000us         0.41%     220.000us       1.100us      90.04 Mb      90.04 Mb           200  
    aten::max_pool2d_with_indices         4.75%       2.538ms         4.75%       2.538ms       2.538ms      11.48 Mb      11.48 Mb             1  
                 aten::empty_like         0.08%      45.000us         0.12%      62.000us       3.100us      47.37 Mb       4.79 Mb            20  
                      aten::addmm         0.69%     368.000us         0.71%     381.000us     381.000us      19.

STAGE:2023-06-03 06:29:14 147292:147292 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-06-03 06:29:14 147292:147292 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-03 06:29:14 147292:147292 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [8]:
model = models.resnet18().cuda()
inputs = torch.randn(5, 3, 224, 224).cuda()

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
    model(inputs)

prof.export_chrome_trace("trace.json")

STAGE:2023-06-03 06:29:43 147292:147292 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-06-03 06:29:43 147292:147292 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-03 06:29:43 147292:147292 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [9]:
prof.events()

[<FunctionEvent id=1537 name=aten::conv2d device_type=DeviceType.CPU node_id=-1 cpu_time=435.000us start_us=234 end_us=669 cpu_children=[1538] cuda_time=77.000us name=aten::conv2d thread=1 input_shapes=[] cpu_memory_usage=0 cuda_memory_usage=0 is_async=False is_remote=False seq_nr=210 is_legacy=False>,
 <FunctionEvent id=1538 name=aten::convolution device_type=DeviceType.CPU node_id=-1 cpu_time=430.000us start_us=238 end_us=668 cpu_children=[1539] cuda_time=77.000us name=aten::convolution thread=1 input_shapes=[] cpu_memory_usage=0 cuda_memory_usage=0 is_async=False is_remote=False seq_nr=210 is_legacy=False>,
 <FunctionEvent id=1539 name=aten::_convolution device_type=DeviceType.CPU node_id=-1 cpu_time=409.000us start_us=255 end_us=664 cpu_children=[1540] cuda_time=77.000us name=aten::_convolution thread=1 input_shapes=[] cpu_memory_usage=0 cuda_memory_usage=0 is_async=False is_remote=False seq_nr=-1 is_legacy=False>,
 <FunctionEvent id=1540 name=aten::cudnn_convolution device_type=De

In [12]:
# with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
with record_function("model_inference"):
    model(inputs)

In [11]:
del prof

In [20]:
print(profile().key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

AssertionError: 