In [11]:
import torch
import torchvision.models as models
from torch.profiler import profile, ProfilerActivity

model = models.resnet18().cuda()
inputs = torch.randn(5, 3, 224, 224).cuda()

# model = models.resnet18()
# inputs = torch.randn(5, 3, 224, 224)


In [12]:


with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    model(inputs)

In [13]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     aten::conv2d         0.00%      61.000us        96.44%        3.276s     163.804ms            20  
                aten::convolution         0.01%     194.000us        96.44%        3.276s     163.801ms            20  
               aten::_convolution         0.03%     975.000us        96.43%        3.276s     163.791ms            20  
          aten::cudnn_convolution        96.40%        3.275s        96.40%        3.275s     163.743ms            20  
                 aten::batch_norm         0.00%     144.000us         2.81%      95.497ms       4.775ms            20  
     aten::_batch_norm_impl_index       

In [14]:
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=30))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls                                                                      Input Shapes  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                     aten::conv2d         0.00%       8.000us        95.20%        3.234s        3.234s             1                             [[5, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], []]  
                aten::convolution         0.00%      34.000us        95.20%        3.234s        3.234s             1                     [[5, 3, 224, 224], [64, 3, 7, 7], [], [], [], [], [], 

In [15]:
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    model(inputs)

print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     aten::conv2d         2.86%     102.000us        38.59%       1.375ms      68.750us      56.000us         0.02%     245.555ms      12.278ms            20  
                 aten::batch_norm         2.19%      78.000us        38.25%       1.363ms      68.150us      51.000us         0.02%       1.405ms      70.250us            20  
     aten::_batch_norm_impl_index         3.37%     120.000us        36.07%       1.285ms      64.250us      52.000us  

In [16]:
# We can also use breakpoints-ish
prof = profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True)
prof.start()
model(inputs)
prof.stop()

print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 aten::batch_norm         2.36%      75.000us        40.21%       1.280ms      64.000us     100.000us         0.03%       5.137ms     256.850us            20  
     aten::_batch_norm_impl_index         3.52%     112.000us        37.86%       1.205ms      60.250us      65.000us         0.02%       5.037ms     251.850us            20  
                     aten::conv2d         2.95%      94.000us        35.85%       1.141ms      57.050us      69.000us  

In [17]:
# With memory : 
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True) as prof:
    model(inputs)

In [18]:
# We can view it with chrome://tracing
prof.export_chrome_trace("trace.json")


Then open the file "C:\Users\david\Desktop\mlops\dtu_mlops_forked\s4_debugging_and_logging\exercise_files\trace.json" in chrome with chrome://tracing

In [19]:
# Checking if it changes anything to run it multiple times 
with prof:
    for i in range(10):
        model(inputs)
        prof.step()

In [20]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 aten::batch_norm         2.51%     824.000us        42.93%      14.112ms      70.560us       1.913ms         3.93%      17.784ms      88.920us           0 b           0 b     490.45 Mb           0 b           200  
     aten::_batch_norm_impl_index         3.80%       1.249ms        4

### Tensorboard

In [23]:
from torch.profiler import profile, tensorboard_trace_handler
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True, on_trace_ready=tensorboard_trace_handler("./log/test")) as prof:
    for i in range(10):
        model(inputs)
        prof.step()


In [1]:
import torch
import torchvision.models as models
from torch.profiler import profile, ProfilerActivity, tensorboard_trace_handler

model = models.resnet18().cuda()
inputs = torch.randn(5, 3, 224, 224).cuda()

prof = profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], 
               record_shapes=True, 
               profile_memory=True, 
               on_trace_ready=tensorboard_trace_handler("./log/resnet18"))

with prof:
    for i in range(10):
        model(inputs)
        prof.step()

Now view the output in "log/resnet18/*.pt.trace.jason" by running :

tensorboard --logdir=./log

in the terminal 

In [3]:
model2 = models.resnet34().cuda()

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True, on_trace_ready=tensorboard_trace_handler("./log/resnet34")) as prof:
    for i in range(10):
        model2(inputs)
        prof.step()

In [4]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 aten::batch_norm         3.10%       1.745ms        43.07%      24.241ms      67.336us       1.055ms         0.38%      86.509ms     240.303us           0 b           0 b     731.55 Mb           0 b           360  
     aten::_batch_norm_impl_index         3.51%       1.977ms        3

In [5]:
import logging 

logging.config

<module 'logging.config' from 'c:\\Users\\david\\anaconda3\\envs\\mlops-env\\Lib\\logging\\config.py'>

In [70]:
import torch

size = 2 
size2 = 5

random_tensor = torch.zeros((size2*size2, size*size))

random_tensor[1, :] = 1 

random_tensor[8, :] = 8

random_tensor[3, :] = 3


In [71]:
random2 = random_tensor.view(size2, size2, size, size)

random3 = random2.permute(0, 2, 1, 3) 
# print(random3.shape)
random3 = torch.flatten(random3, end_dim=1)

random4 = torch.flatten(random3, start_dim=1)


In [72]:
random4

tensor([[0., 0., 1., 1., 0., 0., 3., 3., 0., 0.],
        [0., 0., 1., 1., 0., 0., 3., 3., 0., 0.],
        [0., 0., 0., 0., 0., 0., 8., 8., 0., 0.],
        [0., 0., 0., 0., 0., 0., 8., 8., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [19]:
random2 = random_tensor.view(280, 280)
random2[:28, 1]

tensor([0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])