In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.autograd.profiler as profiler


class ProfileTargetModule(nn.Module):
    def __init__(self, in_features: int, out_features: int, bias: bool = True, bn: bool = True):
        super(ProfileTargetModule, self).__init__()
        self.conv = nn.Conv2d(in_features, out_features, kernel_size=3, padding=1, bias=bias)
        self.bn = nn.BatchNorm2d(out_features)
        
    def forward(self, input):
        with profiler.record_function("CONV FORWARD"):
            out = self.conv(input)
            out = self.bn(out)

        with profiler.record_function("SVD"):
            u, s, vh = np.linalg.svd(out.cpu().detach().numpy())
            s = torch.from_numpy(s).cuda()
            
        return out, s

In [2]:
x = torch.rand(1, 3, 128, 128).cuda()
model = ProfileTargetModule(3, 8, True, True).cuda()
out, s = model(x) # Warm-up

with profiler.profile(with_stack=True, profile_memory=True) as prof:
    out, s = model(x)
    
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                 SVD        88.04%      29.726ms        89.81%      30.324ms      30.324ms          -4 b    -512.00 Kb       4.00 Kb           0 b             1  
                        CONV FORWARD         0.52%     174.000us         8.70%       2.937ms       2.937ms          -4 b          -4 b       1.00 Mb           0 b             1  
                        aten::conv2d         0.01%       4.000us         4.55%       1.537ms       1.537

STAGE:2022-11-07 15:19:22 1269393:1269393 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2022-11-07 15:19:22 1269393:1269393 ActivityProfilerController.cpp:300] Completed Stage: Collection


In [3]:
print(prof.key_averages(group_by_stack_n=5).table(sort_by="cpu_time_total", row_limit=3))

------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------------------------------------  
                                Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  Source Location                                                              
------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------------------------------------  
                                 SVD        88.04%      29.726ms        89.81%      30.324ms      30.324ms          -4 b    -512.00 Kb       4.00 Kb           0 b             1  ...-in method _record_function_enter of PyCapsule obj

In [4]:
x = torch.rand(1, 3, 128, 128).cuda()
model = ProfileTargetModule(3, 8, True, True).cuda()
out, s = model(x)

with profiler.profile(with_stack=True, use_cuda=True, profile_memory=True) as prof:
    out, s = model(x)
    
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                 SVD        96.31%      29.281ms        97.57%      29.665ms      29.665ms      29.412ms        96.17%      29.838ms      29.838ms          -4 b    -512.26 Kb       4.00 Kb           0 b             1  
                        CONV FORWARD         0.65%     197

STAGE:2022-11-07 15:20:42 1269393:1269393 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2022-11-07 15:20:42 1269393:1269393 ActivityProfilerController.cpp:300] Completed Stage: Collection


In [5]:
class ProfileTargetModule(nn.Module):
    def __init__(self, in_features: int, out_features: int, bias: bool = True, bn: bool = True):
        super(ProfileTargetModule, self).__init__()
        self.conv = nn.Conv2d(in_features, out_features, kernel_size=3, padding=1, bias=bias)
        self.bn = nn.BatchNorm2d(out_features)
        
    def forward(self, input):
        with profiler.record_function("CONV FORWARD"):
            out = self.conv(input)
            out = self.bn(out)

        with profiler.record_function("SVD"):
            u, s, vh = torch.svd(out)
            
        return out, s

In [6]:
x = torch.rand(1, 3, 128, 128).cuda()
model = ProfileTargetModule(3, 8, True, True).cuda()
out, s = model(x)

with profiler.profile(with_stack=True, use_cuda=True, profile_memory=True) as prof:
    out, s = model(x)
    
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

STAGE:2022-11-07 15:22:29 1269393:1269393 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2022-11-07 15:22:29 1269393:1269393 ActivityProfilerController.cpp:300] Completed Stage: Collection


------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                 SVD         1.38%     378.000us        97.44%      26.617ms      26.617ms     359.000us         1.31%      26.622ms      26.622ms          -4 b        -268 b       1.00 Mb           0 b             1  
                           aten::svd         0.16%      45