In [1]:
import os; os.environ["ACCELERATE_DISABLE_RICH"] = "1"
import sys
import torch
import torchvision
from torch.utils import benchmark
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import copy

from collections import namedtuple

import torchvision.datasets as datasets
import torchvision.transforms as transforms

from torch.profiler import profile, record_function, ProfilerActivity
from pathlib import Path

from typing import List, Optional, Callable, Tuple, Dict, Literal, Set 
# Make sure exercises are in the path
orig_dir = os.getcwd()
chapter = r"chapter3_training_at_scale"
exercises_dir = Path(f"{os.getcwd().split(chapter)[0]}/{chapter}/exercises").resolve()
section_dir = exercises_dir / "part7_toy_models_of_superposition"
if str(exercises_dir) not in sys.path: sys.path.append(str(exercises_dir))

import part1_gpus.tests as tests

# Add root dir, so we can import from chapter 0 material
root_dir = exercises_dir.parent.parent.resolve()
if str(root_dir) not in sys.path: sys.path.append(str(root_dir))
os.chdir(root_dir)
from chapter0_fundamentals.exercises.part3_resnets.solutions import ResNet34
os.chdir(orig_dir)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

MAIN = __name__ == "__main__"

In [4]:
model = torchvision.models.resnet18(weights='IMAGENET1K_V1')
inputs = torch.randn(5, 3, 224, 224)

with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         7.91%       1.898ms       100.00%      24.007ms      24.007ms             1  
                     aten::conv2d         0.25%      59.000us        67.68%      16.248ms     812.400us            20  
                aten::convolution         0.69%     166.000us        67.43%      16.189ms     809.450us            20  
               aten::_convolution         0.53%     127.000us        66.74%      16.023ms     801.150us            20  
         aten::mkldnn_convolution        65.72%      15.777ms        66.21%      15.896ms     794.800us            20  
                 aten::batch_norm       

STAGE:2023-06-19 10:09:11 3166:3166 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-06-19 10:09:11 3166:3166 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-19 10:09:11 3166:3166 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [9]:
# GPU
print('--- Without inference mode (compute gradients) ---')
model = torchvision.models.resnet18(weights='IMAGENET1K_V1').cuda()
inputs = torch.randn(5, 3, 224, 224).cuda()

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

# CPU
print('--- With inference mode (don\'t compute gradients) ---')
inputs = torch.randn(5, 3, 224, 224).cuda()
model = torchvision.models.resnet18(weights='IMAGENET1K_V1').cuda()

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("model_inference"):
        with torch.inference_mode():
            model(inputs)
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

--- GPU table ---


STAGE:2023-06-19 10:13:13 3166:3166 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-06-19 10:13:13 3166:3166 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-19 10:13:13 3166:3166 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference        22.67%       1.203ms        99.87%       5.299ms       5.299ms       0.000us         0.00%       1.583ms       1.583ms             1  
                                      aten::convolution         2.75%     146.000us        35.71%       1.895ms      94.750us       0.000us         0.00%       1.050ms      52.500us            20  
         

STAGE:2023-06-19 10:13:13 3166:3166 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-06-19 10:13:13 3166:3166 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-19 10:13:13 3166:3166 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference        23.06%       1.098ms        99.85%       4.755ms       4.755ms       0.000us         0.00%       1.580ms       1.580ms             1  
                                           aten::conv2d         2.10%     100.000us        39.04%       1.859ms      92.950us       0.000us         0.00%       1.013ms      50.650us            20  
         