In [47]:
from resnet import resnet18, resnet34, resnet50, resnet101, resnet152
from resnet import resnet18_bottleneck, resnet34_bottleneck, resnet50_bottleneck, resnet101_bottleneck, resnet152_bottleneck
import numpy as np
import torch
from torch import nn
from torch.profiler import profile, record_function, ProfilerActivity
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import time

# Profiling resnets on GPU - Training and Data Loading

In [48]:
path = "./data"
workers = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

lr = 0.0001
epochs = 100
batch_size = 64

In [49]:
transform_train = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR100(
    root=path, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=True, num_workers=workers)


Files already downloaded and verified


In [50]:
trainloader_enum = enumerate(trainloader)

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("data_load"):
        batch_idx, (inputs, targets) = next(trainloader_enum)
        inputs, targets = inputs.to(device), targets.to(device)       

print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=15))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                              data_load         0.62%     489.000us        99.95%      78.892ms      78.892ms       0.000us         0.00%      68.000us      68.000us             1  
                                               aten::to         6.20%       4.891ms        13.66%      10.778ms       1.796ms       0.000us         0.00%      68.000us      11.333us             6  
         

In [51]:
net = resnet50_bottleneck(large_mode=True)
net = net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40,80], gamma=0.1)

In [None]:
start = time.perf_counter()

# Train
for epoch in range(0, epochs):
    net.train()
    print('\nEpoch: %d' % epoch)
    running_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        del inputs, labels

        if i % 100 == 99:
            print(f'Average loss: {running_loss / 100}')
            running_loss = 0.0

    scheduler.step()

stop = time.perf_counter()

print(f'Total training time: {stop-start}s')


Epoch: 0
Average loss: 4.602475605010986
Average loss: 4.5941401290893555
Average loss: 4.592751932144165
Average loss: 4.59194890499115
Average loss: 4.588705639839173
Average loss: 4.584306879043579
Average loss: 4.581297531127929

Epoch: 1
Average loss: 4.5835423040390015
Average loss: 4.587651910781861
Average loss: 4.580157947540283
Average loss: 4.57881489276886
Average loss: 4.582628808021545
Average loss: 4.576735816001892
Average loss: 4.572474827766419

Epoch: 2
Average loss: 4.573218035697937
Average loss: 4.56801595211029
Average loss: 4.5822995042800905
Average loss: 4.585709176063538
Average loss: 4.577975063323975
Average loss: 4.575355472564698
Average loss: 4.570017056465149

Epoch: 3
Average loss: 4.575529613494873
Average loss: 4.573502645492554
Average loss: 4.572918243408203
Average loss: 4.571292500495911
Average loss: 4.564483857154846
Average loss: 4.562680125236511
Average loss: 4.569099345207214

Epoch: 4
Average loss: 4.56195634841919
Average loss: 4.5687694