# Benchmarking a basic training of a CNN

In [None]:
import time
import random
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models

In [None]:
batch_size = 128
num_iters = 10
device = 0


class SyntheticDataset(Dataset):
    def __getitem__(self, idx):
        data = torch.randn(3, 224, 224)
        target = random.randint(0, 999)
        return (data, target)

    def __len__(self):
        return batch_size * num_iters
    

train_set = SyntheticDataset()

train_loader = DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=False,
    num_workers=12
)

In [None]:
model = models.resnet50()
model.to(device);

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [None]:
def benchmark_step(model, imgs, labels):
    optimizer.zero_grad()
    output = model(imgs)
    loss = F.cross_entropy(output, labels)
    loss.backward()
    optimizer.step()

In [None]:
num_epochs = 5
imgs_sec = []
for epoch in range(num_epochs):
    t0 = time.time()
    for step, (imgs, labels) in enumerate(train_loader):
        imgs = imgs.to(device)
        labels = labels.to(device)
        benchmark_step(model, imgs, labels)

    dt = time.time() - t0
    imgs_sec.append(batch_size * num_iters / dt)

    print(f' * Epoch {epoch:2d}: '
          f'{imgs_sec[epoch]:.2f} images/sec per GPU')