In [3]:
import os
if not (os.path.exists('./NaturalImageNetTest/') and os.path.exists('./NaturalImageNetTrain/')):
    # !wget https://zenodo.org/record/5846979/files/NaturalImageNetTest.zip?download=1
    # !wget https://zenodo.org/record/5846979/files/NaturalImageNetTrain.zip?download=1
    !unzip -q NaturalImageNetTest.zip?download=1
    !unzip -q NaturalImageNetTrain.zip?download=1

#torch
import time
import torch
from torch.nn import Conv2d, MaxPool2d, AvgPool2d
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from torchvision import datasets, transforms
from torchvision.utils import save_image, make_grid
from torchinfo import summary
import matplotlib.pyplot as plt
import numpy as np
# set the seed for reproducibility
rng_seed = 90
torch.manual_seed(rng_seed)

import torch_directml  # directml plugin
device = torch_directml.device(0)  # 0 for discrete, 1 for integrated
print(torch_directml.device_name(0))

%reload_ext watermark
%watermark

AMD Radeon RX 6900 XT 
Last updated: 2023-03-29T14:14:28.278123+01:00

Python implementation: CPython
Python version       : 3.8.13
IPython version      : 8.6.0

Compiler    : GCC 11.2.0
OS          : Linux
Release     : 5.10.16.3-microsoft-standard-WSL2
Machine     : x86_64
Processor   : x86_64
CPU cores   : 32
Architecture: 64bit



In [4]:
mean = torch.Tensor([0.485, 0.456, 0.406])
std = torch.Tensor([0.229, 0.224, 0.225])
transform = transforms.Compose(
        [
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.ToTensor(),
            transforms.Normalize(mean.tolist(), std.tolist()),
        ]
    )
train_path = './NaturalImageNetTrain'
test_path = './NaturalImageNetTest'

train_dataset = datasets.ImageFolder(train_path, transform=transform)
test_dataset = datasets.ImageFolder(test_path, transform=transform)

# Create train val split
n = len(train_dataset)
n_val = int(n/10)
train_set, val_set = torch.utils.data.random_split(train_dataset, [n-n_val, n_val])

batch_size = 128

loader_train = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=16)
loader_val = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=16)
loader_test = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=16)

In [5]:
class ResidualBlock(nn.Module): 
    def __init__(self, inchannel, outchannel, stride=1): 
        super(ResidualBlock, self).__init__() 
        self.left = nn.Sequential(Conv2d(inchannel, outchannel, kernel_size=3, 
                                         stride=stride, padding=1, bias=False), 
                                  nn.BatchNorm2d(outchannel), 
                                  nn.ReLU(inplace=True), 
                                  Conv2d(outchannel, outchannel, kernel_size=3, 
                                         stride=1, padding=1, bias=False), 
                                  nn.BatchNorm2d(outchannel)) 
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel: 
            self.shortcut = nn.Sequential(Conv2d(inchannel, outchannel, 
                                                 kernel_size=1, stride=stride, 
                                                 padding = 0, bias=False), 
                                          nn.BatchNorm2d(outchannel) ) 
            
    def forward(self, x): 
        out = self.left(x) 
        out += self.shortcut(x) 
        out = F.relu(out) 
        return out


class ResNet(nn.Module):
    
    def __init__(self, ResidualBlock, num_classes = 20):
        super(ResNet, self).__init__()
        self.inchannel = 8
        self.conv1 = nn.Sequential(Conv2d(3, 8, kernel_size = 3, stride = 1,
                                            padding = 1, bias = False), 
                                  nn.BatchNorm2d(8), 
                                  nn.ReLU())
        self.layer1 = self.make_layer(ResidualBlock, 8, 2, stride = 2)
        self.layer2 = self.make_layer(ResidualBlock, 16, 2, stride = 2)
        self.layer3 = self.make_layer(ResidualBlock, 32, 2, stride = 2)
        self.layer4 = self.make_layer(ResidualBlock, 64, 2, stride = 2)
        self.layer5 = self.make_layer(ResidualBlock, 128, 2, stride = 2)
        self.layer6 = self.make_layer(ResidualBlock, 256, 2, stride = 2)
        self.maxpool = MaxPool2d(4)
        self.fc = nn.Linear(256, num_classes)
        
    
    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)
    
    
    def forward(self, x):

        x = self.conv1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

def MyResNet():
    return ResNet(ResidualBlock)

### Train functions

In [12]:
dtype = torch.float32
print_every = 1

def train_part(model, optimizer, epochs=1):
    model = model.to(device=device)
    for e in range(epochs):
        start = time.time()
        for t, (x, y) in enumerate(loader_train):
            model.train()
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            optimizer.zero_grad()
            print('\nBefore loss.backward()')
            for param in optimizer.param_groups[0]['params']:
                # print(param)
                print('Grad', param.grad)            

            loss.backward()
            print('\nAfter loss.backward(), before optimiser.step()')
            for param in optimizer.param_groups[0]['params']:
#                 print(param)
                print('Grad', param.grad)

            optimizer.step()
            print('\nAfter optimiser.step()')
            for param in optimizer.param_groups[0]['params']:
                # print(param)
                print('Grad', param.grad)

            end = time.time()
            if t % print_every == 0:
                print('Epoch: %d, Iteration %d, loss = %.4f, epoch time = %d s' %
                      (e, t, loss.item(), end-start), end='\r')
            raise Exception

#### Train

In [13]:
# define and train the network
model = MyResNet()
print('device:', device)
print(summary(model, input_size=(batch_size, 3, 256, 256)))

optimizer = optim.Adamax(model.parameters(), lr=1e-4, weight_decay=1e-7) 
# optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-7)

params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of parameters is: {}".format(params))

train_part(model, optimizer, epochs = 10)


device: privateuseone:0
Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [128, 20]                 --
├─Sequential: 1-1                        [128, 8, 256, 256]        --
│    └─Conv2d: 2-1                       [128, 8, 256, 256]        216
│    └─BatchNorm2d: 2-2                  [128, 8, 256, 256]        16
│    └─ReLU: 2-3                         [128, 8, 256, 256]        --
├─Sequential: 1-2                        [128, 8, 128, 128]        --
│    └─ResidualBlock: 2-4                [128, 8, 128, 128]        --
│    │    └─Sequential: 3-1              [128, 8, 128, 128]        1,184
│    │    └─Sequential: 3-2              [128, 8, 128, 128]        80
│    └─ResidualBlock: 2-5                [128, 8, 128, 128]        --
│    │    └─Sequential: 3-3              [128, 8, 128, 128]        1,184
│    │    └─Sequential: 3-4              [128, 8, 128, 128]        --
├─Sequential: 1-3                        [128, 16, 64,

Exception: 