In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

print(torch.__version__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

2.1.0+cu118
cuda


In [2]:
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, downsample=None, stride=1):
        super(Bottleneck, self).__init__()
        
        # original paper places the stride here but pytorch and other implementations place it in the 3x3 convolution layer conv2
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)
        
        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
        self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion)
        
        self.downsample = downsample
        self.stride = stride
        self.relu = nn.ReLU()
        
    def forward(self, x):
        identity = x.clone()

        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.batch_norm2(x)
        x = self.relu(x)
        
        x = self.batch_norm3(self.conv3(x))

        #downsample identity if needed
        if self.downsample is not None:
            identity = self.downsample(identity)

        #add shortcut/skip connection
        x+=identity
        x=self.relu(x)
        
        return x

In [3]:
class ResNet(nn.Module):
    def __init__(self, ResBlock, layer_list, num_classes, num_channels=3):
        super(ResNet, self).__init__()
        
        self.in_channels = 64
        
        self.conv1 = nn.Conv2d(num_channels, self.in_channels, kernel_size=7, stride=2, padding=3, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size = 3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(ResBlock, layer_list[0], out_channels=64)
        self.layer2 = self._make_layer(ResBlock, layer_list[1], out_channels=128, stride=2)
        self.layer3 = self._make_layer(ResBlock, layer_list[2], out_channels=256, stride=2)
        self.layer4 = self._make_layer(ResBlock, layer_list[3], out_channels=512, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512*ResBlock.expansion, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = self.relu(x)
        x = self.max_pool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        
        return x
        
    def _make_layer(self, ResBlock, blocks, out_channels, stride=1):
        _downsample = None
        layers = []
        
        if stride != 1 or self.in_channels != out_channels*ResBlock.expansion:
            _downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels*ResBlock.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels*ResBlock.expansion)
            )
            
        layers.append(ResBlock(self.in_channels, out_channels, downsample=_downsample, stride=stride))
        self.in_channels = out_channels*ResBlock.expansion
        
        for i in range(blocks-1):
            layers.append(ResBlock(self.in_channels, out_channels))
            
        return nn.Sequential(*layers)


In [4]:
def ResNet50(num_classes, channels=1):
    return ResNet(Bottleneck, [3,4,6,3], num_classes, channels)

In [5]:
model = ResNet50(10, 1).to(device)

In [6]:
model

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (max_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
      (batch_norm3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, mom

In [7]:
import torch.optim as optim
import torchvision.transforms as transforms

from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms

from PIL import Image



train_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)

train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=100, 
                          shuffle=True)

test_dataset = datasets.MNIST(root='data', 
                              train=False, 
                              transform=transforms.ToTensor())

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=1, 
                         shuffle=False)

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break


Image batch dimensions: torch.Size([100, 1, 28, 28])
Image label dimensions: torch.Size([100])


In [8]:
def checkpoint(model, filename):
    torch.save(model.state_dict(), filename)
    
def resume(model, filename):
    model.load_state_dict(torch.load(filename))

In [9]:
# Model
print('Building model with device:', device)

# Hyper-parameters
num_epochs = 5
start_epoch = 0
learning_rate = 0.001

# Loss and optimizer
lossfn = nn.CrossEntropyLoss()

print(model.parameters())

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# For updating learning rate
def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# Train the model
total_step = len(train_loader)
current_lr = learning_rate

if start_epoch > 0:
    resume_epoch = start_epoch - 1
    resume(model, f"epoch-{resume_epoch}.pth")

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = lossfn(outputs, labels)

        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Decay learning rate
    if (epoch+1) % 20 == 0:
        current_lr /= 3
        update_lr(optimizer, current_lr)

    model.eval()

    correct = 0
    total = 0

    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    print(f"End of epoch {epoch}: accuracy = {(100 * correct / total)}%")
    checkpoint(model, f"checkpoints/2.epoch-{epoch}.pth")

# Save the model checkpoint
torch.save(model.state_dict(), 'resnet.ckpt')


Building model with device: cuda
<generator object Module.parameters at 0x0000019AF163BA00>
Epoch [1/5], Step [100/600] Loss: 0.1633
Epoch [1/5], Step [200/600] Loss: 0.0713
Epoch [1/5], Step [300/600] Loss: 0.0720
Epoch [1/5], Step [400/600] Loss: 0.0737
Epoch [1/5], Step [500/600] Loss: 0.0836
Epoch [1/5], Step [600/600] Loss: 0.0212
End of epoch 0: accuracy = 97.57%
Epoch [2/5], Step [100/600] Loss: 2.1478
Epoch [2/5], Step [200/600] Loss: 1.5738
Epoch [2/5], Step [300/600] Loss: 1.0998
Epoch [2/5], Step [400/600] Loss: 0.4753
Epoch [2/5], Step [500/600] Loss: 0.4997
Epoch [2/5], Step [600/600] Loss: 0.1128
End of epoch 1: accuracy = 91.46%
Epoch [3/5], Step [100/600] Loss: 0.1435
Epoch [3/5], Step [200/600] Loss: 0.3276
Epoch [3/5], Step [300/600] Loss: 0.0778
Epoch [3/5], Step [400/600] Loss: 0.1141
Epoch [3/5], Step [500/600] Loss: 0.0781
Epoch [3/5], Step [600/600] Loss: 0.0982
End of epoch 2: accuracy = 95.68%
Epoch [4/5], Step [100/600] Loss: 0.1658
Epoch [4/5], Step [200/600]

In [17]:
#End of epoch 21: accuracy = 99.32%

with torch.no_grad():
    indexOfMisGuess = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: []}
    misGuessesLabel = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: []}
    correct = 0
    total = 0
    index = 0
    for image, label in test_loader:
        
        image = image.to(device)

        label = label.to(device)


        outputs = model(image)
        _, predicted = torch.max(outputs.data, 1)
        total += label.size(0)
        correct += (predicted == label)

        if(predicted != label):
            indexOfMisGuess[label.item()] += [index]
            misGuessesLabel[label.item()] += [predicted.item()]
        
        index += 1

    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

Accuracy of the model on the test images: tensor([97.4900], device='cuda:0') %


In [18]:
indexOfMisGuess

{0: [1621, 4065, 6532, 6597, 6651, 8325, 9634],
 1: [716, 900, 1527, 1909, 2018, 2182, 2343, 3906, 4201, 4507, 6783, 6883],
 2: [321, 583, 990, 2462, 2488],
 3: [381,
  449,
  938,
  1114,
  1166,
  1290,
  1463,
  1681,
  2109,
  2405,
  2921,
  2927,
  2945,
  2952,
  2953,
  3475,
  3549,
  4740,
  4990,
  5067,
  5878,
  5955,
  6011,
  6023,
  6065,
  6564,
  7233,
  7849,
  8246,
  9163,
  9636],
 4: [115,
  247,
  376,
  447,
  532,
  610,
  707,
  712,
  740,
  1242,
  1270,
  1279,
  1357,
  1453,
  1549,
  1956,
  1963,
  2043,
  2053,
  2090,
  2130,
  2148,
  2447,
  2514,
  2678,
  2778,
  3114,
  3288,
  3533,
  3726,
  3780,
  3941,
  3996,
  4194,
  4265,
  4745,
  4783,
  4860,
  5159,
  5201,
  5842,
  5888,
  5936,
  7434,
  8520,
  8527,
  9792],
 5: [1299,
  1393,
  1911,
  2125,
  3558,
  3565,
  3778,
  4763,
  5769,
  7842,
  7850,
  9729,
  9770,
  9982],
 6: [259,
  445,
  1014,
  1181,
  2135,
  3030,
  3422,
  3520,
  3749,
  3853,
  4699,
  4814,
  6847,
  

In [19]:
misGuessesLabel

{0: [6, 2, 2, 7, 6, 6, 8],
 1: [7, 5, 5, 7, 2, 2, 7, 2, 9, 7, 6, 2],
 2: [7, 7, 7, 0, 0],
 3: [7,
  5,
  5,
  8,
  8,
  5,
  7,
  7,
  8,
  2,
  2,
  2,
  7,
  5,
  5,
  7,
  2,
  5,
  2,
  2,
  5,
  8,
  5,
  5,
  8,
  7,
  2,
  2,
  9,
  8,
  5],
 4: [9,
  2,
  9,
  9,
  9,
  6,
  9,
  9,
  9,
  9,
  9,
  9,
  9,
  9,
  6,
  9,
  9,
  8,
  9,
  9,
  9,
  9,
  9,
  9,
  9,
  0,
  6,
  9,
  9,
  9,
  6,
  6,
  9,
  9,
  9,
  9,
  9,
  9,
  9,
  9,
  9,
  0,
  9,
  9,
  9,
  9,
  9],
 5: [7, 1, 6, 9, 0, 6, 8, 6, 6, 8, 8, 6, 6, 6],
 6: [0, 0, 8, 1, 1, 0, 0, 4, 0, 0, 1, 0, 4, 2, 5, 0],
 7: [9,
  1,
  2,
  8,
  2,
  9,
  9,
  2,
  2,
  9,
  2,
  2,
  2,
  0,
  9,
  2,
  3,
  9,
  2,
  9,
  9,
  0,
  9,
  2,
  2,
  2,
  2,
  2,
  2],
 8: [2,
  0,
  6,
  9,
  2,
  2,
  6,
  9,
  2,
  6,
  7,
  2,
  0,
  2,
  0,
  0,
  2,
  0,
  5,
  2,
  9,
  0,
  0,
  5,
  2,
  2,
  2,
  2,
  7,
  9,
  2,
  0,
  0,
  6,
  7,
  0,
  0,
  0,
  0,
  0,
  0,
  9,
  5,
  7,
  2,
  0,
  2,
  9,
  9,
  0,
  0,
  0