Attempt at adding a layer to the previous ResNet model created following [kaggle notebook ResNet for MNIST with PyTorch](https://www.kaggle.com/readilen/resnet-for-mnist-with-pytorch?scriptVersionId=6942243) and the [PyTorch Tutorial on implementation of a ResNet model](https://pytorch-tutorial.readthedocs.io/en/latest/tutorial/chapter03_intermediate/3_2_2_cnn_resnet_cifar10/) to see what happens, if the accuracy increases. Code is due to [Liu Kuangs's extensive code](https://github.com/kuangliu).

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

### Dataset
***

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [8]:
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

In [9]:
train_dataset = torchvision.datasets.CIFAR10(root='../cifar-10-batches-py/',
                                             train=True, 
                                             transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='../cifar-10-batches-py/',
                                            train=False, 
                                            transform=transforms.ToTensor())

In [10]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100, 
                                           shuffle=True) #Original tutorial has shuffle=True

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100, 
                                          shuffle=False)

### Creating classes and functions related to ResNet with 4 layers

In [49]:
def conv3x3(in_channels, out_channels, stride=1):
    print("function conv3x3")
    print("In channels ", in_channels)
    print("Out channels ", out_channels)
    return nn.Conv2d(in_channels,
                     out_channels,
                     kernel_size=3,
                     stride=stride,
                     padding=1,
                     bias=False)

In [50]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
    
    def forward(self, x):
        print("ResidualBlock forward")
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample:
            residual = self.downsample(x)
            
        out += residual
        out = self.relu(out)
        
        return out

In [74]:
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        print("conv")
        self.conv = conv3x3(3, 16)  # 1 when using mnist, 3 when using cifar10
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        
        print("\nlayer1")
        self.layer1 = self.make_layer(block, 16, layers[0])
        print("\nlayer2")
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        print("\nlayer3")
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        print("\nlayer4")
        self.layer4 = self.make_layer(block, 128, layers[3], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)
        
    
    def make_layer(self, block, out_channels, blocks, stride=1):
        
        downsample = None
        
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        print("ResNet forward")
        print(x.shape)
        print(type(x))
        print(self.conv.weight)
        print(self.conv.weight.shape)
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        
        return out

### Training model
***

In [75]:
net_args = {
    "block" : ResidualBlock,
    "layers": [2, 2, 2, 2]
}

model = ResNet(**net_args).to(device)

conv
function conv3x3
In channels  3
Out channels  16

layer1
function conv3x3
In channels  16
Out channels  16
function conv3x3
In channels  16
Out channels  16
function conv3x3
In channels  16
Out channels  16
function conv3x3
In channels  16
Out channels  16

layer2
function conv3x3
In channels  16
Out channels  32
function conv3x3
In channels  16
Out channels  32
function conv3x3
In channels  32
Out channels  32
function conv3x3
In channels  32
Out channels  32
function conv3x3
In channels  32
Out channels  32

layer3
function conv3x3
In channels  32
Out channels  64
function conv3x3
In channels  32
Out channels  64
function conv3x3
In channels  64
Out channels  64
function conv3x3
In channels  64
Out channels  64
function conv3x3
In channels  64
Out channels  64

layer4
function conv3x3
In channels  64
Out channels  128
function conv3x3
In channels  64
Out channels  128
function conv3x3
In channels  128
Out channels  128
function conv3x3
In channels  128
Out channels  128
function

In [60]:
num_epochs = 80
learning_rate = 0.001

In [61]:
error = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [62]:
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [30]:
total_step = len(train_loader)
curr_lr = learning_rate

In [76]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.resize_(100, 1, 32, 32)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = error(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Decay learning rate
    if (epoch+1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)

ResNet forward
torch.Size([100, 1, 32, 32])
<class 'torch.Tensor'>
Parameter containing:
tensor([[[[-7.4735e-02,  1.3674e-01,  6.0271e-02],
          [ 1.7892e-01, -1.8771e-01, -1.4566e-01],
          [ 7.8874e-02,  3.9991e-02,  1.2911e-01]],

         [[-3.7161e-02, -8.2767e-02,  7.3420e-02],
          [ 1.3777e-01, -7.3602e-02, -1.1098e-01],
          [-3.8359e-02,  4.4810e-02, -1.6561e-01]],

         [[ 1.4400e-01, -1.2958e-01,  2.7100e-02],
          [-8.7010e-03,  6.8463e-02,  1.3647e-01],
          [-1.0501e-01, -1.2591e-01, -7.0783e-02]]],


        [[[-1.6773e-01,  1.2532e-01, -1.0948e-01],
          [-1.0701e-01, -1.4782e-01, -1.4778e-01],
          [-1.6182e-01,  4.3212e-02, -1.5102e-01]],

         [[-1.6138e-01,  1.0512e-01, -6.4897e-02],
          [ 6.8898e-02, -9.2888e-02,  1.8196e-01],
          [ 5.6517e-02, -1.6841e-02, -1.2339e-01]],

         [[-1.8738e-01,  1.8412e-01, -1.3791e-01],
          [-6.1126e-02,  1.8902e-01, -5.1872e-02],
          [ 8.8434e-02, -1.4609e

RuntimeError: Given groups=1, weight of size [16, 3, 3, 3], expected input[100, 1, 32, 32] to have 3 channels, but got 1 channels instead

In [21]:
type(images)

torch.Tensor

In [None]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.resize_(100, 1, 32, 32).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))