<a href="https://colab.research.google.com/github/ayushkoirala/Handwriting-recognition-system/blob/master/resnet18_chihuahua_vs_muffindataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch                                           
from torchvision import datasets, models, transforms    
import torch.nn as nn                                   
from torch.nn import functional as F                   
import torch.optim as optim                             

In [None]:
class BasicBlock(nn.Module):
    '''
    BasicBlock: Simple residual block with two conv layers
    '''
    EXPANSION = 1
    def __init__(self, in_planes, out_planes, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_planes)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.shortcut = nn.Sequential()
        # If output size is not equal to input size, reshape it with 1x1 convolution
        if stride != 1 or in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
class BottleneckBlock(nn.Module):
    '''
    BottleneckBlock: More powerful residual block with three convs, used for Resnet50 and up
    '''
    EXPANSION = 4
    def __init__(self, in_planes, planes, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.EXPANSION * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.EXPANSION * planes)

        self.shortcut = nn.Sequential()
        # If the output size is not equal to input size, reshape it with 1x1 convolution
        if stride != 1 or in_planes != self.EXPANSION * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.EXPANSION * planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.EXPANSION * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                 std=[0.5, 0.5, 0.5])

# transforms for our training data
train_transforms = transforms.Compose([
    # resize to resnet input size
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    normalize
])

# these validation transforms are exactly the same as our train transforms
validation_transforms = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    normalize
])

print("Train transforms:", train_transforms)

image_datasets = {
    'train':
        datasets.ImageFolder('/content/c-m-dataset/training/',train_transforms),
    'validation':
        datasets.ImageFolder('/content/c-m-dataset/testing/',validation_transforms)}

Train transforms: Compose(
    Resize(size=(32, 32), interpolation=bilinear, max_size=None, antialias=None)
    ToTensor()
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
)


In [None]:
print("==Train Dataset==\n", image_datasets["train"])
print()
print("==Validation Dataset==\n", image_datasets["train"])

==Train Dataset==
 Dataset ImageFolder
    Number of datapoints: 16
    Root location: /content/c-m-dataset/training/
    StandardTransform
Transform: Compose(
               Resize(size=(32, 32), interpolation=bilinear, max_size=None, antialias=None)
               ToTensor()
               Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
           )

==Validation Dataset==
 Dataset ImageFolder
    Number of datapoints: 16
    Root location: /content/c-m-dataset/training/
    StandardTransform
Transform: Compose(
               Resize(size=(32, 32), interpolation=bilinear, max_size=None, antialias=None)
               ToTensor()
               Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
           )


In [None]:
dataloaders = {
    'train':
        torch.utils.data.DataLoader(
            image_datasets['train'],
            batch_size=2,
            shuffle=True,
            num_workers=2),
    'validation':
        torch.utils.data.DataLoader(
            image_datasets['validation'],
            batch_size=2,
            shuffle=False,
            num_workers=2)}

print("Train loader:", dataloaders["train"])
print("Validation loader:", dataloaders["validation"])

Train loader: <torch.utils.data.dataloader.DataLoader object at 0x7f705a0c7e10>
Validation loader: <torch.utils.data.dataloader.DataLoader object at 0x7f705a885550>


In [None]:
next(iter(dataloaders["train"]))

[tensor([[[[-0.0588, -0.0588, -0.0588,  ..., -0.3490, -0.3725, -0.3725],
           [ 0.0196, -0.0039, -0.0353,  ..., -0.3255, -0.3412, -0.3725],
           [ 0.0510,  0.0275, -0.0039,  ..., -0.2941, -0.3333, -0.3569],
           ...,
           [ 0.2706,  0.2471,  0.2000,  ..., -0.4275, -0.3569, -0.3176],
           [ 0.2784,  0.2549,  0.2235,  ..., -0.4510, -0.3647, -0.3333],
           [ 0.2549,  0.2392,  0.2314,  ..., -0.4588, -0.3882, -0.3647]],
 
          [[-0.0588, -0.0510, -0.0667,  ..., -0.2863, -0.3020, -0.3333],
           [-0.0196, -0.0196, -0.0431,  ..., -0.2941, -0.3176, -0.3412],
           [ 0.0118,  0.0039, -0.0196,  ..., -0.2941, -0.3020, -0.3333],
           ...,
           [ 0.0431, -0.0039, -0.0275,  ..., -0.6078, -0.5373, -0.5137],
           [ 0.0039, -0.0353, -0.0353,  ..., -0.6314, -0.5529, -0.5137],
           [-0.1059, -0.1137, -0.1294,  ..., -0.6471, -0.5686, -0.5294]],
 
          [[ 0.1843,  0.1843,  0.1451,  ...,  0.0039, -0.0196, -0.0667],
           [ 

In [None]:
import os  # interact with the os. in our case, we want to view the file system

print("Data contents:", os.listdir("/content/c-m-dataset/"))
print("Train contents:", os.listdir("/content/c-m-dataset/training/"))
print("Validation contents:", os.listdir("/content/c-m-dataset/testing/"))

Data contents: ['testing', 'training']
Train contents: ['muffin', 'chihuahua']
Validation contents: ['muffin', 'chihuahua']


In [None]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super().__init__()
        self.in_planes = 64
        # Initial convolution
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        # Residual blocks
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        # FC layer = 1 layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.linear = nn.Linear(512 * block.EXPANSION, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.EXPANSION
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18(num_classes = 10):
    '''
    First conv layer: 1
    4 residual blocks with two sets of two convolutions each: 2*2 + 2*2 + 2*2 + 2*2 = 16 conv layers
    last FC layer: 1
    Total layers: 1+16+1 = 18
    '''
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)

In [None]:
resnet = ResNet18().to(device)
# # Optimizer and loss function
criterion = nn.CrossEntropyLoss()
params_to_update = resnet.parameters()
# # Now we'll use Adam optimization

#resnet = ResNet18().to(device)
loss_function = nn.CrossEntropyLoss() 
optimizer = optim.Adam(params_to_update, lr=0.001)             # the most common error function in deep learning
#optimizer = optim.SGD(params_to_update, lr=0.001)  # Stochastic Gradient Descent, with a learning rate of 0.1

In [None]:
from tqdm import tnrange, tqdm_notebook # import progress bars to show train progress

def train_model(model, dataloaders, loss_function, optimizer, num_epochs):
    """
    Trains a model using the given loss function and optimizer, for a certain number of epochs.
    
    model: a PyTorch neural network
    loss_function: a mathematical function that compares predictions and labels to return an error
    num_epochs: the number of times to run through the full training dataset
    """
    # train for n epochs. an epoch is a full iteration through our dataset
    for epoch in tnrange(num_epochs, desc="Total progress", unit="epoch"):
        # print a header
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('----------------')

        # first train over the dataset and update weights; at the end, calculate our validation performance
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval() 

            # keep track of the overall loss and accuracy for this batch
            running_loss = 0.0
            running_corrects = 0

            # iterate through the inputs and labels in our dataloader
            # (the tqdm_notebook part is to display a progress bar)
            for inputs, labels in tqdm_notebook(dataloaders[phase], desc=phase, unit="batch", leave=False):
                # move inputs and labels to appropriate device (GPU or CPU)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # FORWARD PASS
                outputs = model(inputs)
                # compute the error of the model's predictions
                loss = loss_function(outputs, labels)

                if phase == 'train':
                    # BACKWARD PASS
                    optimizer.zero_grad()  # clear the previous gradients
                    loss.backward()        # backpropagate the current error gradients
                    optimizer.step()       # update the weights (i.e. do the learning)

                # track our accumulated loss
                running_loss += loss.item() * inputs.size(0)
                # track number of correct to compute accuracy
                _, preds = torch.max(outputs, 1)
                running_corrects += torch.sum(preds == labels.data)

            # print our progress
            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])
            print(f'{phase} error: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

In [None]:
train_model(resnet, dataloaders, criterion, optimizer, num_epochs=10)
# torch.save(best_model.state_dict(),'resnet18-cifar-10.pth')
# torch.save(val_acc_history,'resnet-18-cifar-10-val-acc.pth')
# torch.save(train_acc_history,'resnet-18-cifar-10-train-acc.pth')
# torch.save(val_loss_history,'resnet-18-cifar-10-val-loss.pth')
# torch.save(train_loss_history,'resnet-18-cifar-10-train-loss.pth')

  if sys.path[0] == '':


Total progress:   0%|          | 0/10 [00:00<?, ?epoch/s]

Epoch 1/10
----------------


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 2.0230, Accuracy: 0.1875


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 0.9200, Accuracy: 0.7500
Epoch 2/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.5310, Accuracy: 0.6875


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 1.9957, Accuracy: 0.5000
Epoch 3/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.6613, Accuracy: 0.7500


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 1.1006, Accuracy: 0.5000
Epoch 4/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.5296, Accuracy: 0.6875


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 0.9922, Accuracy: 0.7500
Epoch 5/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.6189, Accuracy: 0.6250


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 0.6398, Accuracy: 0.7500
Epoch 6/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.6853, Accuracy: 0.8125


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 0.0417, Accuracy: 1.0000
Epoch 7/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.4026, Accuracy: 0.8125


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 0.1625, Accuracy: 1.0000
Epoch 8/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.3403, Accuracy: 0.9375


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 0.2066, Accuracy: 1.0000
Epoch 9/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.3077, Accuracy: 0.8750


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 0.0596, Accuracy: 1.0000
Epoch 10/10
----------------


train:   0%|          | 0/8 [00:00<?, ?batch/s]

train error: 0.2037, Accuracy: 0.9375


validation:   0%|          | 0/2 [00:00<?, ?batch/s]

validation error: 0.1290, Accuracy: 1.0000
