# Chapter 3 : Convolution Neural Networks

## Improving our earlier model using convolutions

In [10]:
# imports
import torch, torchvision
import torch.utils.data as data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import transforms
from torchvision.datasets import ImageFolder
from PIL import Image, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

In [20]:
# loading up device
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

cuda


## Importing Data

In [3]:
# adding relative paths
train_path = "../Chapter 2 - Image Clasification with Pytorch/train"
val_path = "../Chapter 2 - Image Clasification with Pytorch/val"
test_path = "../Chapter 2 - Image Clasification with Pytorch/test"

# creting transforms pipeline
transformation = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [4]:
# adding checksum function to load images
def check_image(path):
    try:
        img = Image.open(path)
        return True
    except: return False

In [6]:
# Getting Datasets
train_data = ImageFolder(root=train_path, transform=transformation, is_valid_file=check_image)
val_data = ImageFolder(root=val_path, transform=transformation, is_valid_file=check_image)
test_data = ImageFolder(root=test_path, transform=transformation, is_valid_file=check_image)

In [11]:
# defining batch
batch_size = 128

# setting up Data Loaders
train_loader = data.DataLoader(train_data, batch_size=batch_size)
val_loader = data.DataLoader(val_data, batch_size=batch_size)
test_loader = data.DataLoader(test_data, batch_size=batch_size)

## Creating Model

In [12]:
class AlexNet(nn.Module):
    # initialize model
    def __init__(self, num_classes=2):
        super(AlexNet, self).__init__()

        # training block 1 : Generalizer
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),

            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        # adaptive pooling layer
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

        # training block 2 : Classifier
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),

            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),

            nn.Linear(4096, num_classes),
        )

    # feedforward
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)     # Flatten embedding into 1D Tensor
        x = self.classifier(x) 

        return x


In [21]:
# initiating model
alexnet = AlexNet()

# sening model to GPU
alexnet.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4

In [17]:
# initialting loss function
loss_fnc = nn.CrossEntropyLoss()

In [18]:
# initiating optimizer
optimizer = optim.Adam(alexnet.parameters(), lr=0.001)

## Training Loop

In [27]:
# defining train function
def train(model: nn.Module, loss_fn, optimizer, train_batches, val_batches, epochs=20, device="cpu"):
    # looping over each epoch
    for epoch in range(1, epochs+1):

        # Stage 1 : Training
        model.train()

        # initiating variables
        train_loss = 0

        # looping over each training batch
        for batch in train_batches:
            optimizer.zero_grad()       # setiing our gardients to zero
            inputs, targets = batch     # unpacking data

            # adding tensors to device
            inputs, targets = inputs.to(device), targets.to(device)

            # passing inputs throght the model
            output = model(inputs)

            # calculating loss from outputs and targets
            loss = loss_fn(output, targets) * inputs.size(0)

            # backprop
            loss.backward()

            # calculating and updating gradients
            optimizer.step()

            # add batch loss to total loss
            train_loss += loss.data.item()
        # calculating avg train loss per batch
        train_loss /= len(train_batches.dataset)

        # Stage 2 : Evaluation
        model.eval()

        # initiating variables
        val_loss = 0
        num_correct = 0
        total_examples = 0

        # looping over each training batch
        for batch in val_batches:
            inputs, targets = batch     # unpacking data

            # adding tensors to device
            inputs, targets = inputs.to(device), targets.to(device)

            # passing inputs throght the model
            output = model(inputs)

            # calculating loss from outputs and targets
            loss = loss_fn(output, targets) * inputs.size(0)

            # add batch loss to total loss
            val_loss += loss.data.item()

            # converting output to a probablity distribution
            # across the 2nd dimention (1st dimention is the batch)
            soft = F.softmax(output, dim=1)

            # getting the maximum (i.e. classified) values from the tensors
            # again accross 2nd dimention (same reason)
            maxi = torch.max(soft, dim=1)

            # as max() returns a tuple of maximum values as well as their indices
            # We only take out their index (at position 2)
            maxi = maxi[1]

            # Finally getting a boolean tensor where True signinfies 
            # same index of output and target and False signifies different indexes
            correct = torch.eq(maxi, targets)

            # adding correct predictions to count of total correct predictions
            num_correct += torch.sum(correct).item()

            # adding number of predictions to count of total predictions
            total_examples += correct.shape[0]
        # calculating avg val loss per batch
        val_loss /= len(val_batches.dataset)

        # printing after comlpeting batch
        print(f"Epoch : {epoch}\t| Training Loss : {train_loss:.4f} | Validation Loss : {val_loss:.4f} | Accuracy : {(num_correct/total_examples)*100:.2f}% |")
    print("=== TRAINING COMPLETE ===")



In [28]:
train(alexnet, loss_fnc, optimizer, train_loader, val_loader, epochs=10, device=device)



Epoch : 1	| Training Loss : 0.6930 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 2	| Training Loss : 0.6927 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 3	| Training Loss : 0.6929 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 4	| Training Loss : 0.6931 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 5	| Training Loss : 0.6929 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 6	| Training Loss : 0.6932 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 7	| Training Loss : 0.6929 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 8	| Training Loss : 0.6930 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 9	| Training Loss : 0.6928 | Validation Loss : 0.6911 | Accuracy : 82.86% |
Epoch : 10	| Training Loss : 0.6929 | Validation Loss : 0.6911 | Accuracy : 82.86% |
=== TRAINING COMPLETE ===


## Using Pre-Trained Networks

In [29]:
import torchvision.models as models

alexnet = models.alexnet(num_classes=1000, pretrained=True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to C:\Users\angad/.cache\torch\hub\checkpoints\alexnet-owt-7be5be79.pth
100.0%


In [30]:
resnet50 = torch.hub.load('pytorch/vision', 'resnet50')

Downloading: "https://github.com/pytorch/vision/zipball/main" to C:\Users\angad/.cache\torch\hub\main.zip


In [31]:
print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [32]:
print(resnet50)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 