# Image Classification using CNN from Scratch in Pytorch
https://youtu.be/9OHlgDjaE2I

In [25]:
# Libraries

import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision, pathlib

In [26]:
# GPU Check
torch.cuda.is_available()

True

In [27]:
# Transformers

transformer = transforms.Compose([
    transforms.Resize((150,150)), # make images 150*150 px
    transforms.RandomHorizontalFlip(), # add variation and increase number of unique images
    transforms.ToTensor(), # 0-255 to 0-1, also converts from numpy to tensor
    transforms.Normalize(
        [0.5,0.5,0.5],
        [0.5,0.5,0.5]
    )
])

In [28]:
# Dataloader

train_path = "./archive/seg_train/seg_train"
test_path = "./archive/seg_test/seg_test"

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path,
                                    transform = transformer),
                                    batch_size= 256,
                                    shuffle= True
)

test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path,
                                    transform = transformer),
                                    batch_size = 256,
                                    shuffle = True
)

train_count = len(glob.glob(train_path+"/**/*.jpg"))
test_count = len(glob.glob(test_path+"/**/*.jpg"))
print(f" Training Data: {train_count}, Testing Data: {test_count}")

 Training Data: 14034, Testing Data: 3000


In [29]:
# Categories in the Dataset
for catg in os.listdir(train_path):
    print(f" --> {catg}")

 --> buildings
 --> forest
 --> glacier
 --> mountain
 --> sea
 --> street


# CNN Network

In [30]:
class ConvNet(nn.Module):
    def __init__(self, num_of_classes = 6):
        super(ConvNet,self).__init__()

        # output size after convolution => ((w-f+2P)/s)+1
        # input shape of the batch = (256, 3, 150, 150) => (batch , depth , height , width)

        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=12, #output channel is 12, this is depth
                               kernel_size=3,
                               stride=1,
                               padding=1)

        # more features to the CNN algo
        self.batch_normal_1 = nn.BatchNorm2d(num_features = 12) # output shape  => 256 , 12 , 150 , 150
        self.relu_1 = nn.ReLU() # for non-linearity
        self.max_pool = nn.MaxPool2d(kernel_size = 2) # reduces convolutional output by factor of 2, new output shape  => 256 , 12 , 75 , 75

        # 2nd Convolution block
        self.conv2 = nn.Conv2d(in_channels=12,
                               out_channels=20, # depth is 20
                               kernel_size=3,
                               stride=1,
                               padding=1)
        # new output shape  => 256 , 20 , 75 , 75
        self.batch_normal_2 = nn.BatchNorm2d(num_features = 20) # output shape  => 256 , 20 , 75 , 75
        self.relu_2 = nn.ReLU() # for non-linearity

        # 3rd Convolution block
        self.conv3 = nn.Conv2d(in_channels=20,
                               out_channels=32, # depth is 32
                               kernel_size=3,
                               stride=1,
                               padding=1)
        # new output shape  => 256 , 32 , 75 , 75
        self.batch_normal_3 = nn.BatchNorm2d(num_features = 32) # output shape  => 256 , 32 , 75 , 75
        self.relu_3 = nn.ReLU() # for non-linearity

        # fully connected layer
        self.fc = nn.Linear(in_features=32*75*75, out_features = num_of_classes)

        # feed forward function
    def forward(self,output):
        output = self.conv1(output)
        output = self.batch_normal_1(output)
        output = self.relu_1(output)

        output = self.max_pool(output)

        output = self.conv2(output)
        output = self.batch_normal_2(output)
        output = self.relu_2(output)

        output = self.conv3(output)
        output = self.batch_normal_3(output)
        output = self.relu_3(output)

        output = output.view(-1,32*75*75)
        output = self.fc(output)

        return output

In [31]:
# Model and tuning

model = ConvNet(num_of_classes=6)
optimizer = Adam(model.parameters(),lr=0.001,weight_decay=0.001)
loss_function = nn.CrossEntropyLoss()
num_of_epochs = 30

# CHAT GPT DOCUMENTATION

-> This code defines a CNN class called `ConvNet` that takes an input image and applies three convolutional layers with kernel size 3, stride 1 and padding 1.
Then, it applies batch normalization, ReLU activation function, and max pooling for each convolutional layer.

-> The input to the first convolutional layer is an image with 3 channels (RGB) and the output is 12 channels.
The batch normalization is applied to the output of the first convolutional layer with 12 features. The ReLU activation function is applied to introduce non-linearity to the model.
The max pooling reduces the size of the output by a factor of 2.

-> Similarly, the second and third convolutional layers are applied, with 20 and 32 output channels, respectively.
The batch normalization, ReLU activation function, and max pooling are applied to the output of each of these layers as well.

Finally, the output of the third convolutional layer is reshaped and passed through a fully connected layer that has an output of `num_of_classes` neurons.

-> After this, the model is defined, an optimizer, and a loss function are defined. The optimizer used is Adam with a learning rate of 0.001 and weight decay of 0.001.
 The loss function used is `nn.CrossEntropyLoss()`. The number of training epochs is set to 5.


In [32]:
best_accuracy = 0
model = model.to("cuda")

for epoch in range(num_of_epochs):
    # evaluation and training on training dataset
    model.train()
    train_accuracy = 0
    train_loss = 0

    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs,labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.cpu().data*images.size(0)
        _,prediction = torch.max(outputs.data,1)

        train_accuracy += int(torch.sum(prediction==labels.data))

    train_accuracy /= train_count
    train_loss /= train_count


    # Evaluation on testing dataset
    model.eval()
    test_accuracy = 0

    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

        outputs = model(images)
        _,prediction = torch.max(outputs.data,1)
        test_accuracy += int(torch.sum(prediction==labels.data))

    test_accuracy /= test_count

    print(f"Epoch: {epoch} =====> Train Loss: {train_loss:.2f} =====> Train Accuracy: {train_accuracy:.2f} =====> Test Accuracy: {test_accuracy:.3f}")

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
    torch.save(model.state_dict(), 'best_model.pth')


Epoch: 0 =====> Train Loss: 9.56 =====> Train Accuracy: 0.54 =====> Test Accuracy: 0.609
Epoch: 1 =====> Train Loss: 1.29 =====> Train Accuracy: 0.72 =====> Test Accuracy: 0.622
Epoch: 2 =====> Train Loss: 0.99 =====> Train Accuracy: 0.77 =====> Test Accuracy: 0.689
Epoch: 3 =====> Train Loss: 0.70 =====> Train Accuracy: 0.83 =====> Test Accuracy: 0.624
Epoch: 4 =====> Train Loss: 0.51 =====> Train Accuracy: 0.86 =====> Test Accuracy: 0.709
Epoch: 5 =====> Train Loss: 0.37 =====> Train Accuracy: 0.90 =====> Test Accuracy: 0.754
Epoch: 6 =====> Train Loss: 0.29 =====> Train Accuracy: 0.92 =====> Test Accuracy: 0.746
Epoch: 7 =====> Train Loss: 0.26 =====> Train Accuracy: 0.93 =====> Test Accuracy: 0.738
Epoch: 8 =====> Train Loss: 0.13 =====> Train Accuracy: 0.96 =====> Test Accuracy: 0.749
Epoch: 9 =====> Train Loss: 0.09 =====> Train Accuracy: 0.97 =====> Test Accuracy: 0.743
Epoch: 10 =====> Train Loss: 0.32 =====> Train Accuracy: 0.92 =====> Test Accuracy: 0.693
Epoch: 11 =====> Tra