<a href="https://colab.research.google.com/github/clashgamer123/SOC_Pytorch/blob/main/Pizza_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mount the google drive on colab to access the data set.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Import all the required modules and functions.

In [None]:
import os
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision import datasets, transforms

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.image as mpimg
from random import shuffle

Get the data using a simple function.
Now comes the important part. We need to represent the data as an array or list of tuples with each tuple containg the tensor and the label of the respective image. This is to load the data using DataLoader conveniently.

In [None]:
def get_data(path):
  data = torch.tensor([])
  file_list = os.listdir(path)
  for file_name in file_list:
    img_path = os.path.join(path, file_name)
    image = mpimg.imread(img_path)
    image = torch.from_numpy(image)
    image = (image - image.min()) / 255
    image = image.permute(2, 0, 1)
    data = torch.cat((data, image.unsqueeze(0)), 0)
  return data

pizza_path = '/content/drive/MyDrive/Colab Notebooks/Assignment_1/Data_Set/pizza'
not_pizza_path = '/content/drive/MyDrive/Colab Notebooks/Assignment_1/Data_Set/not_pizza'

pizza_data = get_data(pizza_path)
pizza_size = len(pizza_data)
pizza_labels = torch.ones(pizza_size)

not_pizza_data = get_data(not_pizza_path)
not_pizza_size = len(not_pizza_data)
not_pizza_labels = torch.zeros(not_pizza_size)

pizza_data_set = []
for i in range(pizza_size):
  pizza_data_set += [(pizza_data[i], pizza_labels[i])]

not_pizza_data_set = []
for i in range(not_pizza_size):
  not_pizza_data_set += [(not_pizza_data[i], not_pizza_labels[i])]



  image = torch.from_numpy(image)


Since our DataSet is quite small,
We are going to use DATA AUGMENTATION to artificially increase the size of our data.
 We mainly use the transforms module in torch.utils.data

In [None]:

# Assuming you have custom dataset classes for your data
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

Now augment the data, prepare the cumulative data and then load it using DataLoader.

In [None]:

# Your datasets
train_data_set_u = pizza_data_set[0:400] + not_pizza_data_set[0:250]
test_data_set_u = pizza_data_set[400:500] + not_pizza_data_set[250:350]

# Define transformations
transform1 = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

augment_transform_1 = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
])

augment_transform_2 = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.8),
    transforms.RandomRotation(degrees=45),
])

# Create datasets with transformations
train_data_set = CustomDataset(train_data_set_u, transform=transform1)
test_data_set = CustomDataset(test_data_set_u, transform=transform1)

# Augmentations will be applied on-the-fly
train_augmented_1 = CustomDataset(train_data_set, transform=augment_transform_1)
train_augmented_2 = CustomDataset(train_data_set, transform=augment_transform_2)

test_augmented_1 = CustomDataset(test_data_set, transform=augment_transform_1)
test_augmented_2 = CustomDataset(test_data_set, transform=augment_transform_2)

# Combine original and augmented datasets
train_data_set = train_data_set + train_augmented_1 + train_augmented_2
test_data_set = test_data_set + test_augmented_1 + test_augmented_2

# Create DataLoaders
train_data_loader = DataLoader(train_data_set, batch_size=5, shuffle=True)
test_data_loader = DataLoader(test_data_set, batch_size=20, shuffle=True)


Define our model using the nn.Sequential().
We are using 3 convolutional layers.
BatchNorm2d normalizes the input to each layer by subtracting the mean and dividing by the standard deviation of the activations in a mini-batch. This ensures that the inputs to each layer have a mean of zero and a standard deviation of one, which helps mitigate issues like vanishing and exploding gradients.

In [None]:
model = nn.Sequential(
    nn.Conv2d(3, 32, kernel_size=3, stride = 1, padding = 1),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Conv2d(32, 64, kernel_size=3, stride = 1, padding = 1),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Conv2d(64, 128, kernel_size=3, stride = 1, padding = 1),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Flatten(),
    nn.Linear(16*16*128, 128),
    nn.ReLU(),
    nn.Linear(128, 1),
    nn.Sigmoid()
)

In [None]:
loss_criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Train the model.

In [None]:
for epoch in range(5):
  cum_loss = 0
  for data in train_data_loader:
    inputs, labels = data
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = loss_criterion(outputs, labels.unsqueeze(1).float())
    loss.backward()
    optimizer.step()
    cum_loss += loss*len(inputs)
  cum_loss = cum_loss / 1950
  print(f'Loss at epoch: {epoch+1} :  {cum_loss.item()}')

KeyboardInterrupt: 

In [None]:
correct_matches = 0
for data, labels in test_data_loader:
  outputs = model(data)
  outputs = outputs.squeeze()
  outputs = (outputs>=0.5).int()
  correct_matches += (outputs == labels).sum()

print(f'Accuracy of the model = {(correct_matches/600)*100:0.4f}')

# Accuracy upto 90 percent can be achieved.

Accuracy of the model = 85.6667
