In [None]:
import matplotlib.pyplot as plt
import cv2
from google.colab.patches import cv2_imshow
from PIL import Image
import numpy as np
import os
import torch
import torch.nn as nn
import torchvision.transforms as T
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset
from torchvision.datasets import ImageFolder
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd


from google.colab import drive
drive.mount('/content/drive')

# After executing the cell above, Drive
# files will be present in "/content/drive/My Drive".
#Second change your current folder to point working folder My Drive
os.chdir("/content/drive/My Drive/")
#os.listdir()

Mounted at /content/drive


In [None]:
top_path = os.getcwd()
path = os.getcwd() + '/Data'
print(path)
img_size = 64
test_ratio = 0.2

/content/drive/My Drive/Data


In this method I pre-process images by resizing, converting to grayscale, perform normalization and splitting dataset images into training dataset and testing dataset

In [None]:

def pre_processing():
    # Get Mean and STD
    mean, std, my_categories = cal_mean_std()

    # Pre_Processing
    image_transforms = T.Compose([
        T.Resize((img_size, img_size)),
        T.Grayscale(),
        T.ToTensor(),
        T.Normalize([mean], [std]),
    ])

    # Get Images
    dataset = ImageFolder(path, transform=image_transforms)

    # splitting dataset, Each category have same number of images
    print("length of dataset = ", len(dataset))
    train_idx, test_idx = train_test_split(list(range(len(dataset))), test_size=test_ratio, stratify=dataset.targets,
                                           random_state=0)
    train = Subset(dataset, train_idx)
    test = Subset(dataset, test_idx)
    
    print("length of training set = ", len(train))
    print("length of testing set = ", len(test))
    
    # load datas
    train_loader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test, batch_size=len(test), shuffle=False, num_workers=2)

    return train_loader, test_loader, my_categories

'''
A method to calculate the mean and the standard deviation for the dataset,
to be used in the normalization process
'''
def cal_mean_std ():
    # get categories
    my_categories = os.listdir(os.path.basename(path))
    cnt = 0
    data_sum=0
    data_std_sum = 0
    for category in my_categories:
        # get directory for each category
        sub_path = path + '/' + category
        flist = os.listdir(sub_path)
        if '.DS_Store' in flist:
            flist.remove('.DS_Store')
        os.chdir(sub_path)
        for image in flist:
            img = Image.open(image).convert('L').resize((img_size, img_size))
            img = np.array(img)
            img = img.astype(np.float64) / 255.0
            data_sum += np.sum(img)
            data_std_sum += np.std(img)
            cnt = cnt+1
    os.chdir(top_path)
    data_mean = data_sum /(cnt*img_size*img_size)
    data_std = data_std_sum/cnt
    return data_mean,data_std,my_categories

I have 3 Convolutional Networks which I will be comparing. The different CNN model architectures are defined in the following code block.

In [None]:

# My proposed first model
class CNN1(nn.Module):
    def __init__(self):
        super().__init__()
        # Define the convolutional layers:
        self.convolutional_layers = nn.Sequential(
            # 1st layer
            # input channels 1=> Grayscale
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function
            # Tensor shape (32, 64 , 64, 64)

            # 2st layer
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function

            # 3rd layer
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function

            # 4th layer
            nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function

            # 5th layer
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # 6th layer
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

        )

        # Define the Fully connected/ linear layers:
        # No_of_feature = (W-F + 2*P)/S  + 1
        self.linear_layers = nn.Sequential(
            # Randomly zeros out some elements of input tensor
            nn.Dropout(p=0.1),
            nn.Linear(16 * 16 * 128, 1000),
            nn.ReLU(inplace=True),

            nn.Linear(1000, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),

            # The output layer)
            nn.Linear(512, 5)
        )

    def forward(self, tensor):
        # calling the convolutional layers defined in my class
        tensor = self.convolutional_layers(tensor)
        # flattening the tensor using view function
        # tensor.size(0) returns the batch size (32) which should
        # remain constant, -1 tells to calc the other dimensions
        # print(tensor.size(0))
        tensor = tensor.view(tensor.size(0), -1)
        # calling the linear layers defined in my class
        tensor = self.linear_layers(tensor)
        return tensor


# My proposed 2nd model or Main/Shallow model
class CNN2(nn.Module):
    def __init__(self):
        super().__init__()
        # Define the convolutional layers:
        self.convolutional_layers = nn.Sequential(
            # 1st layer
            # input channels 1=> Grayscale
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function
            # Tensor shape (32, 64 , 64, 64)

            # 2nd layer
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Tensor shape (32, 64 , 32, 32)

            # 3rd layer
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Tensor shape (32, 128 , 16, 16)

        )

        # Define the Fully connected/ linear layers:
        # No_of_feature = (W-F + 2*P)/S  + 1
        self.linear_layers = nn.Sequential(
            # Randomly zeros out some elements of input tensor
            nn.Dropout(p=0.1),
            nn.Linear(16 * 16 * 128, 1000),
            nn.ReLU(inplace=True),

            nn.Linear(1000, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),

            # The output layer)
            nn.Linear(512, 5)
        )

    def forward(self, tensor):
        # calling the convolutional layers defined in my class
        tensor = self.convolutional_layers(tensor)
        # flattening the tensor using view function
        # tensor.size(0) returns the batch size (32) which should
        # remain constant, -1 tells to calc the other dimensions
        # print(tensor.size(0))
        tensor = tensor.view(tensor.size(0), -1)
        # calling the linear layers defined in my class
        tensor = self.linear_layers(tensor)
        return tensor


# My proposed 3rd model
class CNN3(nn.Module):
    def __init__(self):
        super().__init__()
        # Define the convolutional layers:
        self.convolutional_layers = nn.Sequential(
            # 1st layer
            # input channels 1=> Grayscale
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function
            # Tensor shape (32, 64 , 64, 64)

            # 2st layer
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function

            # 3rd layer
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function
            nn.MaxPool2d(kernel_size=2, stride=2),

            # 4th layer
            nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),  # for normalization, and to accelerate the process
            nn.LeakyReLU(inplace=True),  # activation function
            nn.MaxPool2d(kernel_size=2, stride=2),

            # 5th layer
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # 6th layer
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

        )

        # Define the Fully connected/ linear layers:
        # No_of_feature = (W-F + 2*P)/S  + 1
        self.linear_layers = nn.Sequential(
            # Randomly zeros out some elements of input tensor
            nn.Dropout(p=0.1),
            nn.Linear(4 * 4 * 128, 1000),
            nn.ReLU(inplace=True),

            nn.Linear(1000, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),

            # The output layer)
            nn.Linear(512, 5)
        )

    def forward(self, tensor):
        # calling the convolutional layers defined in my class
        tensor = self.convolutional_layers(tensor)
        # flattening the tensor using view function
        # tensor.size(0) returns the batch size (32) which should
        # remain constant, -1 tells to calc the other dimensions
        # print(tensor.size(0))
        tensor = tensor.view(tensor.size(0), -1)
        # calling the linear layers defined in my class
        tensor = self.linear_layers(tensor)
        return tensor

This method defines the logic for training the model on the given dataset

In [None]:
def run(model, train_loader, learning_rate, epochs):
    # create an instance of my convolutional network class
    cnn = model()
    lossCriterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
    total_step = len(train_loader)
    loss_list = []
    accuracy_list = []
    for epoch in range(epochs):
        for i, (images, labels) in enumerate(train_loader):
            output = cnn(images)
            loss = lossCriterion(output, labels)
            loss_list.append((epoch, loss.item()))

            # Applying backpropagation and optimisation
            optimizer.zero_grad()  # no need for gradient in pur classification
            loss.backward()
            optimizer.step()

            # Calculating the accuracy of the cnn during the training process
            totalImages = labels.size(0)
            # max_indexes represents the highest predicted class
            max_elements, max_indexes = torch.max(output.data, 1)
            imagesClassifiedCorrectly = (max_indexes == labels).sum().item()
            accuracy_list.append((epoch, (imagesClassifiedCorrectly / totalImages)))

            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, epochs, i + 1, total_step, loss.item(),
                          (imagesClassifiedCorrectly / totalImages) * 100))
    return cnn




This is the main driver method, and trains three models using the three proposed CNN architectures on the same dataset.

In [None]:
def CNN_project_01(model_no_):
    train_loader, test_loader, my_categories = pre_processing()
    if model_no_ == 1:
        cnn_model = run(CNN1, train_loader, learning_rate=0.0001, epochs=8)
        torch.save(cnn_model, top_path + '/trained_model_1.pth')
    elif model_no_ == 2:
        cnn_model = run(CNN2, train_loader, learning_rate=0.0001, epochs=8)
        torch.save(cnn_model, top_path + '/trained_model_2.pth')
    else:
        cnn_model = run(CNN3, train_loader, learning_rate=0.0001, epochs=8)
        torch.save(cnn_model, top_path + '/trained_model_3.pth')

    evaluate(cnn_model, test_loader, my_categories)


In theis code block, we load an already trained model, and perform evaluation on it using the test dataset to determine accuracy and generate the confusion matrix.

In [None]:

def evaluate(cnnModel, test_loader, my_categories):
    # Set the CNN module in evaluation mode:
    cnnModel.eval()

    # print(summary(cnn, input_size=(1, 64,64)))
    # disable the gradient calculation in test mode
    with torch.no_grad():
        classifiedCorrectly = 0
        totalImages = 0
        for images, labels in test_loader:
            outputs = cnnModel(images)
            max_elements, max_idxs = torch.max(outputs.data, 1)
            totalImages += labels.size(0)
            classifiedCorrectly += (max_idxs == labels).sum().item()

    print('\nTest Accuracy of the model on the {} test images: {} %'
          .format((totalImages),
                  (classifiedCorrectly / totalImages) * 100))
    test_labels = np.asarray(labels, dtype=np.float32)
    pre_labels = np.asarray(max_idxs, dtype=np.float32)

    conf_Mat = confusion_matrix(test_labels, pre_labels)
    print('\n Confusion Matrix\n')
    print(conf_Mat, '\n')

    df = pd.DataFrame(conf_Mat, index=my_categories, columns=my_categories)
    print(df, '\n\n')
    print(classification_report(test_labels, pre_labels, target_names=my_categories))


def CNN_load_trained_model(model_no_):
    train_loader, test_loader, my_categories = pre_processing()
    if model_no_ == 1:
        model = torch.load(top_path + '/trained_model_1.pth')
    elif model_no_ == 2:
        model = torch.load(top_path + '/trained_model_2.pth')
    else:
        model = torch.load(top_path + '/trained_model_3.pth')
    evaluate(model, test_loader, my_categories)


Main Driver Method

In [None]:
for model_no in range(1,4): 
  CNN_project_01(model_no)

length of dataset =  1999
length of training set =  1599
length of testing set =  400
Epoch [1/8], Step [1/50], Loss: 1.6541, Accuracy: 21.88%
Epoch [1/8], Step [2/50], Loss: 1.6053, Accuracy: 25.00%
Epoch [1/8], Step [3/50], Loss: 1.6495, Accuracy: 28.12%
Epoch [1/8], Step [4/50], Loss: 1.6122, Accuracy: 18.75%
Epoch [1/8], Step [5/50], Loss: 1.5337, Accuracy: 43.75%
Epoch [1/8], Step [6/50], Loss: 1.4767, Accuracy: 40.62%
Epoch [1/8], Step [7/50], Loss: 1.4095, Accuracy: 59.38%
Epoch [1/8], Step [8/50], Loss: 1.4314, Accuracy: 43.75%
Epoch [1/8], Step [9/50], Loss: 1.3524, Accuracy: 59.38%
Epoch [1/8], Step [10/50], Loss: 1.3739, Accuracy: 56.25%
Epoch [1/8], Step [11/50], Loss: 1.3721, Accuracy: 43.75%
Epoch [1/8], Step [12/50], Loss: 1.3056, Accuracy: 53.12%
Epoch [1/8], Step [13/50], Loss: 1.2156, Accuracy: 59.38%
Epoch [1/8], Step [14/50], Loss: 1.2971, Accuracy: 34.38%
Epoch [1/8], Step [15/50], Loss: 1.2771, Accuracy: 43.75%
Epoch [1/8], Step [16/50], Loss: 1.1870, Accuracy: 62

In [None]:

#CNN_load_trained_model(model_no_)