In [2]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import shutil
import random
from tqdm.notebook import tqdm
from sklearn.metrics import classification_report, confusion_matrix
import math
from torch.utils.data import DataLoader, Subset



In [3]:
#If you are using google colab, mount your google drive
from google.colab import drive
drive.mount('/content/MyDrive')


Mounted at /content/MyDrive


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
device

device(type='cuda')

In [6]:
model_conv = models.alexnet(pretrained = True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 130MB/s]


In [7]:
for param in model_conv.parameters():
  param.requires_grad = False

In [8]:
print(model_conv)



AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [9]:
num_classes = 3
model_conv.classifier[6] = nn.Linear(in_features=4096, out_features=num_classes)


In [10]:
print(model_conv)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_conv.classifier.parameters(), lr=0.01)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


In [12]:
def train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs, device):
    model.to(device)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in tqdm(range(num_epochs)):
        print(f"Epoch {epoch+1}/{num_epochs} started")

        # Training phase
        model.train()
        train_labels_all = []
        train_preds_all = []
        train_loss = 0.0
        for inputs, labels in dataloaders['train']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            train_loss += loss.item() * inputs.size(0)
            train_labels_all.extend(labels.cpu().numpy())
            train_preds_all.extend(preds.cpu().numpy())

        train_loss /= len(dataloaders['train'].dataset)
        print(f"Training Loss: {train_loss:.4f}")

        # Print training confusion matrix and classification report
        print("Training - Confusion Matrix and Classification Report")
        print(confusion_matrix(train_labels_all, train_preds_all))
        print(classification_report(train_labels_all, train_preds_all))

        # Validation phase
        model.eval()
        valid_labels_all = []
        valid_preds_all = []
        valid_loss = 0.0
        for inputs, labels in dataloaders['valid']:
            inputs = inputs.to(device)
            labels = labels.to(device)

            with torch.set_grad_enabled(False):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

            valid_loss += loss.item() * inputs.size(0)
            valid_labels_all.extend(labels.cpu().numpy())
            valid_preds_all.extend(preds.cpu().numpy())

        valid_loss /= len(dataloaders['valid'].dataset)
        valid_acc = (np.array(valid_labels_all) == np.array(valid_preds_all)).mean()
        print(f"Validation Loss: {valid_loss:.4f}, Validation Accuracy: {valid_acc:.4f}")

        # Print validation confusion matrix and classification report
        print("Validation - Confusion Matrix and Classification Report")
        print(confusion_matrix(valid_labels_all, valid_preds_all))
        print(classification_report(valid_labels_all, valid_preds_all))

        # Check if this is the best model so far
        if valid_acc > best_acc:
            best_acc = valid_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        # Step the scheduler
        scheduler.step()

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [None]:
#File manipulation to create the dataset


DATA_DIR = "" # Path to the dataset
TARGET_DIR = "" # Path to the processed dataset

TRAIN_DIR = f"{DATA_DIR}/training"
TRAIN_LABELS_PATH = f"{DATA_DIR}/training_labels.txt"
TEST_DIR = f"{DATA_DIR}/test"
TEST_LABELS_PATH = f"{DATA_DIR}/test_labels.txt"

VALID_SPLIT = 0.2

shutil.rmtree(TARGET_DIR, ignore_errors=True)

with open(TRAIN_LABELS_PATH) as f:
    train_labels = f.read().splitlines()

with open(TEST_LABELS_PATH) as f:
    test_labels = f.read().splitlines()

for label in set(train_labels):
    os.makedirs(f"{TARGET_DIR}/train/class-{label}", exist_ok=True)
    os.makedirs(f"{TARGET_DIR}/valid/class-{label}", exist_ok=True)
    os.makedirs(f"{TARGET_DIR}/test/class-{label}", exist_ok=True)

train_data = [(idx + 1, label) for idx, label in enumerate(train_labels)]
test_data = [(idx + 1, label) for idx, label in enumerate(test_labels)]

random.shuffle(train_data)

num_val_items = int(len(train_data) * VALID_SPLIT)

for idx, label in train_data[:num_val_items]:
    file_path = f"{TARGET_DIR}/valid/class-{label}/img{idx}.jpg"
    shutil.copyfile(f"{TRAIN_DIR}/tr{idx}.jpg", file_path)

for idx, label in train_data[num_val_items:]:
    file_path = f"{TARGET_DIR}/train/class-{label}/img{idx}.jpg"
    shutil.copyfile(f"{TRAIN_DIR}/tr{idx}.jpg", file_path)

for idx, label in test_data:
    file_path = f"{TARGET_DIR}/test/class-{label}/img{idx}.jpg"
    shutil.copyfile(f"{TEST_DIR}/ts{idx}.jpg", file_path)


In [15]:
def calculate_mean_std(data_dir, batch_size=32):
    transform = transforms.Compose([
        transforms.ToTensor()
    ])

    dataset = datasets.ImageFolder(data_dir, transform=transform)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    mean = 0.
    std = 0.
    nb_samples = 0.

    for data, _ in loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples

    return mean.numpy(), std.numpy()

data = f'{TARGET_DIR}/train'
mean, std = calculate_mean_std(data)
print(f"Calculated mean: {mean}")
print(f"Calculated std: {std}")


Calculated mean: [0.7017949 0.6766075 0.7341904]
Calculated std: [0.13982332 0.16244218 0.10425418]


In [16]:
#Data augmentation with normalization

data_transforms = {
    'train': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227),
        transforms.Normalize(mean, std)
    ]),
    'valid': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227),
        transforms.Normalize(mean, std)
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227),
        transforms.Normalize(mean, std)
    ])
}

image_datasets = {x: datasets.ImageFolder(os.path.join(TARGET_DIR, x), data_transforms[x])
                  for x in ['train', 'valid', 'test']}


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],
  batch_size = 4, shuffle = True)
  for x in ['train', 'valid']}


In [17]:
def print_image_sizes(dataloaders):
    for phase, dataloader in dataloaders.items():
        images, _ = next(iter(dataloader))
        print(f"{phase} image sizes: {images.shape}")
print_image_sizes(dataloaders)

train image sizes: torch.Size([4, 3, 227, 227])
valid image sizes: torch.Size([4, 3, 227, 227])


In [18]:
trained_model = train_model(dataloaders, model_conv, criterion, optimizer, exp_lr_scheduler, 5, device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5 started
Training Loss: 1.1094
Training - Confusion Matrix and Classification Report
[[39  8  4]
 [ 4 54  7]
 [ 8  5 20]]
              precision    recall  f1-score   support

           0       0.76      0.76      0.76        51
           1       0.81      0.83      0.82        65
           2       0.65      0.61      0.62        33

    accuracy                           0.76       149
   macro avg       0.74      0.73      0.74       149
weighted avg       0.76      0.76      0.76       149

Validation Loss: 1.9322, Validation Accuracy: 0.8378
Validation - Confusion Matrix and Classification Report
[[ 8  0  1]
 [ 3 18  2]
 [ 0  0  5]]
              precision    recall  f1-score   support

           0       0.73      0.89      0.80         9
           1       1.00      0.78      0.88        23
           2       0.62      1.00      0.77         5

    accuracy                           0.84        37
   macro avg       0.78      0.89      0.82        37
weighted avg    

In [19]:
dataloader_test = torch.utils.data.DataLoader(image_datasets["test"],
                                                  batch_size=32,
                                                  shuffle=True)


In [20]:
def test_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        with torch.no_grad():
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return np.array(all_preds), np.array(all_labels)

test_preds, test_labels = test_model(trained_model, dataloader_test, device)
print("Confusion Matrix:")
print(confusion_matrix(test_labels, test_preds))
print("\nClassification Report:")
print(classification_report(test_labels, test_preds))


Confusion Matrix:
[[48  0  0]
 [ 4 51  2]
 [ 1  6 32]]

Classification Report:
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        48
           1       0.89      0.89      0.89        57
           2       0.94      0.82      0.88        39

    accuracy                           0.91       144
   macro avg       0.91      0.91      0.91       144
weighted avg       0.91      0.91      0.91       144



In [21]:
# Find the smallest class in the dataset

num_classes = 3
smallest_class_size=math.inf
smallest_class = None


for i in range(num_classes):
  train_path = f"{TARGET_DIR}/train/class-{i+1}"
  valid_path = f"{TARGET_DIR}/valid/class-{i+1}"

  train_file_list= os.listdir(train_path)
  valid_file_list= os.listdir(valid_path)
  num_files = len(train_file_list) + len(valid_file_list)

  if num_files < smallest_class_size:
      smallest_class_size = num_files
      smallest_class = i+1


print(f"Smallest class is class-{smallest_class} with {smallest_class_size} files.")




Smallest class is class-3 with 38 files.


In [25]:
#Undersample the dataset

PROCESSED_DIR =""# Path to the processed dataset
BALANCED_DIR = "" # Path to the balanced dataset

valid_split = 0.2


os.makedirs(BALANCED_DIR, exist_ok=True)
for i in range(1, num_classes + 1):
    os.makedirs(f"{BALANCED_DIR}/train/class-{i}", exist_ok=True)
    os.makedirs(f"{BALANCED_DIR}/valid/class-{i}", exist_ok=True)
    os.makedirs(f"{BALANCED_DIR}/test/class-{i}", exist_ok=True)

for i in range(1, num_classes + 1):

    train_path = f"{PROCESSED_DIR}/train/class-{i}"
    valid_path = f"{PROCESSED_DIR}/valid/class-{i}"
    test_path = f"{PROCESSED_DIR}/test/class-{i}"

    target_train_path = f"{BALANCED_DIR}/train/class-{i}"
    target_valid_path = f"{BALANCED_DIR}/valid/class-{i}"
    target_test_path = f"{BALANCED_DIR}/test/class-{i}"


    train_file_list = os.listdir(train_path)
    valid_file_list = os.listdir(valid_path)
    test_file_list = os.listdir(test_path)

    random.shuffle(train_file_list)
    random.shuffle(valid_file_list)

    num_val_items = int(smallest_class_size * valid_split)
    num_train_items = smallest_class_size - num_val_items

    selected_train_images = train_file_list[:num_train_items]
    selected_valid_images = valid_file_list[:num_val_items]

    for file_name in selected_train_images:
        shutil.copy(os.path.join(train_path, file_name), os.path.join(target_train_path, file_name))

    for file_name in selected_valid_images:
        shutil.copy(os.path.join(valid_path, file_name), os.path.join(target_valid_path, file_name))

    for file_name in test_file_list:
        shutil.copy(os.path.join(test_path, file_name), os.path.join(target_test_path, file_name))

In [26]:
#Data augmentation and transformation with normalization and undersampling

data_transforms = {
    'train': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227),
        transforms.Normalize(mean, std)
    ]),
    'valid': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227),
        transforms.Normalize(mean, std)
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227),
        transforms.Normalize(mean, std)
    ])
}

image_datasets_norm_samp = {x: datasets.ImageFolder(os.path.join(BALANCED_DIR, x), data_transforms[x])
                  for x in ['train', 'valid', 'test']}


dataloaders_norm_samp = {x: torch.utils.data.DataLoader(image_datasets_norm_samp[x],
  batch_size = 4, shuffle = True)
  for x in ['train', 'valid']}



In [27]:
trained_model_norm_under_sampled = train_model(dataloaders_norm_samp, model_conv, criterion, optimizer, exp_lr_scheduler, 5, device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5 started
Training Loss: 0.1142
Training - Confusion Matrix and Classification Report
[[29  1  1]
 [ 0 31  0]
 [ 0  2 29]]
              precision    recall  f1-score   support

           0       1.00      0.94      0.97        31
           1       0.91      1.00      0.95        31
           2       0.97      0.94      0.95        31

    accuracy                           0.96        93
   macro avg       0.96      0.96      0.96        93
weighted avg       0.96      0.96      0.96        93

Validation Loss: 0.7006, Validation Accuracy: 0.8947
Validation - Confusion Matrix and Classification Report
[[6 0 1]
 [1 6 0]
 [0 0 5]]
              precision    recall  f1-score   support

           0       0.86      0.86      0.86         7
           1       1.00      0.86      0.92         7
           2       0.83      1.00      0.91         5

    accuracy                           0.89        19
   macro avg       0.90      0.90      0.90        19
weighted avg       0.90  

In [28]:
dataloader_test_norm_samp = torch.utils.data.DataLoader(image_datasets_norm_samp["test"],
                                                  batch_size=32,
                                                  shuffle=True)

In [29]:
test_preds, test_labels = test_model(trained_model_norm_under_sampled, dataloader_test_norm_samp, device)
print("Confusion Matrix:")
print(confusion_matrix(test_labels, test_preds))
print("\nClassification Report:")
print(classification_report(test_labels, test_preds))

Confusion Matrix:
[[48  0  0]
 [ 2 54  1]
 [ 0  6 33]]

Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        48
           1       0.90      0.95      0.92        57
           2       0.97      0.85      0.90        39

    accuracy                           0.94       144
   macro avg       0.94      0.93      0.94       144
weighted avg       0.94      0.94      0.94       144



In [30]:
#Data augmentation and transformation with undersampling and no normalization

data_transforms = {
    'train': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227)
    ]),
    'valid': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227)
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(227)
    ])
}

image_datasets_undersampled = {x: datasets.ImageFolder(os.path.join(BALANCED_DIR, x), data_transforms[x])
                  for x in ['train', 'valid', 'test']}


dataloaders_undersampled = {x: torch.utils.data.DataLoader(image_datasets_undersampled[x],
  batch_size = 4, shuffle = True)
  for x in ['train', 'valid']}


In [33]:
trained_model_undersamp = train_model(dataloaders_undersampled, model_conv, criterion, optimizer, exp_lr_scheduler, 5, device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5 started
Training Loss: 1.0964
Training - Confusion Matrix and Classification Report
[[23  7  1]
 [ 2 28  1]
 [ 0  6 25]]
              precision    recall  f1-score   support

           0       0.92      0.74      0.82        31
           1       0.68      0.90      0.78        31
           2       0.93      0.81      0.86        31

    accuracy                           0.82        93
   macro avg       0.84      0.82      0.82        93
weighted avg       0.84      0.82      0.82        93

Validation Loss: 1.3588, Validation Accuracy: 0.8421
Validation - Confusion Matrix and Classification Report
[[5 2 0]
 [1 6 0]
 [0 0 5]]
              precision    recall  f1-score   support

           0       0.83      0.71      0.77         7
           1       0.75      0.86      0.80         7
           2       1.00      1.00      1.00         5

    accuracy                           0.84        19
   macro avg       0.86      0.86      0.86        19
weighted avg       0.85  

In [35]:
dataloader_test_undersampled = torch.utils.data.DataLoader(image_datasets_undersampled["test"],
                                                  batch_size=32,
                                                  shuffle=True)

test_preds, test_labels = test_model(trained_model_undersamp, dataloader_test_undersampled, device)
print("Confusion Matrix:")
print(confusion_matrix(test_labels, test_preds))
print("\nClassification Report:")
print(classification_report(test_labels, test_preds))

Confusion Matrix:
[[36  8  4]
 [ 3 46  8]
 [ 0  5 34]]

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.75      0.83        48
           1       0.78      0.81      0.79        57
           2       0.74      0.87      0.80        39

    accuracy                           0.81       144
   macro avg       0.81      0.81      0.81       144
weighted avg       0.82      0.81      0.81       144

