In [1]:
# getting device ready
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [7]:
from pathlib import Path
data_path=Path('data_ss/')

train_dir=data_path/'train'
test_dir=data_path/'test'
val_dir=data_path/'val'


In [8]:
# data transformation
import torchvision
from torchvision import transforms

# taking the best weights for MobileNetV2 model
weights = torchvision.models.MobileNet_V2_Weights.DEFAULT

# base trasnform from MobileNetV2 model
base_transform = weights.transforms()

# training transform
train_transform = transforms.Compose([

    # transforms.RandomPerspective(distortion_scale=0.1, p=0.2),
    # transforms.TrivialAugmentWide(num_magnitude_bins=31),

    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.15, contrast=0.15,
                           saturation=0.15, hue=0.05),
    base_transform  # best weights of MobileNetV2 model

])


# test, validation transform
# taking the base_transform's value
test_val_transform = base_transform



In [9]:
print(train_transform)
print(test_val_transform)



Compose(
    RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear, antialias=True)
    RandomHorizontalFlip(p=0.5)
    RandomRotation(degrees=[-15.0, 15.0], interpolation=nearest, expand=False, fill=0)
    ColorJitter(brightness=(0.85, 1.15), contrast=(0.85, 1.15), saturation=(0.85, 1.15), hue=(-0.05, 0.05))
    ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)
)
ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


In [10]:
# train, test, validation data
from torchvision import datasets
train_data = datasets.ImageFolder(
    root=train_dir, transform=train_transform, target_transform=None)
test_data = datasets.ImageFolder(
    root=test_dir, transform=test_val_transform, target_transform=None)
val_data = datasets.ImageFolder(
    root=val_dir, transform=test_val_transform, target_transform=None)



In [11]:
print(train_data)

Dataset ImageFolder
    Number of datapoints: 6744
    Root location: data_ss\train
    StandardTransform
Transform: Compose(
               RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear, antialias=True)
               RandomHorizontalFlip(p=0.5)
               RandomRotation(degrees=[-15.0, 15.0], interpolation=nearest, expand=False, fill=0)
               ColorJitter(brightness=(0.85, 1.15), contrast=(0.85, 1.15), saturation=(0.85, 1.15), hue=(-0.05, 0.05))
               ImageClassification(
               crop_size=[224]
               resize_size=[232]
               mean=[0.485, 0.456, 0.406]
               std=[0.229, 0.224, 0.225]
               interpolation=InterpolationMode.BILINEAR
           )
           )


In [12]:
# batch size and number of classes
BATCH_SIZE = 32

class_names = train_data.classes
# print(len(class_names))
# print(class_names)
NUM_CLASSES = len(class_names)
print(NUM_CLASSES)


23


In [13]:
# train, test, val dataloaders for model
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_dataloader = DataLoader(
    dataset=test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
val_dataloader = DataLoader(
    dataset=val_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# with GPU running
# add pin_memory=True in train, test, val dataloader



In [14]:
print(train_dataloader)
print(test_dataloader)
print(val_dataloader)

<torch.utils.data.dataloader.DataLoader object at 0x000002CF475E6A50>
<torch.utils.data.dataloader.DataLoader object at 0x000002CF47D9C050>
<torch.utils.data.dataloader.DataLoader object at 0x000002CF47D9C2D0>


In [15]:
# MobileNetV2 with Transfer Learning
model_MoSE = torchvision.models.mobilenet_v2(weights=weights).to(device=device)
print(model_MoSE)


MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [16]:
import torchinfo
from torchinfo import summary
summary(model=model_MoSE, input_size=(32, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
        )



Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
MobileNetV2 (MobileNetV2)                     [32, 3, 224, 224]    [32, 1000]           --                   True
├─Sequential (features)                       [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   True
│    └─Conv2dNormActivation (0)               [32, 3, 224, 224]    [32, 32, 112, 112]   --                   True
│    │    └─Conv2d (0)                        [32, 3, 224, 224]    [32, 32, 112, 112]   864                  True
│    │    └─BatchNorm2d (1)                   [32, 32, 112, 112]   [32, 32, 112, 112]   64                   True
│    │    └─ReLU6 (2)                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─InvertedResidual (1)                   [32, 32, 112, 112]   [32, 16, 112, 112]   --                   True
│    │    └─Sequential (conv)                 [32, 32, 112, 112]   [32, 16, 112, 112]

In [17]:
# adding for more efficiency improvment
# adding SE block
# This is an attention mechanism that helps the model weight which channels of the features are most relevant.

class SEBlock(torch.nn.Module):
    def __init__(self, in_channels, reduction_ratio=16):
        super(SEBlock, self).__init__()
        self.avg_pool = torch.nn.AdaptiveAvgPool2d(1)
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(in_channels, in_channels //
                            reduction_ratio, bias=False),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(in_channels // reduction_ratio,
                            in_channels, bias=False),
            torch.nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)  # Squeeze operation
        y = self.fc(y).view(b, c, 1, 1)  # Excitation operation
        return x * y.expand_as(x)  # Scale the input features
    
    

In [18]:
# keeping base model in feature excitation mode
for param in model_MoSE.parameters():
    param.requires_grad=False



In [19]:
# adding SE block before final classifier of MobileNetV2 model
# last feature of MobileNetV2 model has output_channels = 1280
model_MoSE.features.add_module('se_block_custom', SEBlock(
    in_channels=1280, reduction_ratio=16))



In [20]:
# changing the output classifier
torch.manual_seed(42)
model_MoSE.classifier = torch.nn.Sequential(

    torch.nn.Dropout(p=0.3, inplace=True),
    torch.nn.Linear(in_features=1280, out_features=NUM_CLASSES,
                    bias=True).to(device=device)
)

model_MoSE.classifier = model_MoSE.classifier.to(device=device)



In [21]:
# summary of new model
# MobileNetV2 with SE block

summary(model=model_MoSE, input_size=(32, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
        )



Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
MobileNetV2 (MobileNetV2)                     [32, 3, 224, 224]    [32, 23]             --                   Partial
├─Sequential (features)                       [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   Partial
│    └─Conv2dNormActivation (0)               [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                        [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                   [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─ReLU6 (2)                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─InvertedResidual (1)                   [32, 32, 112, 112]   [32, 16, 112, 112]   --                   False
│    │    └─Sequential (conv)                 [32, 32, 112, 112]   [32, 16,

In [22]:
# loss function for model_mobnetv2
from torch import nn
loss_fn = nn.CrossEntropyLoss()

# optimization function
optimizer_fn = torch.optim.Adam(
    params=model_MoSE.parameters(), lr=0.0005, weight_decay=1e-5)



In [23]:
# TRAINING STEP

from torchmetrics.classification import MulticlassAccuracy, MulticlassPrecision, MulticlassRecall, MulticlassF1Score


def train_step(model: nn.Module,
               train_dataloader: torch.utils.data.DataLoader,
               loss_fn: nn.modules,
               optimizer_fn: torch.optim.Optimizer,
               device: torch.device = device
               ):
    # model -> train
    model.train()
    train_loss = 0
    metric_accuracy = MulticlassAccuracy(
        num_classes=NUM_CLASSES, average='macro').to(device)

    for batch, (X, y) in enumerate(train_dataloader):
        X = X.to(device)
        y = y.to(device)

        y_pred = model(X)  # forward pass
        loss = loss_fn(y_pred, y)  # loss
        train_loss += loss.item()
        optimizer_fn.zero_grad()  # zero grad
        loss.backward()  # back propagation
        optimizer_fn.step()  # updation

        # calculate and accumulate accuracy metrics across all batches
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        metric_accuracy.update(y_pred_class, y)

    train_loss /= len(train_dataloader)
    train_accuracy = metric_accuracy.compute().item()
    return train_loss, train_accuracy



In [24]:
# TESTING STEP
def test_step(model: nn.Module,
              test_dataloader: torch.utils.data.DataLoader,
              loss_fn: nn.Module,
              device: torch.device = device
              ):

    # model -> testing
    model.eval()
    test_loss = 0

    # test_accuracy = 0
    average_type = 'weighted'
    metric_accuracy = MulticlassAccuracy(
        num_classes=NUM_CLASSES, average=average_type).to(device)
    metric_precision = MulticlassPrecision(
        num_classes=NUM_CLASSES, average=average_type, zero_division=0).to(device)
    metric_recall = MulticlassRecall(
        num_classes=NUM_CLASSES, average=average_type, zero_division=0).to(device)
    metric_f1 = MulticlassF1Score(
        num_classes=NUM_CLASSES, average=average_type, zero_division=0).to(device)

    with torch.inference_mode():
        for batch, (X, y) in enumerate(test_dataloader):
            X = X.to(device)
            y = y.to(device)

            test_pred = model(X)  # forward pass
            loss = loss_fn(test_pred, y)  # loss function
            test_loss += loss.item()

            # Calculate and accumulate accuracy
            test_pred_labels = test_pred.argmax(dim=1)

            metric_accuracy.update(test_pred_labels, y)
            metric_precision.update(test_pred_labels, y)
            metric_recall.update(test_pred_labels, y)
            metric_f1.update(test_pred_labels, y)

    test_loss /= len(test_dataloader)
    metrics = {
        'test_accuracy': metric_accuracy.compute().item(),
        'test_precision': metric_precision.compute().item(),
        'test_recall': metric_recall.compute().item(),
        'test_f1_score': metric_f1.compute().item()
    }

    return test_loss, metrics



In [25]:
# VALIDATION STEP
def val_step(model: nn.Module,
             val_dataloader: torch.utils.data.DataLoader,
             loss_fn: nn.Module,
             device: torch.device = device
             ):
    # model -> testing
    model.eval()
    val_loss = 0

    metric_accuracy = MulticlassAccuracy(
        num_classes=NUM_CLASSES, average='macro').to(device)

    with torch.inference_mode():
        for X, y in val_dataloader:
            X = X.to(device)
            y = y.to(device)

            val_pred = model(X)
            loss = loss_fn(val_pred, y)
            val_loss += loss.item()

            val_pred_labels = val_pred.argmax(dim=1)
            metric_accuracy.update(val_pred_labels, y)

    val_loss /= len(val_dataloader)
    val_accuracy = metric_accuracy.compute().item()
    return val_loss, val_accuracy



In [26]:
# model_mobnetv2 TRAINING
from tqdm.auto import tqdm


def model_train(model: nn.Module,
                train_dataloader: torch.utils.data.DataLoader,
                val_dataloader: torch.utils.data.DataLoader,  # ADDED
                test_dataloader: torch.utils.data.DataLoader,
                loss_fn: nn.Module,
                optimizer_fn: torch.optim.Optimizer,
                epochs: int,
                device: torch.device = device
                ):

    results = {
        'train_loss': [], 'train_accuracy': [],
        'val_loss': [], 'val_accuracy': [],
        'test_loss': [], 'test_accuracy': [],
        'test_precision': [], 'test_recall': [], 'test_f1_score': []
    }

    best_val_accuracy = 0.0  # best validation accuracy

    for epoch in tqdm(range(epochs), desc="Training Epochs"):
        train_loss, train_accuracy = train_step(
            model, train_dataloader, loss_fn, optimizer_fn, device)
        val_loss, val_accuracy = val_step(
            model, val_dataloader, loss_fn, device)
        test_loss, test_metrics = test_step(
            model, test_dataloader, loss_fn, device)

        print(
            f"Epoch: {epoch+1:02d} | "
            f"Loss: {train_loss:.4f} | Acc: {train_accuracy:.4f} | "
            f"Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.4f} | "
            f"Test Accuracy: {test_metrics['test_accuracy']:.4f} |"
            f"Test Recall: {test_metrics['test_recall']:.4f} |"
            f"Test Precision: {test_metrics['test_precision']:.4f} |"
            f"Test F1: {test_metrics['test_f1_score']:.4f}"
        )

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy

            # additional step to save model on best val_accuracy
            # torch.save(model.state_dict(), f'best_model_epoch_{epoch+1}.pth')
            # print("SAVED BEST MODEL")

        results["train_loss"].append(train_loss)
        results["train_accuracy"].append(train_accuracy)
        results["val_loss"].append(val_loss)
        results["val_accuracy"].append(val_accuracy)
        results["test_loss"].append(test_loss)
        results["test_accuracy"].append(test_metrics['test_accuracy'])
        results["test_precision"].append(test_metrics['test_precision'])
        results["test_recall"].append(test_metrics['test_recall'])
        results["test_f1_score"].append(test_metrics['test_f1_score'])

    return results



  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model_moSE_results = model_train(model=model_MoSE,
                                 train_dataloader=train_dataloader,
                                 val_dataloader=val_dataloader,
                                 test_dataloader=test_dataloader,
                                 loss_fn=loss_fn,
                                 optimizer_fn=optimizer_fn,
                                 epochs=50,
                                 device=device
                                 )

model_moSE_results

