<h1> Cuda verification </h1>

In [173]:
import torch

print(torch.version.cuda)
print(torch.cuda.get_device_name(0))

12.8
NVIDIA GeForce RTX 4060 Laptop GPU


<h1>Augmentation pipeline </h1>

In [107]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

training_pipeline = A.Compose([
    A.Resize(height=224, width=224),
    A.SquareSymmetry(p=0.5),
    A.RandomCrop(height=180, width=180,  p=1.0),
    A.Resize(height=224, width=224),
    A.RandomRain(slant_range=(-15,15), drop_length=15, drop_width=1, rain_type="default", blur_value=7 ,p=0.3),
    A.RandomBrightnessContrast(brightness_limit=(-0.3, 0.3), contrast_limit=(-0.2, 0.2), p=0.3),
    A.Rotate(limit=45, p=0.5),
    A.GaussNoise(std_range=(0.1, 0.2), per_channel=True  ,p=0.5),
    A.ColorJitter(brightness=(0.8, 1.1), contrast=(0.8, 1.1), saturation=(0.8, 1.1), hue=(-0.5, 0.5)),
    A.OneOf([
        A.CoarseDropout(num_holes_range=(1, 8), hole_height_range=(0.1, 0.25),
                        hole_width_range=(0.1, 0.25), fill=0, p=0.5),
        A.GridDropout(ratio=0.5, random_offset=True,  p=0.5)
    ]),
    A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

validation_pipeline = A.Compose([
    A.Resize(height=224, width=224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

<h2>Data preparation</h2>

In [175]:
import random
import shutil
from tqdm import tqdm
from pathlib import Path

def prepare_image_datasets(source_dir, base_dir, split_ratios=(0.7, 0.2, 0.1)):

     if not (0.999 < sum(split_ratios) < 1.001):
         raise ValueError('split_ratios must sum to 1')

     source_path = Path(source_dir)
     base_path = Path(base_dir)

     if not source_path.is_dir():
         print(f'Source directory {source_path.name} does not exist')
         return

     train_path = base_path / 'train'
     test_path = base_path / 'test'
     val_path = base_path / 'val'

     class_names = [d.name for d in source_path.iterdir() if d.is_dir()]

     if not class_names:
         print(f'Source directory {source_path.name} does not contain any class names')
         return

     for directory in [train_path, test_path, val_path]:
         for class_name in class_names:
             (directory / class_name).mkdir(parents=True, exist_ok=True)

     for class_name in class_names:

         class_source_path = source_path / class_name

         files = [f for f in class_source_path.iterdir() if f.is_file()]

         random.shuffle(files)

         total_files = len(files)
         train_end = int(total_files * split_ratios[0])
         test_end = train_end + int(total_files * split_ratios[1])

         split_data = {
             'train': (files[:train_end], train_path),
             'test': (files[train_end:test_end], test_path),
             'val': (files[test_end:], val_path)
         }

         print(f"Copying {class_name} to {base_path}")

         for split_name, (file_list, destination_path) in split_data.items():

             dest_class_path = destination_path / class_name

             for file_path in tqdm(file_list, desc=f'Copying {split_name} files'):

                 shutil.copy2(file_path, dest_class_path / file_path.name)


In [167]:
import cv2
from torchvision import datasets

class CustomDataset(datasets.ImageFolder):

    def __getitem__(self, index):

        path, target = self.samples[index]

        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        return image, target

    def __len__(self):
        return len(self.samples)

data = CustomDataset(root='dataset', transform=training_pipeline)

# image, path = data[0]
#
# print(path)
#
# image = image.permute(1,2,0).numpy()
#
# image = (image * np
#          .array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))
# image = np.clip(image, 0, 1)
# image = (image * 255).astype(np.uint8)
#
# plt.imshow(image)
# plt.show()

<h2> Data splitting </h2>

In [174]:
source_data_dir = r'C:\Users\LANCE\OneDrive\Desktop\rice_pest\Finalized_datasets'
base_data_dir = r'C:\Users\LANCE\OneDrive\Desktop\rice_pest\dataset'

if not source_data_dir or not base_data_dir:
    raise ValueError('Source and base data directory not found')

prepare_image_datasets(source_data_dir, base_data_dir, split_ratios=(0.7, 0.2, 0.1))


Copying train files: 100%|██████████| 1362/1362 [00:09<00:00, 151.11it/s]
Copying test files: 100%|██████████| 389/389 [00:02<00:00, 151.45it/s]
Copying val files: 100%|██████████| 195/195 [00:01<00:00, 157.64it/s]
Copying train files: 100%|██████████| 763/763 [00:05<00:00, 149.81it/s]
Copying test files: 100%|██████████| 218/218 [00:01<00:00, 164.58it/s]
Copying val files: 100%|██████████| 109/109 [00:00<00:00, 156.36it/s]
Copying train files: 100%|██████████| 1204/1204 [00:07<00:00, 153.75it/s]
Copying test files: 100%|██████████| 344/344 [00:02<00:00, 164.47it/s]
Copying val files: 100%|██████████| 172/172 [00:01<00:00, 159.31it/s]
Copying train files: 100%|██████████| 671/671 [00:04<00:00, 161.03it/s]
Copying test files: 100%|██████████| 191/191 [00:01<00:00, 149.55it/s]
Copying val files: 100%|██████████| 97/97 [00:00<00:00, 151.51it/s]
Copying train files: 100%|██████████| 410/410 [00:02<00:00, 157.82it/s]
Copying test files: 100%|██████████| 117/117 [00:00<00:00, 154.45it/s]
Cop

<h2> Training functions and hyperparameters</h2>

In [244]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class LSoftmax(nn.Module):

    def __init__(self, input_features, number_of_classes, m=4):
        super(LSoftmax, self).__init__()
        self.input_features = input_features
        self.number_of_classes = number_of_classes
        self.m = m

        if m <= 1:
            raise ValueError('m must be greater than 1')

        # Initialization of weights through xavier uniform.
        self.weight = nn.Parameter(torch.FloatTensor(input_features, number_of_classes))
        nn.init.xavier_uniform_(self.weight)


    # Forward propagation
    def forward(self, x, labels):

        # normalization of weights and input vector features.
        normalize_weight = F.normalize(self.weight, p=2, dim=1)
        normalize_x = F.normalize(x, p=2, dim=1)

        # Cosine similarity: perform DOT product multiplication between weights and input vectors.
        cos_theta = F.linear(normalize_x, normalize_weight.t())
        cos_theta = cos_theta.clamp(-1,1)

        # Convert cosine similarity into actual angles.
        theta = torch.acos(cos_theta)

        # Gathered the correct classes as one vector and reshape it in to a 2d with one column.
        target_theta = theta.gather(1, labels.view(-1,1))

        # applies the margin to the correct classes
        m_theta = self.m * target_theta
        k = (m_theta / math.pi).floor()

        # Compute the angular margin to get a new hard target score for prediction
        psi_theta = ((-1)**k) * torch.cos(m_theta) - (2*k)

        # Scattered the new values got from psi_theta to its corresponding classes
        final_logits = cos_theta.scatter(1, labels.view(-1,1), psi_theta)

        return final_logits


In [144]:
t = torch.tensor([[1, 2, 3, 4],
                  [5,6,7,8]])

t.gather(1, torch.tensor())

# t = torch.tensor([[1, 2, 3, 4],
#                   [5,6,7,8]])
#
# print(t.view(-1,1))

RuntimeError: Index tensor must have the same number of dimensions as input tensor

<h3> LSmax Test </h3>

In [277]:
NUM_FEATURES = 1280
NUM_CLASSES = 6
BATCH_SIZE = 4
MARGIN = 4

lsoftmax_layer = LSoftmax(
    input_features=NUM_FEATURES,
    number_of_classes=NUM_CLASSES,
    m=MARGIN
)

dummy_features = torch.randn(BATCH_SIZE, NUM_FEATURES)
dummy_labels = torch.randint(0, NUM_CLASSES, (BATCH_SIZE,))
res = lsoftmax_layer(dummy_features, dummy_labels)

print("Shape of the output logits:", res.shape)
print("\nOutput Logits Tensor (first 2 rows):")
print(res.detach().numpy()[:2])

Shape of the output logits: torch.Size([4, 6])

Output Logits Tensor (first 2 rows):
[[-2.2175212  -0.46530092  0.4388179  -0.13423419  0.6960334   0.20117837]
 [ 0.06377348  0.31151363  0.52479124  0.12865415 -3.1037827   0.04310565]]


In [109]:
import torchvision.models as models

class PestClassifierMobileNetV2(nn.Module):

    def __init__(self,num_classes , margin):
        super(PestClassifierMobileNetV2, self).__init__()

        # get model
        self.base_model = models.mobilenet_v2(weights='MobileNet_V2_Weights.IMAGENET1K_V2')

        # Froze feature extraction layer to retain weights.
        for params in self.base_model.parameters():
            params.requires_grad = False

        # get number of features
        num_filters = self.base_model.classifier[1].in_features

        # Hyperparameter tuning: Instantiate LSoftmax as new customized final layer.
        self.base_model.classifier[1] = LSoftmax(
            input_features=num_filters,
            number_of_classes=num_classes,
            m=margin
        )

    # Forward propagation
    def forward(self, x, labels=None):

        if labels is None:
            return self.base_model(x)

        # Passed the features in feature extraction layer.
        features = self.base_model.features(x)

        # Adaptive average pooling: returns a 2d scaled vector features.
        features = F.adaptive_avg_pool2d(features, (1,1)).reshape(features.shape[0], -1)

        # Implementation of the LSoftmax through passing the feature vectors for classification.
        logits = self.base_model.classifier[1](features)

        # return the result logits
        return logits


In [156]:
def train_and_test(model, optimizer, criterion, train_dataloader, test_dataloader, num_epoch, device):

    for epoch in range(num_epoch):

        model.train()
        running_train_loss = 0.0

        # Training phase
        for data, labels in tqdm(train_dataloader, desc=f"Train epoch: {epoch+1}"):

            data, target = data.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(data, labels)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()

        training_loss = running_train_loss / len(train_dataloader)

        # Testing phase

        model.eval()
        running_test_loss = 0.0
        correct_predictions = 0
        total_samples = 0

        with torch.no_grad():

            for inputs, labels in tqdm(test_dataloader, desc=f"Test epoch: {epoch+1}"):
                inputs, labels = inputs.to(device), labels.to(device)

                output_loss = model(inputs, labels)
                loss = criterion(output_loss, labels)
                running_test_loss += loss.item()

                output_predictions = model(inputs, labels=None)
                _, predicted = torch.max(output_predictions.data, 1)
                total_samples += labels.size(0)
                correct_predictions += (predicted == labels).sum().item()


        testing_loss = running_test_loss / len(test_dataloader)
        accuracy = 100 * correct_predictions / total_samples

        print(f'Epoch {epoch+1}/{num_epoch} | Train Loss: {training_loss} | Test Loss: {testing_loss} | Accuracy: {accuracy}')

    print('Training done!')

In [117]:
model = models.mobilenet_v2(weights='MobileNet_V2_Weights.IMAGENET1K_V2')

# print(model.classifier[1])
print(model.features)

Sequential(
  (0): Conv2dNormActivation(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6(inplace=True)
  )
  (1): InvertedResidual(
    (conv): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): InvertedResidual(
    (conv): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (

In [145]:
print(model.features[0])

Conv2dNormActivation(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU6(inplace=True)
)


In [108]:
import torch

a = torch.randn(4, 1280, 1, 1)
a.reshape(4, -1)

tensor([[ 1.6195, -0.1782, -2.5715,  ...,  0.0331,  0.7603,  0.0035],
        [-0.2862,  1.4578,  1.0089,  ...,  1.1253,  0.0103,  1.5974],
        [-0.3662,  2.0849, -0.4080,  ..., -0.4553, -1.3057, -0.6453],
        [ 1.8317, -1.2072,  0.1279,  ..., -0.2502, -2.0353,  0.0071]])

In [69]:
import torch.nn as nn

m = nn.AdaptiveAvgPool2d((5, 7))
input = torch.randn(1, 64, 8, 9)
output = m(input)
# print(input)
print(output)

tensor([[[[ 0.2691,  0.9643,  0.8347,  ..., -0.1645, -0.1972, -0.2539],
          [ 0.2757,  0.9418,  0.9279,  ...,  0.5813,  0.1480, -0.1334],
          [ 0.0424,  0.0234, -0.4845,  ...,  0.5998, -0.1160,  0.5142],
          [ 0.4355,  0.0480, -0.6159,  ...,  0.0204, -0.6934, -0.3013],
          [ 0.2712,  0.3419, -0.3261,  ..., -0.7096, -0.8251, -0.5811]],

         [[ 0.5099, -0.4436, -0.1795,  ...,  0.9180,  0.1221, -0.5859],
          [ 0.2151, -0.2515, -0.0462,  ...,  0.3873,  0.2008, -0.3096],
          [-0.2853, -0.7343, -0.6029,  ...,  0.3461,  1.0835,  0.1555],
          [-0.5955, -0.3730,  0.0737,  ...,  0.0337,  0.1940, -0.0271],
          [-0.3843, -0.3897, -0.2840,  ..., -0.3347, -0.0284, -0.2131]],

         [[ 0.2297,  0.6255,  0.5641,  ..., -0.9883, -1.5511, -0.7901],
          [ 0.3697,  0.2164,  0.4150,  ..., -0.0471, -0.8920, -0.4873],
          [ 0.2315,  0.2039,  0.3022,  ...,  0.9991,  0.1681,  0.1191],
          [-0.1260,  0.3516,  0.5540,  ...,  0.0779, -0.3429


