<h1> Cuda verification </h1>

In [173]:
import torch

print(torch.version.cuda)
print(torch.cuda.get_device_name(0))

12.8
NVIDIA GeForce RTX 4060 Laptop GPU


<h1>Augmentation pipeline </h1>

In [107]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

training_pipeline = A.Compose([
    A.Resize(height=224, width=224),
    A.SquareSymmetry(p=0.5),
    A.RandomCrop(height=180, width=180,  p=1.0),
    A.Resize(height=224, width=224),
    A.RandomRain(slant_range=(-15,15), drop_length=15, drop_width=1, rain_type="default", blur_value=7 ,p=0.3),
    A.RandomBrightnessContrast(brightness_limit=(-0.3, 0.3), contrast_limit=(-0.2, 0.2), p=0.3),
    A.Rotate(limit=45, p=0.5),
    A.GaussNoise(std_range=(0.1, 0.2), per_channel=True  ,p=0.5),
    A.ColorJitter(brightness=(0.8, 1.1), contrast=(0.8, 1.1), saturation=(0.8, 1.1), hue=(-0.5, 0.5)),
    A.OneOf([
        A.CoarseDropout(num_holes_range=(1, 8), hole_height_range=(0.1, 0.25),
                        hole_width_range=(0.1, 0.25), fill=0, p=0.5),
        A.GridDropout(ratio=0.5, random_offset=True,  p=0.5)
    ]),
    A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

validation_pipeline = A.Compose([
    A.Resize(height=224, width=224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

<h2>Data preparation</h2>

In [175]:
import random
import shutil
from tqdm import tqdm
from pathlib import Path

def prepare_image_datasets(source_dir, base_dir, split_ratios=(0.7, 0.2, 0.1)):

     if not (0.999 < sum(split_ratios) < 1.001):
         raise ValueError('split_ratios must sum to 1')

     source_path = Path(source_dir)
     base_path = Path(base_dir)

     if not source_path.is_dir():
         print(f'Source directory {source_path.name} does not exist')
         return

     train_path = base_path / 'train'
     test_path = base_path / 'test'
     val_path = base_path / 'val'

     class_names = [d.name for d in source_path.iterdir() if d.is_dir()]

     if not class_names:
         print(f'Source directory {source_path.name} does not contain any class names')
         return

     for directory in [train_path, test_path, val_path]:
         for class_name in class_names:
             (directory / class_name).mkdir(parents=True, exist_ok=True)

     for class_name in class_names:

         class_source_path = source_path / class_name

         files = [f for f in class_source_path.iterdir() if f.is_file()]

         random.shuffle(files)

         total_files = len(files)
         train_end = int(total_files * split_ratios[0])
         test_end = train_end + int(total_files * split_ratios[1])

         split_data = {
             'train': (files[:train_end], train_path),
             'test': (files[train_end:test_end], test_path),
             'val': (files[test_end:], val_path)
         }

         print(f"Copying {class_name} to {base_path}")

         for split_name, (file_list, destination_path) in split_data.items():

             dest_class_path = destination_path / class_name

             for file_path in tqdm(file_list, desc=f'Copying {split_name} files'):

                 shutil.copy2(file_path, dest_class_path / file_path.name)


In [167]:
import cv2
from torchvision import datasets

class CustomDataset(datasets.ImageFolder):

    def __getitem__(self, index):

        path, target = self.samples[index]

        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        return image, target

    def __len__(self):
        return len(self.samples)

data = CustomDataset(root='dataset', transform=training_pipeline)

# image, path = data[0]
#
# print(path)
#
# image = image.permute(1,2,0).numpy()
#
# image = (image * np
#          .array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))
# image = np.clip(image, 0, 1)
# image = (image * 255).astype(np.uint8)
#
# plt.imshow(image)
# plt.show()

<h2> Data splitting </h2>

In [174]:
source_data_dir = r'C:\Users\LANCE\OneDrive\Desktop\rice_pest\Finalized_datasets'
base_data_dir = r'C:\Users\LANCE\OneDrive\Desktop\rice_pest\dataset'

if not source_data_dir or not base_data_dir:
    raise ValueError('Source and base data directory not found')

prepare_image_datasets(source_data_dir, base_data_dir, split_ratios=(0.7, 0.2, 0.1))


Copying train files: 100%|██████████| 1362/1362 [00:09<00:00, 151.11it/s]
Copying test files: 100%|██████████| 389/389 [00:02<00:00, 151.45it/s]
Copying val files: 100%|██████████| 195/195 [00:01<00:00, 157.64it/s]
Copying train files: 100%|██████████| 763/763 [00:05<00:00, 149.81it/s]
Copying test files: 100%|██████████| 218/218 [00:01<00:00, 164.58it/s]
Copying val files: 100%|██████████| 109/109 [00:00<00:00, 156.36it/s]
Copying train files: 100%|██████████| 1204/1204 [00:07<00:00, 153.75it/s]
Copying test files: 100%|██████████| 344/344 [00:02<00:00, 164.47it/s]
Copying val files: 100%|██████████| 172/172 [00:01<00:00, 159.31it/s]
Copying train files: 100%|██████████| 671/671 [00:04<00:00, 161.03it/s]
Copying test files: 100%|██████████| 191/191 [00:01<00:00, 149.55it/s]
Copying val files: 100%|██████████| 97/97 [00:00<00:00, 151.51it/s]
Copying train files: 100%|██████████| 410/410 [00:02<00:00, 157.82it/s]
Copying test files: 100%|██████████| 117/117 [00:00<00:00, 154.45it/s]
Cop

<h2> Training functions and hyperparameters</h2>

In [244]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class LSoftmax(nn.Module):

    def __init__(self, input_features, number_of_classes, m=4):
        super(LSoftmax, self).__init__()
        self.input_features = input_features
        self.number_of_classes = number_of_classes
        self.m = m

        if m <= 1:
            raise ValueError('m must be greater than 1')

        self.weight = nn.Parameter(torch.FloatTensor(input_features, number_of_classes))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, x, labels):

        normalize_weight = F.normalize(self.weight, p=2, dim=1)
        normalize_x = F.normalize(x, p=2, dim=1)

        cos_theta = F.linear(normalize_x, normalize_weight.t())
        cos_theta = cos_theta.clamp(-1,1)

        theta = torch.acos(cos_theta)

        target_theta = theta.gather(1, labels.view(-1,1))

        m_theta = self.m * target_theta
        k = (m_theta / math.pi).floor()

        psi_theta = ((-1)**k) * torch.cos(m_theta) - (2*k)

        final_logits = cos_theta.scatter(1, labels.view(-1,1), psi_theta)

        return final_logits


In [229]:
t = torch.tensor([[1, 2], [3, 4]])
torch.gather(t, 1, torch.tensor([[0, 0], [1, 0]]))

tensor([[1, 1],
        [4, 3]])

<h3> LSmax Test </h3>

In [277]:
NUM_FEATURES = 1280
NUM_CLASSES = 6
BATCH_SIZE = 4
MARGIN = 4

lsoftmax_layer = LSoftmax(
    input_features=NUM_FEATURES,
    number_of_classes=NUM_CLASSES,
    m=MARGIN
)

dummy_features = torch.randn(BATCH_SIZE, NUM_FEATURES)
dummy_labels = torch.randint(0, NUM_CLASSES, (BATCH_SIZE,))
res = lsoftmax_layer(dummy_features, dummy_labels)

print("Shape of the output logits:", res.shape)
print("\nOutput Logits Tensor (first 2 rows):")
print(res.detach().numpy()[:2])

Shape of the output logits: torch.Size([4, 6])

Output Logits Tensor (first 2 rows):
[[-2.2175212  -0.46530092  0.4388179  -0.13423419  0.6960334   0.20117837]
 [ 0.06377348  0.31151363  0.52479124  0.12865415 -3.1037827   0.04310565]]
