In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import os
import zipfile

In [None]:
ha = ["NB_train", "NB_val","NB_test"]
dataset_folder = []
for i in ha:
    dataset_folder.append(f"/content/drive/MyDrive/YBIGTA 신입플/Datasets/{i}")

We start testing our data augmentations on our simple custom CNN model that will hopefully train quickly

In [None]:
!pip install torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-

In [None]:
# I want to start importing the base packages that we need to train our CNN
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split, Subset
from sklearn.metrics import f1_score
import numpy as np
from collections import defaultdict
import torch.nn.functional as F
from PIL import Image

In [None]:
# We'll start training with a smaller version of the data to see how CNN performs
# Base Model: No Data Augmentations
base_transform = {
    'train': transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    'val': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    'test': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
}

In [None]:
# Blurring
base_transform = {
    'train': transforms.Compose([
    transforms.Resize((224,224)),
    transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 2.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    'val': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    'test': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
}

In [None]:
# Color Jittering + Blurring
additional_transform =  {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ColorJitter(
            brightness=0.05,
            contrast=0.05,
            saturation=0.05,
            hue=0.01
        ),
        transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    'val': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    'test': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
}

In [None]:
# Adding Affine Transformations + Blurring
affine_transform =  {
   'train': transforms.Compose([
       transforms.Resize((224,224)),
       transforms.RandomAffine(
           degrees=5,
           translate=(0.02, 0.02),
           scale=(0.98, 1.02),
           shear=5),
       transforms.GaussianBlur(kernel_size=(5,5), sigma=(0.1, 2.0)),
       transforms.ToTensor(),
       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
   ]),
   'val': transforms.Compose([
       transforms.Resize((224,224)),
       transforms.ToTensor(),
       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
   ]),
   'test': transforms.Compose([
       transforms.Resize((224,224)),
       transforms.ToTensor(),
       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
   ]),
}


In [None]:
def preprocess_and_save(dataset_folder, save_folder):
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    for subdir, _, files in os.walk(dataset_folder):
        for file in files:
            file_path = os.path.join(subdir, file)
            img = Image.open(file_path).convert("RGB")  # Convert to RGB
            img = base_transform["train"](img)
            save_path = os.path.join(save_folder, os.path.relpath(subdir, dataset_folder), file + '.pt')
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            torch.save(img, save_path)


In [None]:
preprocess_and_save(dataset_folder[0], "/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/base_NB_train")
preprocess_and_save(dataset_folder[1], "/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/NB_val")
preprocess_and_save(dataset_folder[2], "/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/NB_test")

In [None]:
def preprocess_and_saved(dataset_folder, save_folder):
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    for subdir, _, files in os.walk(dataset_folder):
        for file in files:
            file_path = os.path.join(subdir, file)
            img = Image.open(file_path).convert("RGB")  # Convert to RGB
            img = blur_transform["train"](img)
            save_path = os.path.join(save_folder, os.path.relpath(subdir, dataset_folder), file + '.pt')
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            torch.save(img, save_path)

In [None]:
preprocess_and_saved(dataset_folder[0],"/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/blur_NB_train_b6")

I ran this code multiple times with slight adjustments to create custom models. The model that is created below is used on a dataset that is resized to 128 x 128, but later on, when we trained our EfficientNet, we had to readjust those sizes 224 x 224. Please keep this in mind if you have any intention of using this code later on!

Baseline Model \\
- Custom
- A model that performed well with ImageNet


In [None]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes: int = 12):
        super(CustomCNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1_input_size = 128 * 16 * 16

        self.fc1 = nn.Linear(self.fc1_input_size, 512)
        self.fc2 = nn.Linear(512, num_classes)

        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.view(x.size(0), -1)

        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return F.log_softmax(x, dim=1)

num_classes = 12
base_model = CustomCNN(num_classes = num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = base_model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(base_model.parameters(), lr=0.001)

In [None]:
model_save_path = "/content/drive/MyDrive/YBIGTA/Models"

# Create the directory if it doesn't exist
os.makedirs(model_save_path, exist_ok=True)

In [None]:
class PreprocessedDataset(torch.utils.data.Dataset):
    def __init__(self, folder):
        self.file_paths = [os.path.join(dp, f) for dp, dn, filenames in os.walk(folder) for f in filenames]
        self.classes = sorted(set(os.path.basename(os.path.dirname(fp)) for fp in self.file_paths))
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        image = torch.load(self.file_paths[idx])
        label = self.get_label_from_path(self.file_paths[idx])
        return image, label

    def get_label_from_path(self, path):
        # Get the class name (folder name) from the path
        class_name = os.path.basename(os.path.dirname(path))
        # Map the class name to an integer label
        return self.class_to_idx[class_name]

# Create datasets and dataloaders
base_train_dataset = PreprocessedDataset("/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/base_NB_train")
blur_train_dataset = PreprocessedDataset("/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/blur_NB_train")
additional_train_dataset = PreprocessedDataset("/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/additional_NB_train")
val_dataset = PreprocessedDataset("/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/NB_val")
affine_train_dataset = PreprocessedDataset("/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/Affine_NB_train")
test_dataset = PreprocessedDataset("/content/drive/MyDrive/YBIGTA 신입플/TransformedDatasets/NB_test")

In [None]:
base_train_loader = DataLoader(base_train_dataset, batch_size=32, shuffle=True, num_workers=1)
blur_train_loader = DataLoader(blur_train_dataset, batch_size=32, shuffle=True, num_workers=1)
affine_train_loader = DataLoader(affine_train_dataset, batch_size=32, shuffle=True, num_workers=1)
additional_train_loader = DataLoader(additional_train_dataset, batch_size=32, shuffle=True, num_workers=1)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=1)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=1)

In [None]:
blur_dataloaders = {'train': blur_train_loader, 'val': val_loader, "test": test_loader}
blur_image_datasets = {'train': blur_train_dataset, 'val': val_dataset, "test": test_dataset}
affine_dataloaders = {'train': affine_train_loader, 'val': val_loader, "test": test_loader}
affine_image_datasets = {'train': affine_train_dataset, 'val': val_dataset, "test": test_dataset}
additional_dataloaders = {'train': additional_train_loader, 'val': val_loader, "test": test_loader}
additional_image_datasets = {'train': additional_train_dataset, 'val': val_dataset, "test": test_dataset}

In [None]:
base_dataloaders = {'train': base_train_loader, 'val': val_loader, "test": test_loader}
base_image_datasets = {'train': base_train_dataset, 'val': val_dataset, "test": test_dataset}

In [None]:
torch.cuda.empty_cache()

This is the code used to train the individual models that were tested below. Some were retested, which is why one of block of codes might not show output!

In [None]:
import gc

num_epochs = 10
accumulation_steps = 4

for epoch in range(num_epochs):
    print(f'Epoch {epoch+1}/{num_epochs}')

    base_model.train()
    running_loss = 0.0
    running_corrects = 0

    optimizer.zero_grad()

    for i, (inputs, labels) in enumerate(affine_dataloaders['train']):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = base_model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()

        if (i + 1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)

        del inputs, labels, outputs, preds, loss
        gc.collect()
        torch.cuda.empty_cache()

    epoch_loss = running_loss / len(affine_image_datasets['train'])
    epoch_acc = running_corrects.double() / len(affine_image_datasets['train'])
    print(f'Training Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    base_model.eval()
    val_loss = 0.0
    val_corrects = 0

    with torch.no_grad():
        for inputs, labels in affine_dataloaders['val']:
            inputs, labels = inputs.to(device), labels.to(device)
            print("i",end="")

            outputs = base_model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels.data)

            del inputs, labels, outputs, preds, loss
            gc.collect()
            torch.cuda.empty_cache()

    val_epoch_loss = val_loss / len(affine_image_datasets['val'])
    val_epoch_acc = val_corrects.double() / len(affine_image_datasets['val'])
    print(f'Validation Loss: {val_epoch_loss:.4f} Acc: {val_epoch_acc:.4f}')

    torch.cuda.empty_cache()

model_save_path = "/content/drive/MyDrive/YBIGTA/Models/affine_augment_custom_model.pth"
torch.save(base_model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

Epoch 1/10
Training Loss: 2.4309 Acc: 0.1463
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiValidation Loss: 2.2265 Acc: 0.2244
Epoch 2/10
Training Loss: 2.2143 Acc: 0.2252
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiValidation Loss: 2.1160 Acc: 0.2561
Epoch 3/10
Training Loss: 2.0898 Acc: 0.2687
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiValidation Loss: 2.0376 Acc: 0.2722
Epoch 4/10
Training Loss: 1.9764 Acc: 0.3096
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiValidation Loss: 1.9928 Acc: 0.2989
Epoch 5/10
Training Loss: 1.8666 Acc: 0.3458
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiValidation Loss: 1.9829 Acc: 0.2978
Epoch 6/10
Training Loss: 1.7432 Acc: 0.3943
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiValidation Loss: 2.0549 Acc: 0.3000
Epoch 7/10
Training Loss: 1.6260 Acc: 0.4432
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiValidation Loss: 2.0832 Acc: 0.3044
Epoch 8/10
Training Loss: 1.4961 A

Testing the Model that were created by loading them down below

In [None]:
torch.cuda.empty_cache()

In [None]:
model_save_path = "/content/drive/MyDrive/YBIGTA 신입플/Models/base_augment_custom_model.pth"
base_model.load_state_dict(torch.load(model_save_path))
base_model = base_model.to(device)

base_model.eval()

running_loss = 0.0
running_corrects_top1 = 0
running_corrects_top2 = 0
running_corrects_top3 = 0

with torch.no_grad():
    for inputs, labels in base_dataloaders['test']:
        inputs, labels = inputs.to(device), labels.to(device)
        print("H", end="")

        outputs = base_model(inputs)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)

        _, top3_preds = torch.topk(outputs, 3, dim=1)

        _, preds = torch.max(outputs, 1)
        running_corrects_top1 += torch.sum(preds == labels.data)

        correct_top2 = top3_preds[:, :2].eq(labels.view(-1, 1).expand_as(top3_preds[:, :2]))
        running_corrects_top2 += torch.sum(correct_top2.any(dim=1))

        correct_top3 = top3_preds.eq(labels.view(-1, 1).expand_as(top3_preds))
        running_corrects_top3 += torch.sum(correct_top3.any(dim=1))


test_loss = running_loss / len(base_image_datasets['test'])
test_acc_top1 = running_corrects_top1.double() / len(base_image_datasets['test'])
test_acc_top2 = running_corrects_top2.double() / len(base_image_datasets['test'])
test_acc_top3 = running_corrects_top3.double() / len(base_image_datasets['test'])
print("finally")

print(f'Test Loss: {test_loss:.4f}')
print(f'Top-1 Acc: {test_acc_top1:.4f}')
print(f'Top-2 Acc: {test_acc_top2:.4f}')
print(f'Top-3 Acc: {test_acc_top3:.4f}')

In [None]:
model_save_path = "/content/drive/MyDrive/YBIGTA/Models/blur_augment_custom_model.pth"
base_model.load_state_dict(torch.load(model_save_path))
base_model = base_model.to(device)

base_model.eval()

running_loss = 0.0
running_corrects_top1 = 0
running_corrects_top2 = 0
running_corrects_top3 = 0

with torch.no_grad():
    for inputs, labels in base_dataloaders['test']:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = base_model(inputs)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)

        _, top3_preds = torch.topk(outputs, 3, dim=1)

        _, preds = torch.max(outputs, 1)
        running_corrects_top1 += torch.sum(preds == labels.data)

        correct_top2 = top3_preds[:, :2].eq(labels.view(-1, 1).expand_as(top3_preds[:, :2]))
        running_corrects_top2 += torch.sum(correct_top2.any(dim=1))

        correct_top3 = top3_preds.eq(labels.view(-1, 1).expand_as(top3_preds))
        running_corrects_top3 += torch.sum(correct_top3.any(dim=1))

test_loss = running_loss / len(base_image_datasets['test'])
test_acc_top1 = running_corrects_top1.double() / len(base_image_datasets['test'])
test_acc_top2 = running_corrects_top2.double() / len(base_image_datasets['test'])
test_acc_top3 = running_corrects_top3.double() / len(base_image_datasets['test'])

print(f'Test Loss: {test_loss:.4f}')
print(f'Top-1 Acc: {test_acc_top1:.4f}')
print(f'Top-2 Acc: {test_acc_top2:.4f}')
print(f'Top-3 Acc: {test_acc_top3:.4f}')

Test Loss: 2.4773
Top-1 Acc: 0.3174
Top-2 Acc: 0.4940
Top-3 Acc: 0.6188


In [None]:
model_save_path = "/content/drive/MyDrive/YBIGTA/Models/additional_augment_custom_model.pth"
base_model.load_state_dict(torch.load(model_save_path))
base_model = base_model.to(device)

base_model.eval()

running_loss = 0.0
running_corrects_top1 = 0
running_corrects_top2 = 0
running_corrects_top3 = 0

with torch.no_grad():
    for inputs, labels in base_dataloaders['test']:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = base_model(inputs)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)

        _, top3_preds = torch.topk(outputs, 3, dim=1)

        _, preds = torch.max(outputs, 1)
        running_corrects_top1 += torch.sum(preds == labels.data)

        correct_top2 = top3_preds[:, :2].eq(labels.view(-1, 1).expand_as(top3_preds[:, :2]))
        running_corrects_top2 += torch.sum(correct_top2.any(dim=1))

        correct_top3 = top3_preds.eq(labels.view(-1, 1).expand_as(top3_preds))
        running_corrects_top3 += torch.sum(correct_top3.any(dim=1))

test_loss = running_loss / len(base_image_datasets['test'])
test_acc_top1 = running_corrects_top1.double() / len(base_image_datasets['test'])
test_acc_top2 = running_corrects_top2.double() / len(base_image_datasets['test'])
test_acc_top3 = running_corrects_top3.double() / len(base_image_datasets['test'])

print(f'Test Loss: {test_loss:.4f}')
print(f'Top-1 Acc: {test_acc_top1:.4f}')
print(f'Top-2 Acc: {test_acc_top2:.4f}')
print(f'Top-3 Acc: {test_acc_top3:.4f}')

Test Loss: 12.3173
Top-1 Acc: 0.1048
Top-2 Acc: 0.2006
Top-3 Acc: 0.2581
