In [62]:
import numpy as np
import torch
from PIL import Image
import torchvision.transforms as transforms
import pandas as pd
import os
from torch import nn
from matplotlib import pyplot as plt
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

In [33]:
print(torch.__version__)

2.3.0+cu118


In [34]:
use_cuda = True if torch.cuda.is_available() else False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('We are using GPU.' if use_cuda else 'We are using CPU.')

We are using GPU.


In [70]:
class CustomImageFolder(torch.utils.data.Dataset):
    def __init__(self, root, csv_file, transform=None, target_transform=None):
        self.root = root
        self.df = pd.read_csv(csv_file, header=0, on_bad_lines='skip')
        self.transform = transform
        self.target_transform = target_transform

    def parse_labels(self, label_str):
        label_list = label_str.split()
        label_list = [int(label) for label in label_list]
        return label_list

    def __getitem__(self, index):
        img_path = os.path.join(self.root, self.df.iloc[index, 0])
        label_str = self.df.iloc[index, 1] 

        img = Image.open(img_path).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)
        
        label = self.parse_labels(label_str)

        if self.target_transform is not None:
            label = self.target_transform(label)

        return img, label

    def __len__(self):
        return len(self.df)
    
root = "COMP5329S1A2Dataset/data"
csv_file = "COMP5329S1A2Dataset/train.csv"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = CustomImageFolder(root=root, csv_file=csv_file, transform=transform)

In [72]:
for i in range(5):
    sample = dataset[i]
    print("Sample", i)
    print("Image shape:", sample[0].shape)
    print("Label:", sample[1])
    print()

Sample 0
Image shape: torch.Size([3, 224, 224])
Label: [1]

Sample 1
Image shape: torch.Size([3, 224, 224])
Label: [1, 19]

Sample 2
Image shape: torch.Size([3, 224, 224])
Label: [1]

Sample 3
Image shape: torch.Size([3, 224, 224])
Label: [8, 3, 13]

Sample 4
Image shape: torch.Size([3, 224, 224])
Label: [8, 3, 7]



In [9]:
class AlexNet(nn.Module):

    def __init__(self, num_classes=1000, stem_stride=4):
        # invoke super class initialisation method
        super(AlexNet, self).__init__()

        # define the CNN:
        # 1. define feature extraction layers
        self.features = nn.Sequential(
            # conv-relu-pooling
            nn.Conv2d(3, 64, kernel_size=11, stride=stem_stride, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # conv-relu-pooling
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # conv-relu-conv-relu-conv-relu-pooling
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        # 2. define average pooling layer
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        # 3. define fully connected layers
        self.classifier = nn.Sequential(
            nn.Dropout(),                   # use dropout
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),                   # use dropout
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    # define forward-propagation
    def forward(self, x):
        # feature extraction
        x = self.features(x)
        # adaptive pooling
        x = self.avgpool(x)
        # flat the feature map
        x = torch.flatten(x, 1)
        # classification
        x = self.classifier(x)
        return x

    # back-propagation is handled by PyTorch