In [14]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import time

In [15]:
# Set absolute path
nb_path = os.path.abspath("a2.ipynb")
# nb_path = "/content/drive/MyDrive/Colab Notebooks/cisc351-a2-fashion-classifier/images/a2.ipynb"  # For Google Colab
DIR_PATH = os.path.dirname(nb_path)

# Load and preview the dataset
train_df = pd.read_csv(os.path.join(DIR_PATH, 'train.csv'), sep='\t')
test_df = pd.read_csv(os.path.join(DIR_PATH, 'test.csv'), sep='\t')

df = pd.concat([train_df, test_df], axis=0)
display(df.head())

Unnamed: 0,imageid,label,productname
0,2653,Bags,Murcia Women Leather Office Grey Bag
1,55997,Others,Colorbar Velvet Matte Temptation Lipstick 24MA
2,2640,Shoes,Carlton London Men Brown Formal Shoes
3,40565,Topwear,W Women Maroon Kurta
4,38932,Bottomwear,Gini and Jony Girls Pink Leggings


### Preprocessing

In [16]:
# Make sure these are the only categories that appear in the dataset
labels = set({"Topwear", "Bottomwear", "Innerwear", "Bags", "Watches", "Jewellery", "Eyewear", "Wallets", "Shoes", "Sandal", "Makeup", "Fragrance", "Others"})
unique_labels = df["label"].unique()
assert(labels == set(unique_labels))

print("* FREQUENCY BY CATEGORY *")
print(df["label"].value_counts(ascending=False))

* FREQUENCY BY CATEGORY *
label
Topwear       15401
Shoes          7344
Others         6230
Bags           3055
Bottomwear     2693
Watches        2542
Innerwear      1808
Jewellery      1080
Eyewear        1073
Fragrance      1012
Sandal          963
Wallets         933
Makeup          307
Name: count, dtype: int64


### Load the data

In [17]:
# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA device.")
# Check if Mac GPU acceleration is available
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS device.")
else:
    device = torch.device("cpu")
    print("MPS device not found.")

Using MPS device.


In [18]:
class_labels = ["Topwear", "Bottomwear", "Innerwear", "Bags", "Watches", "Jewellery", "Eyewear", "Wallets", "Shoes", "Sandal", "Makeup", "Fragrance", "Others"]
label_dict = {label: index for index, label in enumerate(class_labels)}

class FashionDataset(Dataset):
    def __init__(self, csv_file, images_dir, transform=None):
        """
        Args:
            csv_file (string): path to csv file with `imageid` (file name) and `label`.
            images_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.df = pd.read_csv(os.path.join(DIR_PATH, csv_file), sep='\t')
        self.df["label"] = self.df["label"].apply(lambda x: label_dict[x])  # convert the labels to numbers
        self.images_dir = os.path.join(DIR_PATH, images_dir)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = f"{self.df.iloc[idx, 0]}.jpg"
        img_path = os.path.join(self.images_dir, img_name)
        image = Image.open(img_path)
        image = image.convert('RGB')  # some images are in grayscale
        label = self.df.iloc[idx, 1]  # label is the second column

        if self.transform:
            image = self.transform(image)

        return image, label



In [19]:

def get_mean_std(loader: DataLoader):
    """
    Compute the mean and standard deviation of the dataset for normalization.
    Args:
        loader (DataLoader): DataLoader with images to compute the mean and std of.
    """
    mean = 0
    std = 0
    img_count = 0
    for images, _ in loader:
        images = images.view(images.size(0), images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        img_count += images.size(0)
    mean /= img_count
    std /= img_count
    return mean, std

# Define the transformations for the initial loader to compute the mean and std
transform = transforms.Compose([
    transforms.Resize((72, 72)),
    transforms.ToTensor(),
])

batch_size = 256

# Create a loader for computing the mean and std of the dataset, which we will use for normalization
train_data = FashionDataset("train.csv", "images", transform=transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
mean, std = get_mean_std(train_loader)

# Define the transformations for the actual train and test loaders
transform = transforms.Compose([
    transforms.Resize((72, 72)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# Create the datasets
train_data = FashionDataset("train.csv", "images", transform=transform)
test_data = FashionDataset("test.csv", "images", transform=transform)

# Create the loaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, pin_memory=True)

### Build CNN

In [20]:
# Build a CNN model to classify the images
class FashionClassifierCNN(nn.Module):
    def __init__(self):
        super(FashionClassifierCNN, self).__init__()
        # [(input - filter + 2*pad) / stride] + 1
        # 72x72x3
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)  # Input channels, output channels, kernel size
        # 68x68x16
        self.pool = nn.MaxPool2d(2, 2)
        # 34x34x16
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        # 30x30x32
        # apply max pooling again
        # 15x15x32
        self.fc1 = nn.Linear(in_features=15*15*32, out_features=120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 13)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # Flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = FashionClassifierCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Train CNN

In [21]:
# If you want to load the model from the file
# model = FashionClassifierCNN().to(device)
# model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model.pt")))

epochs = 100
no_improvement_streak = 0
patience = 20
best_loss = float('inf')
stop = False
time_start = time.time()

for epoch in range(epochs):
    total_loss = 0.0
    # For each batch
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad()  # Reset the gradients
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        if i % 10 == 9:
            avg_loss = total_loss / 10
            time_elapsed = time.time() - time_start
            time_left = (time_elapsed * (len(train_loader) - i) / (i + 1)
                         + (epochs - epoch - 1) * len(train_loader))
            print(f"[epoch {epoch + 1}/{epochs}, batch {i + 1}/{len(train_loader)}]   loss: {avg_loss:.3f}   time left: {time_left:.0f}s")
            total_loss = 0.0

            # Early stopping mechanism
            if avg_loss < best_loss:
                # save the model if it's the best so far
                torch.save(model.state_dict(), os.path.join(DIR_PATH, "model.pt"))
                best_loss = avg_loss
                no_improvement_streak = 0
            else:
                no_improvement_streak += 1
                
        if no_improvement_streak == patience:
            stop = True
            break
    if stop:
        print("Early stopping!")
        break

print(f"Finished training. Best model (loss: {best_loss:.3f} saved to model.pt")

[epoch 1/100, batch 10/158]   loss: 1.883   time left: 15669s
[epoch 1/100, batch 20/158]   loss: 1.137   time left: 15669s
[epoch 1/100, batch 30/158]   loss: 0.816   time left: 15667s
[epoch 1/100, batch 40/158]   loss: 0.668   time left: 15665s
[epoch 1/100, batch 50/158]   loss: 0.576   time left: 15663s
[epoch 1/100, batch 60/158]   loss: 0.538   time left: 15661s
[epoch 1/100, batch 70/158]   loss: 0.524   time left: 15659s
[epoch 1/100, batch 80/158]   loss: 0.503   time left: 15657s
[epoch 1/100, batch 90/158]   loss: 0.441   time left: 15655s
[epoch 1/100, batch 100/158]   loss: 0.413   time left: 15653s
[epoch 1/100, batch 110/158]   loss: 0.422   time left: 15651s
[epoch 1/100, batch 120/158]   loss: 0.415   time left: 15650s
[epoch 1/100, batch 130/158]   loss: 0.396   time left: 15648s
[epoch 1/100, batch 140/158]   loss: 0.363   time left: 15646s
[epoch 1/100, batch 150/158]   loss: 0.335   time left: 15644s
[epoch 2/100, batch 10/158]   loss: 0.337   time left: 15974s
[e

### Test CNN

In [23]:
# If you want to load the model from the file
# model = FashionClassifierCNN().to(device)
# model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model.pt")))

correct = 0
total = 0

with torch.no_grad():
    # For each batch
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the {total} test images: {100 * correct / total}%")

Accuracy of the network on the 4000 test images: 94.9%
