In [1]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
from PIL import Image
import time

# Set absolute path
nb_path = os.path.abspath("a2.ipynb")
# nb_path = "/content/drive/MyDrive/Colab Notebooks/cisc351-a2-fashion-classifier/images/a2.ipynb"  # For Google Colab
DIR_PATH = os.path.dirname(nb_path)

### Exploration

In [2]:
# Load and preview the dataset
train_df = pd.read_csv(os.path.join(DIR_PATH, 'train.csv'), sep='\t')
test_df = pd.read_csv(os.path.join(DIR_PATH, 'test.csv'), sep='\t')

df = pd.concat([train_df, test_df], axis=0)
display(df.head())

Unnamed: 0,imageid,label,productname
0,2653,Bags,Murcia Women Leather Office Grey Bag
1,55997,Others,Colorbar Velvet Matte Temptation Lipstick 24MA
2,2640,Shoes,Carlton London Men Brown Formal Shoes
3,40565,Topwear,W Women Maroon Kurta
4,38932,Bottomwear,Gini and Jony Girls Pink Leggings


In [3]:
# Make sure these are the only categories that appear in the dataset
labels = set({"Topwear", "Bottomwear", "Innerwear", "Bags", "Watches", "Jewellery", "Eyewear", "Wallets", "Shoes", "Sandal", "Makeup", "Fragrance", "Others"})
unique_labels = df["label"].unique()
assert(labels == set(unique_labels))

print("* FREQUENCY BY CATEGORY *")
print(df["label"].value_counts(ascending=False))

* FREQUENCY BY CATEGORY *
label
Topwear       15401
Shoes          7344
Others         6230
Bags           3055
Bottomwear     2693
Watches        2542
Innerwear      1808
Jewellery      1080
Eyewear        1073
Fragrance      1012
Sandal          963
Wallets         933
Makeup          307
Name: count, dtype: int64


In [4]:
# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA device.")
# Check if Mac GPU acceleration is available
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS device.")
else:
    device = torch.device("cpu")
    print("GPU not available.")

Using MPS device.


### Load the data

In [5]:
class_labels = ["Topwear", "Bottomwear", "Innerwear", "Bags", "Watches", "Jewellery", "Eyewear", "Wallets", "Shoes", "Sandal", "Makeup", "Fragrance", "Others"]
label_dict = {label: index for index, label in enumerate(class_labels)}

class FashionDataset(Dataset):
    def __init__(self, csv_file, images_dir, transform=None):
        """
        Args:
            csv_file (string): path to csv file with `imageid` (file name) and `label`.
            images_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.df = pd.read_csv(os.path.join(DIR_PATH, csv_file), sep='\t')
        self.df["label"] = self.df["label"].apply(lambda x: label_dict[x])  # convert the labels to numbers
        self.images_dir = os.path.join(DIR_PATH, images_dir)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = f"{self.df.iloc[idx, 0]}.jpg"
        img_path = os.path.join(self.images_dir, img_name)
        image = Image.open(img_path)
        image = image.convert('RGB')  # some images are in grayscale
        label = self.df.iloc[idx, 1]  # label is the second column

        if self.transform:
            image = self.transform(image)

        return image, label

def get_mean_std(loader: DataLoader):
    """
    Compute the mean and standard deviation of the dataset for normalization.
    Args:
        loader (DataLoader): DataLoader with images to compute the mean and std of.
    """
    mean = 0
    std = 0
    img_count = 0
    for images, _ in loader:
        images = images.view(images.size(0), images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        img_count += images.size(0)
    mean /= img_count
    std /= img_count
    return mean, std

# Define the transformations for the initial loader to compute the mean and std
transform = transforms.Compose([
    transforms.Resize((72, 72)),
    transforms.ToTensor(),
])

# Create a loader for computing the mean and std of the dataset, which we will use for normalization
train_data = FashionDataset("train.csv", "images", transform=transform)
train_loader = DataLoader(train_data, batch_size=256, shuffle=True)
mean, std = get_mean_std(train_loader)

# Model 1 - Baseline CNN

### Create data loaders

In [39]:
batch_size = 128

# Define the transformations for the actual train and test loaders
transform = transforms.Compose([
    transforms.Resize((72, 72)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# Create the datasets
train_data = FashionDataset("train.csv", "images", transform=transform)
test_data = FashionDataset("test.csv", "images", transform=transform)

validation_data, test_data = train_test_split(test_data, test_size=0.5)  # split the test data into validation and test sets

# Create the loaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)
validation_loader = DataLoader(validation_data, batch_size=batch_size, shuffle=False, pin_memory=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, pin_memory=True)

### Build CNN #1

In [45]:
# Build a CNN model to classify the images
class FashionClassifierCNN(nn.Module):
    def __init__(self):
        super(FashionClassifierCNN, self).__init__()
        # [(input - filter + 2*pad) / stride] + 1
        # 72x72x3
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)  # Input channels, output channels, kernel size
        # 68x68x16
        self.pool = nn.MaxPool2d(2, 2)
        # 34x34x16
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        # 30x30x32
        # apply max pooling again
        # 15x15x32
        self.fc1 = nn.Linear(in_features=15*15*32, out_features=120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 13)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # Flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = FashionClassifierCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

#### Rationale for my CNN architecture
My CNN has two convolutional layers, each followed by a max pooling layer, and then three fully connected layers. This is a very standard architecture for image classification. The convolutional layers capture the low-level features of the images, such as edges and corners, and the last three fully-connected layers are capture the high-level features, such as the shape of the clothing item.

- I chose the ReLU activation function because it is simple and efficient for introducing non-linearity into the model.
- I chose the cross-entropy loss function (which applies a softmax activation function automatically) for the output layer because it is a standard choice for multi-class classification problems.
- I chose the Adam optimizer with the standard learning rate of 0.001 because it adjusts the learning rate during training to improve training speed and performance.

### Train CNN #1

In [46]:
# If you want to load the model from the file
# model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model1.pt")))

epochs = 100
no_improvement_streak = 0
patience = 5
best_loss = float('inf')
time_start = time.time()

for epoch in range(epochs):
    model.train()  # Set the model to training mode
    train_loss = 0.0

    # For each batch in the training set
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad()  # Reset the gradients
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # After each epoch, evaluate the model on the validation set
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient computation
        validation_loss = 0.0
        for i, data in enumerate(validation_loader):
            inputs, labels = data
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            validation_loss += loss.item()

    time_elapsed = time.time() - time_start
    time_left = (epochs - epoch - 1) / (epoch + 1) * time_elapsed
    avg_train_loss = train_loss / len(train_loader)
    avg_validation_loss = validation_loss / len(validation_loader)
    
    print(f"[epoch {epoch+1}, time remaining: {time_left / 60:.1f} min]   train loss: {avg_train_loss:.3f}   validation loss: {avg_validation_loss:.3f}")

    # Early stopping mechanism based on validation loss
    if avg_validation_loss < best_loss:
        # save the model if it's the best so far
        torch.save(model.state_dict(), os.path.join(DIR_PATH, "model1.pt"))
        best_loss = avg_validation_loss
        no_improvement_streak = 0
    else:
        no_improvement_streak += 1

    if no_improvement_streak == patience:
        print("Early stopping!")
        break

print(f"Finished training. Best model (validation loss: {best_loss:.3f}) saved to model1.pt")

[epoch 1, time remaining: 53.5 min]   train loss: 0.500   validation loss: 0.270
[epoch 2, time remaining: 51.7 min]   train loss: 0.235   validation loss: 0.249
[epoch 3, time remaining: 51.0 min]   train loss: 0.178   validation loss: 0.189
[epoch 4, time remaining: 50.2 min]   train loss: 0.140   validation loss: 0.200
[epoch 5, time remaining: 49.2 min]   train loss: 0.107   validation loss: 0.211
[epoch 6, time remaining: 48.2 min]   train loss: 0.083   validation loss: 0.205
[epoch 7, time remaining: 47.6 min]   train loss: 0.066   validation loss: 0.230
[epoch 8, time remaining: 47.2 min]   train loss: 0.055   validation loss: 0.240
Early stopping!
Finished training. Best model (validation loss: 0.189) saved to model.pt


### Test CNN #1

In [47]:
# If you want to load the model from the file
# model = FashionClassifierCNN().to(device)
# model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model1.pt")))

correct = 0
total = 0

with torch.no_grad():
    # For each batch
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the {total} test images: {100 * correct / total}%")

Accuracy of the network on the 2000 test images: 95.6%


#### Conclusion on the performance of the baseline model
The baseline model performed very well, correctly classifying 95.6% of test samples. I used early stopping to halt training when there was no improvement in the validation loss for 5 epochs, and according to this rule training stopped after just 8 epochs -- this was potentially concerning. The training loss continued to decrease steadily while the validation loss plateaued, suggesting that the model quickly began overfitting the training data. I will address this in the next model through data augmentation.

# Model 2 - CNN with Data Augmentation

### Create data loaders

In [11]:
batch_size = 128

# Define the transformations for randomized data augmentation
train_transform = transforms.Compose([
    transforms.RandomRotation(10),  # Slight rotation, but not too much because clothing images are usually upright
    transforms.RandomHorizontalFlip(),  # Do not use vertical flip for the same reason as above
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),  # Adjust for different lighting conditions
    transforms.Resize((72, 72)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
test_transform = transforms.Compose([
    transforms.Resize((72, 72)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# Create the datasets
train_data = FashionDataset("train.csv", "images", transform=train_transform)
test_data = FashionDataset("test.csv", "images", transform=test_transform)

validation_data, test_data = train_test_split(test_data, test_size=0.5)  # split the test data into validation and test sets

# Create the loaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)
validation_loader = DataLoader(validation_data, batch_size=batch_size, shuffle=False, pin_memory=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, pin_memory=True)

### Build CNN #2

In [12]:
# Build a CNN model to classify the images
class FashionClassifierCNN3(nn.Module):
    def __init__(self):
        super(FashionClassifierCNN3, self).__init__()
        # [(input - filter + 2*pad) / stride] + 1
        # 72x72x3
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)  # Input channels, output channels, kernel size
        # 68x68x16
        self.pool = nn.MaxPool2d(2, 2)
        # 34x34x16
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        # 30x30x32
        # apply max pooling again
        # 15x15x32
        self.fc1 = nn.Linear(in_features=15*15*32, out_features=120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 13)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # Flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = FashionClassifierCNN3().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Train CNN #2

In [13]:
# If you want to load the model from the file
# model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model2.pt")))

epochs = 100
no_improvement_streak = 0
patience = 5
best_loss = float('inf')
time_start = time.time()

for epoch in range(epochs):
    model.train()  # Set the model to training mode
    train_loss = 0.0

    # For each batch in the training set
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad()  # Reset the gradients
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # After each epoch, evaluate the model on the validation set
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient computation
        validation_loss = 0.0
        for i, data in enumerate(validation_loader):
            inputs, labels = data
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            validation_loss += loss.item()

    time_elapsed = time.time() - time_start
    time_left = (epochs - epoch - 1) / (epoch + 1) * time_elapsed
    avg_train_loss = train_loss / len(train_loader)
    avg_validation_loss = validation_loss / len(validation_loader)
    
    print(f"[epoch {epoch+1}, time remaining: {time_left / 60:.1f} min]   train loss: {avg_train_loss:.3f}   validation loss: {avg_validation_loss:.3f}")

    # Early stopping mechanism based on validation loss
    if avg_validation_loss < best_loss:
        # save the model if it's the best so far
        torch.save(model.state_dict(), os.path.join(DIR_PATH, "model2.pt"))
        best_loss = avg_validation_loss
        no_improvement_streak = 0
    else:
        no_improvement_streak += 1

    if no_improvement_streak == patience:
        print("Early stopping!")
        break

print(f"Finished training. Best model (validation loss: {best_loss:.3f}) saved to model2.pt")

[epoch 1, time remaining: 71.6 min]   train loss: 0.594   validation loss: 0.306
[epoch 2, time remaining: 69.9 min]   train loss: 0.320   validation loss: 0.238
[epoch 3, time remaining: 69.4 min]   train loss: 0.258   validation loss: 0.224
[epoch 4, time remaining: 68.7 min]   train loss: 0.225   validation loss: 0.222
[epoch 5, time remaining: 67.8 min]   train loss: 0.199   validation loss: 0.188
[epoch 6, time remaining: 67.0 min]   train loss: 0.180   validation loss: 0.180
[epoch 7, time remaining: 66.5 min]   train loss: 0.163   validation loss: 0.169
[epoch 8, time remaining: 66.6 min]   train loss: 0.152   validation loss: 0.176
[epoch 9, time remaining: 67.3 min]   train loss: 0.140   validation loss: 0.163
[epoch 10, time remaining: 66.6 min]   train loss: 0.130   validation loss: 0.170
[epoch 11, time remaining: 66.1 min]   train loss: 0.123   validation loss: 0.173
[epoch 12, time remaining: 65.7 min]   train loss: 0.116   validation loss: 0.175
[epoch 13, time remaining

### Test CNN #2

In [14]:
# If you want to load the model from the file
# model = FashionClassifierCNN().to(device)
# model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model2.pt")))

correct = 0
total = 0

with torch.no_grad():
    # For each batch
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the {total} test images: {100 * correct / total}%")

Accuracy of the network on the 2000 test images: 94.75%


#### Conclusion on the performance of the model with data augmentation
The model with data augmentation performed slightly worse on test data than the first model, correctly classifying 94.75% of test samples. However, the training and validation losses decreased together throughout training, and the model did not begin overfitting the training data until after more than 15 epochs. This is a significant improvement over the baseline model, which began overfitting after only around 3 epochs.

The training loss stayed higher for longer in this model because the images were randomly transformed each epoch, so the model did not see the exact same image twice. The fact that the ratio between the training and validation loss was closer to 1 for much of the training process means the model with data augmentation may generalize better to new data.

So, while the model with data augmentation did not perform better than the baseline model on the test data, it is likely a better model overall because its lower validation loss and higher ratio between training and validation loss indicates it is less prone to overfitting.

# Model 3 - Hyperparameter Tuning

### Create data loaders

In [16]:
batch_size = 128

# Define the transformations
transform = transforms.Compose([
    transforms.Resize((72, 72)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# Create the datasets
train_data = FashionDataset("train.csv", "images", transform=transform)
test_data = FashionDataset("test.csv", "images", transform=transform)

validation_data, test_data = train_test_split(test_data, test_size=0.5)  # split the test data into validation and test sets

# Create the loaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)
validation_loader = DataLoader(validation_data, batch_size=batch_size, shuffle=False, pin_memory=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, pin_memory=True)

### Build CNN #3

In [17]:
# Build a CNN model to classify the images
class FashionClassifierCNN3(nn.Module):
    def __init__(self):
        super(FashionClassifierCNN3, self).__init__()
        # [(input - filter + 2*pad) / stride] + 1
        # 72x72x3
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)  # Input channels, output channels, kernel size
        # 68x68x16
        self.pool = nn.MaxPool2d(2, 2)
        # 34x34x16
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        # 30x30x32
        # apply max pooling again
        # 15x15x32
        self.fc1 = nn.Linear(in_features=15*15*32, out_features=120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 13)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # Flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

### Train CNN #3

In [18]:
# If you want to load the model from the file
# model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model3.pt")))
def train_model3(model):
    epochs = 20
    no_improvement_streak = 0
    patience = 5
    best_loss = float('inf')
    time_start = time.time()

    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        train_loss = 0.0

        # For each batch in the training set
        for i, data in enumerate(train_loader):
            inputs, labels = data
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            optimizer.zero_grad()  # Reset the gradients
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # After each epoch, evaluate the model on the validation set
        model.eval()  # Set the model to evaluation mode
        with torch.no_grad():  # Disable gradient computation
            validation_loss = 0.0
            for i, data in enumerate(validation_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                validation_loss += loss.item()

        time_elapsed = time.time() - time_start
        time_left = (epochs - epoch - 1) / (epoch + 1) * time_elapsed
        avg_train_loss = train_loss / len(train_loader)
        avg_validation_loss = validation_loss / len(validation_loader)
        
        print(f"[epoch {epoch+1}, time remaining: {time_left / 60:.1f} min]   train loss: {avg_train_loss:.3f}   validation loss: {avg_validation_loss:.3f}")

        # Early stopping mechanism based on validation loss
        if avg_validation_loss < best_loss:
            # save the model if it's the best so far
            torch.save(model.state_dict(), os.path.join(DIR_PATH, f"model3_{lr}.pt"))
            best_loss = avg_validation_loss
            no_improvement_streak = 0
        else:
            no_improvement_streak += 1

        if no_improvement_streak == patience:
            print("Early stopping!")
            break

    print(f"Finished training. Best model (validation loss: {best_loss:.3f}) saved to model3_{lr}.pt")


learning_rates = [0.1, 0.01, 0.001, 0.0001, 0.00001]
for lr in learning_rates:
    print(f"Training model with learning rate {lr}")
    model = FashionClassifierCNN3().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    train_model3(model)
    print("\n")


Training model with learning rate 0.1
[epoch 1, time remaining: 9.6 min]   train loss: 314.035   validation loss: 2.084
[epoch 2, time remaining: 8.7 min]   train loss: 2.058   validation loss: 2.077
[epoch 3, time remaining: 8.2 min]   train loss: 2.058   validation loss: 2.086
[epoch 4, time remaining: 7.7 min]   train loss: 2.059   validation loss: 2.080
[epoch 5, time remaining: 7.2 min]   train loss: 2.059   validation loss: 2.080
[epoch 6, time remaining: 6.8 min]   train loss: 2.059   validation loss: 2.080
[epoch 7, time remaining: 6.3 min]   train loss: 2.058   validation loss: 2.079
Early stopping!
Finished training. Best model (validation loss: 2.077) saved to model3_0.1.pt


Training model with learning rate 0.01
[epoch 1, time remaining: 9.0 min]   train loss: 2.124   validation loss: 2.083
[epoch 2, time remaining: 8.6 min]   train loss: 2.056   validation loss: 2.076
[epoch 3, time remaining: 8.1 min]   train loss: 2.056   validation loss: 2.078
[epoch 4, time remaining:

Let's keep training the model with lr=1e-5 because it was still improving when training ended.

In [22]:
# load model from model3_1e-05.pt
model = FashionClassifierCNN3().to(device)
model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model3_1e-05.pt")))

# Set all parameters to require gradients so that we can continue training
for param in model.parameters():
    param.requires_grad = True

optimizer = optim.Adam(model.parameters(), lr=1e-5)
train_model3(model)

[epoch 1, time remaining: 8.6 min]   train loss: 0.319   validation loss: 0.349
[epoch 2, time remaining: 7.9 min]   train loss: 0.313   validation loss: 0.345
[epoch 3, time remaining: 7.4 min]   train loss: 0.308   validation loss: 0.339
[epoch 4, time remaining: 6.9 min]   train loss: 0.303   validation loss: 0.333
[epoch 5, time remaining: 6.6 min]   train loss: 0.298   validation loss: 0.328
[epoch 6, time remaining: 6.2 min]   train loss: 0.293   validation loss: 0.326
[epoch 7, time remaining: 5.8 min]   train loss: 0.289   validation loss: 0.317
[epoch 8, time remaining: 5.4 min]   train loss: 0.284   validation loss: 0.314
[epoch 9, time remaining: 4.9 min]   train loss: 0.281   validation loss: 0.316
[epoch 10, time remaining: 4.5 min]   train loss: 0.276   validation loss: 0.308
[epoch 11, time remaining: 4.1 min]   train loss: 0.273   validation loss: 0.304
[epoch 12, time remaining: 3.7 min]   train loss: 0.270   validation loss: 0.298
[epoch 13, time remaining: 3.3 min]  

In [23]:
train_model3(model)

[epoch 1, time remaining: 8.8 min]   train loss: 0.242   validation loss: 0.278
[epoch 2, time remaining: 8.3 min]   train loss: 0.239   validation loss: 0.275
[epoch 3, time remaining: 7.8 min]   train loss: 0.237   validation loss: 0.286
[epoch 4, time remaining: 7.4 min]   train loss: 0.234   validation loss: 0.273
[epoch 5, time remaining: 6.9 min]   train loss: 0.232   validation loss: 0.269
[epoch 6, time remaining: 6.4 min]   train loss: 0.229   validation loss: 0.272
[epoch 7, time remaining: 6.0 min]   train loss: 0.227   validation loss: 0.272
[epoch 8, time remaining: 5.5 min]   train loss: 0.224   validation loss: 0.262
[epoch 9, time remaining: 5.0 min]   train loss: 0.223   validation loss: 0.267
[epoch 10, time remaining: 4.6 min]   train loss: 0.220   validation loss: 0.260
[epoch 11, time remaining: 4.1 min]   train loss: 0.218   validation loss: 0.263
[epoch 12, time remaining: 3.7 min]   train loss: 0.218   validation loss: 0.252
[epoch 13, time remaining: 3.2 min]  

### Test CNN #3

In [24]:
# If you want to load the model from the file
# model = FashionClassifierCNN().to(device)
# model.load_state_dict(torch.load(os.path.join(DIR_PATH, "model3.pt")))
for lr in learning_rates:
    model = FashionClassifierCNN3().to(device)
    model.load_state_dict(torch.load(os.path.join(DIR_PATH, f"model3_{lr}.pt")))

    correct = 0
    total = 0

    with torch.no_grad():
        # For each batch
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"(lr={lr}) Accuracy of the network on the {total} test images: {100 * correct / total}%")

(lr=0.1) Accuracy of the network on the 2000 test images: 35.0%
(lr=0.01) Accuracy of the network on the 2000 test images: 35.0%
(lr=0.001) Accuracy of the network on the 2000 test images: 94.95%
(lr=0.0001) Accuracy of the network on the 2000 test images: 94.6%
(lr=1e-05) Accuracy of the network on the 2000 test images: 92.65%


### Conclusion on the performance of the model with hyperparameter tuning
I chose 5 learning rates separated by orders of magnitude, and the model with the learning rate of 0.001 performed the best. 0.001 is accepted as the default learning rate for Adam. The model with this learning rate correctly classified 94.95% of test samples, which is nearly the same as the model with data augmentation.

The models with large learning rates 0.1 and 0.01 performed horribly with accuracies of 35% each. This is because the learning rate was too high, and the model was not able to converge to a good solution -- the training and validation losses hardly changed over the training process, exhibiting the model's frustration.

The models with small learning rates 0.0001 and 0.00001 performed better than the models with large learning rates, but slightly worse than the model with the default learning rate. The two models, especially the 1e-5 model, also took significantly longer to train than the other models, marking the point where the precision that comes with a smaller learning rate is not worth the time it takes to train.

In the end, it is not surprising that the model with the default learning rate performed the best because it balances speed and accuracy. 0.001 was likely chosen after testing extensively on diverse datasets.