In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader

#input transformation seqeunce
transform = transforms.Compose([
        transforms.Resize((28, 28)), #resize input to 28x28
        transforms.ToTensor(), #convert image to pytorch tensor [0, 1] range
        transforms.Normalize((0.5, ), (0.5, )) #normalize values to [-1, 1] range
])

#load traning and test FashionMNIST dataset
'''
root --> save location if download is needed#train --> what portion of the dataset to load (training or testing)
download --> download dataset if not present in root
transform --> apply transitions to each image as it is being accessed
'''
full_train_dataset = torchvision.datasets.FashionMNIST(
        root='./data', train=True, download=True, transform=transform
)

test_dataset = torchvision.datasets.FashionMNIST(
        root='./data', train=False, download=True, transform=transform
)

In [None]:
def train_val_split(train_ratio):
        #Split training dataset into training and validation sets (80%/20%)
        train_size = int(train_ratio * len(full_train_dataset))
        val_size = len(full_train_dataset) - train_size
        train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

        #Create DataLoaders
        '''
        batch_size --> each batch retreived by the loader will get 512 samples at once
                - reduces gradient noise
                - allows parallel computing by GPU
                - forward pass --> CEL --> backward pass --> Gradient update
        shuffle --> shuffle order samples are loaded
                - shuffle for training set to prevent overfitting
                - no shuffle for validation and testing set to ensure consistent & reproducible evaluation
        num_workers --> how many parallel processes will be handling data loading
        '''
        train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True, num_workers=2)
        val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False, num_workers=2)
        test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False, num_workers=2)

        print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

        return train_loader, val_loader, test_loader

In [29]:
import torch.nn.functional as F

class FashionMNISTNet(nn.Module):
        '''
        input: 28x28
        conv1 = 32x28x28
        maxpool = 32x14x14
        conv2 = 64x16x16
        maxpool = 64x8x8
        conv3 = 64x8x8
        maxpool = 64x4x4
        flatten = 1024
        '''
        def __init__(self):
                super(FashionMNISTNet, self).__init__()
                self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
                self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=2)
                self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
                self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
                self.fc1 = nn.Linear(in_features=1024, out_features=256)
                self.fc2 = nn.Linear(in_features=256, out_features=128)
                self.fc3 = nn.Linear(in_features=128, out_features=10)

        def forward(self, x):
                x = self.pool(F.relu(self.conv1(x)))
                x = self.pool(F.relu(self.conv2(x)))
                x = self.pool(F.relu(self.conv3(x)))
                x = torch.flatten(x, start_dim=1)
                x = F.relu(self.fc1(x))
                x = F.relu(self.fc2(x))
                x = self.fc3(x) #no ReLU on the last fully connected layer
                # x = F.softmax(x, dim=1)
                return x
        
net = FashionMNISTNet()
print(net)

FashionMNISTNet(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)


In [23]:
def evaluate_model(model, test_loader, device):
        #Test
        correct = 0
        total = 0

        # Disable gradient calculation
        with torch.no_grad():
                for inputs, labels in test_loader:

                        # Move the inputs and labels to the GPU if available
                        inputs = inputs.to(device)
                        labels = labels.to(device)

                        # Forward pass
                        outputs = model(inputs)

                        # Get the predicted class
                        _, predicted = torch.max(outputs.data, 1)

                        # Update the total number of samples and correct predictions
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()

        # Calculate the accuracy
        accuracy = 100 * correct / total
        print(f"Accuracy: {accuracy:.2f}%")

In [22]:
def train_net(model, train_loader, val_loader, device, criterion, optimizer):
        # Training loop
        num_epochs = 15
        for epoch in range(num_epochs):
                model.train()
                running_loss = 0.0
                for i, (inputs, labels) in enumerate(train_loader, 0):
                        inputs = inputs.to(device)
                        labels = labels.to(device)

                        optimizer.zero_grad()
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        loss.backward()
                        optimizer.step()
                        running_loss += loss.item()

                avg_loss = running_loss / (i + 1)

                # ---validation---
                model.eval()
                correct = 0
                total = 0
                with torch.no_grad():  
                        for inputs, labels in val_loader:
                                inputs, labels = inputs.to(device), labels.to(device)
                                outputs = model(inputs)
                                _, predicted = torch.max(outputs.data, 1)
                                total += labels.size(0)
                                correct += (predicted == labels).sum().item()
                if (total == 0):
                        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")
                else:
                        val_acc = 100 * correct / total
                        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}, Val Acc: {val_acc:.2f}%")

                


In [24]:
# 1. Trained with no validation set
no_val_net = FashionMNISTNet()

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
no_val_net.to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(no_val_net.parameters(), lr=0.1)

train_loader, val_loader, test_loader = train_val_split(train_ratio=1.0)

train_net(no_val_net, train_loader, val_loader, device, criterion, optimizer)

evaluate_model(no_val_net, test_loader, device)


Using device: cuda
Train: 60000 | Val: 0 | Test: 10000
Epoch [1/15], Loss: 2.3023
Epoch [2/15], Loss: 2.3017
Epoch [3/15], Loss: 2.3007
Epoch [4/15], Loss: 2.2984
Epoch [5/15], Loss: 2.2804
Epoch [6/15], Loss: 2.0183
Epoch [7/15], Loss: 1.8307
Epoch [8/15], Loss: 1.7653
Epoch [9/15], Loss: 1.7382
Epoch [10/15], Loss: 1.7219
Epoch [11/15], Loss: 1.7104
Epoch [12/15], Loss: 1.7006
Epoch [13/15], Loss: 1.6942
Epoch [14/15], Loss: 1.6900
Epoch [15/15], Loss: 1.6839
Accuracy: 77.60%


In [25]:
# 2. Trained with 10% validation set
ten_val_net = FashionMNISTNet()

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
ten_val_net.to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(ten_val_net.parameters(), lr=0.1)

train_loader, val_loader, test_loader = train_val_split(train_ratio=0.9)

train_net(ten_val_net, train_loader, val_loader, device, criterion, optimizer)

evaluate_model(ten_val_net, test_loader, device)

Using device: cuda
Train: 54000 | Val: 6000 | Test: 10000
Epoch [1/15], Loss: 2.3025, Val Acc: 10.17%
Epoch [2/15], Loss: 2.3020, Val Acc: 10.17%
Epoch [3/15], Loss: 2.3015, Val Acc: 10.17%
Epoch [4/15], Loss: 2.3005, Val Acc: 19.15%
Epoch [5/15], Loss: 2.2986, Val Acc: 20.13%
Epoch [6/15], Loss: 2.2902, Val Acc: 27.87%
Epoch [7/15], Loss: 2.1262, Val Acc: 50.37%
Epoch [8/15], Loss: 1.8962, Val Acc: 61.47%
Epoch [9/15], Loss: 1.8245, Val Acc: 67.02%
Epoch [10/15], Loss: 1.7936, Val Acc: 66.40%
Epoch [11/15], Loss: 1.7775, Val Acc: 66.85%
Epoch [12/15], Loss: 1.7660, Val Acc: 66.92%
Epoch [13/15], Loss: 1.7548, Val Acc: 69.65%
Epoch [14/15], Loss: 1.7483, Val Acc: 70.30%
Epoch [15/15], Loss: 1.7395, Val Acc: 71.73%
Accuracy: 70.95%


In [26]:
# 2. Trained with 20% validation set
twenty_val_net = FashionMNISTNet()

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
twenty_val_net.to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(twenty_val_net.parameters(), lr=0.1)

train_loader, val_loader, test_loader = train_val_split(train_ratio=0.8)

train_net(twenty_val_net, train_loader, val_loader, device, criterion, optimizer)

evaluate_model(twenty_val_net, test_loader, device)

Using device: cuda
Train: 48000 | Val: 12000 | Test: 10000
Epoch [1/15], Loss: 2.3024, Val Acc: 9.87%
Epoch [2/15], Loss: 2.3020, Val Acc: 16.23%
Epoch [3/15], Loss: 2.3016, Val Acc: 18.34%
Epoch [4/15], Loss: 2.3009, Val Acc: 19.10%
Epoch [5/15], Loss: 2.2999, Val Acc: 24.13%
Epoch [6/15], Loss: 2.2977, Val Acc: 30.57%
Epoch [7/15], Loss: 2.2899, Val Acc: 34.57%
Epoch [8/15], Loss: 2.1495, Val Acc: 51.02%
Epoch [9/15], Loss: 1.9165, Val Acc: 64.88%
Epoch [10/15], Loss: 1.8404, Val Acc: 58.16%
Epoch [11/15], Loss: 1.7732, Val Acc: 63.26%
Epoch [12/15], Loss: 1.7299, Val Acc: 74.27%
Epoch [13/15], Loss: 1.7152, Val Acc: 76.09%
Epoch [14/15], Loss: 1.7083, Val Acc: 76.69%
Epoch [15/15], Loss: 1.6936, Val Acc: 75.33%
Accuracy: 75.10%


In [27]:
# 3. Trained with 30% validation set
thirty_val_net = FashionMNISTNet()

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
thirty_val_net.to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(thirty_val_net.parameters(), lr=0.1)

train_loader, val_loader, test_loader = train_val_split(train_ratio=0.7)

train_net(thirty_val_net, train_loader, val_loader, device, criterion, optimizer)

evaluate_model(thirty_val_net, test_loader, device)

Using device: cuda
Train: 42000 | Val: 18000 | Test: 10000
Epoch [1/15], Loss: 2.3024, Val Acc: 11.49%
Epoch [2/15], Loss: 2.3021, Val Acc: 18.81%
Epoch [3/15], Loss: 2.3017, Val Acc: 18.08%
Epoch [4/15], Loss: 2.3011, Val Acc: 17.94%
Epoch [5/15], Loss: 2.3000, Val Acc: 24.49%
Epoch [6/15], Loss: 2.2981, Val Acc: 26.69%
Epoch [7/15], Loss: 2.2918, Val Acc: 25.16%
Epoch [8/15], Loss: 2.2021, Val Acc: 39.59%
Epoch [9/15], Loss: 1.9548, Val Acc: 33.87%
Epoch [10/15], Loss: 1.9022, Val Acc: 50.16%
Epoch [11/15], Loss: 1.7873, Val Acc: 59.20%
Epoch [12/15], Loss: 1.7533, Val Acc: 59.33%
Epoch [13/15], Loss: 1.7317, Val Acc: 44.78%
Epoch [14/15], Loss: 1.7260, Val Acc: 61.32%
Epoch [15/15], Loss: 1.7139, Val Acc: 65.80%
Accuracy: 64.64%


In [28]:
# 4. Trained with 40% validation set
fourty_val_net = FashionMNISTNet()

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
fourty_val_net.to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(fourty_val_net.parameters(), lr=0.1)

train_loader, val_loader, test_loader = train_val_split(train_ratio=0.6)

train_net(fourty_val_net, train_loader, val_loader, device, criterion, optimizer)

evaluate_model(fourty_val_net, test_loader, device)

Using device: cuda
Train: 36000 | Val: 24000 | Test: 10000
Epoch [1/15], Loss: 2.3025, Val Acc: 17.18%
Epoch [2/15], Loss: 2.3022, Val Acc: 18.17%
Epoch [3/15], Loss: 2.3019, Val Acc: 20.52%
Epoch [4/15], Loss: 2.3015, Val Acc: 31.80%
Epoch [5/15], Loss: 2.3009, Val Acc: 45.41%
Epoch [6/15], Loss: 2.3000, Val Acc: 43.09%
Epoch [7/15], Loss: 2.2980, Val Acc: 36.30%
Epoch [8/15], Loss: 2.2923, Val Acc: 33.92%
Epoch [9/15], Loss: 2.2212, Val Acc: 40.73%
Epoch [10/15], Loss: 1.9968, Val Acc: 54.31%
Epoch [11/15], Loss: 1.8942, Val Acc: 58.00%
Epoch [12/15], Loss: 1.8252, Val Acc: 58.58%
Epoch [13/15], Loss: 1.7833, Val Acc: 67.76%
Epoch [14/15], Loss: 1.7620, Val Acc: 67.59%
Epoch [15/15], Loss: 1.7428, Val Acc: 72.02%
Accuracy: 71.21%
