Intializing the functions + parameters + data for the training and test.

In [1]:
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn
from torch import optim
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from tqdm import tqdm
from models import*

def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0.0
        correct = 0
        total = 0
        
        for data, labels in tqdm(train_loader, desc=f'Epoch {epoch}/{num_epochs}', unit='batch'):
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
            _, predicted = torch.max(output, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        average_loss = total_loss / len(train_loader)
        accuracy = correct / total
        
        print(f'Epoch {epoch}/{num_epochs}, Average Loss: {average_loss:.4f}, Accuracy: {accuracy * 100:.2f}%')

    return model

def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, labels in test_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

def view_data_sample(loader):
    image, label = next(iter(loader))
    plt.figure(figsize=(16, 8))
    plt.axis('off')
    plt.imshow(make_grid(image, nrow=16).permute((1, 2, 0)))

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def splice_batch(X, Y, num_of_labels, prints=True):
    if prints:
        print('input: ', end="")
        print("\t X shape: ", X.shape, end='\t')
        print("\t Y shape: ", Y.shape)
    X = X[Y < num_of_labels]
    Y = Y[Y < num_of_labels]
    if prints:
        print('output: ', end="")
        print("\t X shape: ", X.shape, end='\t')
        print("\t Y shape: ", Y.shape)
    return X, Y



# Parameters
batch_size = 512
lr = 0.001
num_epochs = 10

# Download and load the training data
trainset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=True, transform=ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

# Download and load the test data
testset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=False, transform=ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

# Data for the first model, only 3 classes
train_data_3, train_labels_3 = splice_batch(trainset.data, trainset.targets, num_of_labels=3)
train_data_3 = train_data_3.float() / 255.0  # Normalization
train_dataset_3 = torch.utils.data.TensorDataset(train_data_3, train_labels_3)
train_loader_3 = torch.utils.data.DataLoader(train_dataset_3, batch_size=batch_size, shuffle=True)

test_data_3, test_labels_3 = splice_batch(testset.data, testset.targets, num_of_labels=3)
test_data_3 = test_data_3.float() / 255.0  # Normalization
test_dataset_3 = torch.utils.data.TensorDataset(test_data_3, test_labels_3)
test_loader_3 = torch.utils.data.DataLoader(test_dataset_3, batch_size=batch_size, shuffle=False)



# Data for the first model, only 7 classes
train_data_7, train_labels_7 = splice_batch(trainset.data, trainset.targets, num_of_labels=7)
train_data_7 = train_data_7.float() / 255.0  # Normalization
train_dataset_7 = torch.utils.data.TensorDataset(train_data_7, train_labels_7)
train_loader_7 = torch.utils.data.DataLoader(train_dataset_7, batch_size=batch_size, shuffle=True)

test_data_7, test_labels_7 = splice_batch(testset.data, testset.targets, num_of_labels=7)
test_data_7 = test_data_7.float() / 255.0  # Normalization
test_dataset_7 = torch.utils.data.TensorDataset(test_data_7, test_labels_7)
test_loader_7 = torch.utils.data.DataLoader(test_dataset_7, batch_size=batch_size, shuffle=False)

input: 	 X shape:  torch.Size([60000, 28, 28])		 Y shape:  torch.Size([60000])
output: 	 X shape:  torch.Size([18000, 28, 28])		 Y shape:  torch.Size([18000])
input: 	 X shape:  torch.Size([10000, 28, 28])		 Y shape:  torch.Size([10000])
output: 	 X shape:  torch.Size([3000, 28, 28])		 Y shape:  torch.Size([3000])
input: 	 X shape:  torch.Size([60000, 28, 28])		 Y shape:  torch.Size([60000])
output: 	 X shape:  torch.Size([42000, 28, 28])		 Y shape:  torch.Size([42000])
input: 	 X shape:  torch.Size([10000, 28, 28])		 Y shape:  torch.Size([10000])
output: 	 X shape:  torch.Size([7000, 28, 28])		 Y shape:  torch.Size([7000])


In [2]:
# Train the first model
model_1 = model_1()
criterion_1 = nn.CrossEntropyLoss()
optimizer_1 = optim.Adam(model_1.parameters(), lr=lr)
model_1 = train_model(model_1, train_loader_3, criterion_1, optimizer_1, num_epochs)

# Test the first model
test_model(model_1, test_loader_3)
print(count_parameters(model_1))

Epoch 0/10: 100%|██████████| 36/36 [00:00<00:00, 133.12batch/s]


Epoch 0/10, Average Loss: 0.5874, Accuracy: 79.90%


Epoch 1/10: 100%|██████████| 36/36 [00:00<00:00, 86.23batch/s] 


Epoch 1/10, Average Loss: 0.1653, Accuracy: 95.30%


Epoch 2/10: 100%|██████████| 36/36 [00:00<00:00, 210.86batch/s]


Epoch 2/10, Average Loss: 0.1360, Accuracy: 95.98%


Epoch 3/10: 100%|██████████| 36/36 [00:00<00:00, 219.01batch/s]


Epoch 3/10, Average Loss: 0.1209, Accuracy: 96.36%


Epoch 4/10: 100%|██████████| 36/36 [00:00<00:00, 199.57batch/s]


Epoch 4/10, Average Loss: 0.1175, Accuracy: 96.64%


Epoch 5/10: 100%|██████████| 36/36 [00:00<00:00, 161.71batch/s]


Epoch 5/10, Average Loss: 0.1146, Accuracy: 96.69%


Epoch 6/10: 100%|██████████| 36/36 [00:00<00:00, 214.47batch/s]


Epoch 6/10, Average Loss: 0.1046, Accuracy: 96.89%


Epoch 7/10: 100%|██████████| 36/36 [00:00<00:00, 200.79batch/s]


Epoch 7/10, Average Loss: 0.1015, Accuracy: 97.01%


Epoch 8/10: 100%|██████████| 36/36 [00:00<00:00, 215.48batch/s]


Epoch 8/10, Average Loss: 0.0946, Accuracy: 97.22%


Epoch 9/10: 100%|██████████| 36/36 [00:00<00:00, 216.42batch/s]

Epoch 9/10, Average Loss: 0.0923, Accuracy: 97.14%
Test Accuracy: 96.47%
48383





In [3]:
# Train the second model
model_2 = model_2()
criterion_2 = nn.CrossEntropyLoss()
optimizer_2 = optim.Adam(model_2.parameters(), lr=lr)
model_2 = train_model(model_2, train_loader_7, criterion_2, optimizer_2, num_epochs)
# Test the second model
test_model(model_2, test_loader_7)
print(count_parameters(model_2))

Epoch 0/10: 100%|██████████| 83/83 [00:00<00:00, 171.96batch/s]


Epoch 0/10, Average Loss: 1.0354, Accuracy: 66.51%


Epoch 1/10: 100%|██████████| 83/83 [00:00<00:00, 228.03batch/s]


Epoch 1/10, Average Loss: 0.5972, Accuracy: 78.40%


Epoch 2/10: 100%|██████████| 83/83 [00:00<00:00, 206.80batch/s]


Epoch 2/10, Average Loss: 0.5258, Accuracy: 80.89%


Epoch 3/10: 100%|██████████| 83/83 [00:00<00:00, 209.25batch/s]


Epoch 3/10, Average Loss: 0.4889, Accuracy: 82.31%


Epoch 4/10: 100%|██████████| 83/83 [00:00<00:00, 225.04batch/s]


Epoch 4/10, Average Loss: 0.4701, Accuracy: 82.78%


Epoch 5/10: 100%|██████████| 83/83 [00:00<00:00, 180.46batch/s]


Epoch 5/10, Average Loss: 0.4556, Accuracy: 83.50%


Epoch 6/10: 100%|██████████| 83/83 [00:00<00:00, 205.58batch/s]


Epoch 6/10, Average Loss: 0.4566, Accuracy: 83.38%


Epoch 7/10: 100%|██████████| 83/83 [00:00<00:00, 253.70batch/s]


Epoch 7/10, Average Loss: 0.4338, Accuracy: 83.98%


Epoch 8/10: 100%|██████████| 83/83 [00:00<00:00, 247.77batch/s]


Epoch 8/10, Average Loss: 0.4248, Accuracy: 84.46%


Epoch 9/10: 100%|██████████| 83/83 [00:00<00:00, 248.64batch/s]

Epoch 9/10, Average Loss: 0.4234, Accuracy: 84.58%
Test Accuracy: 82.46%
48467





In [4]:
# Train the third model
model_3 = model_3()
criterion_3 = nn.CrossEntropyLoss()
optimizer_3 = optim.Adam(model_3.parameters(), lr=lr)
model_3 = train_model(model_3, train_loader_7, criterion_3, optimizer_3, num_epochs)
# Test the third model
test_model(model_3, test_loader_7)
print(count_parameters(model_3))

Epoch 0/10: 100%|██████████| 83/83 [00:00<00:00, 164.72batch/s]


Epoch 0/10, Average Loss: 1.4547, Accuracy: 46.35%


Epoch 1/10: 100%|██████████| 83/83 [00:00<00:00, 207.90batch/s]


Epoch 1/10, Average Loss: 0.6915, Accuracy: 73.80%


Epoch 2/10: 100%|██████████| 83/83 [00:00<00:00, 226.90batch/s]


Epoch 2/10, Average Loss: 0.6079, Accuracy: 77.71%


Epoch 3/10: 100%|██████████| 83/83 [00:00<00:00, 217.48batch/s]


Epoch 3/10, Average Loss: 0.5602, Accuracy: 79.68%


Epoch 4/10: 100%|██████████| 83/83 [00:00<00:00, 205.91batch/s]


Epoch 4/10, Average Loss: 0.5279, Accuracy: 80.54%


Epoch 5/10: 100%|██████████| 83/83 [00:00<00:00, 220.07batch/s]


Epoch 5/10, Average Loss: 0.5143, Accuracy: 81.14%


Epoch 6/10: 100%|██████████| 83/83 [00:00<00:00, 211.29batch/s]


Epoch 6/10, Average Loss: 0.5058, Accuracy: 81.32%


Epoch 7/10: 100%|██████████| 83/83 [00:00<00:00, 196.87batch/s]


Epoch 7/10, Average Loss: 0.4871, Accuracy: 81.94%


Epoch 8/10: 100%|██████████| 83/83 [00:00<00:00, 231.67batch/s]


Epoch 8/10, Average Loss: 0.4817, Accuracy: 82.25%


Epoch 9/10: 100%|██████████| 83/83 [00:00<00:00, 218.27batch/s]


Epoch 9/10, Average Loss: 0.4675, Accuracy: 82.67%
Test Accuracy: 81.29%
28663


In [5]:
# Train the forth model
lr = 0.001
num_epochs = 10
model_4 = model_4()
criterion_4 = nn.CrossEntropyLoss()
optimizer_4 = optim.Adam(model_4.parameters(), lr=lr)
model_4 = train_model(model_4, train_loader_7, criterion_4, optimizer_4, num_epochs)
# Test the forth model
test_model(model_4, test_loader_7)
print(count_parameters(model_4))

Epoch 0/25:   6%|▌         | 5/83 [00:06<01:43,  1.32s/batch]


KeyboardInterrupt: 