Intializing the functions + parameters + data for the training and test.

In [None]:
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn
from torch import optim
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from tqdm import tqdm
# from models import*

def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0.0
        correct = 0
        total = 0

        for data, labels in tqdm(train_loader, desc=f'Epoch {epoch}/{num_epochs}', unit='batch'):
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            _, predicted = torch.max(output, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        average_loss = total_loss / len(train_loader)
        accuracy = correct / total

        print(f'Epoch {epoch+1}/{num_epochs}, Average Loss: {average_loss:.4f}, Accuracy: {accuracy * 100:.2f}%')

    return model

def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, labels in test_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

def view_data_sample(loader):
    image, label = next(iter(loader))
    plt.figure(figsize=(16, 8))
    plt.axis('off')
    plt.imshow(make_grid(image, nrow=16).permute((1, 2, 0)))

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def splice_batch(X, Y, num_of_labels, prints=True):
    if prints:
        print('input: ', end="")
        print("\t X shape: ", X.shape, end='\t')
        print("\t Y shape: ", Y.shape)
    X = X[Y < num_of_labels]
    Y = Y[Y < num_of_labels]
    if prints:
        print('output: ', end="")
        print("\t X shape: ", X.shape, end='\t')
        print("\t Y shape: ", Y.shape)
    return X, Y



# Parameters
batch_size = 256
lr = 0.001
num_epochs = 50

# Download and load the training data
trainset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=True, transform=ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

# Download and load the test data
testset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=False, transform=ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

# Data for the first model, only 3 classes
train_data_3, train_labels_3 = splice_batch(trainset.data, trainset.targets, num_of_labels=3)
train_data_3 = train_data_3.float() / 255.0  # Normalization
train_dataset_3 = torch.utils.data.TensorDataset(train_data_3, train_labels_3)
train_loader_3 = torch.utils.data.DataLoader(train_dataset_3, batch_size=batch_size, shuffle=True)

test_data_3, test_labels_3 = splice_batch(testset.data, testset.targets, num_of_labels=3)
test_data_3 = test_data_3.float() / 255.0  # Normalization
test_dataset_3 = torch.utils.data.TensorDataset(test_data_3, test_labels_3)
test_loader_3 = torch.utils.data.DataLoader(test_dataset_3, batch_size=batch_size, shuffle=False)



# Data for the first model, only 7 classes
train_data_7, train_labels_7 = splice_batch(trainset.data, trainset.targets, num_of_labels=7)
train_data_7 = train_data_7.float() / 255.0  # Normalization
train_dataset_7 = torch.utils.data.TensorDataset(train_data_7, train_labels_7)
train_loader_7 = torch.utils.data.DataLoader(train_dataset_7, batch_size=batch_size, shuffle=False)

test_data_7, test_labels_7 = splice_batch(testset.data, testset.targets, num_of_labels=7)
test_data_7 = test_data_7.float() / 255.0  # Normalization
test_dataset_7 = torch.utils.data.TensorDataset(test_data_7, test_labels_7)
test_loader_7 = torch.utils.data.DataLoader(test_dataset_7, batch_size=batch_size, shuffle=False)

input: 	 X shape:  torch.Size([60000, 28, 28])		 Y shape:  torch.Size([60000])
output: 	 X shape:  torch.Size([18000, 28, 28])		 Y shape:  torch.Size([18000])
input: 	 X shape:  torch.Size([10000, 28, 28])		 Y shape:  torch.Size([10000])
output: 	 X shape:  torch.Size([3000, 28, 28])		 Y shape:  torch.Size([3000])
input: 	 X shape:  torch.Size([60000, 28, 28])		 Y shape:  torch.Size([60000])
output: 	 X shape:  torch.Size([42000, 28, 28])		 Y shape:  torch.Size([42000])
input: 	 X shape:  torch.Size([10000, 28, 28])		 Y shape:  torch.Size([10000])
output: 	 X shape:  torch.Size([7000, 28, 28])		 Y shape:  torch.Size([7000])


In [None]:
class model_4(nn.Module):
    def __init__(self):
        super(model_4, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3)
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.dropout1 = nn.Dropout(0.3)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.dropout2 = nn.Dropout(0.25)
        self.conv3 = nn.Conv2d(32, 32, 3)
        self.dropout3 = nn.Dropout(0.25)
        self.fc1 = nn.Linear(15488, 7)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.conv1(x)
        x = F.relu(x)
        # x = self.pool(x)
        # x = self.dropout1(x)
        x = self.conv2(x)
        x = F.relu(x)
        # x = self.pool(x)
        # x = self.dropout2(x)
        x = self.conv3(x)
        x = F.relu(x)
        # x = self.pool(x)
        x = x.view(-1, self.num_flat_features(x))
        x = self.fc1(x)
        x = F.softmax(x, dim=1)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
# Train the forth model
lr = 0.001
num_epochs = 10
model_4 = model_4()
print(count_parameters(model_4))
criterion_4 = nn.CrossEntropyLoss()
optimizer_4 = optim.Adam(model_4.parameters(), lr=lr)
model_4 = train_model(model_4, train_loader_7, criterion_4, optimizer_4, num_epochs)
# Test the forth model
test_model(model_4, test_loader_7)

122471


Epoch 0/10: 100%|██████████| 165/165 [00:53<00:00,  3.07batch/s]


Epoch 0/10, Average Loss: 1.4624, Accuracy: 70.35%


Epoch 1/10: 100%|██████████| 165/165 [00:52<00:00,  3.15batch/s]


Epoch 1/10, Average Loss: 1.4003, Accuracy: 76.39%


Epoch 2/10: 100%|██████████| 165/165 [00:52<00:00,  3.14batch/s]


Epoch 2/10, Average Loss: 1.3868, Accuracy: 77.77%


Epoch 3/10: 100%|██████████| 165/165 [00:54<00:00,  3.04batch/s]


Epoch 3/10, Average Loss: 1.3786, Accuracy: 78.56%


Epoch 4/10: 100%|██████████| 165/165 [00:54<00:00,  3.04batch/s]


Epoch 4/10, Average Loss: 1.3706, Accuracy: 79.38%


Epoch 5/10: 100%|██████████| 165/165 [00:55<00:00,  2.96batch/s]


Epoch 5/10, Average Loss: 1.3661, Accuracy: 79.85%


Epoch 6/10: 100%|██████████| 165/165 [00:53<00:00,  3.06batch/s]


Epoch 6/10, Average Loss: 1.3631, Accuracy: 80.15%


Epoch 7/10: 100%|██████████| 165/165 [00:52<00:00,  3.15batch/s]


Epoch 7/10, Average Loss: 1.3617, Accuracy: 80.30%


Epoch 8/10: 100%|██████████| 165/165 [00:51<00:00,  3.24batch/s]


Epoch 8/10, Average Loss: 1.3184, Accuracy: 84.66%


Epoch 9/10: 100%|██████████| 165/165 [00:56<00:00,  2.94batch/s]


Epoch 9/10, Average Loss: 1.2974, Accuracy: 86.77%
Test Accuracy: 85.89%


In [None]:
# Train the first model
model_1 = model_1()
criterion_1 = nn.CrossEntropyLoss()
optimizer_1 = optim.Adam(model_1.parameters(), lr=lr)
model_1 = train_model(model_1, train_loader_3, criterion_1, optimizer_1, num_epochs)

# Test the first model
test_model(model_1, test_loader_3)
print(count_parameters(model_1))

Epoch 0/10: 100%|██████████| 36/36 [00:00<00:00, 67.52batch/s]


Epoch 0/10, Average Loss: 0.6341, Accuracy: 84.68%


Epoch 1/10: 100%|██████████| 36/36 [00:00<00:00, 100.30batch/s]


Epoch 1/10, Average Loss: 0.1841, Accuracy: 94.87%


Epoch 2/10: 100%|██████████| 36/36 [00:00<00:00, 115.43batch/s]


Epoch 2/10, Average Loss: 0.1485, Accuracy: 95.70%


Epoch 3/10: 100%|██████████| 36/36 [00:00<00:00, 110.52batch/s]


Epoch 3/10, Average Loss: 0.1307, Accuracy: 96.22%


Epoch 4/10: 100%|██████████| 36/36 [00:00<00:00, 92.21batch/s]


Epoch 4/10, Average Loss: 0.1164, Accuracy: 96.51%


Epoch 5/10: 100%|██████████| 36/36 [00:00<00:00, 105.62batch/s]


Epoch 5/10, Average Loss: 0.1069, Accuracy: 96.76%


Epoch 6/10: 100%|██████████| 36/36 [00:00<00:00, 114.19batch/s]


Epoch 6/10, Average Loss: 0.0993, Accuracy: 97.02%


Epoch 7/10: 100%|██████████| 36/36 [00:00<00:00, 61.92batch/s]


Epoch 7/10, Average Loss: 0.0952, Accuracy: 97.12%


Epoch 8/10: 100%|██████████| 36/36 [00:00<00:00, 101.61batch/s]


Epoch 8/10, Average Loss: 0.0922, Accuracy: 97.26%


Epoch 9/10: 100%|██████████| 36/36 [00:00<00:00, 95.78batch/s]


Epoch 9/10, Average Loss: 0.0879, Accuracy: 97.38%
Test Accuracy: 96.63%
48383


In [None]:
# Train the second model
model_2 = model_2()
criterion_2 = nn.CrossEntropyLoss()
optimizer_2 = optim.Adam(model_2.parameters(), lr=lr)
model_2 = train_model(model_2, train_loader_7, criterion_2, optimizer_2, num_epochs)
# Test the second model
test_model(model_2, test_loader_7)
print(count_parameters(model_2))

Epoch 0/50: 100%|██████████| 329/329 [00:01<00:00, 195.09batch/s]


Epoch 0/50, Average Loss: 0.7379, Accuracy: 73.89%


Epoch 1/50: 100%|██████████| 329/329 [00:01<00:00, 204.98batch/s]


Epoch 1/50, Average Loss: 0.5073, Accuracy: 81.50%


Epoch 2/50: 100%|██████████| 329/329 [00:01<00:00, 243.19batch/s]


Epoch 2/50, Average Loss: 0.4673, Accuracy: 82.75%


Epoch 3/50: 100%|██████████| 329/329 [00:01<00:00, 251.24batch/s]


Epoch 3/50, Average Loss: 0.4411, Accuracy: 83.75%


Epoch 4/50: 100%|██████████| 329/329 [00:01<00:00, 233.10batch/s]


Epoch 4/50, Average Loss: 0.4213, Accuracy: 84.41%


Epoch 5/50: 100%|██████████| 329/329 [00:01<00:00, 236.21batch/s]


Epoch 5/50, Average Loss: 0.4062, Accuracy: 85.03%


Epoch 6/50: 100%|██████████| 329/329 [00:01<00:00, 273.16batch/s]


Epoch 6/50, Average Loss: 0.3947, Accuracy: 85.44%


Epoch 7/50: 100%|██████████| 329/329 [00:01<00:00, 271.17batch/s]


Epoch 7/50, Average Loss: 0.3846, Accuracy: 85.76%


Epoch 8/50: 100%|██████████| 329/329 [00:01<00:00, 239.38batch/s]


Epoch 8/50, Average Loss: 0.3758, Accuracy: 86.04%


Epoch 9/50: 100%|██████████| 329/329 [00:02<00:00, 154.19batch/s]


Epoch 9/50, Average Loss: 0.3674, Accuracy: 86.40%


Epoch 10/50: 100%|██████████| 329/329 [00:01<00:00, 252.14batch/s]


Epoch 10/50, Average Loss: 0.3597, Accuracy: 86.67%


Epoch 11/50: 100%|██████████| 329/329 [00:01<00:00, 270.49batch/s]


Epoch 11/50, Average Loss: 0.3516, Accuracy: 87.03%


Epoch 12/50: 100%|██████████| 329/329 [00:01<00:00, 241.96batch/s]


Epoch 12/50, Average Loss: 0.3452, Accuracy: 87.20%


Epoch 13/50: 100%|██████████| 329/329 [00:01<00:00, 274.83batch/s]


Epoch 13/50, Average Loss: 0.3386, Accuracy: 87.45%


Epoch 14/50: 100%|██████████| 329/329 [00:01<00:00, 258.77batch/s]


Epoch 14/50, Average Loss: 0.3317, Accuracy: 87.69%


Epoch 15/50: 100%|██████████| 329/329 [00:01<00:00, 235.72batch/s]


Epoch 15/50, Average Loss: 0.3263, Accuracy: 87.96%


Epoch 16/50: 100%|██████████| 329/329 [00:01<00:00, 213.28batch/s]


Epoch 16/50, Average Loss: 0.3207, Accuracy: 88.17%


Epoch 17/50: 100%|██████████| 329/329 [00:02<00:00, 142.48batch/s]


Epoch 17/50, Average Loss: 0.3150, Accuracy: 88.31%


Epoch 18/50: 100%|██████████| 329/329 [00:01<00:00, 199.75batch/s]


Epoch 18/50, Average Loss: 0.3103, Accuracy: 88.47%


Epoch 19/50: 100%|██████████| 329/329 [00:01<00:00, 253.25batch/s]


Epoch 19/50, Average Loss: 0.3050, Accuracy: 88.73%


Epoch 20/50: 100%|██████████| 329/329 [00:01<00:00, 244.32batch/s]


Epoch 20/50, Average Loss: 0.3000, Accuracy: 88.90%


Epoch 21/50: 100%|██████████| 329/329 [00:01<00:00, 243.65batch/s]


Epoch 21/50, Average Loss: 0.2952, Accuracy: 89.09%


Epoch 22/50: 100%|██████████| 329/329 [00:01<00:00, 237.04batch/s]


Epoch 22/50, Average Loss: 0.2914, Accuracy: 89.20%


Epoch 23/50: 100%|██████████| 329/329 [00:01<00:00, 274.21batch/s]


Epoch 23/50, Average Loss: 0.2873, Accuracy: 89.37%


Epoch 24/50: 100%|██████████| 329/329 [00:01<00:00, 256.63batch/s]


Epoch 24/50, Average Loss: 0.2833, Accuracy: 89.53%


Epoch 25/50: 100%|██████████| 329/329 [00:01<00:00, 274.78batch/s]


Epoch 25/50, Average Loss: 0.2795, Accuracy: 89.58%


Epoch 26/50: 100%|██████████| 329/329 [00:01<00:00, 177.15batch/s]


Epoch 26/50, Average Loss: 0.2754, Accuracy: 89.76%


Epoch 27/50: 100%|██████████| 329/329 [00:01<00:00, 225.25batch/s]


Epoch 27/50, Average Loss: 0.2721, Accuracy: 89.88%


Epoch 28/50: 100%|██████████| 329/329 [00:01<00:00, 249.16batch/s]


Epoch 28/50, Average Loss: 0.2689, Accuracy: 90.00%


Epoch 29/50: 100%|██████████| 329/329 [00:01<00:00, 240.48batch/s]


Epoch 29/50, Average Loss: 0.2654, Accuracy: 90.10%


Epoch 30/50: 100%|██████████| 329/329 [00:01<00:00, 262.76batch/s]


Epoch 30/50, Average Loss: 0.2608, Accuracy: 90.37%


Epoch 31/50: 100%|██████████| 329/329 [00:01<00:00, 278.07batch/s]


Epoch 31/50, Average Loss: 0.2580, Accuracy: 90.40%


Epoch 32/50: 100%|██████████| 329/329 [00:01<00:00, 270.80batch/s]


Epoch 32/50, Average Loss: 0.2549, Accuracy: 90.50%


Epoch 33/50: 100%|██████████| 329/329 [00:01<00:00, 276.11batch/s]


Epoch 33/50, Average Loss: 0.2515, Accuracy: 90.67%


Epoch 34/50: 100%|██████████| 329/329 [00:01<00:00, 233.95batch/s]


Epoch 34/50, Average Loss: 0.2485, Accuracy: 90.85%


Epoch 35/50: 100%|██████████| 329/329 [00:01<00:00, 181.42batch/s]


Epoch 35/50, Average Loss: 0.2452, Accuracy: 90.85%


Epoch 36/50: 100%|██████████| 329/329 [00:01<00:00, 246.46batch/s]


Epoch 36/50, Average Loss: 0.2425, Accuracy: 91.03%


Epoch 37/50: 100%|██████████| 329/329 [00:01<00:00, 234.43batch/s]


Epoch 37/50, Average Loss: 0.2393, Accuracy: 91.18%


Epoch 38/50: 100%|██████████| 329/329 [00:01<00:00, 249.66batch/s]


Epoch 38/50, Average Loss: 0.2362, Accuracy: 91.24%


Epoch 39/50: 100%|██████████| 329/329 [00:01<00:00, 266.55batch/s]


Epoch 39/50, Average Loss: 0.2339, Accuracy: 91.31%


Epoch 40/50: 100%|██████████| 329/329 [00:01<00:00, 250.30batch/s]


Epoch 40/50, Average Loss: 0.2308, Accuracy: 91.41%


Epoch 41/50: 100%|██████████| 329/329 [00:01<00:00, 229.52batch/s]


Epoch 41/50, Average Loss: 0.2280, Accuracy: 91.57%


Epoch 42/50: 100%|██████████| 329/329 [00:01<00:00, 248.67batch/s]


Epoch 42/50, Average Loss: 0.2251, Accuracy: 91.64%


Epoch 43/50: 100%|██████████| 329/329 [00:01<00:00, 183.18batch/s]


Epoch 43/50, Average Loss: 0.2226, Accuracy: 91.73%


Epoch 44/50: 100%|██████████| 329/329 [00:01<00:00, 227.34batch/s]


Epoch 44/50, Average Loss: 0.2192, Accuracy: 91.91%


Epoch 45/50: 100%|██████████| 329/329 [00:01<00:00, 247.59batch/s]


Epoch 45/50, Average Loss: 0.2171, Accuracy: 91.95%


Epoch 46/50: 100%|██████████| 329/329 [00:01<00:00, 240.44batch/s]


Epoch 46/50, Average Loss: 0.2144, Accuracy: 92.11%


Epoch 47/50: 100%|██████████| 329/329 [00:01<00:00, 240.47batch/s]


Epoch 47/50, Average Loss: 0.2127, Accuracy: 92.11%


Epoch 48/50: 100%|██████████| 329/329 [00:01<00:00, 253.64batch/s]


Epoch 48/50, Average Loss: 0.2095, Accuracy: 92.27%


Epoch 49/50: 100%|██████████| 329/329 [00:01<00:00, 235.55batch/s]


Epoch 49/50, Average Loss: 0.2074, Accuracy: 92.38%
Test Accuracy: 85.84%
48467


In [None]:
# Train the third model
model_3 = model_3()
criterion_3 = nn.CrossEntropyLoss()
optimizer_3 = optim.Adam(model_3.parameters(), lr=lr)
model_3 = train_model(model_3, train_loader_7, criterion_3, optimizer_3, num_epochs)
# Test the third model
test_model(model_3, test_loader_7)
print(count_parameters(model_3))

49267


Epoch 0/50: 100%|██████████| 329/329 [00:01<00:00, 211.69batch/s]


Epoch 0/50, Average Loss: 1.1642, Accuracy: 51.67%


Epoch 1/50:  82%|████████▏ | 271/329 [00:01<00:00, 203.12batch/s]


KeyboardInterrupt: 