In [None]:
import numpy as np
import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms
import time
from tqdm import tqdm

In [None]:
# Define the device to use
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define the transformation applied to the dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

# Load the training dataset
trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

# Load the test dataset
testset = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)



Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw



In [None]:
# Define the neural network architecture
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(784, 256)
        self.fc2 = torch.nn.Linear(256, 128)
        self.fc3 = torch.nn.Linear(128, 64)
        self.fc4 = torch.nn.Linear(64, 10)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Define the training function
def train(net, optimizer, criterion, epochs):
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print('[Epoch %d] loss: %.3f' %
              (epoch + 1, running_loss / len(trainloader)))
        
# Define the function to evaluate the accuracy of the network
def evaluate(net, dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in dataloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

In [None]:
# Define the list of optimizers to compare
optimizers = ['SGD', 'SGD with momentum', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam', 'Adamax']

# Train and evaluate the network for each optimizer
df_results = pd.DataFrame(columns=['Optimizer', 'Time(s)', 'Accuracy(%)'])
for optimizer_name in tqdm(optimizers):
    print(f"Training with {optimizer_name} optimizer...")
    
    # Define the neural network
    net = Net()
    net.to(device)

    # Define the loss function and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if optimizer_name == 'SGD':
      optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
    elif optimizer_name == 'SGD with momentum':
      optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    elif optimizer_name == 'Adagrad':
      optimizer = torch.optim.Adagrad(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adadelta':
      optimizer = torch.optim.Adadelta(net.parameters(), lr=0.01)
    elif optimizer_name == 'RMSprop':
      optimizer = torch.optim.RMSprop(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adam':
      optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adamax':
      optimizer = torch.optim.Adamax(net.parameters(), lr=0.01)

    # Train the network and measure the time
    start_time = time.time()
    train(net, optimizer, criterion, epochs=8)
    end_time = time.time()
    training_time = end_time - start_time

    # Evaluate the accuracy of the network on the test set
    test_accuracy = evaluate(net, testloader)

    # Store the results
    #results.append((optimizer_name, training_time, test_accuracy))
    res = [optimizer_name, training_time, test_accuracy]
    df_results.loc[len(df_results)] = res
    print('\n')
    print('{:<20s}{:<20s}{:<20s}'.format('Optimizer', 'Training Time (s)', 'Test Accuracy (%)'))
    print('{:<20s}{:<20.4f}{:<20.2f}'.format(optimizer_name, training_time, test_accuracy))
    
print('\n')
print(df_results)

  0%|          | 0/7 [00:00<?, ?it/s]

Training with SGD optimizer...
[Epoch 1] loss: 1.316
[Epoch 2] loss: 0.613
[Epoch 3] loss: 0.518
[Epoch 4] loss: 0.471
[Epoch 5] loss: 0.444
[Epoch 6] loss: 0.423
[Epoch 7] loss: 0.406
[Epoch 8] loss: 0.391


 14%|█▍        | 1/7 [02:10<13:04, 130.70s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
SGD                 128.2960            84.60               
Training with SGD with momentum optimizer...
[Epoch 1] loss: 0.645
[Epoch 2] loss: 0.405
[Epoch 3] loss: 0.356
[Epoch 4] loss: 0.331
[Epoch 5] loss: 0.310
[Epoch 6] loss: 0.293
[Epoch 7] loss: 0.281
[Epoch 8] loss: 0.266


 29%|██▊       | 2/7 [04:25<11:04, 132.83s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
SGD with momentum   130.8369            88.21               
Training with Adagrad optimizer...
[Epoch 1] loss: 0.507
[Epoch 2] loss: 0.368
[Epoch 3] loss: 0.335
[Epoch 4] loss: 0.313
[Epoch 5] loss: 0.298
[Epoch 6] loss: 0.285
[Epoch 7] loss: 0.274
[Epoch 8] loss: 0.264


 43%|████▎     | 3/7 [06:39<08:53, 133.45s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adagrad             131.8171            87.76               
Training with Adadelta optimizer...
[Epoch 1] loss: 1.580
[Epoch 2] loss: 0.796
[Epoch 3] loss: 0.649
[Epoch 4] loss: 0.588
[Epoch 5] loss: 0.549
[Epoch 6] loss: 0.520
[Epoch 7] loss: 0.497
[Epoch 8] loss: 0.479


 57%|█████▋    | 4/7 [08:56<06:44, 134.99s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adadelta            135.0721            81.99               
Training with RMSprop optimizer...
[Epoch 1] loss: 2.202
[Epoch 2] loss: 0.538
[Epoch 3] loss: 0.498
[Epoch 4] loss: 0.481
[Epoch 5] loss: 0.476
[Epoch 6] loss: 0.467
[Epoch 7] loss: 0.464
[Epoch 8] loss: 0.446


 71%|███████▏  | 5/7 [11:09<04:28, 134.41s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
RMSprop             131.1167            83.26               
Training with Adam optimizer...
[Epoch 1] loss: 0.578
[Epoch 2] loss: 0.470
[Epoch 3] loss: 0.426
[Epoch 4] loss: 0.421
[Epoch 5] loss: 0.408
[Epoch 6] loss: 0.389
[Epoch 7] loss: 0.384
[Epoch 8] loss: 0.412


 86%|████████▌ | 6/7 [13:31<02:16, 136.99s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adam                138.9742            85.46               
Training with Adamax optimizer...
[Epoch 1] loss: 0.511
[Epoch 2] loss: 0.378
[Epoch 3] loss: 0.340
[Epoch 4] loss: 0.319
[Epoch 5] loss: 0.296
[Epoch 6] loss: 0.285
[Epoch 7] loss: 0.271
[Epoch 8] loss: 0.258


100%|██████████| 7/7 [15:49<00:00, 135.64s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adamax              134.7698            87.95               


           Optimizer     Time(s)  Accuracy(%)
0                SGD  128.296008        84.60
1  SGD with momentum  130.836945        88.21
2            Adagrad  131.817068        87.76
3           Adadelta  135.072112        81.99
4            RMSprop  131.116693        83.26
5               Adam  138.974241        85.46
6             Adamax  134.769769        87.95





In [None]:
df_results

Unnamed: 0,Optimizer,Time(s),Accuracy(%)
0,SGD,128.296008,84.6
1,SGD with momentum,130.836945,88.21
2,Adagrad,131.817068,87.76
3,Adadelta,135.072112,81.99
4,RMSprop,131.116693,83.26
5,Adam,138.974241,85.46
6,Adamax,134.769769,87.95
