In [1]:
import numpy as np
import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms
import time
from tqdm import tqdm

Определяем устройство для использования

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Определяем преобразование, которое применим к набору данных

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

Загружаем набор обучающих данных

In [None]:
trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

Загружаем тестовый набор данных

In [5]:
testset = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

Определяем архитектуру нейронной сети

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(784, 256)
        self.fc2 = torch.nn.Linear(256, 128)
        self.fc3 = torch.nn.Linear(128, 64)
        self.fc4 = torch.nn.Linear(64, 10)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

Определяем функцию обучения

In [7]:
def train(net, optimizer, criterion, epochs):
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print('[Epoch %d] loss: %.3f' %
              (epoch + 1, running_loss / len(trainloader)))

Определяем функцию для оценки точности сети

In [8]:
def evaluate(net, dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in dataloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

Список оптимизаторов для сравнения

In [9]:
optimizers = ['SGD', 'SGD with momentum', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam', 'Adamax']

Обучаем и оцениванием сеть для каждого оптимизатора

In [10]:
df_results = pd.DataFrame(columns=['Optimizer', 'Time(s)', 'Accuracy(%)'])
for optimizer_name in tqdm(optimizers):
    print(f"Training with {optimizer_name} optimizer...")
    
    # Определяем нейронную сеть
    net = Net()
    net.to(device)

    # Определяем функцию потерь и оптимизатор
    criterion = torch.nn.CrossEntropyLoss()
    if optimizer_name == 'SGD':
      optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
    elif optimizer_name == 'SGD with momentum':
      optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    elif optimizer_name == 'Adagrad':
      optimizer = torch.optim.Adagrad(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adadelta':
      optimizer = torch.optim.Adadelta(net.parameters(), lr=0.01)
    elif optimizer_name == 'RMSprop':
      optimizer = torch.optim.RMSprop(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adam':
      optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adamax':
      optimizer = torch.optim.Adamax(net.parameters(), lr=0.01)

    # Обучаем сеть и измеряем время
    start_time = time.time()
    train(net, optimizer, criterion, epochs=8)
    end_time = time.time()
    training_time = end_time - start_time

    # Оцениваем точность сети на тестовом наборе
    test_accuracy = evaluate(net, testloader)

    # Сохраняем результаты
    res = [optimizer_name, training_time, test_accuracy]
    df_results.loc[len(df_results)] = res
    print('\n')
    print('{:<20s}{:<20s}{:<20s}'.format('Optimizer', 'Training Time (s)', 'Test Accuracy (%)'))
    print('{:<20s}{:<20.4f}{:<20.2f}'.format(optimizer_name, training_time, test_accuracy))
    
print('\n')
print(df_results)

  0%|          | 0/7 [00:00<?, ?it/s]

Training with SGD optimizer...
[Epoch 1] loss: 1.266
[Epoch 2] loss: 0.617
[Epoch 3] loss: 0.524
[Epoch 4] loss: 0.474
[Epoch 5] loss: 0.441
[Epoch 6] loss: 0.419
[Epoch 7] loss: 0.401
[Epoch 8] loss: 0.386


 14%|█▍        | 1/7 [02:11<13:06, 131.14s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
SGD                 123.0685            84.97               
Training with SGD with momentum optimizer...
[Epoch 1] loss: 0.647
[Epoch 2] loss: 0.406
[Epoch 3] loss: 0.359
[Epoch 4] loss: 0.333
[Epoch 5] loss: 0.311
[Epoch 6] loss: 0.293
[Epoch 7] loss: 0.281
[Epoch 8] loss: 0.266


 29%|██▊       | 2/7 [04:12<10:26, 125.23s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
SGD with momentum   118.8189            86.87               
Training with Adagrad optimizer...
[Epoch 1] loss: 0.512
[Epoch 2] loss: 0.370
[Epoch 3] loss: 0.335
[Epoch 4] loss: 0.313
[Epoch 5] loss: 0.297
[Epoch 6] loss: 0.284
[Epoch 7] loss: 0.273
[Epoch 8] loss: 0.264


 43%|████▎     | 3/7 [06:18<08:22, 125.68s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adagrad             123.9715            87.44               
Training with Adadelta optimizer...
[Epoch 1] loss: 1.573
[Epoch 2] loss: 0.751
[Epoch 3] loss: 0.631
[Epoch 4] loss: 0.578
[Epoch 5] loss: 0.543
[Epoch 6] loss: 0.517
[Epoch 7] loss: 0.496
[Epoch 8] loss: 0.480


 57%|█████▋    | 4/7 [08:19<06:11, 123.85s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adadelta            118.7917            81.59               
Training with RMSprop optimizer...
[Epoch 1] loss: 1.671
[Epoch 2] loss: 0.537
[Epoch 3] loss: 0.503
[Epoch 4] loss: 0.470
[Epoch 5] loss: 0.479
[Epoch 6] loss: 0.459
[Epoch 7] loss: 0.457
[Epoch 8] loss: 0.436


 71%|███████▏  | 5/7 [10:19<04:04, 122.39s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
RMSprop             117.5868            83.77               
Training with Adam optimizer...
[Epoch 1] loss: 0.584
[Epoch 2] loss: 0.466
[Epoch 3] loss: 0.435
[Epoch 4] loss: 0.418
[Epoch 5] loss: 0.409
[Epoch 6] loss: 0.412
[Epoch 7] loss: 0.393
[Epoch 8] loss: 0.396


 86%|████████▌ | 6/7 [12:20<02:01, 121.90s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adam                118.6841            84.30               
Training with Adamax optimizer...
[Epoch 1] loss: 0.511
[Epoch 2] loss: 0.376
[Epoch 3] loss: 0.343
[Epoch 4] loss: 0.322
[Epoch 5] loss: 0.302
[Epoch 6] loss: 0.288
[Epoch 7] loss: 0.274
[Epoch 8] loss: 0.261


100%|██████████| 7/7 [14:26<00:00, 123.76s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adamax              123.8243            86.96               


           Optimizer     Time(s)  Accuracy(%)
0                SGD  123.068459        84.97
1  SGD with momentum  118.818883        86.87
2            Adagrad  123.971509        87.44
3           Adadelta  118.791675        81.59
4            RMSprop  117.586797        83.77
5               Adam  118.684050        84.30
6             Adamax  123.824324        86.96





In [11]:
df_results

Unnamed: 0,Optimizer,Time(s),Accuracy(%)
0,SGD,123.068459,84.97
1,SGD with momentum,118.818883,86.87
2,Adagrad,123.971509,87.44
3,Adadelta,118.791675,81.59
4,RMSprop,117.586797,83.77
5,Adam,118.68405,84.3
6,Adamax,123.824324,86.96
