In [1]:
import numpy as np
import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms
import time
from tqdm import tqdm

Определяем устройство для использования

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Определяем преобразование, которое применим к набору данных

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

Загружаем набор обучающих данных

In [4]:
trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

Загружаем тестовый набор данных

In [5]:
testset = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

Определяем архитектуру нейронной сети

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(784, 256)
        self.fc2 = torch.nn.Linear(256, 128)
        self.fc3 = torch.nn.Linear(128, 64)
        self.fc4 = torch.nn.Linear(64, 10)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

Определяем функцию обучения

In [7]:
def train(net, optimizer, criterion, epochs):
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print('[Epoch %d] loss: %.3f' %
              (epoch + 1, running_loss / len(trainloader)))

Определяем функцию для оценки точности сети

In [8]:
def evaluate(net, dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in dataloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

Список оптимизаторов для сравнения

In [9]:
optimizers = ['SGD', 'SGD with momentum', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam', 'Adamax']

Обучаем и оцениванием сеть для каждого оптимизатора

In [10]:
df_results = pd.DataFrame(columns=['Optimizer', 'Time(s)', 'Accuracy(%)'])
for optimizer_name in tqdm(optimizers):
    print(f"Training with {optimizer_name} optimizer...")
    
    # Определяем нейронную сеть
    net = Net()
    net.to(device)

    # Определяем функцию потерь и оптимизатор
    criterion = torch.nn.CrossEntropyLoss()
    if optimizer_name == 'SGD':
      optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
    elif optimizer_name == 'SGD with momentum':
      optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    elif optimizer_name == 'Adagrad':
      optimizer = torch.optim.Adagrad(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adadelta':
      optimizer = torch.optim.Adadelta(net.parameters(), lr=0.01)
    elif optimizer_name == 'RMSprop':
      optimizer = torch.optim.RMSprop(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adam':
      optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
    elif optimizer_name == 'Adamax':
      optimizer = torch.optim.Adamax(net.parameters(), lr=0.01)

    # Обучаем сеть и измеряем время
    start_time = time.time()
    train(net, optimizer, criterion, epochs=8)
    end_time = time.time()
    training_time = end_time - start_time

    # Оцениваем точность сети на тестовом наборе
    test_accuracy = evaluate(net, testloader)

    # Сохраняем результаты
    res = [optimizer_name, training_time, test_accuracy]
    df_results.loc[len(df_results)] = res
    print('\n')
    print('{:<20s}{:<20s}{:<20s}'.format('Optimizer', 'Training Time (s)', 'Test Accuracy (%)'))
    print('{:<20s}{:<20.4f}{:<20.2f}'.format(optimizer_name, training_time, test_accuracy))
    
print('\n')
print(df_results)

  0%|          | 0/7 [00:00<?, ?it/s]

Training with SGD optimizer...
[Epoch 1] loss: 1.339
[Epoch 2] loss: 0.619
[Epoch 3] loss: 0.528
[Epoch 4] loss: 0.480
[Epoch 5] loss: 0.448
[Epoch 6] loss: 0.425
[Epoch 7] loss: 0.406
[Epoch 8] loss: 0.390


 14%|█▍        | 1/7 [02:06<12:37, 126.22s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
SGD                 121.2270            84.44               
Training with SGD with momentum optimizer...
[Epoch 1] loss: 0.641
[Epoch 2] loss: 0.399
[Epoch 3] loss: 0.359
[Epoch 4] loss: 0.327
[Epoch 5] loss: 0.309
[Epoch 6] loss: 0.289
[Epoch 7] loss: 0.276
[Epoch 8] loss: 0.265


 29%|██▊       | 2/7 [04:03<10:05, 121.04s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
SGD with momentum   115.1819            87.96               
Training with Adagrad optimizer...
[Epoch 1] loss: 0.506
[Epoch 2] loss: 0.366
[Epoch 3] loss: 0.333
[Epoch 4] loss: 0.311
[Epoch 5] loss: 0.295
[Epoch 6] loss: 0.282
[Epoch 7] loss: 0.272
[Epoch 8] loss: 0.262


 43%|████▎     | 3/7 [06:05<08:05, 121.31s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adagrad             118.4965            88.08               
Training with Adadelta optimizer...
[Epoch 1] loss: 1.693
[Epoch 2] loss: 0.824
[Epoch 3] loss: 0.668
[Epoch 4] loss: 0.597
[Epoch 5] loss: 0.552
[Epoch 6] loss: 0.519
[Epoch 7] loss: 0.496
[Epoch 8] loss: 0.478


 57%|█████▋    | 4/7 [08:04<06:01, 120.41s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adadelta            116.7665            81.98               
Training with RMSprop optimizer...
[Epoch 1] loss: 1.607
[Epoch 2] loss: 0.548
[Epoch 3] loss: 0.496
[Epoch 4] loss: 0.492
[Epoch 5] loss: 0.463
[Epoch 6] loss: 0.459
[Epoch 7] loss: 0.446
[Epoch 8] loss: 0.446


 71%|███████▏  | 5/7 [10:02<03:59, 119.74s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
RMSprop             115.8884            84.40               
Training with Adam optimizer...
[Epoch 1] loss: 0.582
[Epoch 2] loss: 0.462
[Epoch 3] loss: 0.431
[Epoch 4] loss: 0.421
[Epoch 5] loss: 0.404
[Epoch 6] loss: 0.402
[Epoch 7] loss: 0.396
[Epoch 8] loss: 0.388


 86%|████████▌ | 6/7 [12:02<01:59, 119.77s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adam                117.6215            84.72               
Training with Adamax optimizer...
[Epoch 1] loss: 0.510
[Epoch 2] loss: 0.381
[Epoch 3] loss: 0.340
[Epoch 4] loss: 0.320
[Epoch 5] loss: 0.303
[Epoch 6] loss: 0.286
[Epoch 7] loss: 0.272
[Epoch 8] loss: 0.260


100%|██████████| 7/7 [14:08<00:00, 121.16s/it]



Optimizer           Training Time (s)   Test Accuracy (%)   
Adamax              123.2785            87.27               


           Optimizer     Time(s)  Accuracy(%)
0                SGD  121.227005        84.44
1  SGD with momentum  115.181901        87.96
2            Adagrad  118.496519        88.08
3           Adadelta  116.766527        81.98
4            RMSprop  115.888372        84.40
5               Adam  117.621498        84.72
6             Adamax  123.278483        87.27





In [11]:
df_results

Unnamed: 0,Optimizer,Time(s),Accuracy(%)
0,SGD,121.227005,84.44
1,SGD with momentum,115.181901,87.96
2,Adagrad,118.496519,88.08
3,Adadelta,116.766527,81.98
4,RMSprop,115.888372,84.4
5,Adam,117.621498,84.72
6,Adamax,123.278483,87.27
