In [0]:
import torch
import random
import numpy as np
import time
import matplotlib.pyplot as plt

random.seed(0) # фиксирование номера случайной последовательности для воспроизводимости экспериментов
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True

In [0]:
import torchvision.datasets

In [0]:
MNIST_train = torchvision.datasets.MNIST('./', download=True, train=True)
MNIST_test = torchvision.datasets.MNIST('./', download=True, train=False)

In [0]:
X_train = MNIST_train.train_data
y_train = MNIST_train.train_labels
X_test = MNIST_test.test_data
y_test = MNIST_test.test_labels

In [0]:
X_train = X_train.unsqueeze(1).float() # разжимаем картинку, чтобы получить из 28х28 1х28х28, где 1 - это grey-channel
X_test = X_test.unsqueeze(1).float()

In [0]:
class LeNet5(torch.nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        
        self.conv1_1 = torch.nn.Conv2d(
            in_channels=1, out_channels=6, kernel_size=3, padding=1)  
        self.conv1_2 = torch.nn.Conv2d(
            in_channels=6, out_channels=6, kernel_size=3, padding=1) # 2 сверточных слоя с ядрами 3х3, паддингом 1 (дополнительный отступ)
        self.act1  = torch.nn.ReLU() # функция активации - ReLU
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
       
        self.conv2_1 = torch.nn.Conv2d(
            in_channels=6, out_channels=16, kernel_size=3, padding=0)
        self.conv2_2 = torch.nn.Conv2d(
            in_channels=16, out_channels=16, kernel_size=3, padding=0)
        self.act2  = torch.nn.ReLU()
        self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1   = torch.nn.Linear(5 * 5 * 16, 120)
        self.act3  = torch.nn.ReLU()
        
        self.fc2   = torch.nn.Linear(120, 84)
        self.act4  = torch.nn.ReLU()
        
        self.fc3   = torch.nn.Linear(84, 10)
    
    def forward(self, x):
        
        x = self.conv1_2(self.conv1_1(x))
        x = self.act1(x)
        x = self.pool1(x)
        
        x = self.conv2_2(self.conv2_1(x))
        x = self.act2(x)
        x = self.pool2(x)
        
        x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))

        x = self.fc1(x)
        x = self.act3(x)
        x = self.fc2(x)
        x = self.act4(x)
        x = self.fc3(x)
        
        return x
    
lenet5 = LeNet5()

In [0]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')  # переносим вычисления на GPU
lenet5 = lenet5.to(device)

In [0]:
loss = torch.nn.CrossEntropyLoss()  # функция потерь бинарная кросс энтропия
optimizer = torch.optim.SGD(lenet5.parameters(), lr=0.01, momentum=0.7) # стохастический градиентный спуск

In [0]:
def train(net, X_train, y_train, X_test, y_test):
    
    batch_size = 100

    test_accuracy_history = []
    test_loss_history = []
    epoch_time_history = []

    X_test = X_test.to(device)
    y_test = y_test.to(device)

    for epoch in range(40):
        start = time.time()
        order = np.random.permutation(len(X_train))  # случайная перестановка 60 тыс. индексов
        for start_index in range(0, len(X_train), batch_size):
            optimizer.zero_grad() # обнуление градиента

            batch_indexes = order[start_index:start_index+batch_size]

            X_batch = X_train[batch_indexes].to(device)
            y_batch = y_train[batch_indexes].to(device)

            preds = net.forward(X_batch)  # forward 100 (батчевых) образцов через сеть

            loss_value = loss(preds, y_batch) # считаем loss
            loss_value.backward() # считаем градиент

            optimizer.step() # делаем градиентный шаг
            
        test_preds = net.forward(X_test) # проверка на тестовом множестве после первой эпохи
        test_loss_history.append(loss(test_preds, y_test).data.cpu())

        accuracy = (test_preds.argmax(dim=1) == y_test).float().mean().data.cpu()
        test_accuracy_history.append(accuracy)

        finish = time.time()
        epoch_time_history.append(finish-start)

        print(accuracy, "Program time: " + str(finish-start) + " seconds.")
    return test_accuracy_history, test_loss_history, epoch_time_history


In [0]:
accuracies = {}
losses = {}
time_history = []
accuracies['relu'], losses['relu'], time_history = \
  train(lenet5, X_train, y_train, X_test, y_test)

In [0]:
for experiment_id in accuracies.keys():
    plt.plot(accuracies[experiment_id], label='Ker 3x3, ReLU, MaxPool')
plt.legend()
plt.title('Validation Accuracy');

In [0]:
for experiment_id in losses.keys():
    plt.plot(losses[experiment_id], label='Ker 3x3, ReLU, MaxPool')
plt.legend()
plt.title('Validation Loss');

# New Section