In [1]:
import torch
from torch import nn
import torchvision as tv
import time

###  Загрузка данных

In [2]:
BATCH_SIZE = 256

transforms = tv.transforms.Compose([
    tv.transforms.ToTensor()
])

train_dataset = tv.datasets.MNIST('.',
                                  train=True,
                                  transform=transforms,
                                  download=True)

test_dataset = tv.datasets.MNIST('.',
                                 train=False,
                                 transform=transforms,
                                 download=True)

train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [3]:
train_dataset.data.shape

torch.Size([60000, 28, 28])

In [4]:
test_dataset.data.shape

torch.Size([10000, 28, 28])

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

## Вспомогательные функции  - обчения и определение тосности классификаации

In [6]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = torch.Tensor([0]), 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n
def train(net, train_iter, test_iter, optimizer, num_epochs):
    loss = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
    
        for X, y in train_iter:
            optimizer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
    
        test_acc = evaluate_accuracy(test_iter, net)
        print(f'epoch {epoch + 1}, loss {train_l_sum / n:.4f}, train acc {train_acc_sum / n:.3f}' \
              f', test acc {test_acc:.3f}, time {time.time() - start:.1f} sec')

### Базовая модель

In [7]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=0),
    nn.Tanh(),
    nn.AvgPool2d(2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.Tanh(),
    nn.AvgPool2d(2, stride=2),
    nn.Conv2d(16, 120, kernel_size=5),
    nn.Flatten(),
    nn.Linear(120, 84),
    nn.Tanh(),
    nn.Linear(84, 10)
)

In [8]:
from torchsummary import summary
summary(model, input_size=(1, 32, 32), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
              Tanh-2            [-1, 6, 28, 28]               0
         AvgPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
              Tanh-5           [-1, 16, 10, 10]               0
         AvgPool2d-6             [-1, 16, 5, 5]               0
            Conv2d-7            [-1, 120, 1, 1]          48,120
           Flatten-8                  [-1, 120]               0
            Linear-9                   [-1, 84]          10,164
             Tanh-10                   [-1, 84]               0
           Linear-11                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/ba

In [10]:
# model = model.to(device)
lr, num_epochs = 0.01, 5
trainer = torch.optim.SGD(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.0089, train acc 0.164, test acc 0.406, time 5.4 sec
epoch 2, loss 0.0077, train acc 0.538, test acc 0.602, time 5.3 sec
epoch 3, loss 0.0048, train acc 0.684, test acc 0.770, time 5.2 sec
epoch 4, loss 0.0031, train acc 0.799, test acc 0.835, time 5.2 sec
epoch 5, loss 0.0024, train acc 0.842, test acc 0.860, time 5.2 sec


### На GPU

In [14]:
def evaluate_accuracy_cuda(data_iter, net, device):
    acc_sum, n = torch.Tensor([0]).to(device), 0
    for X, y in data_iter:
        X, y = X.to(device), y.to(device)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n
def train_cuda(net, train_iter, test_iter, optimizer, num_epochs, device):
    loss = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
    
        for X, y in train_iter:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
    
        test_acc = evaluate_accuracy_cuda(test_iter, net, device)
        print(f'epoch {epoch + 1}, loss {train_l_sum / n:.4f}, train acc {train_acc_sum / n:.3f}' \
              f', test acc {test_acc:.3f}, time {time.time() - start:.1f} sec')

In [13]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [15]:
model = model.to(device)

lr, num_epochs = 0.01, 5
trainer = torch.optim.SGD(model.parameters(), lr=lr)
train_cuda(model, train_iter, test_iter, trainer, num_epochs, device)

epoch 1, loss 0.0020, train acc 0.862, test acc 0.875, time 3.8 sec
epoch 2, loss 0.0018, train acc 0.876, test acc 0.883, time 3.7 sec
epoch 3, loss 0.0016, train acc 0.884, test acc 0.892, time 4.9 sec
epoch 4, loss 0.0015, train acc 0.889, test acc 0.896, time 5.1 sec
epoch 5, loss 0.0015, train acc 0.894, test acc 0.900, time 5.1 sec


## Оптимизатор Adam

In [16]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=0),
    nn.Tanh(),
    nn.AvgPool2d(2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.Tanh(),
    nn.AvgPool2d(2, stride=2),
    nn.Conv2d(16, 120, kernel_size=5),
    nn.Flatten(),
    nn.Linear(120, 84),
    nn.Tanh(),
    nn.Linear(84, 10)
)
model = model.to(device)

In [17]:
lr, num_epochs = 0.01, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train_cuda(model, train_iter, test_iter, trainer, num_epochs, device)

epoch 1, loss 0.0014, train acc 0.893, test acc 0.944, time 5.7 sec
epoch 2, loss 0.0007, train acc 0.947, test acc 0.948, time 5.2 sec
epoch 3, loss 0.0006, train acc 0.955, test acc 0.954, time 5.2 sec
epoch 4, loss 0.0006, train acc 0.955, test acc 0.956, time 5.2 sec
epoch 5, loss 0.0006, train acc 0.956, test acc 0.962, time 5.1 sec


## Добавим слой регуляризации

model = nn.Sequential(

    nn.Conv2d(1, 6, kernel_size=5, padding=0),
    nn.Tanh(),
    nn.AvgPool2d(2, stride=2),

    nn.Conv2d(6, 16, kernel_size=5),
    nn.Tanh(),
    nn.BatchNorm2d(16),
    nn.AvgPool2d(2, stride=2),

    nn.Conv2d(16, 120, kernel_size=5),
    nn.Flatten(),
    nn.Linear(120, 84),
    nn.BatchNorm1d(84),
    nn.Tanh(),
    nn.Linear(84, 10)
)
model = model.to(device)

In [25]:
lr, num_epochs = 0.01, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train_cuda(model, train_iter, test_iter, trainer, num_epochs, device)

epoch 1, loss 0.0008, train acc 0.940, test acc 0.972, time 5.5 sec
epoch 2, loss 0.0003, train acc 0.979, test acc 0.978, time 5.3 sec
epoch 3, loss 0.0002, train acc 0.984, test acc 0.979, time 5.2 sec
epoch 4, loss 0.0002, train acc 0.987, test acc 0.979, time 5.2 sec
epoch 5, loss 0.0002, train acc 0.988, test acc 0.980, time 5.1 sec


## Добавим слой регуляризации и Dropout слой

In [28]:
model = nn.Sequential(

    nn.Conv2d(1, 6, kernel_size=5, padding=0),
    nn.Tanh(),
    nn.AvgPool2d(2, stride=2),

    nn.Conv2d(6, 16, kernel_size=5),
    nn.Tanh(),
    nn.BatchNorm2d(16),
    nn.AvgPool2d(2, stride=2),

    nn.Conv2d(16, 120, kernel_size=5),    
    nn.Flatten(),
    
    nn.Linear(120, 84),
    nn.BatchNorm1d(84),
    nn.Tanh(),
    torch.nn.Dropout(0.3),
    nn.Linear(84, 10)
)
model = model.to(device)

In [29]:
lr, num_epochs = 0.01, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train_cuda(model, train_iter, test_iter, trainer, num_epochs, device)

epoch 1, loss 0.0009, train acc 0.933, test acc 0.971, time 5.5 sec
epoch 2, loss 0.0003, train acc 0.977, test acc 0.977, time 5.1 sec
epoch 3, loss 0.0002, train acc 0.982, test acc 0.980, time 5.0 sec
epoch 4, loss 0.0002, train acc 0.983, test acc 0.979, time 5.2 sec
epoch 5, loss 0.0002, train acc 0.986, test acc 0.982, time 5.1 sec


### Изменим функцию активации в выходном полносвязном слое

In [47]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=0),
    nn.Tanh(),
    nn.AvgPool2d(2, stride=2),

    nn.Conv2d(6, 16, kernel_size=5),
    nn.Tanh(),
    nn.BatchNorm2d(16),
    nn.AvgPool2d(2, stride=2),

    nn.Conv2d(16, 120, kernel_size=5),
    nn.Flatten(),

    nn.Linear(120, 84),
    nn.BatchNorm1d(84),
    nn.ReLU(),    
    torch.nn.Dropout(0.3),
    nn.Linear(84, 10)
)
model = model.to(device)

In [48]:
lr, num_epochs = 0.01, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train_cuda(model, train_iter, test_iter, trainer, num_epochs, device)

epoch 1, loss 0.0010, train acc 0.925, test acc 0.970, time 4.1 sec
epoch 2, loss 0.0003, train acc 0.976, test acc 0.974, time 4.9 sec
epoch 3, loss 0.0003, train acc 0.981, test acc 0.980, time 5.1 sec
epoch 4, loss 0.0002, train acc 0.984, test acc 0.981, time 5.2 sec
epoch 5, loss 0.0002, train acc 0.983, test acc 0.980, time 5.0 sec


In [51]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=0),
    nn.Tanh(),
    nn.AvgPool2d(2, stride=2),

    nn.Conv2d(6, 16, kernel_size=3),
    nn.Tanh(),
    nn.BatchNorm2d(16),
    nn.AvgPool2d(2, stride=2),

    nn.Conv2d(16, 64, kernel_size=2),
    nn.Tanh(),
    nn.BatchNorm2d(64),
    # nn.AvgPool2d(2, stride=1),

    nn.Conv2d(64, 120, kernel_size=5),
    nn.Flatten(),

    nn.Linear(120, 84),
    nn.BatchNorm1d(84),
    nn.ReLU(),
    torch.nn.Dropout(0.3),
    nn.Linear(84, 10)
)
model = model.to(device)

In [52]:
lr, num_epochs = 0.01, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train_cuda(model, train_iter, test_iter, trainer, num_epochs, device)

epoch 1, loss 0.0010, train acc 0.923, test acc 0.971, time 5.0 sec
epoch 2, loss 0.0003, train acc 0.974, test acc 0.973, time 5.0 sec
epoch 3, loss 0.0002, train acc 0.981, test acc 0.977, time 5.1 sec
epoch 4, loss 0.0002, train acc 0.984, test acc 0.982, time 5.0 sec
epoch 5, loss 0.0002, train acc 0.986, test acc 0.983, time 5.3 sec
