In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import torch
import torchvision as tv
from torchmetrics import Accuracy
import numpy as np
import pandas as pd
import time

## CNN

### Загрузка данных

In [2]:
BATCH_SIZE = 256

In [3]:
train_mnist = tv.datasets.MNIST(
    '.',
    train=True,
    transform=tv.transforms.ToTensor(),
    download=True
)

test_mnist = tv.datasets.MNIST(
    '.',
    train=False,
    transform=tv.transforms.ToTensor(),
    download=True
)

In [4]:
train = torch.utils.data.DataLoader(train_mnist, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_mnist, batch_size=BATCH_SIZE)

In [5]:
train_mnist[0][0].shape

torch.Size([1, 28, 28])

In [6]:
test_mnist[0][0].shape

torch.Size([1, 28, 28])

### Построение модели CNN

In [7]:

class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.feature_extractor = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            
            torch.nn.Conv2d(32, 64, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(64 * 7 * 7, 128),
            torch.nn.Dropout(0.5),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 10)
        )
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)
        return x

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
#device = "cpu"
print(f"Использую {device}-устройство\n")

model = CNN().to(device)
print(model)

Использую cuda-устройство

CNN(
  (feature_extractor): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=3136, out_features=128, bias=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [9]:
learning_rate = 1e-3
num_epochs = 10

loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Обучение модели

In [10]:
def train_model(model_cnn):
    train_accuracy = Accuracy(task='multiclass', num_classes=10).to(device)
    test_accuracy = Accuracy(task='multiclass', num_classes=10).to(device)

    for epoch in range(num_epochs):
        start = time.time()
        
        train_loss = .0 
        train_iters = 0
        model_cnn.train()
        for X, y in train:
            X, y = X.to(device), y.to(device)
            y_pred = model_cnn(X)

            optimizer.zero_grad()
            l = loss(y_pred, y)
            l.backward()
            optimizer.step()

            train_loss += l.item()
            train_iters += 1
            train_accuracy.update(y_pred, y)
        train_acc_out = train_accuracy.compute()
        train_accuracy.reset()

        test_loss = .0
        test_iters = 0
        model_cnn.eval()
        for X, y in test:
            X, y = X.to(device), y.to(device)
            y_pred = model_cnn(X)

            l = loss(y_pred, y)

            test_loss += l.item()
            test_iters += 1
            test_accuracy.update(y_pred, y)
        test_acc_out = test_accuracy.compute()
        test_accuracy.reset()

        print(f"ep: {epoch+1} | time: {time.time()-start:.2f} сек | train_loss: {train_loss/train_iters:.2f} | train_acc: {train_acc_out*100:.2f}%" 
              f" | test_loss: {test_loss/test_iters:.2f} | test_acc: {test_acc_out*100:.2f}%")


In [11]:
train_model(model)

ep: 1 | time: 4.44 сек | train_loss: 0.28 | train_acc: 91.24% | test_loss: 0.06 | test_acc: 98.16%
ep: 2 | time: 4.33 сек | train_loss: 0.10 | train_acc: 97.07% | test_loss: 0.04 | test_acc: 98.79%
ep: 3 | time: 4.76 сек | train_loss: 0.08 | train_acc: 97.81% | test_loss: 0.04 | test_acc: 98.74%
ep: 4 | time: 4.70 сек | train_loss: 0.07 | train_acc: 97.96% | test_loss: 0.03 | test_acc: 98.79%
ep: 5 | time: 4.89 сек | train_loss: 0.06 | train_acc: 98.25% | test_loss: 0.03 | test_acc: 98.76%
ep: 6 | time: 4.97 сек | train_loss: 0.05 | train_acc: 98.49% | test_loss: 0.03 | test_acc: 98.88%
ep: 7 | time: 4.79 сек | train_loss: 0.05 | train_acc: 98.60% | test_loss: 0.03 | test_acc: 99.12%
ep: 8 | time: 5.01 сек | train_loss: 0.04 | train_acc: 98.73% | test_loss: 0.03 | test_acc: 99.06%
ep: 9 | time: 4.86 сек | train_loss: 0.04 | train_acc: 98.77% | test_loss: 0.03 | test_acc: 99.07%
ep: 10 | time: 4.93 сек | train_loss: 0.04 | train_acc: 98.85% | test_loss: 0.03 | test_acc: 99.07%
