In [7]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

Обучим MLP из библиотеки Scikit Learn

In [4]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target
X /= 255.0
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, shuffle=True, test_size=0.2)

  warn(


In [10]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier()
model.fit(X_train, y_train)

preds = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, preds))
print('F1 micro:', f1_score(y_test, preds, average='micro'))
print('F1 macro:', f1_score(y_test, preds, average='macro'))

Accuracy: 0.9763571428571428
F1 micro: 0.9763571428571428
F1 macro: 0.9762537196331811


Построим CNN по типу LeNet и обучим её на датасете MNIST

In [15]:
from torchvision import models
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torch
import warnings
from PIL import Image

In [43]:
class MNIST(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = T.Compose([
            T.ToPILImage(),
            T.ToTensor(),
            T.Normalize((0.1307,), (0.3081,))
        ])
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = self.X[idx].reshape(28, 28).astype(np.uint8)
        image = self.transform(image)
            
        return image, int(self.y[idx])

In [44]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, shuffle=True, test_size=0.25)

train_dataset = MNIST(X_train, y_train, transform=transform)
test_dataset = MNIST(X_test, y_test, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

  warn(


In [48]:
import torch.nn.functional as F

class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5, padding=2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.linear1   = nn.Linear(400, 120)
        self.linear2   = nn.Linear(120, 84)
        self.linear3   = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = x.view(-1, np.prod(x.size()[1:]))
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

In [69]:
def train(epochs):
    train_losses, train_accuracies = [], []
    test_losses, test_accuracies = [], []
    for epoch in range(1, epochs + 1):
        train_loss, train_accuracy = 0.0, 0.0
        model.train()
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            logits = model(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * images.shape[0]
            train_accuracy += (logits.argmax(dim=1) == labels).sum().item()
        
        train_loss /= len(train_loader.dataset)
        train_accuracy /= len(train_loader.dataset)

        scheduler.step()
        
        test_loss, test_accuracy = 0.0, 0.0
        model.eval()
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            with torch.no_grad():
                logits = model(images)
                loss = criterion(logits, labels)

            test_loss += loss.item() * images.shape[0]
            test_accuracy += (logits.argmax(dim=1) == labels).sum().item()

        test_loss /= len(test_loader.dataset)
        test_accuracy /= len(test_loader.dataset)
        
        
        train_losses += [train_loss]
        train_accuracies += [train_accuracy]
        test_losses += [test_loss]
        test_accuracies += [test_accuracy]

        print(f'Epoch: {epoch}, train_loss: {train_loss}, test_loss: {test_loss}')

In [79]:
model = CNN()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=8)
criterion = nn.CrossEntropyLoss() 
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [80]:
train(8)

Epoch: 1, train_loss: 0.3424120650798792, test_loss: 0.11195711160898209
Epoch: 2, train_loss: 0.08913182769275847, test_loss: 0.07002112916026797
Epoch: 3, train_loss: 0.05973386781499499, test_loss: 0.051833647218772344
Epoch: 4, train_loss: 0.04378662882979427, test_loss: 0.05209189014605113
Epoch: 5, train_loss: 0.03378710383034888, test_loss: 0.04729568531853812
Epoch: 6, train_loss: 0.02737112624247869, test_loss: 0.041883145223345075
Epoch: 7, train_loss: 0.021968932999635028, test_loss: 0.04007958917958396
Epoch: 8, train_loss: 0.018976253450750595, test_loss: 0.03676403621860913


In [81]:
y_pred = []
model.eval()
for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    with torch.no_grad():
        logits = model(images)
        y_pred += logits.argmax(dim=1).tolist()
y_test = list(map(int, y_test))
print('Accuracy:', accuracy_score(y_test, y_pred))
print('F1 micro:', f1_score(y_test, y_pred, average='micro'))
print('F1 macro:', f1_score(y_test, y_pred, average='macro'))

Accuracy: 0.9879428571428571
F1 micro: 0.9879428571428571
F1 macro: 0.9878750328689254


Вывод:
В силу того, что сверточные нейронные сети лучше подходят для решения задач классификации результаты у модели LeNet получились лучше. Это подтверждается метриками, так как Accuracy и F1-score у LeNet лучше чем у MLP