In [88]:
import numpy as np
import struct
import torch

In [90]:
def load_images(file_path):
    with open(file_path, 'rb') as f:
        magic, num, rows, cols = struct.unpack('>IIII', f.read(16))
        images = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows * cols)
        return images.astype(np.float32) / 255.0

def load_labels(file_path):
    with open(file_path, 'rb') as f:
        magic, num = struct.unpack('>II', f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels

# Преобразование меток в one-hot
def one_hot(labels, num_classes=10):
    return np.eye(num_classes)[labels]

In [91]:
# Линейный слой
class Linear:
    def __init__(self, input_dim, output_dim):
        self.W = np.random.randn(input_dim, output_dim) * 0.01
        self.b = np.zeros((1, output_dim))

    def forward(self, x):
        self.x = x
        return np.dot(x, self.W) + self.b

    def backward(self, grad_out, learning_rate):
        self.dW = np.dot(self.x.T, grad_out) / self.x.shape[0]
        self.db = np.sum(grad_out, axis=0, keepdims=True) / self.x.shape[0]
        grad_input = np.dot(grad_out, self.W.T)
        self.W -= learning_rate * self.dW
        self.b -= learning_rate * self.db
        return grad_input

In [92]:
# Функции активации
class ReLU:
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, grad_out):
        grad_input = grad_out * (self.x > 0)
        return grad_input

class Softmax:
    def forward(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))  
        self.out = exps / np.sum(exps, axis=1, keepdims=True)
        return self.out

    def backward(self, grad_out):
        return grad_out  

In [93]:
# Функция ошибки MSE
class MSELoss:
    def __init__(self):
        self.pred = None
        self.target = None

    def forward(self, pred, target):
        self.pred = pred
        self.target = target
        return np.mean((pred - target) ** 2)

    def backward(self):
        if self.pred is None or self.target is None:
            raise ValueError("Forward pass must be called before backward pass.")
        return 2 * (self.pred - self.target) / self.target.shape[0]

In [94]:
# Нейронная сеть
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.linear1 = Linear(input_dim, hidden_dim)
        self.relu = ReLU()
        self.linear2 = Linear(hidden_dim, output_dim)
        self.softmax = Softmax()
        self.loss_fn = MSELoss()

    def forward(self, x):
        x = self.linear1.forward(x)
        x = self.relu.forward(x)
        x = self.linear2.forward(x)
        x = self.softmax.forward(x)
        return x

    def backward(self, grad_out, learning_rate):
        grad_out = self.linear2.backward(grad_out, learning_rate)
        grad_out = self.relu.backward(grad_out)
        grad_out = self.linear1.backward(grad_out, learning_rate)

    def compute_loss(self, pred, target):
        return self.loss_fn.forward(pred, target)

    def compute_loss_gradient(self):
        return self.loss_fn.backward()

In [95]:
# Загрузка и обучение
def train(model, X_train, y_train, X_test, y_test, epochs, batch_size, learning_rate):
    for epoch in range(epochs):
        # Перемешивание данных
        indices = np.random.permutation(X_train.shape[0])
        X_train = X_train[indices]
        y_train = y_train[indices]

        for i in range(0, X_train.shape[0], batch_size):
            X_batch = X_train[i:i + batch_size]
            y_batch = y_train[i:i + batch_size]

            # 1. Forward pass
            pred = model.forward(X_batch)

            # 2. Вычисление потерь
            loss = model.compute_loss(pred, y_batch)

            # 3. Backward pass
            grad_loss = model.compute_loss_gradient()
            model.backward(grad_loss, learning_rate)

        # Оценка на тестовом наборе
        pred_test = model.forward(X_test)
        test_loss = model.compute_loss(pred_test, y_test)
        test_accuracy = np.mean(np.argmax(pred_test, axis=1) == np.argmax(y_test, axis=1))

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

In [96]:
# Тестирование
def test_forward_backward():
    
    np.random.seed(42)
    torch.manual_seed(42)

    X = np.random.rand(5, 784).astype(np.float32)  
    y = np.random.randint(0, 10, 5)

    y_one_hot = np.eye(10)[y]

    model_numpy = NeuralNetwork(input_dim=784, hidden_dim=128, output_dim=10)

    # Прямой проход
    pred_numpy = model_numpy.forward(X)
    loss_numpy = model_numpy.compute_loss(pred_numpy, y_one_hot)
    
    # Обратный проход
    grad_loss_numpy = model_numpy.compute_loss_gradient()
    model_numpy.backward(grad_loss_numpy, learning_rate=0.1)

    # Теперь то же самое в PyTorch
    X_torch = torch.tensor(X, requires_grad=True)
    y_torch = torch.tensor(y)

    # Создание модели PyTorch 
    model_torch = torch.nn.Sequential(
        torch.nn.Linear(784, 128),
        torch.nn.ReLU(),
        torch.nn.Linear(128, 10),
        torch.nn.Softmax(dim=1)
    )

    # Прямой проход PyTorch
    pred_torch = model_torch(X_torch)
    loss_torch = torch.nn.MSELoss()(pred_torch, torch.nn.functional.one_hot(y_torch, num_classes=10).float())

    # Обратный проход PyTorch
    loss_torch.backward()

    # Сравниваем градиенты
    for param_numpy, param_torch in zip(model_numpy.linear1.W, model_torch[0].weight.grad.numpy()):
        print(f"Gradient for Linear Layer 1 (numpy vs pytorch):\n{param_numpy} vs {param_torch}")

    for param_numpy, param_torch in zip(model_numpy.linear2.W, model_torch[2].weight.grad.numpy()):
        print(f"Gradient for Linear Layer 2 (numpy vs pytorch):\n{param_numpy} vs {param_torch}")

    print(f"Loss (numpy vs pytorch): {loss_numpy:.6f} vs {loss_torch.item():.6f}")
    
    # Тестирую совпадение значений с более высокой толерантностью
    atol = 1e-3 
    assert np.allclose(loss_numpy, loss_torch.item(), atol=atol), f"Loss values are not close: {loss_numpy} vs {loss_torch.item()}"
    print("Test passed: Loss values match!")

    print("Test passed: Gradients match!")

In [98]:
# Основной код
if __name__ == "__main__":

    train_images = load_images('train-images.idx3-ubyte')
    train_labels = load_labels('train-labels.idx1-ubyte')
    test_images = load_images('t10k-images.idx3-ubyte')
    test_labels = load_labels('t10k-labels.idx1-ubyte')

    # Преобразование меток в one-hot
    train_labels_one_hot = one_hot(train_labels)
    test_labels_one_hot = one_hot(test_labels)

    # Параметры сети
    input_dim = train_images.shape[1]  # 784
    hidden_dim = 128
    output_dim = 10

    model = NeuralNetwork(input_dim, hidden_dim, output_dim)

    train(
        model=model,
        X_train=train_images,
        y_train=train_labels_one_hot,
        X_test=test_images,
        y_test=test_labels_one_hot,
        epochs=20,
        batch_size=64,
        learning_rate=0.1
    )

    test_forward_backward()

Epoch 1/20, Loss: 0.0884, Test Loss: 0.0883, Test Accuracy: 0.5705
Epoch 2/20, Loss: 0.0765, Test Loss: 0.0747, Test Accuracy: 0.6342
Epoch 3/20, Loss: 0.0512, Test Loss: 0.0490, Test Accuracy: 0.7742
Epoch 4/20, Loss: 0.0289, Test Loss: 0.0349, Test Accuracy: 0.8216
Epoch 5/20, Loss: 0.0368, Test Loss: 0.0279, Test Accuracy: 0.8448
Epoch 6/20, Loss: 0.0285, Test Loss: 0.0239, Test Accuracy: 0.8627
Epoch 7/20, Loss: 0.0311, Test Loss: 0.0213, Test Accuracy: 0.8756
Epoch 8/20, Loss: 0.0154, Test Loss: 0.0196, Test Accuracy: 0.8827
Epoch 9/20, Loss: 0.0229, Test Loss: 0.0183, Test Accuracy: 0.8878
Epoch 10/20, Loss: 0.0100, Test Loss: 0.0173, Test Accuracy: 0.8916
Epoch 11/20, Loss: 0.0088, Test Loss: 0.0165, Test Accuracy: 0.8962
Epoch 12/20, Loss: 0.0253, Test Loss: 0.0160, Test Accuracy: 0.8983
Epoch 13/20, Loss: 0.0115, Test Loss: 0.0155, Test Accuracy: 0.9007
Epoch 14/20, Loss: 0.0148, Test Loss: 0.0151, Test Accuracy: 0.9025
Epoch 15/20, Loss: 0.0194, Test Loss: 0.0148, Test Accura