In [2]:
import numpy as np
from keras.datasets import mnist
from keras.utils import to_categorical


In [3]:
# 1. Загрузка данных MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
# Предобработка данных
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [5]:
print(x_train.shape)

(60000, 784)


In [6]:
# Разделение на тренировочный и валидационный наборы
split = 59000
x_val, y_val = x_train[split:], y_train[split:]
x_train, y_train = x_train[:split], y_train[:split]

In [None]:
# 2. Модифицированная функция обучения с валидацией
def train_model(X, y, X_val, y_val, 
                hidden_size=[512], 
                learning_rate=0.001, epochs=10):
    input_size = X.shape[1]
    output_size = y.shape[1]
    samples = X.shape[0]
    hidden_sizes = [*hidden_size, output_size]
    layers = len(hidden_sizes)
    W = []
    b = []
    np.random.seed(42)
    for i in range(0, layers):
        # Регуляризация Ксавьера
        W_i = np.random.randn(input_size     if i == 0          else hidden_size[i - 1], 
                              hidden_size[i] if i != layers - 1 else output_size) 
        W_i *= np.sqrt(2./(input_size if i == 0 else hidden_size[i - 1]))

        b_i = np.zeros(hidden_size[i] if i != layers - 1 else output_size)

        W.append(W_i)
        b.append(b_i)

    # Функции активации
    softmax = lambda x: np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

    for epoch in range(epochs):
        indices = np.random.permutation(samples)
        X_shuffled = X[indices]
        y_shuffled = y[indices]

        h = [X_shuffled]
        # Прямое распространение
        for i in range(0, layers):
            z = h[i] @ W[i] + b[i]
            h.append(z)

        y_pred = softmax(h[-1])
        loss = -np.mean(y_shuffled * np.log(y_pred + 1e-8))

        buf = W[-1].T
        # Обратное распостранение
        for i in range(layers - 1, -1, -1):
            dl = (y_pred - y_shuffled) / samples
            dW = (h[i - layers - 1].T if i != 0 else X_shuffled.T) @ dl

            if i != layers - 1:
                dW = dW @ buf
                dl = dl @ buf
                buf = buf @ W[i].T

            W[i] = W[i] - learning_rate * dW
            b[i] = b[i] - learning_rate * np.sum(dl, axis=0, keepdims=True)

        # Вычисление метрик
        train_preds = np.argmax(y_pred, axis=1)
        train_labels = np.argmax(y_shuffled, axis=1)
        train_acc = np.mean(train_preds == train_labels)
        
        # Валидационная точность
        z = X_val
        for i in range(0, layers):
            z = z @ W[i] + b[i]
        z = softmax(z)
        val_preds = np.argmax(z, axis=1)
        val_labels = np.argmax(y_val, axis=1)
        val_acc = np.mean(val_preds == val_labels)

        print(f'Epoch {epoch+1}/{epochs} | Loss: {loss:.4f} | '
              f'Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}')
        
    return W, b, layers



In [228]:
W, b, layers = train_model(
    X=x_train,
    y=y_train,
    X_val=x_val,
    y_val=y_val,
    hidden_size=[512, 512, 512],
    learning_rate=0.05,
    epochs=50
)

Epoch 1/50 | Loss: 0.3008 | Train Acc: 0.0878 | Val Acc: 0.3780
Epoch 2/50 | Loss: 0.3023 | Train Acc: 0.3696 | Val Acc: 0.3140
Epoch 3/50 | Loss: 0.3160 | Train Acc: 0.3129 | Val Acc: 0.5760
Epoch 4/50 | Loss: 0.2586 | Train Acc: 0.5391 | Val Acc: 0.6280
Epoch 5/50 | Loss: 0.2284 | Train Acc: 0.5699 | Val Acc: 0.6820
Epoch 6/50 | Loss: 0.1417 | Train Acc: 0.6425 | Val Acc: 0.7720
Epoch 7/50 | Loss: 0.1033 | Train Acc: 0.6923 | Val Acc: 0.8400
Epoch 8/50 | Loss: 0.0767 | Train Acc: 0.7681 | Val Acc: 0.8660
Epoch 9/50 | Loss: 0.0683 | Train Acc: 0.7933 | Val Acc: 0.8760
Epoch 10/50 | Loss: 0.0650 | Train Acc: 0.8076 | Val Acc: 0.8830
Epoch 11/50 | Loss: 0.0628 | Train Acc: 0.8145 | Val Acc: 0.8920
Epoch 12/50 | Loss: 0.0609 | Train Acc: 0.8197 | Val Acc: 0.8970
Epoch 13/50 | Loss: 0.0593 | Train Acc: 0.8262 | Val Acc: 0.9010
Epoch 14/50 | Loss: 0.0577 | Train Acc: 0.8297 | Val Acc: 0.9050
Epoch 15/50 | Loss: 0.0564 | Train Acc: 0.8367 | Val Acc: 0.9080
Epoch 16/50 | Loss: 0.0550 | Train

In [222]:
np.savez('model_weights.npz', layers, *W, *b)