In [75]:
import numpy as np
from keras.datasets import mnist
from keras.utils import to_categorical


In [76]:
# 1. Загрузка данных MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [77]:
# Предобработка данных
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [68]:
print(x_train.shape)

(60000, 784)


In [78]:
# Разделение на тренировочный и валидационный наборы
split = 59000
x_val, y_val = x_train[split:], y_train[split:]
x_train, y_train = x_train[:split], y_train[:split]

In [82]:
# 2. Модифицированная функция обучения с валидацией
def train_model(X, y, X_val, y_val, hidden_size=512, 
               learning_rate=0.001, epochs=10):
    input_size = X.shape[1]
    output_size = y.shape[1]
    samples = X.shape[0]

    # Инициализация параметров
    np.random.seed(42)
    W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2./input_size)
    b1 = np.zeros(hidden_size)
    W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2./hidden_size)
    b2 = np.zeros(output_size)

    # Функции активации
    relu = lambda x: x
    relu_deriv = lambda x: 1.0
    softmax = lambda x: np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

    for epoch in range(epochs):
        # Перемешивание данных
        indices = np.random.permutation(samples)
        X_shuffled = X[indices]
        y_shuffled = y[indices]

        # Прямое распространение
        h1 = X_shuffled @ W1 + b1
        a1 = relu(h1)
        h2 = a1.dot(W2) + b2
        a2 = softmax(h2)

        # Вычисление потерь
        loss = -np.mean(y_shuffled * np.log(a2 + 1e-8))
        
        # Обратное распространение
        dh2 = (a2 - y_shuffled) / samples
        dW2 = a1.T @ dh2
        db2 = dh2.sum(axis=0)
        
        dh1 = (dh2 @ W2.T) * relu_deriv(a1)
        dW1 = X_shuffled.T @ dh1
        db1 = dh1.sum(axis=0)

        # Обновление параметров
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

        # Вычисление метрик
        train_preds = np.argmax(a2, axis=1)
        train_labels = np.argmax(y_shuffled, axis=1)
        train_acc = np.mean(train_preds == train_labels)
        
        # Валидационная точность
        val_h1 = X_val @ W1 + b1
        val_a1 = relu(val_h1)
        val_h2 = val_a1 @ W2 + b2
        val_a2 = softmax(val_h2)
        
        val_preds = np.argmax(val_a2, axis=1)
        val_labels = np.argmax(y_val, axis=1)
        val_acc = np.mean(val_preds == val_labels)

        print(f'Epoch {epoch+1}/{epochs} | Loss: {loss:.4f} | '
              f'Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}')

    return W1, b1, W2, b2


In [83]:
W1, b1, W2, b2 = train_model(
    X=x_train,
    y=y_train,
    X_val=x_val,
    y_val=y_val,
    hidden_size=1024,
    learning_rate=0.3,
    epochs=50
)

Epoch 1/50 | Loss: 0.2476 | Train Acc: 0.1166 | Val Acc: 0.3340
Epoch 2/50 | Loss: 0.2363 | Train Acc: 0.3504 | Val Acc: 0.4690
Epoch 3/50 | Loss: 0.3289 | Train Acc: 0.4285 | Val Acc: 0.4550
Epoch 4/50 | Loss: 0.2788 | Train Acc: 0.3945 | Val Acc: 0.5720
Epoch 5/50 | Loss: 0.2087 | Train Acc: 0.5575 | Val Acc: 0.6470
Epoch 6/50 | Loss: 0.1193 | Train Acc: 0.6185 | Val Acc: 0.8140
Epoch 7/50 | Loss: 0.0791 | Train Acc: 0.7413 | Val Acc: 0.8900
Epoch 8/50 | Loss: 0.0594 | Train Acc: 0.8316 | Val Acc: 0.9020
Epoch 9/50 | Loss: 0.0564 | Train Acc: 0.8385 | Val Acc: 0.9050
Epoch 10/50 | Loss: 0.0544 | Train Acc: 0.8501 | Val Acc: 0.9130
Epoch 11/50 | Loss: 0.0528 | Train Acc: 0.8515 | Val Acc: 0.9110
Epoch 12/50 | Loss: 0.0514 | Train Acc: 0.8596 | Val Acc: 0.9140
Epoch 13/50 | Loss: 0.0502 | Train Acc: 0.8603 | Val Acc: 0.9140
Epoch 14/50 | Loss: 0.0491 | Train Acc: 0.8667 | Val Acc: 0.9210
Epoch 15/50 | Loss: 0.0481 | Train Acc: 0.8673 | Val Acc: 0.9170
Epoch 16/50 | Loss: 0.0472 | Train

In [84]:
np.savez('model_weights.npz', W1=W1, b1=b1, W2=W2, b2=b2)