## Усовершенствование сети MNIST

In [33]:
import sys, numpy as np
from keras.datasets import mnist

np.random.seed(1)

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[:1000].reshape(1000, 28*28) / 255, y_train[:1000])
one_hot_labels = np.zeros((labels.shape[0], 10))

for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(x_test.shape[0], 28*28) / 255
test_labels = np.zeros((y_test.shape[0], 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1
    
# relu = lambda x: (x >= 0) * x
# relu2deriv = lambda x: x >= 0

def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

lr, iterations, hidden_size, pixels_per_image, num_labels = (2, 300, 100, 784, 10)
batch_size = 100

weights_0_1 = 0.02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    correct_cnt = 0
    
    for i in range(int(images.shape[0] / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i + 1) * batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(layer_0.dot(weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(layer_1.dot(weights_1_2))
        
        for k in range(batch_size):
            correct_cnt += np.argmax(layer_2[k:k + 1]) == np.argmax(labels[batch_start+k: batch_start+k+1].reshape(1, -1))    
        
        layer_2_delta = (labels[batch_start: batch_end] - layer_2) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 += lr * layer_1.T.dot(layer_2_delta)
        weights_0_1 += lr * layer_0.T.dot(layer_1_delta)
    
    test_correct_cnt = 0
        
    for i in range(test_images.shape[0]):
        layer_0 = test_images[i: i + 1]
        layer_1 = tanh(layer_0.dot(weights_0_1))
        layer_2 = layer_1.dot(weights_1_2)
        test_correct_cnt += np.argmax(layer_2) == np.argmax(test_labels[i: i+1])
            
    if j % 10 == 0:
        print(f'I: {j} Test-Acc: {test_correct_cnt / test_images.shape[0]} Train-Acc: {correct_cnt / test_images.shape[0]}')

I: 0 Test-Acc: 0.394 Train-Acc: 0.0156
I: 10 Test-Acc: 0.6867 Train-Acc: 0.0723
I: 20 Test-Acc: 0.7025 Train-Acc: 0.0732
I: 30 Test-Acc: 0.734 Train-Acc: 0.0763
I: 40 Test-Acc: 0.7663 Train-Acc: 0.0794
I: 50 Test-Acc: 0.7913 Train-Acc: 0.0819
I: 60 Test-Acc: 0.8102 Train-Acc: 0.0849
I: 70 Test-Acc: 0.8228 Train-Acc: 0.0864
I: 80 Test-Acc: 0.831 Train-Acc: 0.0867
I: 90 Test-Acc: 0.8364 Train-Acc: 0.0885
I: 100 Test-Acc: 0.8407 Train-Acc: 0.0883
I: 110 Test-Acc: 0.845 Train-Acc: 0.0891
I: 120 Test-Acc: 0.8481 Train-Acc: 0.0901
I: 130 Test-Acc: 0.8505 Train-Acc: 0.0901
I: 140 Test-Acc: 0.8526 Train-Acc: 0.0905
I: 150 Test-Acc: 0.8555 Train-Acc: 0.0914
I: 160 Test-Acc: 0.8577 Train-Acc: 0.0925
I: 170 Test-Acc: 0.8596 Train-Acc: 0.0918
I: 180 Test-Acc: 0.8619 Train-Acc: 0.0933
I: 190 Test-Acc: 0.863 Train-Acc: 0.0933
I: 200 Test-Acc: 0.8642 Train-Acc: 0.0926
I: 210 Test-Acc: 0.8653 Train-Acc: 0.0931
I: 220 Test-Acc: 0.8668 Train-Acc: 0.093
I: 230 Test-Acc: 0.8672 Train-Acc: 0.0937
I: 240 Te

In [None]:
216