## Библиотеки

In [80]:
import numpy as np
import sys
from keras.datasets import mnist

## Данные

In [81]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [82]:
# Возьмем 1000 наблюдений и вытянем картинки в вектор и отцентрируем и нормируем. 
images, labels = x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000]
# Закодируем категориальный признак.
one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels
test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

In [83]:
def tanh(x):
    return np.tanh(x)

def tang2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [84]:
# Зададим параметры.
alpha = 2
iterations = 300
hidden_size = 100
pixels_per_image = 784
num_labels = 10
batch_size = 100

In [85]:
# Создадим веса.
np.random.seed(1)
weights_0_1 = 0.02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [86]:
for iter in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i* batch_size, (i+1) * batch_size))
        
        layer_0 = images[batch_start : batch_end]
        layer_1 = tanh(layer_0.dot(weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(layer_1.dot(weights_1_2))
        
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k : k+1]) == np.argmax(labels[batch_start+k : batch_start+k+1]))
            
        layer_2_delta = (layer_2 - labels[batch_start : batch_end]) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tang2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_delta_1_2 = layer_1.T.dot(layer_2_delta)
        weights_delta_0_1 = layer_0.T.dot(layer_1_delta)
        
        weights_1_2 -= alpha * weights_delta_1_2
        weights_0_1 -= alpha * weights_delta_0_1
    
        test_correct_cnt = 0
        for j in range(len(test_labels)):
            layer_0 = test_images[j : j+1]
            layer_1 = tanh(layer_0.dot(weights_0_1))
            layer_2 = layer_1.dot(weights_1_2)
            
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[j : j+1]))

    if (iter%10 ==0):  
        sys.stdout.write('\n' + 'I : ' + str(iter) + ' Train-Acc : ' + str(correct_cnt/float(len(images/batch_size))) + ' Test-Acc : ' + str(test_correct_cnt/float(len(test_images/batch_size))))   
        


I : 0 Train-Acc : 0.156 Test-Acc : 0.394
I : 10 Train-Acc : 0.723 Test-Acc : 0.6867
I : 20 Train-Acc : 0.732 Test-Acc : 0.7025
I : 30 Train-Acc : 0.763 Test-Acc : 0.734
I : 40 Train-Acc : 0.794 Test-Acc : 0.7663
I : 50 Train-Acc : 0.819 Test-Acc : 0.7913
I : 60 Train-Acc : 0.849 Test-Acc : 0.8102
I : 70 Train-Acc : 0.864 Test-Acc : 0.8228
I : 80 Train-Acc : 0.867 Test-Acc : 0.831
I : 90 Train-Acc : 0.885 Test-Acc : 0.8364
I : 100 Train-Acc : 0.883 Test-Acc : 0.8407
I : 110 Train-Acc : 0.891 Test-Acc : 0.845
I : 120 Train-Acc : 0.901 Test-Acc : 0.8481
I : 130 Train-Acc : 0.901 Test-Acc : 0.8505
I : 140 Train-Acc : 0.905 Test-Acc : 0.8526
I : 150 Train-Acc : 0.914 Test-Acc : 0.8555
I : 160 Train-Acc : 0.925 Test-Acc : 0.8577
I : 170 Train-Acc : 0.918 Test-Acc : 0.8596
I : 180 Train-Acc : 0.933 Test-Acc : 0.8619
I : 190 Train-Acc : 0.933 Test-Acc : 0.863
I : 200 Train-Acc : 0.926 Test-Acc : 0.8642
I : 210 Train-Acc : 0.931 Test-Acc : 0.8653
I : 220 Train-Acc : 0.93 Test-Acc : 0.8668
I : 