## Библиотеки

In [1]:
import numpy as np
import sys
from keras.datasets import mnist

## Данные

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
print(y_train[:10])
print(y_test[:10])

(60000, 28, 28)
(10000, 28, 28)
(60000,)
(10000,)
[5 0 4 1 9 2 1 3 1 4]
[7 2 1 0 4 1 4 9 5 9]


In [4]:
# Возьмем 1000 наблюдений и вытянем картинки в вектор и отцентрируем и нормируем. 
images, labels = x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000]
# Закодируем категориальный признак.
one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels
test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

In [5]:
relu = lambda x: (x >= 0) * x
relu2deriv = lambda x: x >= 0

In [6]:
# Зададим параметры.
alpha = 0.005
iterations = 350
hidden_size = 40
pixels_per_image = 784
num_labels = 10

In [7]:
# Создадим веса.
np.random.seed(1)
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

## Нейронная сеть

In [None]:
for iter in range(iterations):
    total_error = 0
    correct_cnt = 0
    for i in range(len(images)):
        layer_0 = images[i : i+1]
        layer_1 = relu(layer_0.dot(weights_0_1))
        layer_2 = layer_1.dot(weights_1_2)
        
        error = np.sum((layer_2 - labels[i : i+1]) ** 2)
        total_error += error
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i : i+1]))
        
        layer_2_delta = layer_2 - labels[i : i+1]
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        
        weights_delta_1_2 = layer_1.T.dot(layer_2_delta)
        weights_delta_0_1 = layer_0.T.dot(layer_1_delta)
        
        weights_0_1 -= alpha * weights_delta_0_1
        weights_1_2 -= alpha * weights_delta_1_2
        
    sys.stdout.write('\r' + 'Iter : ' + str(iter+1) + ' Test error : ' + str(total_error/float(len(images)))[0:5] \
        + ' Correct : ' + str(correct_cnt/float(len(images))))
    

In [9]:
error = 0.0
correct_cnt = 0
for i in range(len(test_labels)):
    layer_0 = test_images[i : i+1]
    layer_1 = relu(layer_0.dot(weights_0_1))
    layer_2 = layer_1.dot(weights_1_2)
    
    error += np.sum((layer_2 - test_labels[i : i+1]) ** 2)
    correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i : i+1]))
    
sys.stdout.write('Test error : ' + str(error/float(len(test_images)))[0:5] + ' Correct : ' + str(correct_cnt/float(len(test_images))))
print()

Test error : 0.653 Correct : 0.7073


## Прореживание (dropout)

In [10]:
# Зададим параметры.
alpha = 0.005
iterations = 300
hidden_size = 100
pixels_per_image = 784
num_labels = 10

In [11]:
# Создадим веса.
np.random.seed(1)
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [12]:
for iter in range(iterations):
    total_error = 0
    correct_cnt = 0
    for i in range(len(images)):
        layer_0 = images[i : i+1]
        layer_1 = relu(layer_0.dot(weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= 2 * dropout_mask
        layer_2 = layer_1.dot(weights_1_2)
        
        error = np.sum((layer_2 - labels[i : i+1]) ** 2)
        total_error += error
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i : i+1]))
        
        layer_2_delta = layer_2 - labels[i : i+1]
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_delta_1_2 = layer_1.T.dot(layer_2_delta)
        weights_delta_0_1 = layer_0.T.dot(layer_1_delta)
        
        weights_0_1 -= alpha * weights_delta_0_1
        weights_1_2 -= alpha * weights_delta_1_2
        
    sys.stdout.write('\r' + 'Iter : ' + str(iter+1) + ' Test error : ' + str(total_error/float(len(images)))[0:5] \
        + ' Correct : ' + str(correct_cnt/float(len(images))))

Iter : 300 Test error : 0.301 Correct : 0.916

In [13]:
error = 0.0
correct_cnt = 0
for i in range(len(test_labels)):
    layer_0 = test_images[i : i+1]
    layer_1 = relu(layer_0.dot(weights_0_1))
    layer_2 = layer_1.dot(weights_1_2)
    
    error += np.sum((layer_2 - test_labels[i : i+1]) ** 2)
    correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i : i+1]))
    
sys.stdout.write('Test error : ' + str(error/float(len(test_images)))[0:5] + ' Correct : ' + str(correct_cnt/float(len(test_images))))
print()

Test error : 0.393 Correct : 0.8229


## Пакетный градиентный спуск

In [9]:
batch_size = 100
alpha = 0.001
iterations = 300
pixels_per_image = 784
num_labels = 10
hidden_size = 100

In [10]:
np.random.seed(1)
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [None]:
for iter in range(iterations):
    error = 0.0
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = (i* batch_size, (i+1) * batch_size)
        
        layer_0 = images[batch_start : batch_end]
        layer_1 = relu(layer_0.dot(weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = layer_1.dot(weights_1_2)
        
        error += np.sum((layer_2 - labels[batch_start : batch_end]) ** 2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k : k+1]) == np.argmax(labels[batch_start+k : batch_start+k+1]))
            
        layer_2_delta = (layer_2 - labels[batch_start : batch_end]) / batch_size
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_delta_1_2 = layer_1.T.dot(layer_2_delta)
        weights_delta_0_1 = layer_0.T.dot(layer_1_delta)
        
        weights_1_2 -= alpha * weights_delta_1_2
        weights_0_1 -= alpha * weights_delta_0_1
        
    sys.stdout.write('\r' + 'Iter : ' + str(iter+1) + ' Test error : ' + str(error/float(len(images)))[0:5] \
                        + ' Correct : ' + str(correct_cnt/float(len(images))))
        

In [28]:
error = 0.0
correct_cnt = 0
for i in range(len(test_labels)):
    layer_0 = test_images[i : i+1]
    layer_1 = relu(layer_0.dot(weights_0_1))
    layer_2 = layer_1.dot(weights_1_2)
    
    error += np.sum((layer_2 - test_labels[i : i+1]) ** 2)
    correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i : i+1]))
    
sys.stdout.write('Test error : ' + str(error/float(len(test_images)))[0:5] + ' Correct : ' + str(correct_cnt/float(len(test_images))))
print()

Test error : 0.428 Correct : 0.7992


-----