## Трехслойная сеть

In [91]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[:1000].reshape(1000, 28*28) / 255, y_train[:1000])
one_hot_labels = np.zeros((labels.shape[0], 10))

for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(x_test.shape[0], 28*28) / 255
test_labels = np.zeros((y_test.shape[0], 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1
    
np.random.seed(1)

relu = lambda x: (x >= 0) * x
relu2deriv = lambda x: x >= 0

lr, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(images.shape[0]):
        layer_0 = images[i].reshape(1, -1)
        layer_1 = relu(layer_0.dot(weights_0_1))
        layer_2 = layer_1.dot(weights_1_2)
        error += ((layer_2 - labels[i].reshape(1, -1)) ** 2).sum()
        correct_cnt += np.argmax(layer_2) == np.argmax(labels[i].reshape(1, -1))
        
        layer_2_delta = labels[i].reshape(1, -1) - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        
        weights_1_2 += lr * layer_1.T.dot(layer_2_delta)
        weights_0_1 += lr * layer_0.T.dot(layer_1_delta)
    
    print(f'I: {j} Error: {(error / images.shape[0]):.3f} Correct: {correct_cnt / images.shape[0]}')

I: 0 Error: 0.722 Correct: 0.537
I: 1 Error: 0.513 Correct: 0.753
I: 2 Error: 0.449 Correct: 0.804
I: 3 Error: 0.411 Correct: 0.837
I: 4 Error: 0.386 Correct: 0.846
I: 5 Error: 0.367 Correct: 0.863
I: 6 Error: 0.353 Correct: 0.876
I: 7 Error: 0.340 Correct: 0.884
I: 8 Error: 0.330 Correct: 0.892
I: 9 Error: 0.320 Correct: 0.897
I: 10 Error: 0.312 Correct: 0.901
I: 11 Error: 0.304 Correct: 0.904
I: 12 Error: 0.298 Correct: 0.91
I: 13 Error: 0.292 Correct: 0.914
I: 14 Error: 0.286 Correct: 0.918
I: 15 Error: 0.281 Correct: 0.918
I: 16 Error: 0.277 Correct: 0.921
I: 17 Error: 0.272 Correct: 0.922
I: 18 Error: 0.268 Correct: 0.926
I: 19 Error: 0.265 Correct: 0.927
I: 20 Error: 0.261 Correct: 0.93
I: 21 Error: 0.257 Correct: 0.932
I: 22 Error: 0.254 Correct: 0.933
I: 23 Error: 0.251 Correct: 0.935
I: 24 Error: 0.248 Correct: 0.935
I: 25 Error: 0.245 Correct: 0.939
I: 26 Error: 0.242 Correct: 0.941
I: 27 Error: 0.240 Correct: 0.941
I: 28 Error: 0.237 Correct: 0.941
I: 29 Error: 0.235 Correct

I: 238 Error: 0.122 Correct: 0.998
I: 239 Error: 0.122 Correct: 0.998
I: 240 Error: 0.122 Correct: 0.998
I: 241 Error: 0.122 Correct: 0.998
I: 242 Error: 0.122 Correct: 0.998
I: 243 Error: 0.121 Correct: 0.998
I: 244 Error: 0.121 Correct: 0.998
I: 245 Error: 0.121 Correct: 0.998
I: 246 Error: 0.121 Correct: 0.998
I: 247 Error: 0.121 Correct: 0.998
I: 248 Error: 0.121 Correct: 0.999
I: 249 Error: 0.120 Correct: 0.999
I: 250 Error: 0.120 Correct: 0.999
I: 251 Error: 0.120 Correct: 0.999
I: 252 Error: 0.120 Correct: 0.999
I: 253 Error: 0.120 Correct: 0.999
I: 254 Error: 0.120 Correct: 0.999
I: 255 Error: 0.120 Correct: 0.999
I: 256 Error: 0.119 Correct: 0.999
I: 257 Error: 0.119 Correct: 0.999
I: 258 Error: 0.119 Correct: 0.999
I: 259 Error: 0.119 Correct: 0.999
I: 260 Error: 0.119 Correct: 0.999
I: 261 Error: 0.119 Correct: 0.999
I: 262 Error: 0.118 Correct: 0.999
I: 263 Error: 0.118 Correct: 0.999
I: 264 Error: 0.118 Correct: 0.999
I: 265 Error: 0.118 Correct: 0.999
I: 266 Error: 0.118 

In [104]:
error, correct_cnt = (0, 0)

for i in range(test_images.shape[0]):
    layer_0 = test_images[i].reshape(1, -1)
    layer_1 = relu(layer_0.dot(weights_0_1))
    layer_2 = layer_1.dot(weights_1_2)
    error += ((test_labels[i].reshape(1, -1) - layer_2) ** 2).sum()
    correct_cnt += np.argmax(layer_2) == np.argmax(test_labels[i].reshape(1, -1))
print(f'Error: {(error / test_images.shape[0]):.3f} Correct: {correct_cnt / test_images.shape[0]}')

Error: 0.653 Correct: 0.7073


In [224]:
er = 0
for _ in range(1000):
    er += (np.random.randint(2, size=(5, 30))).sum()
print(er / 1000)

75.203


## Трехслойная сеть с прореживанием

In [229]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[:1000].reshape(1000, 28*28) / 255, y_train[:1000])
one_hot_labels = np.zeros((labels.shape[0], 10))

for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(x_test.shape[0], 28*28) / 255
test_labels = np.zeros((y_test.shape[0], 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1
    
np.random.seed(1)

relu = lambda x: (x >= 0) * x
relu2deriv = lambda x: x >= 0

lr, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 300, 100, 784, 10)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(images.shape[0]):
        layer_0 = images[i].reshape(1, -1)
        layer_1 = relu(layer_0.dot(weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = layer_1.dot(weights_1_2)
        error += ((layer_2 - labels[i].reshape(1, -1)) ** 2).sum()
        correct_cnt += np.argmax(layer_2) == np.argmax(labels[i].reshape(1, -1))
        
        layer_2_delta = labels[i].reshape(1, -1) - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 += lr * layer_1.T.dot(layer_2_delta)
        weights_0_1 += lr * layer_0.T.dot(layer_1_delta)
    
    if j % 10 == 0:
        print(f'I: {j} Error: {(error / images.shape[0]):.3f} Correct: {correct_cnt / images.shape[0]}')

I: 0 Error: 0.891 Correct: 0.413
I: 10 Error: 0.472 Correct: 0.764
I: 20 Error: 0.431 Correct: 0.809
I: 30 Error: 0.416 Correct: 0.811
I: 40 Error: 0.413 Correct: 0.827
I: 50 Error: 0.392 Correct: 0.836
I: 60 Error: 0.402 Correct: 0.836
I: 70 Error: 0.383 Correct: 0.857
I: 80 Error: 0.387 Correct: 0.854
I: 90 Error: 0.377 Correct: 0.868
I: 100 Error: 0.370 Correct: 0.864
I: 110 Error: 0.372 Correct: 0.868
I: 120 Error: 0.353 Correct: 0.857
I: 130 Error: 0.352 Correct: 0.867
I: 140 Error: 0.356 Correct: 0.885
I: 150 Error: 0.342 Correct: 0.883
I: 160 Error: 0.361 Correct: 0.876
I: 170 Error: 0.345 Correct: 0.889
I: 180 Error: 0.333 Correct: 0.892
I: 190 Error: 0.335 Correct: 0.898
I: 200 Error: 0.348 Correct: 0.893
I: 210 Error: 0.337 Correct: 0.894
I: 220 Error: 0.326 Correct: 0.896
I: 230 Error: 0.322 Correct: 0.894
I: 240 Error: 0.332 Correct: 0.898
I: 250 Error: 0.320 Correct: 0.899
I: 260 Error: 0.322 Correct: 0.899
I: 270 Error: 0.313 Correct: 0.906
I: 280 Error: 0.317 Correct: 0.

In [230]:
error, correct_cnt = (0, 0)

for i in range(test_images.shape[0]):
    layer_0 = test_images[i].reshape(1, -1)
    layer_1 = relu(layer_0.dot(weights_0_1))
    layer_2 = layer_1.dot(weights_1_2)
    error += ((test_labels[i].reshape(1, -1) - layer_2) ** 2).sum()
    correct_cnt += np.argmax(layer_2) == np.argmax(test_labels[i].reshape(1, -1))
print(f'Error: {(error / test_images.shape[0]):.3f} Correct: {correct_cnt / test_images.shape[0]}')

Error: 0.393 Correct: 0.8229
