### Example on pp. 188-189

In [1]:
import numpy
from NNet import NNet

weights = [[[0.1],
            [0.2],
            [-0.1]]]
alphas = [0.01]
nn = NNet(weights)
nn.setAlphas(alphas)

input = numpy.array([[8.5, 0.65, 1.2]])
goal = numpy.array([[1]])
for i in range(4):
    output = nn.fire(input)
    print('Goal:    ' + str(goal))
    print(nn)
    nn.learn(input, goal)

Goal:    [[1]]
Layer 1: [[0.86]]
Weights: [[0.100000],
          [0.200000],
          [-0.100000]]
Layer 0: [[8.5  0.65 1.2 ]]

Goal:    [[1]]
Layer 1: [[0.9637575]]
Weights: [[0.111900],
          [0.200910],
          [-0.098320]]
Layer 0: [[8.5  0.65 1.2 ]]

Goal:    [[1]]
Layer 1: [[0.99061772]]
Weights: [[0.114981],
          [0.201146],
          [-0.097885]]
Layer 0: [[8.5  0.65 1.2 ]]

Goal:    [[1]]
Layer 1: [[0.99757116]]
Weights: [[0.115778],
          [0.201207],
          [-0.097773]]
Layer 0: [[8.5  0.65 1.2 ]]



### Example on pp. 226-227

In [2]:
    import numpy as np
    from NNet import NNet

    nn = NNet([[[0.5],
                [0.48],
                [-0.7]]])
    nn.setAlphas([0.1])

    streetlights = np.array([[1, 0, 1],
                             [0, 1, 1],
                             [0, 0, 1],
                             [1, 1, 1],
                             [0, 1, 1],
                             [1, 0, 1]])

    walk_vs_stop = np.array([[0, 1, 0, 1, 1, 0]]).T

    datain = streetlights[0]  # [1,0,1]
    goal_prediction = walk_vs_stop[0]  # equals 0... i.e. "stop"

    verbose = False
    def vprint(s):
        if verbose:
            print(s)

    for iteration in range(40):
        vprint('~~~~~~~~~~~ Iteration %d ~~~~~~~~~~~' % iteration)
        error_for_all_lights = 0
        for row_index in range(len(walk_vs_stop)):
            datain = streetlights[row_index:row_index+1]
            goal_prediction = walk_vs_stop[row_index:row_index+1]
            prediction = nn.fire(datain)
            # print('Prediction:' + str(prediction))
            vprint(nn)

            error = (goal_prediction - prediction) ** 2
            error_for_all_lights += error

            nn.learn(datain, goal_prediction)

        print("Error:" + str(error_for_all_lights))
        vprint('')
    vprint('~~~~~~~~~~~~~~~ End ~~~~~~~~~~~~~~~~')
    print(nn)

Error:[[2.65612311]]
Error:[[0.96287018]]
Error:[[0.55091659]]
Error:[[0.36445837]]
Error:[[0.25167687]]
Error:[[0.17797575]]
Error:[[0.12864461]]
Error:[[0.09511037]]
Error:[[0.07194564]]
Error:[[0.05564915]]
Error:[[0.04394764]]
Error:[[0.03535797]]
Error:[[0.028907]]
Error:[[0.02395166]]
Error:[[0.02006311]]
Error:[[0.01695209]]
Error:[[0.01442082]]
Error:[[0.01233174]]
Error:[[0.01058739]]
Error:[[0.00911723]]
Error:[[0.00786904]]
Error:[[0.00680327]]
Error:[[0.0058893]]
Error:[[0.00510293]]
Error:[[0.00442464]]
Error:[[0.00383851]]
Error:[[0.00333131]]
Error:[[0.00289194]]
Error:[[0.00251105]]
Error:[[0.00218067]]
Error:[[0.00189397]]
Error:[[0.00164511]]
Error:[[0.00142904]]
Error:[[0.0012414]]
Error:[[0.00107844]]
Error:[[0.00093689]]
Error:[[0.00081394]]
Error:[[0.00070713]]
Error:[[0.00061434]]
Error:[[0.00053374]]
Layer 1: [[-0.00262562]]
Weights: [[0.013892],
          [1.013815],
          [-0.015993]]
Layer 0: [[1 0 1]]



#### ...but what happens if the problem is a little harder?

This is the same neural net, with a shorter set to learn, but the error does not drop to zero.

In [3]:
import numpy as np
from NNet import NNet

nn = NNet([[[0.5],
            [0.48],
            [-0.7]]])
nn.setAlphas([0.1])

streetlights = np.array([[[1, 0, 1]],
                         [[0, 1, 1]],
                         [[0, 0, 1]],
                         [[1, 1, 1]]])

walk_vs_stop = np.array([[[1, 1, 0, 0]]]).T

input = streetlights[0]  # [1,0,1]
goal_prediction = walk_vs_stop[0]  # equals 0... i.e. "stop"

verbose = False
def vprint(s):
    if verbose:
        print(s)

for iteration in range(40):
    vprint('~~~~~~~~~~~ Iteration %d ~~~~~~~~~~~' % iteration)
    error_for_all_lights = 0
    for row_index in range(len(walk_vs_stop)):
        input = streetlights[row_index]
        goal_prediction = walk_vs_stop[row_index]
        prediction = nn.fire(input)
        # print('Prediction:' + str(prediction))
        vprint(nn)

        error = (goal_prediction - prediction) ** 2
        error_for_all_lights += error

        nn.learn(input, goal_prediction)

    print("Error:" + str(error_for_all_lights))
    vprint('')
vprint('~~~~~~~~~~~~~~~ End ~~~~~~~~~~~~~~~~')
print(nn)

Error:[[3.490269]]
Error:[[2.73135902]]
Error:[[2.40459207]]
Error:[[2.21518015]]
Error:[[2.08027017]]
Error:[[1.97321711]]
Error:[[1.88378038]]
Error:[[1.80717553]]
Error:[[1.74072358]]
Error:[[1.6826847]]
Error:[[1.63179743]]
Error:[[1.58707581]]
Error:[[1.54771178]]
Error:[[1.51302396]]
Error:[[1.4824283]]
Error:[[1.45541947]]
Error:[[1.43155795]]
Error:[[1.41046033]]
Error:[[1.3917915]]
Error:[[1.37525829]]
Error:[[1.36060401]]
Error:[[1.34760377]]
Error:[[1.33606048]]
Error:[[1.32580133]]
Error:[[1.31667473]]
Error:[[1.30854764]]
Error:[[1.30130328]]
Error:[[1.29483906]]
Error:[[1.28906487]]
Error:[[1.28390146]]
Error:[[1.27927913]]
Error:[[1.27513655]]
Error:[[1.2714197]]
Error:[[1.26808101]]
Error:[[1.26507853]]
Error:[[1.26237524]]
Error:[[1.25993847]]
Error:[[1.25773938]]
Error:[[1.25575244]]
Error:[[1.25395507]]
Layer 1: [[0.59955275]]
Weights: [[0.044807],
          [-0.013149],
          [0.388029]]
Layer 0: [[1 1 1]]



### Example on pp. 260-265 (with a verbose option)

In [4]:
verbose = False
def vprint(s):
    if verbose:
        print(s)

import numpy as np
from numpy import nan

np.random.seed(1)

def relu(x):
    return (x > 0) * x  # returns x if x > 0
    # return 0 otherwise


def relu2deriv(output):
    return output > 0  # returns 1 for input > 0
    # return 0 otherwise

streetlights = np.array([[1, 0, 1],
                         [0, 1, 1],
                         [0, 0, 1],
                         [1, 1, 1]])

walk_vs_stop = np.array([[1, 1, 0, 0]]).T

alpha = 0.2
hidden_size = 4

layer_0 = [nan] * 3
layer_1 = [nan] * 4
layer_2 = nan
weights_0_1 = 2 * np.random.random((3, hidden_size)) - 1
weights_1_2 = 2 * np.random.random((hidden_size, 1)) - 1

def weights2str(weights):
    s = ''
    rs = 'Weights: [['
    for row in weights:
        firstCol = True
        for col in row:
            if not firstCol:
                rs += ', '
            firstCol = False
            rs += '%f' % col
        s += rs + ']'
        rs = ',\n          ['
    s += ']'
    return s

def vprintnn():
    if not verbose:
        return

    print('Layer 2: ' + str(layer_2))
    print(weights2str(weights_1_2))
    print('Layer 1: ' + str(layer_1))
    print(weights2str(weights_0_1))
    print('Layer 0: ' + str(layer_0))
    print('')

nIterations = 60
# verbose = True
# nIterations = 5

for iteration in range(nIterations):
    vprint('~~~~~~~~~~~ Iteration %d ~~~~~~~~~~~' % iteration)
    layer_2_error = 0
    for i in range(len(streetlights)):
        layer_0 = streetlights[i:i + 1]
        # layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        goal = walk_vs_stop[i:i + 1]
        vprint('Goal:   ' + str(goal))
        vprintnn()

        layer_2_error += np.sum((layer_2 - goal) ** 2)

        layer_2_delta = (layer_2 - goal)
        derivative = relu2deriv(layer_1)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * derivative

        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)

    if (iteration % 10 == 9):
        print("Error:" + str(layer_2_error))

Error:0.6342311598444467
Error:0.35838407676317513
Error:0.0830183113303298
Error:0.006467054957103705
Error:0.0003292669000750734
Error:1.5055622665134859e-05


#### The same example using NNet

In [5]:
verbose = False
def vprint(s):
    if verbose:
        print(s)

import numpy as np
from NNet import NNet

np.random.seed(1)

streetlights = np.array([[1, 0, 1],
                         [0, 1, 1],
                         [0, 0, 1],
                         [1, 1, 1]])

walk_vs_stop = np.array([[1, 1, 0, 0]]).T

nn = NNet(sizes=[3, 4, 1])
nn.setAlphas([0.2, 0.2])
nn.setActivations(['relu', 'linear'])
print(nn)

nIterations = 60
# verbose = True
# nIterations = 5

for iteration in range(nIterations):
    vprint('~~~~~~~~~~~ Iteration %d ~~~~~~~~~~~' % iteration)
    layer_2_error = 0
    for i in range(len(streetlights)):
        input = streetlights[i:i+1]
        prediction = nn.fire(input)
        goal = walk_vs_stop[i:i+1]
        vprint('Goal:   ' + str(goal))
        # print('Prediction:' + str(prediction))
        vprint(nn)

        error = (goal - prediction) ** 2
        layer_2_error += error

        nn.learn(input, goal)

    if (iteration % 10 == 9):
        print("Error:" + str(layer_2_error))

Layer 2: [nan]
Weights: [[-0.591096],
          [0.756235],
          [-0.945225],
          [0.340935]]
Layer 1: [nan, nan, nan, nan]
Weights: [[-0.165956, 0.440649, -0.999771, -0.395335],
          [-0.706488, -0.815323, -0.627480, -0.308879],
          [-0.206465, 0.077633, -0.161611, 0.370439]]
Layer 0: [nan, nan, nan]

Error:[[0.63423116]]
Error:[[0.35838408]]
Error:[[0.08301831]]
Error:[[0.00646705]]
Error:[[0.00032927]]
Error:[[1.50556227e-05]]


### Loading the MNIST images

In [1]:
import sys, numpy as np
from keras.datasets import mnist
import os

# (x_train, y_train), (x_test, y_test) = mnist.load_data()    # this operation fails ...
download = os.getcwd() + '/../grokking-data/mnist.npz'        # ... so I cached a copy here
# print(download)
(x_train, y_train), (x_test, y_test) = mnist.load_data(download)

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

Using TensorFlow backend.


### Example, pp. 283-284, ouput on pp. 288-289

In [3]:
import sys, numpy as np
from keras.datasets import mnist
np.random.seed(1)

relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)\
                                    * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r I:"+str(j)+ \
                     " Train-Err:" + str(error/float(len(images)))[0:5] +\
                     " Train-Acc:" + str(correct_cnt/float(len(images))))
    
    if(j % 10 == 0 or j == iterations-1):
        error, correct_cnt = (0.0, 0)

        for i in range(len(test_images)):

            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1,weights_1_2)

            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2) == \
                                            np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(correct_cnt/float(len(test_images))))
        print()

 I:0 Train-Err:0.722 Train-Acc:0.537 Test-Err:0.601 Test-Acc:0.6488
 I:10 Train-Err:0.312 Train-Acc:0.901 Test-Err:0.420 Test-Acc:0.8114
 I:20 Train-Err:0.260 Train-Acc:0.937 Test-Err:0.414 Test-Acc:0.8111
 I:30 Train-Err:0.232 Train-Acc:0.946 Test-Err:0.417 Test-Acc:0.8066
 I:40 Train-Err:0.215 Train-Acc:0.956 Test-Err:0.426 Test-Acc:0.8019
 I:50 Train-Err:0.204 Train-Acc:0.966 Test-Err:0.437 Test-Acc:0.7982
 I:60 Train-Err:0.194 Train-Acc:0.967 Test-Err:0.448 Test-Acc:0.7921
 I:70 Train-Err:0.186 Train-Acc:0.975 Test-Err:0.458 Test-Acc:0.7864
 I:80 Train-Err:0.179 Train-Acc:0.979 Test-Err:0.466 Test-Acc:0.7817
 I:90 Train-Err:0.172 Train-Acc:0.981 Test-Err:0.474 Test-Acc:0.7758
 I:100 Train-Err:0.166 Train-Acc:0.984 Test-Err:0.482 Test-Acc:0.7706
 I:110 Train-Err:0.161 Train-Acc:0.984 Test-Err:0.489 Test-Acc:0.7686
 I:120 Train-Err:0.157 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766
 I:130 Train-Err:0.153 Train-Acc:0.999 Test-Err:0.502 Test-Acc:0.7622
 I:140 Train-Err:0.149 Train-Acc