### FULL training run, long enough to observe overfitting.

Coded with the guidance of 'Make your own neural network'.

In [1]:
import numpy as np
import scipy.special

#neural network class definition 

class NeuralNetwork:
    
    # input number of nodes in each of 3 layers
    def __init__(self, ninodes, nhnodes, nonodes, lr):
        
        self.activation_function = lambda x: scipy.special.expit(x)
        self.inodes = ninodes
        self.hnodes = nhnodes
        self.onodes = nonodes
        self.lr = lr
        
        # initialize matrix with normal random variables 
        self.wih = np.random.normal(0.0, 
                                        pow(float(self.hnodes), -0.5),
                                        (self.hnodes, self.inodes)
                                      )
        self.who = np.random.normal(0.0,
                                     pow(float(self.onodes), -0.5),
                                     (self.onodes, self.hnodes)
                                      )
        
        
    def train(self, inputs_list, targets_list):
        
        # if inputs_list is a list of m items each of length n,
        # np.array will turn it into an mxn matrix.
        # ie, it will add records row by row.
        # the result gets transposed here.
        inputs = np.array(inputs_list, ndmin=2).T
        targets = np.array(targets_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        
        # calculate signals coming out of hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals going into output layer
        final_inputs = np.dot(self.who, hidden_outputs)
        
        # calculate final output
        final_outputs = self.activation_function(final_inputs)
        
        
        # We will need the output and the hidden layer errors,
        # both calculated up front, i.e., before the changes are applied to the interior weights.
        # the hidden layer error is equal to the output_errors 
        # passed backward through the weights between the hidden and output
        # layers (transposed).
        # note that self.wts_ho.T is num_hidden x num_output,
        # so hidden_errors is a num_hidden x 1 vector.
        output_errors = targets - final_outputs
        hidden_errors = np.dot(self.who.T, output_errors)
        
        
        # now we backpropagate.
        # step 1: adjust the hidden->outer weights wts_ho.
        # these are components of the gradient of the error function
        # wrt the hidden->output weights wts_ho (dimension outer x hidden).
        # note that this is an outer product of the 
        # hidden and output components of the gradient        
        self.who += self.lr * np.dot((output_errors*final_outputs*(1.0-final_outputs)), hidden_outputs.T)
        
        # the next step is the same, shifted to the input->hidden weights.
        #hidden_component_of_wts_ih_delta = hidden_errors*hidden_outputs*(1.0-hidden_outputs)
        #input_component_of_wts_ih_delta = inputs
        
        # again, outer product of input and hidden components of the gradient
        self.wih += self.lr * np.dot((hidden_errors*hidden_outputs*(1.0-hidden_outputs)), inputs.T) 
        
        
        pass
    
    def query(self,inputs_list):
        inputs = np.array(inputs_list, ndmin =2).T
        
        # calcualte signals entering hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        
        # signals leaving hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # signals entering final layer
        final_inputs = np.dot(self.who, hidden_outputs)
        
        # signals leaving final layer
        final_outputs = self.activation_function(final_inputs)
        return final_outputs

In [2]:
# number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 10

# learning rate
learning_rate = 0.1

# create instance of neural network
n = NeuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)

## Now having gotten the neural net running in nnet1.ipynb, train the neural net on the full MNIST dataset. 

In [3]:
import os
print(os.getcwd())
os.chdir("/mnt/xferUbuntu/notebooks/make_your_own_neural_network/mnist")
os.listdir(".")


/mnt/xferUbuntu/notebooks/make_your_own_neural_network


['mnist_test.csv',
 'mnist_test_10.csv',
 'mnist_train.csv',
 'mnist_train_100.csv']

In [4]:
training_file = open(os.path.join(os.getcwd(), "mnist_train.csv"), 'r')
training_data = training_file.readlines()
training_file.close()

In [5]:
import matplotlib.pyplot as plt
%matplotlib inline



### Train the neural net using the MNIST data, and 200 hidden nodes.

In [6]:
inodes = 28*28 # number of input nodes = dimensions of input images
hnodes = 200 # he starts by using 100 hidden nodes
onodes = 10 # number of output nodes = 10 (1 per digit)

training_list = []
target_list = []
lrate = 0.01
nnet2 = NeuralNetwork(inodes, hnodes, onodes, lrate)

for line in training_data:
    all_values = line.split(',')
    assert len(all_values) == 28*28+1
    
    # rescale the input values into the range [0.01, 1.0]
    # you're avoiding the value 0 because the weights can't affect that value,
    # but the value 1 is okay for input.
    # note you do not need to reshape the array before using it for training...
    # the neural net doesn't care whether it looks like an image to you!
    inputs = ((np.asfarray(all_values[1:])/255.0)*0.99) + 0.01
    assert len(inputs) == inodes
    assert inputs.min() >0
    
    # all the non-target entries will be .01, the target entry will be .99
    # otherwise you'll get large weights and a saturated network because you'll be trying 
    # to hit impossible values
    targets = (np.zeros(onodes) + 0.01)
    targets[int(all_values[0])] = 0.99

    # ===>>> now we train individually with every sample.
    nnet2.train(inputs, targets)

# NOTE: BEFORE I FOUND THIS BUG, I WAS TRAINING WITH EVERYTHING in one pass, 
# instead of iteratively, as below:
# nnet2.train(training_list, target_list)

### Now test the neural net.

In [7]:
# Open and set up the training data. 
test_file = open(os.path.join(os.getcwd(), "mnist_test.csv"), 'r')
test_data = test_file.readlines()
test_file.close()

In [8]:
test_list = []
test_target_indices = []
for line in test_data:
    all_values = line.split(',')
    
    # rescale the input values into the range [0.01, 1.0]
    # you're avoiding the value 0 because the weights can't affect that value,
    # but the value 1 is okay for input.
    # note you do not need to reshape the array before using it for training...
    # the neural net doesn't care whether it looks like an image to you!
    image_array = (np.asfarray(all_values[1:])/255.0*0.99) +0.01
    
    # rescale the output targets into the range [.01, .99]
    # all the non-target entries will be .01, the target entry will be .99
    # otherwise you'll get large weights and a saturated network because you'll be trying 
    # to hit impossible values (0, 1)
    target_array = (np.zeros(onodes)+0.01)
    target_index = int(all_values[0])
    target_array[target_index]=0.99

    # append the training sample
    test_list.append(image_array)
    test_target_indices.append(target_index)

In [9]:
# get test statistics
scorecard = []
margins = []
# go through test records
for ii, record in enumerate(test_list):
    
    results = nnet2.query(record)
    label = np.argmax(results)
    print "Network says: {}, Target was {}".format(label, test_target_indices[ii])
    if label == test_target_indices[ii]:
        scorecard.append(1)
    else:
        scorecard.append(0)
    if ii > 10:
        break
        
print "Accuracy with 1 epoch: {}".format(np.mean(scorecard))

Network says: 7, Target was 7
Network says: 2, Target was 2
Network says: 1, Target was 1
Network says: 0, Target was 0
Network says: 4, Target was 4
Network says: 1, Target was 1
Network says: 4, Target was 4
Network says: 9, Target was 9
Network says: 6, Target was 5
Network says: 9, Target was 9
Network says: 0, Target was 0
Network says: 6, Target was 6
Accuracy with 1 epoch: 0.916666666667


### with learning rate .01, accuracy is 91 2/3%
#### now what is it like if I increase the number of epochs? I.e., do more than one pass through the training data. 

#### I use the same code, but now I wrap it in an outer loop. 
### Note that even after 10 epochs, I'm not overfitting yet. 

In [13]:
training_list = []
target_list = []
lrate = 0.01
nnet3 = NeuralNetwork(inodes, hnodes, onodes, lrate)
accuracies = []

for epoch in range(10):
    for line in training_data:
        all_values = line.split(',')
        assert len(all_values) == 28*28+1

        # rescale the input values into the range [0.01, 1.0]
        # you're avoiding the value 0 because the weights can't affect that value,
        # but the value 1 is okay for input.
        # note you do not need to reshape the array before using it for training...
        # the neural net doesn't care whether it looks like an image to you!
        inputs = ((np.asfarray(all_values[1:])/255.0)*0.99) + 0.01
        assert len(inputs) == inodes
        assert inputs.min() >0

        # all the non-target entries will be .01, the target entry will be .99
        # otherwise you'll get large weights and a saturated network because you'll be trying 
        # to hit impossible values
        targets = (np.zeros(onodes) + 0.01)
        targets[int(all_values[0])] = 0.99

        # ===>>> now we train individually with every sample.
        nnet3.train(inputs, targets)

    # tuck the test loop inside the epochs loop, 
    # so you get a test rate per epoch. 
    scorecard = []
    margins = []
    
    # go through test records
    for ii, record in enumerate(test_list):

        results = nnet3.query(record)
        label = np.argmax(results)
        #print "Network says: {}, Target was {}".format(label, test_target_indices[ii])
        if label == test_target_indices[ii]:
            scorecard.append(1)
        else:
            scorecard.append(0)

    accuracy = np.mean(scorecard)
    accuracies.append(accuracy)
    print "Accuracy with {} epochs: {}".format(epoch, accuracy)

Accuracy with 0 epochs: 0.9238
Accuracy with 1 epochs: 0.9405
Accuracy with 2 epochs: 0.9518
Accuracy with 3 epochs: 0.9599
Accuracy with 4 epochs: 0.9639
Accuracy with 5 epochs: 0.9656
Accuracy with 6 epochs: 0.9671
Accuracy with 7 epochs: 0.9686
Accuracy with 8 epochs: 0.9702
Accuracy with 9 epochs: 0.971


In [14]:
for epoch in range(10, 30):
    for line in training_data:
        all_values = line.split(',')
        assert len(all_values) == 28*28+1

        # rescale the input values into the range [0.01, 1.0]
        # you're avoiding the value 0 because the weights can't affect that value,
        # but the value 1 is okay for input.
        # note you do not need to reshape the array before using it for training...
        # the neural net doesn't care whether it looks like an image to you!
        inputs = ((np.asfarray(all_values[1:])/255.0)*0.99) + 0.01
        assert len(inputs) == inodes
        assert inputs.min() >0

        # all the non-target entries will be .01, the target entry will be .99
        # otherwise you'll get large weights and a saturated network because you'll be trying 
        # to hit impossible values
        targets = (np.zeros(onodes) + 0.01)
        targets[int(all_values[0])] = 0.99

        # ===>>> now we train individually with every sample.
        nnet3.train(inputs, targets)

    # tuck the test loop inside the epochs loop, 
    # so you get a test rate per epoch. 
    # Keep adding to the scorecard array of accuracies.
    scorecard = []
    
    # go through test records
    for ii, record in enumerate(test_list):

        results = nnet3.query(record)
        label = np.argmax(results)
        #print "Network says: {}, Target was {}".format(label, test_target_indices[ii])
        if label == test_target_indices[ii]:
            scorecard.append(1)
        else:
            scorecard.append(0)

    accuracy = np.mean(scorecard)
    accuracies.append(accuracy)
    print "Accuracy with {} epochs: {}".format(epoch+1, accuracy)

Accuracy with 11 epochs: 0.9717
Accuracy with 12 epochs: 0.9735
Accuracy with 13 epochs: 0.974
Accuracy with 14 epochs: 0.9746
Accuracy with 15 epochs: 0.9747
Accuracy with 16 epochs: 0.9748
Accuracy with 17 epochs: 0.9754
Accuracy with 18 epochs: 0.9757
Accuracy with 19 epochs: 0.976
Accuracy with 20 epochs: 0.9767
Accuracy with 21 epochs: 0.9772
Accuracy with 22 epochs: 0.9774
Accuracy with 23 epochs: 0.978
Accuracy with 24 epochs: 0.9779
Accuracy with 25 epochs: 0.9779
Accuracy with 26 epochs: 0.9778
Accuracy with 27 epochs: 0.978
Accuracy with 28 epochs: 0.9781
Accuracy with 29 epochs: 0.9783
Accuracy with 30 epochs: 0.9784


### after 30 epochs, still not overfitting. The learning rate, .01, is low compared to what is recommended in the text (0.2). That is probably why. But consider this comment in the text:

Intuitively it makes sense that if you plan to explore the gradient descent for much longer (more epochs), you can afford to take shorter steps (learning rate), and overall you’ll find a better path down.

### His tests top out at roughly .97, and I'm approaching a .98. Could I be finding a better path because my learning rate is lower? I'll run it for a while longer and see if I top out. 


In [15]:
for epoch in range(30, 100):
    for line in training_data:
        all_values = line.split(',')
        assert len(all_values) == 28*28+1

        # rescale the input values into the range [0.01, 1.0]
        # you're avoiding the value 0 because the weights can't affect that value,
        # but the value 1 is okay for input.
        # note you do not need to reshape the array before using it for training...
        # the neural net doesn't care whether it looks like an image to you!
        inputs = ((np.asfarray(all_values[1:])/255.0)*0.99) + 0.01
        assert len(inputs) == inodes
        assert inputs.min() >0

        # all the non-target entries will be .01, the target entry will be .99
        # otherwise you'll get large weights and a saturated network because you'll be trying 
        # to hit impossible values
        targets = (np.zeros(onodes) + 0.01)
        targets[int(all_values[0])] = 0.99

        # ===>>> now we train individually with every sample.
        nnet3.train(inputs, targets)

    # tuck the test loop inside the epochs loop, 
    # so you get a test rate per epoch. 
    # Keep adding to the scorecard array of accuracies.
    scorecard = []
    
    # go through test records
    for ii, record in enumerate(test_list):

        results = nnet3.query(record)
        label = np.argmax(results)
        #print "Network says: {}, Target was {}".format(label, test_target_indices[ii])
        if label == test_target_indices[ii]:
            scorecard.append(1)
        else:
            scorecard.append(0)

    accuracy = np.mean(scorecard)
    accuracies.append(accuracy)
    print "Accuracy with {} epochs: {}".format(epoch+1, accuracy)

Accuracy with 31 epochs: 0.9786
Accuracy with 32 epochs: 0.9789
Accuracy with 33 epochs: 0.9788
Accuracy with 34 epochs: 0.9788
Accuracy with 35 epochs: 0.9785
Accuracy with 36 epochs: 0.9786
Accuracy with 37 epochs: 0.9786
Accuracy with 38 epochs: 0.9785
Accuracy with 39 epochs: 0.9787
Accuracy with 40 epochs: 0.9788
Accuracy with 41 epochs: 0.979
Accuracy with 42 epochs: 0.979
Accuracy with 43 epochs: 0.979
Accuracy with 44 epochs: 0.9787
Accuracy with 45 epochs: 0.9787
Accuracy with 46 epochs: 0.9788
Accuracy with 47 epochs: 0.9789
Accuracy with 48 epochs: 0.9788
Accuracy with 49 epochs: 0.9785
Accuracy with 50 epochs: 0.9784
Accuracy with 51 epochs: 0.978
Accuracy with 52 epochs: 0.9779
Accuracy with 53 epochs: 0.9782
Accuracy with 54 epochs: 0.9782
Accuracy with 55 epochs: 0.9783
Accuracy with 56 epochs: 0.9785
Accuracy with 57 epochs: 0.9787
Accuracy with 58 epochs: 0.9784
Accuracy with 59 epochs: 0.9783
Accuracy with 60 epochs: 0.9781
Accuracy with 61 epochs: 0.978
Accuracy with

### Note that accuracy topped out at .9789 at 47 epochs. Unfortunately I can't plot epoch v. accuracy since Ubuntu crashed at some point, but it appears that after that point, the neural net began overfitting.