### Basic neural net built with "make your own neural network" guidance. 

I tried to code it myself but in the end, got stuck with a subtle bug that I describe below. It was kind of short-sighted of me; 
if my original approach were equivalent to the correct approach, training neural nets wouldn't take so long.

In [2]:
import numpy as np
import scipy.special

#neural network class definition 

class NeuralNetwork:
    
    # input number of nodes in each of 3 layers
    def __init__(self, ninodes, nhnodes, nonodes, lr):
        
        self.activation_function = lambda x: scipy.special.expit(x)
        self.inodes = ninodes
        self.hnodes = nhnodes
        self.onodes = nonodes
        self.lr = lr
        
        # initialize matrix with normal random variables 
        self.wih = np.random.normal(0.0, 
                                        pow(float(self.hnodes), -0.5),
                                        (self.hnodes, self.inodes)
                                      )
        self.who = np.random.normal(0.0,
                                     pow(float(self.onodes), -0.5),
                                     (self.onodes, self.hnodes)
                                      )
        
        
    def train(self, inputs_list, targets_list):
        
        # if inputs_list is a list of m items each of length n,
        # np.array will turn it into an mxn matrix.
        # ie, it will add records row by row.
        # the result gets transposed here.
        inputs = np.array(inputs_list, ndmin=2).T
        targets = np.array(targets_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        
        # calculate signals coming out of hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals going into output layer
        final_inputs = np.dot(self.who, hidden_outputs)
        
        # calculate final output
        final_outputs = self.activation_function(final_inputs)
        
        
        # We will need the output and the hidden layer errors,
        # both calculated up front, i.e., before the changes are applied to the interior weights.
        # the hidden layer error is equal to the output_errors 
        # passed backward through the weights between the hidden and output
        # layers (transposed).
        # note that self.wts_ho.T is num_hidden x num_output,
        # so hidden_errors is a num_hidden x 1 vector.
        output_errors = targets - final_outputs
        hidden_errors = np.dot(self.who.T, output_errors)
        
        
        # now we backpropagate.
        # step 1: adjust the hidden->outer weights wts_ho.
        # these are components of the gradient of the error function
        # wrt the hidden->output weights wts_ho (dimension outer x hidden).
        # note that this is an outer product of the 
        # hidden and output components of the gradient        
        self.who += self.lr * np.dot((output_errors*final_outputs*(1.0-final_outputs)), hidden_outputs.T)
        
        # the next step is the same, shifted to the input->hidden weights.
        #hidden_component_of_wts_ih_delta = hidden_errors*hidden_outputs*(1.0-hidden_outputs)
        #input_component_of_wts_ih_delta = inputs
        
        # again, outer product of input and hidden components of the gradient
        self.wih += self.lr * np.dot((hidden_errors*hidden_outputs*(1.0-hidden_outputs)), inputs.T) 
        
        
        pass
    
    def query(self,inputs_list):
        inputs = np.array(inputs_list, ndmin =2).T
        
        # calcualte signals entering hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        
        # signals leaving hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # signals entering final layer
        final_inputs = np.dot(self.who, hidden_outputs)
        
        # signals leaving final layer
        final_outputs = self.activation_function(final_inputs)
        return final_outputs

In [3]:
# number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 10

# learning rate
learning_rate = 0.1

# create instance of neural network
n = NeuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)

### Mine wasn't working -- it was driving me crazy since the training code was virtually identical. 

So in the next 4 lines, I duplicated his training and testing code.
I eventually figured out the difference -- he was calling train() individually on every new sample, while I was only calling it once. 

In [4]:
#HIS TRAINING & TEST CODE 1
training_file = open("/mnt/xferUbuntu/notebooks/make_your_own_neural_network/mnist/mnist_train_100.csv", 'r')
training_data_list = training_file.readlines()
training_file.close()

# load the mnist training data CSV file into a list
#training_data_file = open("mnist_dataset/mnist_train.csv", 'r')
#training_data_list = training_data_file.readlines()
#training_data_file.close()

In [5]:
#HIS TRAINING & TEST CODE 2
# NOTE HE IS TRAINING INDIVIDUALLY ON EVERY NEW EXAMPLE -- THIS MUST BE THE REASON IT'S WORKING

for record in training_data_list:
    # split the record by the ',' commas
    all_values = record.split(',')
    # scale and shift the inputs
    inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
    # create the target output values (all 0.01, except the desired label which is 0.99)
    targets = np.zeros(output_nodes) + 0.01
    # all_values[0] is the target label for this record
    targets[int(all_values[0])] = 0.99
    n.train(inputs, targets) 

In [6]:
#HIS TRAINING & TEST CODE 3
test_file = open("/mnt/xferUbuntu/notebooks/make_your_own_neural_network/mnist/mnist_test_10.csv", 'r')
test_data = test_file.readlines()
test_file.close()

#### The test step below was turning out well for him because he was doing 100 training iterations TO MY ONE.

In [7]:
#HIS TRAINING & TEST CODE 4 -- TEST STEP
test1_values = test_data[0].split(",") 
print(test1_values[0])
test1_input = (np.asfarray(test1_values[1:]) / 255.0 * 0.99) + 0.01
n.query(test1_input)

7


array([[ 0.03584036],
       [ 0.00353559],
       [ 0.04111049],
       [ 0.02744567],
       [ 0.01407476],
       [ 0.03854381],
       [ 0.00322071],
       [ 0.87844156],
       [ 0.04867697],
       [ 0.01002646]])

## Now, back to my code -- with the bug fixed.

In [8]:
import os
print(os.getcwd())
os.chdir("/mnt/xferUbuntu/notebooks/make_your_own_neural_network/mnist")
os.listdir(".")


/mnt/xferUbuntu/notebooks/make_your_own_neural_network


['mnist_test_10.csv', 'mnist_train_100.csv']

In [9]:
training_file = open(os.path.join(os.getcwd(), "mnist_train_100.csv"), 'r')
training_data = training_file.readlines()
training_file.close()

In [10]:
import matplotlib.pyplot as plt
%matplotlib inline



### Train the neural net using the MNIST data, and 200 hidden nodes.

In [11]:
inodes = 28*28 # number of input nodes = dimensions of input images
hnodes = 200 # he starts by using 100 hidden nodes
onodes = 10 # number of output nodes = 10 (1 per digit)

training_list = []
target_list = []
lrate = 0.1
nnet2 = NeuralNetwork(inodes, hnodes, onodes, lrate)

for line in training_data:
    all_values = line.split(',')
    assert len(all_values) == 28*28+1
    
    # rescale the input values into the range [0.01, 1.0]
    # you're avoiding the value 0 because the weights can't affect that value,
    # but the value 1 is okay for input.
    # note you do not need to reshape the array before using it for training...
    # the neural net doesn't care whether it looks like an image to you!
    inputs = ((np.asfarray(all_values[1:])/255.0)*0.99) + 0.01
    assert len(inputs) == inodes
    assert inputs.min() >0
    
    # all the non-target entries will be .01, the target entry will be .99
    # otherwise you'll get large weights and a saturated network because you'll be trying 
    # to hit impossible values
    targets = (np.zeros(onodes) + 0.01)
    targets[int(all_values[0])] = 0.99

    # ===>>> now we train individually with every sample.
    nnet2.train(inputs, targets)

# NOTE: BEFORE I FOUND THIS BUG, I WAS TRAINING WITH EVERYTHING in one pass, 
# instead of iteratively, as below:
# nnet2.train(training_list, target_list)

### Now test the neural net.

In [12]:
# Open and set up the training data. 
test_file = open(os.path.join(os.getcwd(), "mnist_test_10.csv"), 'r')
test_data = test_file.readlines()
test_file.close()

In [13]:
test_list = []
test_target_list = []
for line in test_data:
    all_values = line.split(',')
    
    # rescale the input values into the range [0.01, 1.0]
    # you're avoiding the value 0 because the weights can't affect that value,
    # but the value 1 is okay for input.
    # note you do not need to reshape the array before using it for training...
    # the neural net doesn't care whether it looks like an image to you!
    image_array = (np.asfarray(all_values[1:])/255.0*0.99) +0.01
    
    # rescale the output targets into the range [.01, .99]
    # all the non-target entries will be .01, the target entry will be .99
    # otherwise you'll get large weights and a saturated network because you'll be trying 
    # to hit impossible values (0, 1)
    target_array = (np.zeros(onodes)+0.01)
    target_array[int(all_values[0])]=0.99

    # append the training sample
    test_list.append(image_array)
    test_target_list.append(target_array)

In [14]:
nnet2.query(test_list[3])

array([[  8.25289692e-01],
       [  1.61022093e-03],
       [  1.52741171e-02],
       [  4.12553006e-03],
       [  7.30825650e-03],
       [  9.56403536e-04],
       [  3.81431887e-02],
       [  1.95614992e-01],
       [  2.98569361e-03],
       [  7.32189739e-04]])

In [15]:
test_target_list[3]

array([ 0.99,  0.01,  0.01,  0.01,  0.01,  0.01,  0.01,  0.01,  0.01,  0.01])

In [16]:
test_target_list[4]

array([ 0.01,  0.01,  0.01,  0.01,  0.99,  0.01,  0.01,  0.01,  0.01,  0.01])

In [17]:
nnet2.query(test_list[4])

array([[ 0.03772522],
       [ 0.00461422],
       [ 0.06128937],
       [ 0.01487877],
       [ 0.91565653],
       [ 0.00653104],
       [ 0.01563727],
       [ 0.45043642],
       [ 0.0878436 ],
       [ 0.00106009]])