Document with the tasks and further explanation: https://docs.google.com/document/d/1aeidmS9pZ4CSHwLx08E79MT9Ch-h6EO6g2qesSC_wEY/edit#



GitHub: [@colombelli](http://github.com/colombelli/)

In [0]:
import numpy as np


class Neuron:
  def __init__(self, weights, bias):  # class constructor
    self.weights = weights
    self.bias = bias
    self.output = 0

      
  def activation_function(self, input):  # used to strech or contract outputs between some range
    input = np.float64(input)  # avoids the overflow
    self.output = 1.0 / (1.0 + np.exp(-(input)))  # the sigmoid activation function normally used
    return self.output
      

  def z(self, inputs):  # outputs the sum of every input times its respective weight, which are always 1 for this specific Task; and add a bias in the final result (0 in this particular Task)
    
    zOut = 0
    for i in range(len(inputs)):
      zOut += inputs[i] * self.weights[i]
    return zOut + self.bias  

    
  def y(self, inputs):  # final output - neuron value when activated
    return self.activation_function(self.z(inputs))
  

  def dEdz(self, dEdy):
  # assuming that our activation function is a sigmoid one, dEdz = dEdy * dydz   -> dEdz = dEdy * y * (1 - y) 
    return dEdy * self.output * (1 - self.output)

  
  def dEdw(self, dEdy, inputs):  # dEdw = dzdw * dydz * dEdy = dzdw * dEdz     , where dz(i)dw is simply the i input
    dEdz = self.dEdz(dEdy)
    dEdw = []
   
    for i, w in enumerate(self.weights):
      dEdw.append(inputs[i] * dEdz)
    print("no def de dEdw:", type(dEdw))
    return dEdw
    
  
  # we have to update the weights based on the learning rate and the derivatives
  def updateWeights(self, learningRate, dEdw):
    for i in range(len(self.weights)):
      self.weights[i] -= learningRate * dEdw[i]
      print("no def de upadte dedw:", type(dEdw))
      print("no def de upadte weights:", type(self.weights))
          
  
  # the same goes to the bias
  def updateBias(self, learningRate, dEdz):
    self.bias -= learningRate * dEdz  # because dEdb = dzdz * dEdz = 1 * dEdz = dEdz

In [0]:
class DenseLayer:
    def __init__(self, num_of_inputs, num_of_neurons):
      self.num_of_inputs = num_of_inputs
      self.neurons = []
      

      for i in range(num_of_neurons):  # creating the neurons for the layer
        weights = np.random.uniform(-1,1,[num_of_inputs])  # randomizing "num_inputs" weights with a value between 0 and 1
        #weights = np.ones((num_of_inputs,), dtype=int)  # generating a vector of ones to use in the Task 1 of summing up Xi inputs
        bias = np.random.uniform(0,1)  # randomizing a bias
        #bias = 0  # also, in order to use Task 1 and test Task 2 easier, the bias will be always zero
        self.neurons.append(Neuron(weights, bias))  # appending the new neuron to the layer


    def feedForward(self, inputs):  # activates every neuron in the layer, outputting their respestive results
      self.inputs = inputs  # save the inputs as an attribute in order to use it in the dzdw calculus
      lisOut = []
      for neuron in self.neurons:
        lisOut.append(neuron.y(inputs))
      return lisOut

In [0]:
class NeuralNetwork:
  def __init__(self, num_of_inputs, num_of_neurons_at_each_layer):
    self.num_of_inputs = num_of_inputs
    self.layers = []
    
    # creates a dense layer for every int on the list num_of_neurons_at_each_layer  
    num_of_inputs_next_neuron = num_of_inputs  # the first layer will have the given number of inputs for each of its neuron
    for num in num_of_neurons_at_each_layer:   
      self.layers.append(DenseLayer(num_of_inputs_next_neuron, num))
      num_of_inputs_next_neuron = num  # the next layers will have the number of neurons of the previous layer as its number of inputs
  
  
  def feedForward(self, inputs):
    
    for layer in self.layers:  # keeps picking the outputs of every layer and passing them as inputs to the next layer
      #print("TESTE")
      #print(inputs)
      #input("tecla para continuar")
      inputs = layer.feedForward(inputs)     
    
    # the outputs will be the final result of the inputs variable, after the end of the loop above
    outputs = inputs  # for code clarity, this variable is created before the method returns
      
    return outputs
  
  
  def derivative_of_the_error(self, value, result):
    # the derivative of the RSS with respect of each dimension  is: 2 * (dimensionResult - dimensionValue)
    # returns an array with the result of each value
  
    dEdy = []
    for i in range(len(value)):  # calculates each value of that derivative 
      dEdy.append((result[i] - value[i]) * 2)
  
    return dEdy
  
  
  def backpropagation(self, value, result, learningRate):
    
    dEdy = self.derivative_of_the_error(value, result)  # calculates the first dEdy related to the final output
    
    flagFirstLayer = 1  # indicates that the dEdy is already calculated for that layer (the first starting at the end)
    for layer in reversed(self.layers):  # update the weights for each neuron in each layer (starting at the end)     
      
      new_dEdy = np.zeros(len(layer.neurons[0].weights))   
      
      for i, neuron in enumerate(layer.neurons):  # updates the neurons weights and biases in the layer

        dEdw = neuron.dEdw(dEdy[i], layer.inputs)  # computates dEdw for given neuron
        dEdz = neuron.dEdz(dEdy[i])  # necessary to update the bias and for calculating the new dEdy used in the next iterable layer
        neuron.updateWeights(learningRate, dEdw)
        neuron.updateBias(learningRate, dEdz)
        
        # sums up the right weight multiplying by the dEdz in the right place of the array
        for w in range(len(neuron.weights)):
          new_dEdy[w] += neuron.weights[w] * dEdz
        
      # finally, with the new dEdy array constructed, we update it and  propagates it to next iterable layer
      dEdy = new_dEdy
          
       

In [0]:
def error_function(value, result):  # where the value represents what the network should output and result represents what it actually outputted
  
  RSS = 0
  for i in range(len(value)):
    RSS += (result[i]- value[i])**2
  
  return RSS

In [0]:
# Training network

startTime = time.time()
sample_n = 0
matches = 0

for sample, label in zip(x_train, y_train):
  
  sample_n += 1
  
  if sample_n == MAX_TRAINING_SAMPLES:
    time_taken = time.time() - startTime
    break
    
  if (sample_n % 1000) == 0:
    print("%d trained" %sample_n)
    
  sample = np.concatenate(sample)
  
  # convert the output to one hot encoded
  expected_output = np.zeros(10)
  expected_output[label] = 1
  
  network_output = network.feedForward(sample)  # computes network output
  loss = error_function(expected_output, network_output)  # computes loss
  
  nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
  if nn_guess == label:  # check if the network got a right guess
    matches += 1
  
  """
  
  NOTE: PRINTING THE RESULTS OF EVERY STEP WILL CAUSE THE BROWSER TO EVENTUALLY 
        CRASH, THEN IT'S SAFER TO JUST RUN THE CODE WITH FEW OUTPUTS INFORMING 
        THE PROGRESS OF THE PROCESSING.
  
  # print results
  print("Sample %d | Label = %d | Output = %d | %d matches" % (sample_n, label, nn_guess, matches))
  print("Loss: ", loss)
  print("Time taken:", time.time() - startTime)
  print("\n")
  
  """

  # update weights and biases
  network.backpropagation(expected_output, network_output, learning_rate)

print("Time taken: ", time_taken)  

  del sys.path[0]


no def de dEdw: <class 'list'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
no def de upadte dedw: <class 'list'>
no def de upadte weights: <class 'numpy.ndarray'>
n

KeyboardInterrupt: ignored

In [0]:
import tensorflow as tf
import time
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()


numInputs = 784
neuronsEachLayer = [30, 10]   
network = NeuralNetwork(numInputs, neuronsEachLayer)
learning_rate = 0.001
MAX_TRAINING_SAMPLES = 60000

In [0]:
# Training network

startTime = time.time()
sample_n = 0
matches = 0

for sample, label in zip(x_train, y_train):
  
  sample_n += 1
  
  if sample_n == MAX_TRAINING_SAMPLES:
    time_taken = time.time() - startTime
    break
    
  if (sample_n % 1000) == 0:
    print("%d trained" %sample_n)
    
  sample = np.concatenate(sample)
  
  # convert the output to one hot encoded
  expected_output = np.zeros(10)
  expected_output[label] = 1
  
  network_output = network.feedForward(sample)  # computes network output
  loss = error_function(expected_output, network_output)  # computes loss
  
  nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
  if nn_guess == label:  # check if the network got a right guess
    matches += 1
  
  """
  
  NOTE: PRINTING THE RESULTS OF EVERY STEP WILL CAUSE THE BROWSER TO EVENTUALLY 
        CRASH, THEN IT'S SAFER TO JUST RUN THE CODE WITH FEW OUTPUTS INFORMING 
        THE PROGRESS OF THE PROCESSING.
  
  # print results
  print("Sample %d | Label = %d | Output = %d | %d matches" % (sample_n, label, nn_guess, matches))
  print("Loss: ", loss)
  print("Time taken:", time.time() - startTime)
  print("\n")
  
  """

  # update weights and biases
  network.backpropagation(expected_output, network_output, learning_rate)

print("Time taken: ", time_taken)

  if sys.path[0] == '':


1000 trained
2000 trained
3000 trained
4000 trained
5000 trained
6000 trained
7000 trained
8000 trained
9000 trained
10000 trained
11000 trained
12000 trained
13000 trained
14000 trained
15000 trained
16000 trained
17000 trained
18000 trained
19000 trained
20000 trained
21000 trained
22000 trained
23000 trained
24000 trained
25000 trained
26000 trained
27000 trained
28000 trained
29000 trained
30000 trained
31000 trained
32000 trained
33000 trained
34000 trained
35000 trained
36000 trained
37000 trained
38000 trained
39000 trained
40000 trained
41000 trained
42000 trained
43000 trained
44000 trained
45000 trained
46000 trained
47000 trained
48000 trained
49000 trained
50000 trained
51000 trained
52000 trained
53000 trained
54000 trained
55000 trained
56000 trained
57000 trained
58000 trained
59000 trained
Time taken:  8175.438211917877


In [0]:
# Testing network accuracy

MAX_TEST_SAMPLES = 1000
sample_num = 0
matches_t = 0

for sample, label in zip(x_test, y_test):
  
  sample_num += 1
  
  if sample_num == MAX_TEST_SAMPLES:
    break
    
  sample = np.concatenate(sample)
  
  # convert the output to one hot encoded
  expected_output = np.zeros(10)
  expected_output[label] = 1
  
  network_output = network.feedForward(sample)  # computes network output
  loss = error_function(expected_output, network_output)  # computes loss
  
  nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
  if nn_guess == label:  # check if the network got a right guess
    matches_t += 1
  
  if (sample_num % 100) == 0:  # prints parcial results from 100 to 100 tested samples
    # print results
    print("Sample %d | Label = %d | Output = %d | %d matches" % (sample_num, label, nn_guess, matches_t))
    print("Loss: ", loss)
    print("Hit rate: %.2f%%" % (matches_t / sample_num * 100))
    print("\n")
    
print("Final hit rate: %.2f%%" % (matches_t / sample_num * 100))

  if sys.path[0] == '':


Sample 100 | Label = 9 | Output = 9 | 44 matches
Loss:  0.39640316123746433
Hit rate: 44.00%


Sample 200 | Label = 2 | Output = 2 | 91 matches
Loss:  0.6563651219245351
Hit rate: 45.50%


Sample 300 | Label = 8 | Output = 6 | 137 matches
Loss:  1.2587315719002707
Hit rate: 45.67%


Sample 400 | Label = 4 | Output = 2 | 173 matches
Loss:  0.7920719544259793
Hit rate: 43.25%


Sample 500 | Label = 6 | Output = 8 | 211 matches
Loss:  0.9111671371042018
Hit rate: 42.20%


Sample 600 | Label = 9 | Output = 9 | 247 matches
Loss:  0.39640316123746433
Hit rate: 41.17%


Sample 700 | Label = 3 | Output = 6 | 287 matches
Loss:  1.4097698613520893
Hit rate: 41.00%


Sample 800 | Label = 2 | Output = 2 | 327 matches
Loss:  0.4490350750493641
Hit rate: 40.88%


Sample 900 | Label = 8 | Output = 7 | 369 matches
Loss:  1.027200024911953
Hit rate: 41.00%


Final hit rate: 40.80%


In [0]:
from IPython.display import clear_output  # with the clear_output() method we can avoid the browser to crash because of long outputs

# Training a network with 2 hidden layers

numInputs = 784
neuronsEachLayer = [50, 30, 10]   
network = NeuralNetwork(numInputs, neuronsEachLayer)
learning_rate = 0.001
repeatTrainSamples = 10
MAX_TRAINING_SAMPLES = len(y_train) * repeatTrainSamples  # samples: 600.000

startTime = time.time()
sample_n = 0
matches = 0

for i in range(repeatTrainSamples):  # iterates through the train samples more than once 
  
  for sample, label in zip(x_train, y_train):

    sample_n += 1

    if sample_n == MAX_TRAINING_SAMPLES:
      time_taken = time.time() - startTime
      break

    if (sample_n % 100) == 0:
      clear_output()  # clears the output in order to avoid browser crash
      print("%.4f%% trained" %(sample_n/600000))  # prints the percentage of the process to give a preview of how much time is still needed before finish the training
      print("%d samples" %sample_n)
      print("%.2f minutes used" %((time.time() - startTime)/60))  # prints how much minutes were already used to train the nn


    sample = np.concatenate(sample)  # shapes the sample in the format of an array of 784 lenght
    sample = np.array(sample, dtype=np.float64)  # avoids the possible overflow about to come with the exp function

    # convert the output to one hot encoded
    expected_output = np.zeros(10)
    expected_output[label] = 1

    network_output = network.feedForward(sample)  # computes network output
    loss = error_function(expected_output, network_output)  # computes loss

    nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
    if nn_guess == label:  # check if the network got a right guess
      matches += 1


    # update weights and biases
    network.backpropagation(expected_output, network_output, learning_rate)

print("Time taken: ", time_taken)
print("\n\n")


# Testing network accuracy

MAX_TEST_SAMPLES = 10000
sample_num = 0
matches_t = 0

for sample, label in zip(x_test, y_test):
  
  sample_num += 1
  
  if sample_num == MAX_TEST_SAMPLES:
    break
    
  sample = np.concatenate(sample)
  
  # convert the output to one hot encoded
  expected_output = np.zeros(10)
  expected_output[label] = 1
  
  network_output = network.feedForward(sample)  # computes network output
  loss = error_function(expected_output, network_output)  # computes loss
  
  nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
  if nn_guess == label:  # check if the network got a right guess
    matches_t += 1
  
  if (sample_num % 100) == 0:  # prints parcial results from 100 to 100 tested samples
    clear_output()
    # print results
    print("Sample %d | Label = %d | Output = %d | %d matches" % (sample_num, label, nn_guess, matches_t))
    print("Loss: ", loss)
    print("Hit rate: %.2f%%" % (matches_t / sample_num * 100))
    print("\n")
    
print("Final hit rate: %.2f%%" % (matches_t / sample_num * 100))

NameError: ignored

**Improved Neural Network**

[https://docs.google.com/document/d/1mjwoSeh8GBn7O0BLuIO4-OxXu5t8W1biKyJW0TSi1DU/edit](https://docs.google.com/document/d/1mjwoSeh8GBn7O0BLuIO4-OxXu5t8W1biKyJW0TSi1DU/edit)

In [0]:
import numpy as np


class Neuron:
  def __init__(self, weights, bias):  # class constructor
    self.weights = weights
    self.bias = bias
    self.output = 0
    
  
  def z(self, inputs):  # outputs the sum of every input times its respective weight, which are always 1 for this specific Task; and add a bias in the final result (0 in this particular Task)    
    zOut = 0
    for i in range(len(inputs)):
      zOut += inputs[i] * self.weights[i]
    return zOut + self.bias  

    
  def y(self, z, maxZ, yExpSum):  # final output: neuron value when activated
    return self.activation_function(z, yExpSum, maxZ)
  

  def dEdz(self, dEdy):     
    # ReLU derivative which respect to z (if z > 0, dydz = 1; else, dydz = 0)
    if (self.output > 0):
      return dEdy  # dEdy * dydz = dEdy * 1
    else:
      return 0  # dEdy * dydz = dEdy * 0
  
  
  def dEdw(self, inputs, dEdz):  # dEdw = dzdw * dydz * dEdy = dzdw * dEdz     , where dz(i)dw is simply the i input
    dEdw = []   
    for i, w in enumerate(self.weights):
      dEdw.append(inputs[i] * dEdz)  
    return dEdw
    
  
  # we have to update the weights and biases based on the learning rate and the derivatives
  def updateWeights(self, learningRate, dEdw):
    for i in range(len(self.weights)):
      self.weights[i] -= learningRate * dEdw[i]
          
  
  def updateBias(self, learningRate, dEdz):
    self.bias -= learningRate * dEdz  # because dEdb = dzdz * dEdz = 1 * dEdz = dEdz

In [0]:
import types  # used for adding the activation function to the neuron class with self parameter

# the softmax function which returns probabilities and has a better gradient stepness for very incorrect
def _softmax(self, inp, maxZ, yExpSum):
  self.output = np.exp(inp - maxZ) / yExpSum  
  return self.output

# the ReLU function
def _ReLU(self, inp, maxZ, yExpSum):
  self.output = max(0,inp)    
  return self.output


class DenseLayer:
    def __init__(self, num_of_inputs, num_of_neurons, lastLayer):
      self.num_of_inputs = num_of_inputs
      self.neurons = []
      self.lastLayer = lastLayer  # a variable indicating if this layer is the last one (true/false)     

      for i in range(num_of_neurons):  # creating the neurons for the layer
        weights = np.random.uniform(-0.3,0.3,[num_of_inputs])  # randomizing "num_inputs" weights with a value between 0 and 1
        bias = np.random.uniform(0,0.3)  # randomizing a bias
        self.neurons.append(Neuron(weights, bias))  # appending the new neuron to the layer      
        # adding the right activation function method depending on the neuron layer:
        if lastLayer: 
          self.neurons[i].activation_function = types.MethodType(_softmax, self.neurons[i])
        else:
          self.neurons[i].activation_function = types.MethodType(_ReLU, self.neurons[i])

          
    def feedForward(self, inputs):  # activates every neuron in the layer, outputting their respestive results
      self.inputs = inputs  # save the inputs as an attribute in order to use it in the dzdw calculus
      lisOut = []
      zArray = []
      yExpSum = 0
      maxZ = 0
      teste = False
      for neuron in self.neurons:  # calculates z values
          zArray.append(neuron.z(inputs))
         
      if (self.lastLayer):  # softmax preparation     
        # extract the maximum value of the array for computing the softmax as "exp(a-max(a)) / sum(exp(a-max(a))"
        maxZ = max(zArray)
        yExpSum = np.sum(np.exp(np.array(zArray) - maxZ))
          
      for i, neuron in enumerate(self.neurons):
        lisOut.append(neuron.y(zArray[i], yExpSum, maxZ))
        
      return lisOut

In [0]:
class NeuralNetwork:
  def __init__(self, num_of_inputs, num_of_neurons_at_each_layer):
    self.num_of_inputs = num_of_inputs
    self.layers = []
    
    # creates a dense layer for every int on the list num_of_neurons_at_each_layer  
    num_of_inputs_next_neuron = num_of_inputs  # the first layer will have the given number of inputs for each of its neuron
    for idx, num in enumerate(num_of_neurons_at_each_layer): 
      # an if test is needed to check if the current layer is the last one
      if (idx+1) == len(num_of_neurons_at_each_layer):  # then, this is the last layer
        self.layers.append(DenseLayer(num_of_inputs_next_neuron, num, True))
      else:
        self.layers.append(DenseLayer(num_of_inputs_next_neuron, num, False))
        num_of_inputs_next_neuron = num  # the next layers will have the number of neurons of the previous layer as its number of inputs
  
  
  def feedForward(self, inputs):
    for layer in self.layers:  # keeps picking the outputs of every layer and passing them as inputs to the next layer
      inputs = layer.feedForward(inputs)         
    return inputs

  
  def cross_entropy_cost_function(self, expected_output, network_output):
    cost = 0;
    for j in range(len(network_output)):
      # note that if we have a nn that only one output value X is different from zero, then we could just return -log(X), but for reusability let's keep it this way
      cost += expected_output[j] * np.log(network_output[j])
    return -cost
  
  
  def backpropagation(self, value, result, learningRate):   
    first_dEdz = np.subtract(np.array(result), np.array(value))  # because of softmax and cross entropy cost function
    
    for layer in reversed(self.layers):  # update the weights for each neuron in each layer (starting at the end)          
      new_dEdy = np.zeros(len(layer.neurons[0].weights))         
      for i, neuron in enumerate(layer.neurons):  # updates the neurons weights and biases in the layer       
        if (layer.lastLayer):
          dEdz = first_dEdz[i]
        else:
          dEdz = neuron.dEdz(dEdy[i])  # necessary to update the bias and for calculating the new dEdy used in the next iterable layer
        
        dEdw = neuron.dEdw(layer.inputs, dEdz)   
        neuron.updateWeights(learningRate, dEdw)
        neuron.updateBias(learningRate, dEdz)
        
        # sums up the right weight multiplying by the dEdz in the right place of the array (the actual backpropagation value)
        for w in range(len(neuron.weights)):
          new_dEdy[w] += neuron.weights[w] * dEdz
      
      # finally, with the new dEdy array constructed, we update it and  propagates it to next iterable layer
      dEdy = new_dEdy
          
       

In [11]:
import tensorflow as tf
import time
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

numInputs = 784
neuronsEachLayer = [30, 10]   
network = NeuralNetwork(numInputs, neuronsEachLayer)
learning_rate = 0.001
MAX_TRAINING_SAMPLES = 10000


# Training network

startTime = time.time()
sample_n = 0
matches = 0

for sample, label in zip(x_train, y_train):
  
  sample_n += 1  
  if sample_n == MAX_TRAINING_SAMPLES:
    time_taken = time.time() - startTime
    break
    
  if (sample_n % 500) == 0:
    print("%d trained    time taken: %f" %(sample_n, time.time() - startTime))    
  sample = np.concatenate(sample)/255
  
  # convert the output to one hot encoded
  expected_output = np.zeros(10)
  expected_output[label] = 1
  
  network_output = network.feedForward(sample)  # computes network output
  loss = network.cross_entropy_cost_function(expected_output, network_output)  # computes loss
  
    
  nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
  if nn_guess == label:  # check if the network got a right guess
    matches += 1
  
  
  """
  
  NOTE: PRINTING THE RESULTS OF EVERY STEP WILL CAUSE THE BROWSER TO EVENTUALLY 
        CRASH, THEN IT'S SAFER TO JUST RUN THE CODE WITH FEW OUTPUTS INFORMING 
        THE PROGRESS OF THE PROCESSING.
  
  """

  # update weights and biases
  network.backpropagation(expected_output, network_output, learning_rate)

print("Time taken: ", time_taken)




500 trained    time taken: 19.219666
1000 trained    time taken: 38.605394
1500 trained    time taken: 58.289990
2000 trained    time taken: 77.664984
2500 trained    time taken: 96.901880
3000 trained    time taken: 116.100442
3500 trained    time taken: 135.286087
4000 trained    time taken: 154.304366
4500 trained    time taken: 173.307647
5000 trained    time taken: 192.091052
5500 trained    time taken: 210.959166
6000 trained    time taken: 229.919374
6500 trained    time taken: 248.965913
7000 trained    time taken: 268.015718
7500 trained    time taken: 286.834077
8000 trained    time taken: 305.681530
8500 trained    time taken: 324.397405
9000 trained    time taken: 343.175690
9500 trained    time taken: 362.408792
Time taken:  381.18195390701294


In [12]:

# Testing network accuracy

MAX_TEST_SAMPLES = 1000
sample_num = 0
matches_t = 0

for sample, label in zip(x_train, y_train):
  
  sample_num += 1
  
  if sample_num == MAX_TEST_SAMPLES:
    break
    
  sample = np.concatenate(sample)/255
  
  # convert the output to one hot encoded
  expected_output = np.zeros(10)
  expected_output[label] = 1
  
  network_output = network.feedForward(sample)  # computes network output
  loss = network.cross_entropy_cost_function(expected_output, network_output)  # computes loss
  
  nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
  if nn_guess == label:  # check if the network got a right guess
    matches_t += 1
  #else:
   # print("not hitted. Label: %d, Guess: %d, Loss: %f, Output:" %(label, nn_guess, loss))
    #print(network_output)
  
  if (sample_num % 50) == 0:  # prints parcial results from 100 to 100 tested samples
    # print results
    print("Sample %d | Label = %d | Output = %d | %d matches" % (sample_num, label, nn_guess, matches_t))
    print("Loss: ", loss)
    print("Hit rate: %.4f%%" % (matches_t / sample_num * 100))
    print("NN output: ", network_output)
    print("\n")
    
print("Final hit rate: %f%%" % (matches_t / sample_num * 100))

Sample 50 | Label = 3 | Output = 3 | 40 matches
Loss:  0.02140080418233224
Hit rate: 80.0000%
NN output:  [0.002613519973072909, 2.845549795798008e-05, 0.0005883056440487426, 0.9788265681553561, 2.9472939432146893e-06, 0.012729100658505115, 5.4098549865390616e-06, 9.282034459591397e-05, 0.005031625502233205, 8.124707530020158e-05]


Sample 100 | Label = 1 | Output = 1 | 84 matches
Loss:  0.11921801699607087
Hit rate: 84.0000%
NN output:  [0.00028243810469204993, 0.8876142646699849, 0.013631787149722411, 0.01327654655410511, 0.014014537166661083, 0.002067947766132206, 0.0031011846323826804, 0.012332721228593507, 0.03707479950914568, 0.016603773218580323]


Sample 150 | Label = 3 | Output = 3 | 125 matches
Loss:  0.047402108570360174
Hit rate: 83.3333%
NN output:  [0.0005747140351675165, 0.0006087794897433112, 0.0019419815740613253, 0.9537038279944067, 0.00011973194799363845, 0.007195008990089909, 1.0720566047604054e-05, 6.275203155067762e-05, 0.03555240076282331, 0.00023008260811578442]

In [0]:
value = [0, 0, 5, 3, 1, 6]
target = [1, 1, 1, 1, 1, 1]
print(type(value))
print(type(target))
print(type(np.subtract(np.array(value), np.array(target))))

<class 'list'>
<class 'list'>


NameError: ignored

In [0]:
max(value)

6

In [0]:
# test overriding functions in python

import types

class Foo(object):
    def __init__(self):
      self.a = "yeah"

    def do_foo(self):
        print ('foo! ', self.a)

def _do_foa(self, x):
    print ('bar!', x, self.a)

teste = Foo()
x=5
teste.do_foo()
teste.do_foo = types.MethodType(_do_foa, teste)
teste.do_foo(3)
teste.aaa = types.MethodType(_do_foa, teste)
teste.aaa(x)



foo!  yeah
bar! 3 yeah
bar! 5 yeah


In [0]:
for i,num in enumerate(target):
  print (i)
  print (num)

0
1
1
1
2
1
3
1
4
1
5
1


In [0]:
out = [0.16572981333900777, 0.7452381020567532, 1.3452044635793639, 0.10558120224405236, 0.2518570782086987, 0.12924466092960685, 0.17763911642626815, 0.3293164264662247, 0.00860719495385458, 0.14647038197571616]
inp = [0, 0, 0.495188705746017, 3.682489010414027, 0, 0.06427587063464169, 0, 2.7655762757782876, 0, 1.1194036007401635, 5.193014938847103, 0, 2.876123741104178, 2.5396964050885438, 0.22493791050183432]
inz = [2.565121193841864, 1.6835017864214008, 3.0999042461972484, 1.629073797202314, 2.4726341905683134, 1.9859639763848096, 1.272855386961326, 4.453294125488756, 3.869712456706496, 2.7706580168460633]

inz = [5.077514176366277, 1.832598550836813, 5.109556725936442, 3.3030695332434616, 5.283152636370791, 4.831346263264652, 4.632667842199421, 6.387173278300329, 0.8510185821633214, 5.663141163826562]
yExpSum = np.sum(np.exp(np.array(inz) - max(inz)))
print(yExpSum)

for z in inz:
  Softmax(z, max(inz), yExpSum)

2.8091810536942776
0.09608211362702507 6.387173278300329 2.8091810536942776
0.0037444979551628523 6.387173278300329 2.8091810536942776
0.09921068554883722 6.387173278300329 2.8091810536942776
0.016293374177964087 6.387173278300329 2.8091810536942776
0.1180185288333675 6.387173278300329 2.8091810536942776
0.0751161260220818 6.387173278300329 2.8091810536942776
0.061581213106084144 6.387173278300329 2.8091810536942776
0.3559756316471404 6.387173278300329 2.8091810536942776
0.0014031329837959979 6.387173278300329 2.8091810536942776
0.17257469609854098 6.387173278300329 2.8091810536942776


In [0]:
def Softmax(input, maxZ, yExpSum):
  output = np.exp(input - maxZ) / yExpSum
  print(output, maxZ, yExpSum)
  