Document with the tasks and further explanation: https://docs.google.com/document/d/1aeidmS9pZ4CSHwLx08E79MT9Ch-h6EO6g2qesSC_wEY/edit#



GitHub: [@colombelli](http://github.com/colombelli/)

In [0]:
from numpy import exp


class Neuron:
  def __init__(self, weights, bias):  # class constructor
    self.weights = weights
    self.bias = bias
    self.output = 0

      
  def activation_function(self, input):  # used to strech or contract outputs between some range
    self.output = 1.0 / (1.0 + exp(-(input)))  # the sigmoid activation function normally used
    return self.output
    #return input  # in this case, there's no activation function since we just need the sum of the inputs: always performed in the z method (but summing the inputs with their weights)
      

  def z(self, inputs):  # outputs the sum of every input times its respective weight, which are always 1 for this specific Task; and add a bias in the final result (0 in this particular Task)
    
    zOut = 0
    for i in range(len(inputs)):
      zOut += inputs[i] * self.weights[i]
    return zOut + self.bias  

    
  def y(self, inputs):  # final output - neuron value when activated
    return self.activation_function(self.z(inputs))
  

  def dEdz(self, dEdy):
  # assuming that our activation function is a sigmoid one, dEdz = dEdy * dydz   -> dEdz = dEdy * y * (1 - y) 
    return dEdy * self.output * (1 - self.output)

  
  def dEdw(self, dEdy, inputs):  # dEdw = dzdw * dydz * dEdy = dzdw * dEdz     , where dz(i)dw is simply the i input
    dEdz = self.dEdz(dEdy)
    dEdw = []
    
    for i, w in enumerate(self.weights):
      dEdw.append(inputs[i] * dEdz)
      
    return dEdw
    
  
  # we have to update the weights based on the learning rate and the derivatives
  def updateWeights(self, learningRate, dEdw):
    for i in range(len(self.weights)):
      self.weights[i] -= learningRate * dEdw[i]
          
  
  # the same goes to the bias
  def updateBias(self, learningRate, dEdz):
    self.bias -= learningRate * dEdz  # because dEdb = dzdz * dEdz = 1 * dEdz = dEdz

In [0]:
import numpy as np

class DenseLayer:
    def __init__(self, num_of_inputs, num_of_neurons):
      self.num_of_inputs = num_of_inputs
      self.neurons = []
      

      for i in range(num_of_neurons):  # creating the neurons for the layer
        weights = np.random.uniform(-1,1,[num_of_inputs])  # randomizing "num_inputs" weights with a value between 0 and 1
        #weights = np.ones((num_of_inputs,), dtype=int)  # generating a vector of ones to use in the Task 1 of summing up Xi inputs
        bias = np.random.uniform(0,1)  # randomizing a bias
        #bias = 0  # also, in order to use Task 1 and test Task 2 easier, the bias will be always zero
        self.neurons.append(Neuron(weights, bias))  # appending the new neuron to the layer


    def feedForward(self, inputs):  # activates every neuron in the layer, outputting their respestive results
      self.inputs = inputs  # save the inputs as an attribute in order to use it in the dzdw calculus
      lisOut = []
      for neuron in self.neurons:
        lisOut.append(neuron.y(inputs))
      return lisOut

In [0]:
class NeuralNetwork:
  def __init__(self, num_of_inputs, num_of_neurons_at_each_layer):
    self.num_of_inputs = num_of_inputs
    self.layers = []
    
    # creates a dense layer for every int on the list num_of_neurons_at_each_layer  
    num_of_inputs_next_neuron = num_of_inputs  # the first layer will have the given number of inputs for each of its neuron
    for num in num_of_neurons_at_each_layer:   
      self.layers.append(DenseLayer(num_of_inputs_next_neuron, num))
      num_of_inputs_next_neuron = num  # the next layers will have the number of neurons of the previous layer as its number of inputs
  
  
  def feedForward(self, inputs):
    
    for layer in self.layers:  # keeps picking the outputs of every layer and passing them as inputs to the next layer
      #print("TESTE")
      #print(inputs)
      #input("tecla para continuar")
      inputs = layer.feedForward(inputs)     
    
    # the outputs will be the final result of the inputs variable, after the end of the loop above
    outputs = inputs  # for code clarity, this variable is created before the method returns
      
    return outputs
  
  
  def derivative_of_the_error(self, value, result):
    # the derivative of the RSS with respect of each dimension  is: 2 * (dimensionResult - dimensionValue)
    # returns an array with the result of each value
  
    dEdy = []
    for i in range(len(value)):  # calculates each value of that derivative 
      dEdy.append((result[i] - value[i]) * 2)
  
    return dEdy
  
  
  def backpropagation(self, value, result, learningRate):
    
    dEdy = self.derivative_of_the_error(value, result)  # calculates the first dEdy related to the final output
    
    flagFirstLayer = 1  # indicates that the dEdy is already calculated for that layer (the first starting at the end)
    for layer in reversed(self.layers):  # update the weights for each neuron in each layer (starting at the end)     
      
      new_dEdy = np.zeros(len(layer.neurons[0].weights))   
      
      for i, neuron in enumerate(layer.neurons):  # updates the neurons weights and biases in the layer

        dEdw = neuron.dEdw(dEdy[i], layer.inputs)  # computates dEdw for given neuron
        dEdz = neuron.dEdz(dEdy[i])  # necessary to update the bias and for calculating the new dEdy used in the next iterable layer
        neuron.updateWeights(learningRate, dEdw)
        neuron.updateBias(learningRate, dEdz)
        
        # sums up the right weight multiplying by the dEdz in the right place of the array
        for w in range(len(neuron.weights)):
          new_dEdy[w] += neuron.weights[w] * dEdz
        
      # finally, with the new dEdy array constructed, we update it and  propagates it to next iterable layer
      dEdy = new_dEdy
          
       

In [0]:
def error_function(value, result):  # where the value represents what the network should output and result represents what it actually outputted
  
  RSS = 0
  for i in range(len(value)):
    RSS += (result[i]- value[i])**2
  
  return RSS

In [0]:
def derivative_of_the_error(value, result):
  # the derivative of the RSS with respect of each dimension  is: 2 * (dimensionResult - dimensionValue)
  
  derivatives = []
  for i in range(len(value)):  # calculates each value of that derivative 
    derivatives.append((result[i] - value[i]) * 2)
  
  return derivatives    

In [0]:
import tensorflow as tf
import numpy as np
import time
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()


numInputs = 784
neuronsEachLayer = [30, 10]   
network = NeuralNetwork(numInputs, neuronsEachLayer)
learning_rate = 0.001
MAX_TRAINING_SAMPLES = 60000

In [0]:
# Training network

startTime = time.time()
sample_n = 0
matches = 0

for sample, label in zip(x_train, y_train):
  
  sample_n += 1
  
  if sample_n == MAX_TRAINING_SAMPLES:
    time_taken = time.time() - startTime
    break
    
  if (sample_n % 1000) == 0:
    print("%d trained" %sample_n)
    
  sample = np.concatenate(sample)
  
  # convert the output to one hot encoded
  expected_output = np.zeros(10)
  expected_output[label] = 1
  
  network_output = network.feedForward(sample)  # computes network output
  loss = error_function(expected_output, network_output)  # computes loss
  
  nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
  if nn_guess == label:  # check if the network got a right guess
    matches += 1
  
  """
  
  NOTE: PRINTING THE RESULTS OF EVERY STEP WILL CAUSE THE BROWSER TO EVENTUALLY 
        CRASH, THEN IT'S SAFER TO JUST RUN THE CODE WITH FEW OUTPUTS INFORMING 
        THE PROGRESS OF THE PROCESSING.
  
  # print results
  print("Sample %d | Label = %d | Output = %d | %d matches" % (sample_n, label, nn_guess, matches))
  print("Loss: ", loss)
  print("Time taken:", time.time() - startTime)
  print("\n")
  
  """

  # update weights and biases
  network.backpropagation(expected_output, network_output, learning_rate)

print("Time taken: ", time_taken)

  if sys.path[0] == '':


1000 trained
2000 trained
3000 trained
4000 trained
5000 trained
6000 trained
7000 trained
8000 trained
9000 trained
10000 trained
11000 trained
12000 trained
13000 trained
14000 trained
15000 trained
16000 trained
17000 trained
18000 trained
19000 trained
20000 trained
21000 trained
22000 trained
23000 trained
24000 trained
25000 trained
26000 trained
27000 trained
28000 trained
29000 trained
30000 trained
31000 trained
32000 trained
33000 trained
34000 trained
35000 trained
36000 trained
37000 trained
38000 trained
39000 trained
40000 trained
41000 trained
42000 trained
43000 trained
44000 trained
45000 trained
46000 trained
47000 trained
48000 trained
49000 trained
50000 trained
51000 trained
52000 trained
53000 trained
54000 trained
55000 trained
56000 trained
57000 trained
58000 trained
59000 trained
Time taken:  8175.438211917877


In [0]:
# Testing network accuracy

MAX_TEST_SAMPLES = 1000
sample_num = 0
matches_t = 0

for sample, label in zip(x_test, y_test):
  
  sample_num += 1
  
  if sample_num == MAX_TEST_SAMPLES:
    break
    
  sample = np.concatenate(sample)
  
  # convert the output to one hot encoded
  expected_output = np.zeros(10)
  expected_output[label] = 1
  
  network_output = network.feedForward(sample)  # computes network output
  loss = error_function(expected_output, network_output)  # computes loss
  
  nn_guess = np.argmax(network_output)  # gets the index of the highest value in the array
  if nn_guess == label:  # check if the network got a right guess
    matches_t += 1
  
  if (sample_num % 100) == 0:  # prints parcial results from 100 to 100 tested samples
    # print results
    print("Sample %d | Label = %d | Output = %d | %d matches" % (sample_num, label, nn_guess, matches_t))
    print("Loss: ", loss)
    print("Hit rate: %.2f%%" % (matches_t / sample_num * 100))
    print("\n")
    
print("Final hit rate: %.2f%%" % (matches_t / sample_num * 100))

  if sys.path[0] == '':


Sample 100 | Label = 9 | Output = 9 | 44 matches
Loss:  0.39640316123746433
Hit rate: 44.00%


Sample 200 | Label = 2 | Output = 2 | 91 matches
Loss:  0.6563651219245351
Hit rate: 45.50%


Sample 300 | Label = 8 | Output = 6 | 137 matches
Loss:  1.2587315719002707
Hit rate: 45.67%


Sample 400 | Label = 4 | Output = 2 | 173 matches
Loss:  0.7920719544259793
Hit rate: 43.25%


Sample 500 | Label = 6 | Output = 8 | 211 matches
Loss:  0.9111671371042018
Hit rate: 42.20%


Sample 600 | Label = 9 | Output = 9 | 247 matches
Loss:  0.39640316123746433
Hit rate: 41.17%


Sample 700 | Label = 3 | Output = 6 | 287 matches
Loss:  1.4097698613520893
Hit rate: 41.00%


Sample 800 | Label = 2 | Output = 2 | 327 matches
Loss:  0.4490350750493641
Hit rate: 40.88%


Sample 900 | Label = 8 | Output = 7 | 369 matches
Loss:  1.027200024911953
Hit rate: 41.00%


Final hit rate: 40.80%
