In [22]:
# this code belongs to this repository:
# https://github.com/ludobouan/pure-numpy-feedfowardNN


%matplotlib inline

import numpy as np
from matplotlib import cm
import matplotlib.pyplot as plt
from progress_bar import log_progress

np.seterr(over='ignore')

{'divide': 'warn', 'over': 'ignore', 'under': 'ignore', 'invalid': 'warn'}

In [76]:
class NeuralNetwork():
    def __init__(self):
        np.random.seed(1)  # Seed the random number generator
        self.weights = {}  # Create dict to hold weights
        self.num_layers = 1  # Set initial number of layer to one (input layer)
        self.adjustments = {}  # Create dict to hold the derivates of W of each layer

    def add_layer(self, shape):
        # Create weights with shape specified + biases
        # the last array has the bias
        ################################################################################################## this part are the bias
        self.weights[self.num_layers] = np.vstack((2 * np.random.random(shape) - 1, 2 * np.random.random((1, shape[1])) - 1))
        # Initialize the adjustements for these weights to zero
        self.adjustments[self.num_layers] = np.zeros(shape)
        self.num_layers += 1

    def __sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def __sigmoid_derivative(self, x):
        return x * (1 - x)

    def predict(self, data):
        # Pass data through pretrained network
        for layer in range(1, self.num_layers+1):
            data = np.dot(data, self.weights[layer-1][:, :-1]) + self.weights[layer-1][:, -1] # + self.biases[layer]
            data = self.__sigmoid(data)
        return data

    def __forward_propagate(self, data):
        # Progapagate through network and hold values for use in back-propagation
        activation_values = {} # acumulates the output of each layer, incluing the output of x that would be the output of layer 1
        activation_values[1] = data
        for layer in range(2, self.num_layers+1):
            # z = (X . W) + B
            data = np.dot(data.T, self.weights[layer-1][:-1, :]) + self.weights[layer-1][-1, :].T # + self.biases[layer]
            # A = g(Z)
            data = self.__sigmoid(data).T
            activation_values[layer] = data
        return activation_values

    def simple_error(self, outputs, targets):
        return targets - outputs

    def sum_squared_error(self, outputs, targets):
        return 0.5 * np.mean(np.sum(np.power(outputs - targets, 2), axis=1))

    def __back_propagate(self, output, target):
        """
        print("Debuging back propagation...")
        print("last layer------------------")
        print("target",target)
        print("output", output)
        print("output[self.num_layers]", output[self.num_layers], self.num_layers)
        #"""
        deltas = {}
        
        # Delta of output Layer-------------------------------------------------------------------------
        # dz3 = a3 - y
        deltas[self.num_layers] = output[self.num_layers] - target # the output less the target | dz
        #print("deltas", deltas)
        #-----------------------------------------------------------------------------------------------
        
        # Delta of hidden Layers------------------------------------------------------------------------
        for layer in reversed(range(2, self.num_layers)):  # All layers except input/output
            #print("hindden layers------------------")
            a_val = output[layer] #output of layer 2 (in this case)
            #print("a_val", a_val, 33)
            #print("self.weights", self.weights)
            weights = self.weights[layer][:-1, :] # weights of layer 2 (in this case)
            #print("weights", weights)
            prev_deltas = deltas[layer+1]#delta ouput of layer 3 (in this case)
            #print("prev_deltas", prev_deltas)
            # dz2 =  (W3.T . dz3) * g'(a2)
            deltas[layer] = np.multiply(np.dot(weights, prev_deltas), self.__sigmoid_derivative(a_val)) # delta of w of layer 2
            #print("deltas", deltas) # delta contains derivates of all outputs
        #----------------------------------------------------------------------------------------------

        # Caclculate total adjustements based on deltas-------------------------------------------------
        """
        print("Caclculate total adjustements based on deltas------------------")
        print("self.adjustments", self.adjustments)
        """
        for layer in range(1, self.num_layers):
            """
            print("layer", layer,"##")
            print("output[layer].T", output[layer].T)
            print("deltas[layer+1]", deltas[layer+1])
            """
            #dw2 = dz3 . a2.T
            #dw1 = dz1 . x.T
            #print("np.dot(deltas[layer+1], output[layer].T).T", np.dot(deltas[layer+1], output[layer].T).T, 666666)
            self.adjustments[layer] += np.dot(deltas[layer+1], output[layer].T).T
            #print("self.adjustments[layer]", self.adjustments[layer])

    def __gradient_descente(self, batch_size, learning_rate):
        # Calculate partial derivative and take a step in that direction
        #"""
        print("Debuging gradient descent...........................")
        print("batch_size:",batch_size)
        print("learning_rate:",learning_rate)
        print("self.adjustments", self.adjustments)
        #"""
        for layer in range(1, self.num_layers):
            print("layer", layer, "###########")
            print("self.adjustments[layer]", self.adjustments[layer])
            partial_d = (1/batch_size) * self.adjustments[layer]
            print("partial_d", partial_d)
            print("partial_d[-1, :]",partial_d[-1, :])
            self.weights[layer][:-1, :] += learning_rate * -partial_d
            self.weights[layer][-1, :] += learning_rate*1e-3 * -partial_d[-1, :]


    def train(self, inputs, targets, num_epochs, learning_rate=1, stop_accuracy=1e-5):
        error = []
        for iteration in range(num_epochs):
            #print("------------------------")
            for i in range(len(inputs)):
                x = inputs[i]
                y = targets[i]
                print("###########")
                print(i)
                print("current data:",x)
                print("###########")
                # Pass the training set through our neural network
                output = self.__forward_propagate(x)

                # Calculate the error
                loss = self.sum_squared_error(output[self.num_layers], y)
                error.append(loss)

                # Calculate Adjustements
                self.__back_propagate(output, y)

            self.__gradient_descente(i, learning_rate)

            # Check if accuarcy criterion is satisfied
            """
            print("error[-(i+1):]", error[-(i+1):])
            print("np.mean(error[-(i+1):])", np.mean(error[-(i+1):]))
            print("if", np.mean(error[-(i+1):]) < stop_accuracy and iteration > 0)
            print(error)
            """
            if np.mean(error[-(i+1):]) < stop_accuracy and iteration > 0:
                break

        return(np.asarray(error), iteration+1)

In [77]:
# Create instance of a neural network
nn = NeuralNetwork()

# Add Layers (Input layer is created by default)

nn.add_layer((2, 9))
nn.add_layer((9, 1))

# XOR function
training_data = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]]).reshape(4, 2, 1)
training_labels = np.asarray([[0], [1], [1], [0]])
error, iteration = nn.train(training_data, training_labels, 5000)
print('Error = ', np.mean(error[-4:]))
print('Epoches needed to train = ', iteration)
# nn.predict(testing_data)

###########
0
current data: [[0]
 [0]]
###########
###########
1
current data: [[0]
 [1]]
###########
###########
2
current data: [[1]
 [0]]
###########
###########
3
current data: [[1]
 [1]]
###########
Debuging gradient descent...........................
batch_size: 3
learning_rate: 1
self.adjustments {1: array([[-0.01080764, -0.00497882,  0.01110212, -0.01473121, -0.00563765,
        -0.0279943 ,  0.00025122,  0.0052058 , -0.00489636],
       [-0.01512931, -0.02370023,  0.03517015, -0.0227764 , -0.00406973,
        -0.01179639,  0.00294506,  0.01217582, -0.00504437]]), 2: array([[0.07509496],
       [0.07990998],
       [0.15280131],
       [0.14210514],
       [0.0958444 ],
       [0.12543398],
       [0.16448763],
       [0.14989301],
       [0.06962061]])}
layer 1 ###########
self.adjustments[layer] [[-0.01080764 -0.00497882  0.01110212 -0.01473121 -0.00563765 -0.0279943
   0.00025122  0.0052058  -0.00489636]
 [-0.01512931 -0.02370023  0.03517015 -0.0227764  -0.00406973 -0.011796

 -0.07535015  0.03046035  0.04114993]
layer 2 ###########
self.adjustments[layer] [[ 0.30456695]
 [-0.1637792 ]
 [-0.07221302]
 [-0.38427011]
 [-0.32443762]
 [-0.03560597]
 [ 0.07501002]
 [-0.24882422]
 [-0.05136005]]
partial_d [[ 0.10152232]
 [-0.05459307]
 [-0.02407101]
 [-0.12809004]
 [-0.10814587]
 [-0.01186866]
 [ 0.02500334]
 [-0.08294141]
 [-0.01712002]]
partial_d[-1, :] [-0.01712002]
###########
0
current data: [[0]
 [0]]
###########
###########
1
current data: [[0]
 [1]]
###########
###########
2
current data: [[1]
 [0]]
###########
###########
3
current data: [[1]
 [1]]
###########
Debuging gradient descent...........................
batch_size: 3
learning_rate: 1
self.adjustments {1: array([[ 0.49539263,  0.06481444, -0.02709765, -0.01601748,  0.14360875,
        -0.21239008,  0.07477484,  0.09097758,  0.05270009],
       [ 0.55613289, -0.06854875,  0.15199949, -0.02736813,  0.02511873,
        -0.07060093, -0.27764453,  0.11179531,  0.1235028 ]]), 2: array([[ 0.4189764 ],
 

  -0.46010264 -0.07964427  0.10902458]]
partial_d[-1, :] [ 0.29123534 -0.05188179  0.07958954 -0.02554797 -0.23505217  0.12505321
 -0.46010264 -0.07964427  0.10902458]
layer 2 ###########
self.adjustments[layer] [[ 2.2375964 ]
 [-0.42007538]
 [ 0.09744817]
 [-0.62002723]
 [-1.32079121]
 [-0.24176975]
 [ 1.33821354]
 [-0.42036656]
 [ 0.3413268 ]]
partial_d [[ 0.74586547]
 [-0.14002513]
 [ 0.03248272]
 [-0.20667574]
 [-0.44026374]
 [-0.08058992]
 [ 0.44607118]
 [-0.14012219]
 [ 0.1137756 ]]
partial_d[-1, :] [0.1137756]
###########
0
current data: [[0]
 [0]]
###########
###########
1
current data: [[0]
 [1]]
###########
###########
2
current data: [[1]
 [0]]
###########
###########
3
current data: [[1]
 [1]]
###########
Debuging gradient descent...........................
batch_size: 3
learning_rate: 1
self.adjustments {1: array([[ 0.87590033,  0.14913161,  0.12429024, -0.05252974,  1.1801462 ,
        -0.16519501,  0.83919515, -0.1454476 ,  0.31474927],
       [ 0.87374926, -0.15176739, 

In [75]:
r = [[1,2,3,4,5],[6,7,8,9,10]]
r[:-1, :]

TypeError: list indices must be integers or slices, not tuple

In [10]:
r = np.array([[1,  0.44064899, -0.99977125, -0.39533485, -0.70648822,
        -0.81532281, -0.62747958, -0.30887855, -0.20646505],
       [ 2, -0.16161097,  0.370439  , -0.5910955 ,  0.75623487,
        -0.94522481,  0.34093502, -0.1653904 ,  0.11737966],
       [3, -0.60379702,  0.60148914,  0.93652315, -0.37315164,
         0.38464523,  0.7527783 ,  0.78921333, -0.82991158]])
r[:-1, :]

array([[ 1.        ,  0.44064899, -0.99977125, -0.39533485, -0.70648822,
        -0.81532281, -0.62747958, -0.30887855, -0.20646505],
       [ 2.        , -0.16161097,  0.370439  , -0.5910955 ,  0.75623487,
        -0.94522481,  0.34093502, -0.1653904 ,  0.11737966]])

In [55]:
r = [1,2,3,4,5,6,7,8,9]

r[-4:]

[6, 7, 8, 9]

In [None]:
r = np.array([[1,  0.44064899, -0.99977125, -0.39533485, -0.70648822,
        -0.81532281, -0.62747958, -0.30887855, -0.20646505]]).T
r[:-1, :]

In [43]:

1e-5

1e-05

In [65]:
print(7.299670911230132e-06<1)

True


In [78]:
-5.04310222e-03 == -0.0050431

False