In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt
from time import time

In [3]:
class Neuron:
    """
    Representation of a neuron
    """
    
    def __init__(self, weights, bias, activation_function):
        self.weights = weights
        self.bias = bias
        self.activation_function = activation_function
        
    def calc_output(self, x, deriv = False):
        if deriv:
            return self.activation_function(np.dot(self.weights, x) + self.bias, deriv = True)
        else:
            return self.activation_function(np.dot(self.weights, x) + self.bias)
    
    def get_params(self):
        return {'weights': self.weights, 'bias': self.bias, 'activation_function': self.activation_function}

In [4]:
class Layer:
    """
    Representation of a layer
    """
    
    def __init__(self, neurons = []):
        """
        neurons argument is a list of neurons
        """
        self.neurons = neurons
        
    def calc_output(self, x, deriv = False):
        if deriv:
            return [neuron.calc_output(x, deriv = True) for neuron in self.neurons]
        else:
            return [neuron.calc_output(x) for neuron in self.neurons]
    
    def get_params(self):
        return [neuron.get_params() for neuron in self.neurons]

In [5]:
class RNN:
    """
    Representation of a recurrent neural network
    """
    
    def __init__(self,
                 final_activation_function,
                 layers,
                 initial_hidden_state = None
                ):
        """
        layers argument has a format: [layer_1, layer_2, ...] where layer_i = [activation_function, input_shape, output_shape]
        layer_i must have input_shape equal to output_shape of layer_i-1
        """
        
        self.layers = []
        # self.neurons[i][j] is the j-th neuron in the i-th layer
        self.neurons = []
        # self.weights[i][j] are weights for the j-th neuron in the i-th layer
        self.weights = []
        # self.biases[i][j] is the biase for the j-th neuron in the i-th layer
        self.biases = []
        # self.activation_functions[i][j] is the activation function for the j-th neuron in the i-th layer
        self.activation_functions = []
        # final_activation_function is applied to model output to make prediction
        self.final_activation_function = final_activation_function
        self.initial_hidden_state = initial_hidden_state
            
        # initial hidden state
        if self.initial_hidden_state == None:
            self.initial_hidden_state = [0.5 for i in range(layers[-1][2])]
            
        for layer_number, (activation_function, input_shape, output_shape) in enumerate(layers):
            neurons = []
            for i in range(output_shape):
                if layer_number == 0:
                    weights = list(np.random.rand(len(self.initial_hidden_state) + input_shape))
                else:
                    weights = list(np.random.rand(input_shape))
                bias = np.random.rand()
                # bias = random.randrange(-1, 1)
                neurons.append(Neuron(weights, bias, activation_function))

            self.layers.append(Layer(neurons))

            self.weights.append([])
            self.biases.append([])
            self.neurons.append(neurons)
            for new_neuron in neurons:
                neuron_params = new_neuron.get_params()
                self.weights[layer_number].append(neuron_params['weights'])
                self.biases[layer_number].append(neuron_params['bias'])
                self.activation_functions.append(neuron_params['activation_function'])


    def calc_neurons_outputs(self, x_train):
        """
        Calculating a neurons outputs where input for the model equals to x_train.
        x_train is a list of inputs for the model.
        This function returns variables neurons_outputs, neurons_deriv_outputs.
        neurons_outputs[i][t][j][k] is the output of k-th neuron in the j-th layer for the t-th input in the i-th sample in the x_train
        neurons_deriv_outputs[i][t][j][k] is the output of a derivative of the k-th neuron in the j-th layer for the t-th input in the i-th sample in the x_train
        We need those values in order to calculate derivative of a loss function.
        """
            
        neurons_outputs = []
        neurons_deriv_outputs = []
        for i in range(len(x_train)):
            neurons_outputs.append([])
            neurons_deriv_outputs.append([])
            for t in range(len(x_train[i])):
                neurons_outputs[i].append([])
                neurons_deriv_outputs[i].append([])

                if t == 0:
                    neurons_outputs[i][t].append([])
                    neurons_deriv_outputs[i][t].append([])
                    
                    neurons_outputs[i][t][0] = self.layers[0].calc_output(self.initial_hidden_state + x_train[i][t])
                    neurons_deriv_outputs[i][t][0] = self.layers[0].calc_output(self.initial_hidden_state + x_train[i][t], deriv = True)
                else:
                    neurons_outputs[i][t].append([])
                    neurons_deriv_outputs[i][t].append([])
                    
                    neurons_outputs[i][t][0] = self.layers[0].calc_output(neurons_outputs[i][t - 1][-1] + x_train[i][t])
                    neurons_deriv_outputs[i][t][0] = self.layers[0].calc_output(neurons_outputs[i][t - 1][-1] + x_train[i][t], deriv = True)

                for layer_number, layer in enumerate(self.layers[1:]):
                    neurons_outputs[i][t].append([])
                    neurons_outputs[i][t][layer_number + 1] = layer.calc_output(neurons_outputs[i][t][layer_number])

                for layer_number, layer in enumerate(self.layers[1:]):
                    neurons_deriv_outputs[i][t].append([])
                    neurons_deriv_outputs[i][t][layer_number + 1] = layer.calc_output(neurons_outputs[i][t][layer_number], deriv = True)
            
        return neurons_outputs, neurons_deriv_outputs
    
    
    def predict(self, x):
        neurons_outputs, _ = self.calc_neurons_outputs([x])
        return self.final_activation_function(neurons_outputs[0][-1][-1])


    def dn_dw(self,
              x_train,
              neurons_outputs,
              neurons_deriv_outputs,
              training_sample_number,
              input_number,
              neuron_layer_number,
              neuron_number,
              weight_layer_number,
              weight_neuron_number,
              weight_number
             ):
        """
        Calculating derivative of the neurons_outputs[training_sample_number][0][layer_number][neuron_number]
        w.r.t weight = self.layers[weight_layer_number].neurons[weight_neuron_number].weights[weight_number]
        in point equal to actual value of the weight = self.layers[weight_layer_number].neurons[weight_neuron_number].weights[weight_number]
        neurons_outputs is calculated by the function calc_neurons_outputs.
        """
        
        if input_number > 0:
            deriv = 0
            for previous_input_neuron_number in range(len(self.layers[-1].neurons)):
                deriv += (self.dn_dn(x_train,
                                     neurons_outputs,
                                     neurons_deriv_outputs,
                                     training_sample_number,
                                     input_number,
                                     neuron_layer_number,
                                     neuron_number,
                                     previous_input_neuron_number
                                    )
                        * self.dn_dw(x_train,
                                      neurons_outputs,
                                      neurons_deriv_outputs,
                                      training_sample_number,
                                      input_number - 1,
                                      neuron_layer_number,
                                      neuron_number,
                                      weight_layer_number,
                                      weight_neuron_number,
                                      weight_number
                                     )
                         )
                
            return deriv
        else:
            if neuron_layer_number > weight_layer_number:
                deriv = neurons_deriv_outputs[training_sample_number][0][neuron_layer_number][neuron_number]
                Sum = 0
                for k in range(len(self.layers[neuron_layer_number - 1].neurons)):
                    Sum += (self.dn_dw(x_train, 
                                             neurons_outputs, 
                                             neurons_deriv_outputs,
                                             training_sample_number, 
                                             input_number,
                                             neuron_layer_number - 1, 
                                             k, 
                                             weight_layer_number,
                                             weight_neuron_number,
                                             weight_number
                                            ) 
                            * self.weights[neuron_layer_number][neuron_number][k]
                           )
                    

                deriv *= Sum
                return deriv

            elif neuron_layer_number == weight_layer_number > 0 and neuron_number == weight_neuron_number:
                deriv = neurons_deriv_outputs[training_sample_number][0][neuron_layer_number][neuron_number]
                deriv *= neurons_outputs[training_sample_number][0][neuron_layer_number - 1][weight_number]
                return deriv

            elif neuron_layer_number == weight_layer_number == 0 and neuron_number == weight_neuron_number:
                deriv = neurons_deriv_outputs[training_sample_number][0][neuron_layer_number][neuron_number]
                deriv *= (self.initial_hidden_state + x_train[training_sample_number][0])[weight_number]
                return deriv

            elif ((neuron_layer_number == weight_layer_number and neuron_number != weight_neuron_number) 
                  or (neuron_layer_number < weight_layer_number)
                 ):
                return 0
        
        
    def dn_db(self,
              x_train, 
              neurons_outputs,
              neurons_deriv_outputs,
              training_sample_number,
              input_number,
              neuron_layer_number,                                          
              neuron_number,
              bias_layer_number,
              bias_neuron_number
             ):
        """
        Calculating derivative of the neurons_outputs[training_sample_number][0][layer_number][neuron_number]
        w.r.t bias = self.layers[bias_layer_number].neurons[bias_neuron_number].bias
        in point equal to actual value of the bias = self.layers[bias_layer_number].neurons[bias_neuron_number].bias
        neurons_outputs is calculated by the function calc_neurons_outputs.
        """
        
        if input_number > 0:
            deriv = 0
            for previous_input_neuron_number in range(len(self.layers[-1].neurons)):
                deriv += (self.dn_dn(x_train,
                                         neurons_outputs,
                                         neurons_deriv_outputs,
                                         training_sample_number,
                                         input_number,
                                         neuron_layer_number,
                                         neuron_number,
                                         previous_input_neuron_number
                                        )
                         * self.dn_db(x_train,
                                          neurons_outputs,
                                          neurons_deriv_outputs,
                                          training_sample_number,
                                          input_number - 1,
                                          neuron_layer_number,
                                          neuron_number,
                                          bias_layer_number,
                                          bias_neuron_number
                                         )
                         )
                
            return deriv
        else:
            if neuron_layer_number > bias_layer_number:
                deriv = neurons_deriv_outputs[training_sample_number][0][neuron_layer_number][neuron_number]
                Sum = 0
                for k in range(len(self.layers[neuron_layer_number - 1].neurons)):
                    Sum += (self.dn_db(x_train, 
                                         neurons_outputs, 
                                         neurons_deriv_outputs,
                                         training_sample_number, 
                                         input_number,
                                         neuron_layer_number - 1, 
                                         k, 
                                         bias_layer_number,
                                         bias_neuron_number
                                        ) 
                             * self.weights[neuron_layer_number][neuron_number][k]
                           )

                deriv *= Sum
                return deriv

            elif neuron_layer_number == bias_layer_number and neuron_number == bias_neuron_number:
                deriv = neurons_deriv_outputs[training_sample_number][0][neuron_layer_number][neuron_number]
                return deriv

            elif ((neuron_layer_number == bias_layer_number and neuron_number != bias_neuron_number) or 
                  (neuron_layer_number < bias_layer_number)
                 ):
                return 0
        
    
    def dn_dn(self,
              x_train, 
              neurons_outputs,
              neurons_deriv_outputs,
              training_sample_number, 
              input_number,                
              layer_number,                                          
              neuron_number,
              previous_input_neuron_number
             ):
        """
        Calculating derivative of the neurons_outputs[training_sample_number][input_number][layer_number][neuron_number]
        w.r.t neurons_outputs[training_sample_number][input_number][layer_number - 1][previous_input_neuron_number]
        in the point equal to actual value of the neurons_outputs[training_sample_number][input_number][layer_number - 1][previous_input_neuron_number]
        neurons_outputs is calculated by the function calc_neurons_outputs.
        """
        
        if layer_number > 0 and input_number > 0:
            deriv = neurons_deriv_outputs[training_sample_number][input_number][layer_number][neuron_number]
            Sum = 0
            for k in range(len(self.layers[layer_number - 1].neurons)):
                Sum += (self.dn_dn(x_train, 
                                         neurons_outputs, 
                                         neurons_deriv_outputs, 
                                         training_sample_number, 
                                         input_number, 
                                         layer_number - 1, 
                                         k, 
                                         previous_input_neuron_number) 
                        * self.weights[layer_number][neuron_number][k]
                       )
            
            deriv *= Sum
            return deriv
        
        elif input_number < 1:
            raise Exception('Wrong value for input_number, input_number must be > 0')
        elif layer_number < 0:
            raise Exception('Wrong value for layer_number, layer_number must be > 0')
        elif layer_number == 0 and input_number > 0:
            deriv = neurons_deriv_outputs[training_sample_number][input_number][layer_number][neuron_number]
            deriv *= self.weights[layer_number][neuron_number][previous_input_neuron_number]
            return deriv
        
        
    def model_prediction_deriv_dw(self,
                                   x_train,
                                   neurons_outputs,
                                   neurons_deriv_outputs,
                                   training_sample_number,
                                   weight_layer_number,
                                   weight_neuron_number,
                                   weight_number
                                  ):
        """
        Calculating a derivative of the model's prediction (= self.final_activation_function(neurons_outputs[training_sample_number][-1][-1]) 
        w.r.t weight = self.weights[weight_layer_number][weight_neuron_number][weight_number]
        in point equal to actual value of the weight
        It returns a list deriv where deriv[i] is a derivative of the i-th element od the model's prediction (which is a vector)
        neurons_outputs is calculated by the function calc_neurons_outputs.
        """
        
        deriv = []
        for neuron_number in range(len(neurons_outputs[training_sample_number][-1][-1])):
            deriv_neuron = self.final_activation_function(neurons_outputs[training_sample_number][-1][-1][neuron_number], deriv = True)
            deriv_neuron *= self.dn_dw(x_train = x_train,
                                        neurons_outputs = neurons_outputs,
                                        neurons_deriv_outputs = neurons_deriv_outputs,
                                        training_sample_number = training_sample_number,
                                        input_number = len(x_train[training_sample_number]) - 1,
                                        neuron_layer_number = len(self.layers) - 1,
                                        neuron_number = neuron_number,
                                        weight_layer_number = weight_layer_number,
                                        weight_neuron_number = weight_neuron_number,
                                        weight_number = weight_number
                                       )
            deriv.append(deriv_neuron)

        return deriv
        
        
    def model_prediction_deriv_db(self,
                                   x_train,
                                   neurons_outputs,
                                   neurons_deriv_outputs,
                                   training_sample_number,
                                   bias_layer_number,
                                   bias_neuron_number
                                  ):
        """
        Calculating a derivative of the model's prediction (= self.final_activation_function(neurons_outputs[training_sample_number][-1][-1]) 
        w.r.t bias = self.biases[bias_layer_number][bias_neuron_number]
        in point equal to actual value of the bias
        It returns a list deriv where deriv[i] is a derivative of the i-th element od the model's prediction (which is a vector)
        neurons_outputs is calculated by the function calc_neurons_outputs.
        """
        
        deriv = []
        for neuron_number in range(len(neurons_outputs[training_sample_number][-1][-1])):
            deriv_neuron = self.final_activation_function(neurons_outputs[training_sample_number][-1][-1][neuron_number], deriv = True)
            deriv_neuron *= self.dn_db(x_train = x_train,
                                        neurons_outputs = neurons_outputs,
                                        neurons_deriv_outputs = neurons_deriv_outputs,
                                        training_sample_number = training_sample_number,
                                        input_number = len(x_train[training_sample_number]) - 1,
                                        neuron_layer_number = len(self.layers) - 1,
                                        neuron_number = neuron_number,
                                        bias_layer_number = bias_layer_number,
                                        bias_neuron_number = bias_neuron_number
                                       )

            deriv.append(deriv_neuron)

        return deriv


    def train(self, x_train, y_train, epochs, batch_size, learning_rate, loss_function):
        """
        Training a model.
        x_train must have a shape[sample_1, sample_2, ...], where sample_i = [input_1, input_2, ...], where input_i = [feature_1, feature_2,..] 
        The Loss function is a categorical cross entropy.
        """
        
        neurons_outputs, neurons_deriv_outputs = self.calc_neurons_outputs(x_train[ : batch_size])
        
        # updating weights
        for epoch in range(epochs):
            for weight_layer_number, layer in enumerate(self.layers):
                for weight_neuron_number, neuron in enumerate(layer.neurons):
                    for weight_number, weight in enumerate(neuron.weights):
                        # calculating derivative of a loss function w.r.t given weight
                        # in point equal to that weight value
                        dl_dw = 0
                        training_sample_number = 0
                        i = 0
                        while training_sample_number + i < len(x_train):
                            i = 0
                            while i < batch_size and training_sample_number + i < len(neurons_outputs):
                                prediction = self.final_activation_function(neurons_outputs[training_sample_number + i][-1][-1])
                                prediction_deriv = self.model_prediction_deriv_dw(x_train = x_train,
                                                                                   neurons_outputs = neurons_outputs,
                                                                                   neurons_deriv_outputs = neurons_deriv_outputs,
                                                                                   training_sample_number = training_sample_number + i,
                                                                                   weight_layer_number = weight_layer_number,
                                                                                   weight_neuron_number = weight_neuron_number,
                                                                                   weight_number = weight_number
                                                                                  )
                                dl_dw += loss_function(y_train[training_sample_number], prediction, prediction_deriv, deriv = True)
                                i += 1

                            print('dl_dw: ', "%.100f" % dl_dw)
                            # self.weights[weight_layer_number][weight_neuron_number][weight_number] -= dl_dw * learning_rate
                            self.weights[weight_layer_number][weight_neuron_number][weight_number] -= dl_dw * learning_rate
                            dl_dw = 0
                            training_sample_number = training_sample_number + i + 1

                            # after updating a weight we need to calculate new neurons outputs and their derivatives
                            # for that new weight
                            if training_sample_number + i != len(x_train) - 1:
                                neurons_outputs, neurons_deriv_outputs = self.calc_neurons_outputs(x_train[training_sample_number : training_sample_number + batch_size])
             
        # updating biases
        # we need to calculate new derivatives for the new weights
        neurons_outputs, neurons_deriv_outputs = self.calc_neurons_outputs(x_train[ : batch_size])
        for bias_layer_number, layer in enumerate(self.layers):
            for bias_neuron_number, neuron in enumerate(layer.neurons):
                # calculating derivative of a loss function w.r.t given weight
                # in point equal to that weight value
                dl_db = 0
                training_sample_number = 0
                i = 0
                while training_sample_number < len(x_train):
                    i = 0
                    while i < batch_size and training_sample_number + i < len(neurons_outputs):
                        prediction = self.final_activation_function(neurons_outputs[training_sample_number + i][-1][-1])
                        prediction_deriv = self.model_prediction_deriv_db(x_train = x_train,
                                                                           neurons_outputs = neurons_outputs,
                                                                           neurons_deriv_outputs = neurons_deriv_outputs,
                                                                           training_sample_number = training_sample_number + i,
                                                                           bias_layer_number = bias_layer_number,
                                                                           bias_neuron_number = bias_neuron_number
                                                                          )
                        dl_db += loss_function(y_train[training_sample_number], prediction, prediction_deriv, deriv = True)
                        i += 1
                        
                    self.biases[bias_layer_number][bias_neuron_number] -= dl_db * learning_rate
                    dl_db = 0
                    training_sample_number = training_sample_number + i + 1

                    # after updating a weight we need to calculate new neurons outputs and their derivatives
                    # for that new weight
                    if training_sample_number != len(x_train) - 1:
                        neurons_outputs, neurons_deriv_outputs = self.calc_neurons_outputs(x_train[training_sample_number : training_sample_number + batch_size])

In [6]:
def relu(x, deriv = False):
    if deriv:
        if x > 0:
            return 1
        else:
            return 0
    else:
        return max(0, x)

In [7]:
def sigmoid(x, deriv = False):
    x = np.array(x)
    if deriv:
        return (np.exp(-x)) / (1 + np.exp(-x)) ** (2)
    else:
        return 1.0 / (1.0 + np.exp(-x))

In [8]:
def softmax(x):
    e = np.exp(x)
    return e / e.sum()

In [9]:
def cross_entropy(real_value, prediction, prediction_deriv = None, deriv = False):
    """
    prediction_deriv is a derivative of a model's prediction w.r.t variable w.r.t which we want to calculate
    a derivative of a loss function. This derivative is calculated in point of actual value of the variable.
    deriv argument indicates if we want to calculate a derivative of a loss function.
    """
    
    real_value = [real_value for real_value in real_value]
    prediction =  [prediction for prediction in prediction]
    
    if deriv:
        return -np.sum([real_value * prediction ** (-1) * prediction_deriv for real_value, prediction, prediction_deriv in zip(real_value, prediction, prediction_deriv)])
    else:
        return -np.sum([real_value * np.log(prediction) for real_value, prediction in zip(real_value, prediction)])

In [10]:
def abs_diff(real_value, prediction, prediction_deriv = None, deriv = False):
    if deriv:
        return np.sum([prediction_deriv[i] if prediction[i] > real_value[i] else -prediction_deriv[i] for i in range(len(real_value))])
    else:
        return np.sum([np.abs(real_value[i] - prediction[i]) for i in range(len(real_value))])

In [11]:
model = RNN(final_activation_function = sigmoid, layers = [[sigmoid, 1, 5], [sigmoid, 5, 10], [sigmoid, 10, 5], [sigmoid, 5, 1]])
# model = RNN(final_activation_function = sigmoid, layers = [[relu, 1, 1]])

In [12]:
x_train = [[[1],[1]], [[0],[0]]]
y_train = [[1], [0]]

In [13]:
print(model.predict(x_train[0]))
print(model.predict(x_train[1]))

[0.72189819]
[0.72188601]


In [51]:
model.train(x_train = x_train, 
            y_train = y_train, 
            epochs = 10, 
            batch_size = 1, 
            learning_rate = 0.05, 
            loss_function = cross_entropy
           )

dl_dw:  -0.0000000002138417086716036933953626663425413856334245110701886005699634552001953125000000000000000000
dl_dw:  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dl_dw:  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dl_dw:  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dl_dw:  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dl_dw:  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dl_dw:  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dl_dw:  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dl_dw:  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000


TypeError: unsupported operand type(s) for *: 'decimal.Decimal' and 'float'

In [122]:
model.train2(x_train = x_train, 
            y_train = y_train, 
            epochs = 10, 
            batch_size = 1, 
            learning_rate = 0.05, 
            loss_function = cross_entropy
           )

loss1:  0.32306702194180537
loss2 - loss1:  -1.1915171609921948e-06
loss1:  0.3230658304246444
loss2 - loss1:  -1.1544734361823572e-06
loss1:  0.3230646759512082
loss2 - loss1:  -1.0912018593156603e-06
loss1:  0.3230635847493489
loss2 - loss1:  -1.0561773045592027e-06
loss1:  0.3230625285720443
loss2 - loss1:  -7.543280791111329e-07
loss1:  0.3230617742439652
loss2 - loss1:  -7.2975304099554e-07
loss1:  0.3230610444909242
loss2 - loss1:  -1.3156845792283711e-06
loss1:  0.323059728806345
loss2 - loss1:  -1.269687318117274e-06
loss1:  0.32305845911902686
loss2 - loss1:  -4.3307360725330923e-07
loss1:  0.3230580260454196
loss2 - loss1:  -4.1184386617709734e-07
loss1:  0.32305761420155343
loss2 - loss1:  -6.17826846516234e-07
loss1:  0.3230569963747069
loss2 - loss1:  -5.790818218276428e-07
loss1:  0.3230564172928851
loss2 - loss1:  -5.396101691590438e-07
loss1:  0.32305587768271593
loss2 - loss1:  -5.011609006122519e-07
loss1:  0.3230553765218153
loss2 - loss1:  -5.15662749500656e-07
loss

In [32]:
neurons_outputs, neurons_deriv_outputs = model.calc_neurons_outputs(x_train)

dn_dw = model.dn_dw(x_train = x_train,
                    neurons_outputs = neurons_outputs,
                    neurons_deriv_outputs = neurons_deriv_outputs,
                    training_sample_number = 0,
                    input_number = 1,
                    neuron_layer_number = len(model.layers) - 1,
                    neuron_number = 0,
                    weight_layer_number = len(model.layers) - 1,
                    weight_neuron_number = 0,
                    weight_number = 0
                   )
print(dn_dw)

0.000003178165106


In [30]:
neurons_outputs[0][0][len(model.layers) - 2]

[0.9846108839913832,
 0.9957160179978289,
 0.9955978806708422,
 0.9857380266984599,
 0.9753489400536554]

In [28]:
neurons_deriv_outputs[0][0][len(model.layers) - 1]

[0.04401256621199428]

In [31]:
0.9846108839913832 * 0.04401256621199428

0.04333525172472097

In [91]:
model.weights[-1]

[[0.8508321212137604,
  0.8100017991325282,
  0.26856577372888313,
  0.9664316704339672,
  0.8631015300565061]]

## checking how training works

In [53]:
x_train = [[[1],[1]], [[0],[0]]]
y_train = [[1], [0], [0]]

In [85]:
model = RNN(final_activation_function = sigmoid, layers = [[sigmoid, 1, 50], [sigmoid, 50, 20], [sigmoid, 20, 20], [sigmoid, 20, 50], [sigmoid, 50, 1]])
# model = RNN(final_activation_function = sigmoid, layers = [[relu, 1, 1]])

batch_size = 1
learning_rate = 0.1
loss_function = cross_entropy

neurons_outputs, neurons_deriv_outputs = model.calc_neurons_outputs(x_train[ : batch_size])

weight_layer_number = 0
weight_neuron_number = 0
weight_number = 0


In [86]:
# calculating derivative of a loss function w.r.t given weight
# in point equal to that weight value
dl_dw = 0
training_sample_number = 0
i = 0
while i < batch_size and training_sample_number + i < len(neurons_outputs):
    prediction = model.final_activation_function(neurons_outputs[training_sample_number + i][-1][-1])
    # prediction_deriv = model.model_prediction_deriv_dw(x_train = x_train,
    #                                                    neurons_outputs = neurons_outputs,
    #                                                    neurons_deriv_outputs = neurons_deriv_outputs,
    #                                                    training_sample_number = training_sample_number + i,
    #                                                    weight_layer_number = weight_layer_number,
    #                                                    weight_neuron_number = weight_neuron_number,
    #                                                    weight_number = weight_number
    #                                                   )
    
    deriv = []
    for neuron_number in range(len(neurons_outputs[training_sample_number][-1][-1])):
        deriv_neuron = model.final_activation_function(neurons_outputs[training_sample_number][-1][-1][neuron_number], deriv = True)
        deriv_neuron = deriv_neuron
        dn_dw = model.dn_dw(x_train = x_train,
                            neurons_outputs = neurons_outputs,
                            neurons_deriv_outputs = neurons_deriv_outputs,
                            training_sample_number = training_sample_number,
                            input_number = len(x_train[training_sample_number]) - 1,
                            neuron_layer_number = len(model.layers) - 1,
                            neuron_number = neuron_number,
                            weight_layer_number = weight_layer_number,
                            weight_neuron_number = weight_neuron_number,
                            weight_number = weight_number
                           )
        print('dn_dw: ', "%.100f" % dn_dw)
        deriv_neuron *= dn_dw
        deriv.append(deriv_neuron)
    prediction_deriv = deriv
    
    dl_dw += loss_function(y_train[training_sample_number], prediction, prediction_deriv, deriv = True)
    i += 1

print('dl_dw: ', "%.100f" % dl_dw)

dn_dw:  0.0000000000000000000000000000000000000000000000000179866512391578804361210439191314092475619647017286
dl_dw:  -0.0000000000000000000000000000000000000000000000000048373555499865276403819483728045610857015053649861


In [60]:
prediction

array([0.73105858])

In [77]:
# calculating loss value for 2 different weights
model = RNN(final_activation_function = sigmoid, layers = [[sigmoid, 1, 5], [sigmoid, 5, 10], [sigmoid, 10, 5], [sigmoid, 5, 1]])
weights_difference = 0.3

stime = time()
for i in range(1000):
    i = 0
    weight1 = model.weights[weight_layer_number][weight_neuron_number][weight_number]
    neurons_outputs, _ = model.calc_neurons_outputs(x_train[ : batch_size])
    loss1 = 0
    while i < 1:
        prediction = model.final_activation_function(neurons_outputs[training_sample_number + i][-1][-1])
        loss1 += loss_function(y_train[training_sample_number], prediction)
        i += 1


    i = 0
    model.weights[weight_layer_number][weight_neuron_number][weight_number] += weights_difference
    weight2 = model.weights[weight_layer_number][weight_neuron_number][weight_number]
    neurons_outputs, _ = model.calc_neurons_outputs(x_train[ : batch_size])
    loss2 = 0
    while i < 1:
        prediction = model.final_activation_function(neurons_outputs[training_sample_number + i][-1][-1])
        loss2 += loss_function(y_train[training_sample_number], prediction)
        i += 1

    model.weights[weight_layer_number][weight_neuron_number][weight_number] -= weights_difference
print(time() - stime)

1.2982909679412842


In [78]:
print('weight1: ', weight1)
print('loss1: ', loss1)

weight1:  0.01948042429650565
loss1:  0.318141309024122


In [69]:
print('weight2: ', weight2)
print('loss2: ', loss2)

weight2:  1.2561883287754874
loss2:  0.3161221206131162


In [19]:
loss1 - loss2

1.166234805560773e-06