In [15]:
import numpy as np    

In [289]:
class NN:
    def __init__(self, layers):
        self.layers = layers
        self.n_layers = len(layers)
        self.n_weights = len(layers) - 1
        self.weights = self.default_weights_init(layers)
        self.biases = self.default_bias_init(layers)
        self.activation_function = self.sigmoid
        self.activation_der = self.sigmoid_der
        self.loss_function = self.mse
        self.loss_function_der = self.mse_der


    # gettting the weights from the normal distribution
    # TODO - adding more clever methods of setting weights
    def default_weights_init(self, layers):
        weights = []
        for i in range(self.n_weights):
            weights.append(np.random.randn(layers[i], layers[i + 1]))
        return weights

    # setting all biases to zero
    # TODO better method?
    def default_bias_init(self, layers):
        biases = []
        for i in layers[1:]:
            biases.append(np.zeros((1, i)))
        return biases


    # data set - array of tuples, where the first element is as input array and the second element is the desired output array
    # ratios are the ratios of train, validation and test data
    def load_data(self, data, ratios):
        # checking if the data matches the NN architecture - I/O
        assert (len(data[0][0]) == self.layers[0]) and (len(data[0][1]) == self.layers[-1])
        idx1 = int(ratios[0] * len(data))
        idx2 = int(sum(ratios[:2]) * len(data))
        np.random.shuffle(data)
        self.X_train = np.array([t[0] for t in data[:idx1]])
        self.Y_train = np.array([t[1] for t in data[:idx1]])
        self.X_valid = np.array([t[0] for t in data[idx1:idx2]])
        self.Y_valid = np.array([t[1] for t in data[idx1:idx2]])
        self.X_test = np.array([t[0] for t in data[idx2:]])
        self.Y_test = np.array([t[1] for t in data[idx2:]])

    
    def train_NN(self, n_epochs, batch_size, learning_rate):
        for epoch in range(n_epochs):
            nabla_weights = [np.zeros(w.shape) for w in self.weights]
            nabla_biases = [np.zeros(b.shape) for b in self.biases]

            # nahradit loop tensorovým násobením
            for _ in range(int(len(self.X_train) / batch_size)):
                # creating a minibatch from the training data
                minibatch_indexes = np.random.choice(range(0, len(self.X_train)), batch_size, replace=False)
                minibatch_X = self.X_train[minibatch_indexes]
                print(minibatch_X.shape)
                minibatch_Y = self.Y_train[minibatch_indexes]
                # backpropagation algorithm
                delta_nabla_weights, delta_nabla_biases = self.backpropagation(minibatch_X, minibatch_Y)
                # updating the nablas - adding gradients
                nabla_weights = [nw + dnw for (nw, dnw) in zip(nabla_weights, delta_nabla_weights)]
                nabla_biases = [nb + dnb for (nb, dnb) in zip(nabla_biases, delta_nabla_biases)]
                
            # updating the NN parameters, averiging the gradients + multiplying by the learning rate
            self.weights = [w + (-learning_rate / batch_size) * delta_w for (w, delta_w) in zip(self.weights, nabla_weights)]
            self.biases = [b + (-learning_rate / batch_size) * delta_b for (b, delta_b) in zip(self.biases, nabla_biases)]

    
    # expects input as an 1 x n  numpy array (n = neurons in input layer)
    # only for evaluation, custom input to the network
    def forward_pass(self, input):
        for i in range(self.n_weights):
            z = input @ self.weights[i] + self.biases[i]
            activation = self.activation_function(z)
            input = activation
        return input

    # backpropagation algorithm
    def backpropagation(self, x, y):
        # matrices of gradients - all zeros
        nabla_weights = [np.zeros(w.shape) for w in self.weights]
        nabla_biases = [np.zeros(b.shape) for b in self.biases]

        # storing activations and z's for each layer
        # storing the input as the first activation
        activations, weighted_inputs = [x], []

        # feed forward + storing all information
        for (w, b) in zip(self.weights, self.biases):
            z = x @ w + b
            x = self.activation_function(z)
            # storing weighted input + activation for further use
            weighted_inputs.append(z)
            activations.append(x)

        # backward pass
        # error in the output layer
        delta = self.loss_function_der(y, activations[-1]) * self.activation_der(weighted_inputs[-1])
        # gradient of biases = error in the corresponding layer
        nabla_biases[-1] = delta
        # gradient of weights = error in the corresponding layer dotted/weighted with activations of the previous layer
        nabla_weights[-1] = np.dot(activations[-2].T, delta)

        # iterating layers second to last to the second
        for l in range(self.n_layers - 2, 0, -1):
            current_z = weighted_inputs[l]
            current_z_der = self.activation_der(current_z)
            
            delta = np.dot(delta, self.weights[l].T) * current_z_der
            
            nabla_biases[l] = delta
            nabla_weights[l] = np.dot(activations[l - 1].T, delta)
    

        return (nabla_weights, nabla_biases)



    
    # ReLU - 0 if x < 0 else x
    def relu(self, input):
        return np.maximum(0, input)

    
    # sigmoid - 1 / (1 + e^-x)
    def sigmoid(self, input):
        return 1 / (1 + np.exp(-input))

    # derivation of the sigmoid function
    def sigmoid_der(self, z):
        return self.sigmoid(z)*(1-self.sigmoid(z))


    # mean square error - categorization loss function
    # Y - true values that SHOULD be predicted by the model
    # Y_hat - ACTUAL prediction by the model
    def mse(self, y, y_hat):
        return 0.5 * (y - y_hat) ** 2

    # mean square error derivative
    def mse_der(self, y, y_hat):
        return (y - y_hat)
        


        

In [290]:
example = NN([3, 4, 1])

In [291]:
dummy = [
    ([7, 5, -3], [35]),
    ([7, -8, -8], [4]),
    ([6, 3, -4], [26]),
    ([-1, -2, 4], [-4]),
    ([6, 6, -6], [30]),
    ([-5, 9, -9], [-11]),
    ([-9, 4, 6], [-22]),
    ([-8, -10, 1], [-51]),
    ([-7, 8, -4], [-16]),
    ([-1, -5, 6], [-8]),
    ([-7, -10, -10], [-58]),
    ([-10, 7, -4], [-30]),
    ([-7, 7, -10], [-24]),
    ([0, -5, -1], [-11]),
    ([2, 8, -6], [18]),
    ([2, -6, 5], [1]),
    ([-10, -8, -8], [-64]),
    ([3, 7, 8], [34]),
    ([7, -1, -9], [17]),
    ([-6, -10, -6], [-50])
]


In [292]:
example.load_data(dummy, (0.8, 0.1, 0.1))

In [293]:
example.train_NN(1, 5, 0.1)

(5, 3)


ValueError: operands could not be broadcast together with shapes (4,1) (3,4) 

In [215]:
x = np.array([[1, 2, 4]])
y = np.array([[9, 9, 9]])

z = x * y

In [216]:
z

array([[ 9, 18, 36]])

In [217]:
print(list(range(5 - 2, -1, -1)))

[3, 2, 1, 0]
