In [295]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Make some activation functions for the hell of it

In [296]:
# Lets get some activation functions going:

softmax = lambda x : np.exp(x) / np.sum(np.exp(x), axis=0)

sigmoid = lambda x : 1/(1+np.exp(-x))

sigmoid_derivative = lambda x : sigmoid(x) * (1 - sigmoid(x))

relu = lambda x : np.maximum(0,x)

identity = lambda x : x

one = lambda x : 1

# Defining a layer

Note: these were an experiment and not actually used

In [297]:

class Layer():
    def __init__(self, width, activation=identity):
        self.width = width
        self.activation = activation
        self.weights = None
        self.bias = None
        self.input_width = None
        
    def construct(self, input_width):
        # input_width is the width of the previous layer
        if self.input_width != None:
            raise AttributeError("Already initialised")
        else:
            self.input_width = input_width
            self.weights = np.random.rand(self.width, self.input_width)
            self.bias = np.random.rand()
            
    def eval(self,input_values):
        if self.input_width == None:
            raise AttributeError("Not initialised")
        else:
            return self.activation((self.weights @ input_values) + self.bias)


# Defining a Neural Network in terms of layers

In [298]:
square_cost_derivative = lambda x,y : x - y
square_cost = lambda x,y : (x - y) * (x - y) / 2

class NeuralNetwork:
    def __init__(self, sizes, act=sigmoid, act_deriv=sigmoid_derivative, cost=square_cost, cost_derivative=square_cost_derivative):
        """
        sizes:list of the sizes of layers
        """
        self.n_layers = len(sizes)
        self.sizes = sizes
        
        # Don't need biases for inputs so we do sizes[1:]
        self.biases = [np.random.randn(size) for size in sizes[1:]]
        
        # Need weights between the layers.
        # n x m matrix where n is # of perceptrons on output layer
        # and m is # of perceptrons on input layer
        self.weights = [np.random.randn(n_out,n_in) for n_in,n_out in zip(sizes[:-1],sizes[1:])]
        
        self.activation = act
        self.activation_derivative = act_deriv
        self.cost = cost
        self.cost_derivative = cost_derivative
        
    def ff(self, input_values):
        for weight, bias in zip(self.weights,self.biases):
            print("weights: ", weight.shape, " biases: ", bias.shape, " input: ", input_values.shape)
            input_values = self.activation(weight @ input_values + bias)
            print(input_values)
        return input_values
    
    def SGD(self, training_set, epochs=1, mini_batch_size=1, learning_rate=1):
        for i in range(epochs):
            mini_batches = self.split_training_set(training_set,mini_batch_size)
            
            for mb in mini_batches:
                self.update_mini_batch(mb, learning_rate)
                
    def update_mini_batch(self, mini_batch, learning_rate):
        change_biases = [np.zeros(bias.shape) for bias in self.biases]
        change_weights = [np.zeros(weight.shape) for weight in self.weights]
        
        for x,y in mini_batch:
            delta_biases, delta_weights = self.backprop(x,y)
            change_biases = [old + d for old,d in zip(change_biases, delta_biases)]
            change_weights = [old + d for old,d in zip(change_weights, delta_weights)]
        self.weights = [w + (learning_rate/len(mini_batch)) * cw for w,cw in zip(self.weights,change_weights)]
        self.biases = [b + (learning_rate/len(mini_batch)) * cb for b,cb in zip(self.biases,change_biases)]
        
    def backprop(self,x,y):
        # x is vector for the inputs
        # y is also a vector but for the outputs
        
        activation = x
        activations = [activation]
        nets = []
        
        # feed forward and store activations
        
        for w,b in zip(self.weights,self.biases):
            net = w @ activation + b
            nets.append(net)
            
            activation = self.activation(net)
            activations.append(activation)
            
        # backwards pass
        delta = self.cost_derivative(activations[-1],y) * self.activation_derivative(activation)
        
        change_b = [np.zeros(b.shape) for b in self.biases]
        change_w = [np.zeros(w.shape) for w in self.weights]
        
        #print("delta:",delta,delta.reshape(-1,1).shape)
        #print("activations:",activations[-2].shape)
    
                        
        change_b[-1] = delta
        change_w[-1] = delta.reshape(-1,1) @ activations[-2].reshape(1,-1)
        
        for i in range(2,self.n_layers):
            net = nets[-i]
            delta = (self.weights[-i + 1].T @ delta) * self.activation_derivative(net)
            change_b[-i] = delta
            change_w[-i] = delta.reshape(-1,1) @ activations[-i-1].reshape(1,-1)
            
        return change_b,change_w
    
    def split_training_set(self,training_set,mini_batch_size):
        np.random.shuffle(training_set)
        return [training_set[k:k+mini_batch_size] for k in range(0, len(training_set), mini_batch_size)]
            
            
            
    
    def __str__(self):
        s = []
        s.append(" ".join([f"input({n+1})" for n in range(self.sizes[0])]))
        for weight,bias in zip(self.weights,self.biases):
            s.append("   |")
            s.append("   \\/")
            s.append(np.array2string(weight) + "  +  " + np.array2string(bias))
        return "\n".join(s)
            
        

# Testing

In [299]:
nn = NeuralNetwork([2,3,2])
print(nn)


input(1) input(2)
   |
   \/
[[ 0.55081434 -0.97159026]
 [-0.68799406 -0.60930652]
 [-1.39479904 -0.90646672]]  +  [ 1.67108451 -1.1276874  -1.47927503]
   |
   \/
[[ 0.58200173  0.2985557  -0.55920355]
 [ 2.65583507  1.17920684 -1.72684983]]  +  [0.44407812 1.4606961 ]


In [300]:
nn.ff(np.array([0,0]))

weights:  (3, 2)  biases:  (3,)  input:  (2,)
[0.84172036 0.24458813 0.18553695]
weights:  (2, 3)  biases:  (2,)  input:  (3,)
[0.71161304 0.97501536]


array([0.71161304, 0.97501536])

In [301]:
training_set = [(np.array([1,1]),np.array([1,1])),
               (np.array([1,3]),np.array([1,3])),
               (np.array([2,1]),np.array([2,1])),
               (np.array([0,1]),np.array([0,1])),]

In [302]:
nn.SGD(training_set,epochs=100)

In [303]:
nn.ff(np.array([1,5]))

weights:  (3, 2)  biases:  (3,)  input:  (2,)
[1.34647474e-09 2.24228227e-04 1.00000000e+00]
weights:  (2, 3)  biases:  (2,)  input:  (3,)
[1.26479463e-082 1.57287965e-123]


array([1.26479463e-082, 1.57287965e-123])