In [2]:
import numpy as np

In [390]:
class Network():

    @np.vectorize
    @staticmethod
    def activation_function(z):
        return 1 / (1 + np.exp(-z))

    
    @np.vectorize
    @staticmethod
    def activation_derivative(z):
        return np.exp(z) / (np.exp(z) + np.exp(-z))**2

    
    @staticmethod
    def cost(a, y):
        return 0.5 * np.dot(np.transpose(a - y), a - y)[0][0]
    
    
    @staticmethod
    def cost_gradient(a, y):
        # There's an incorrect sign somewhere but I can't find it.
        return a - y
    

    def __init__(self, dim: list):
        self.dim = [dim[0]] + dim[1] + [dim[2]]
        self.dim_input = dim[0]
        self.dim_hidden = dim[1]
        self.dim_output = dim[2]
        self.weights = [np.random.normal(0, 1, (j, i)) for i, j in zip(self.dim[:-1], self.dim[1:])]
        self.biases = [np.random.normal(0, 1, (i, 1)) for i in self.dim[1:]]

    
    def front_propagate(self, x):
        for w, b in zip(self.weights, self.biases):
            x = self.activation_function(np.dot(w, x) + b)
        return x

    
    def calculate_inputs(self, x):
        Z = [x]
        for w, b in zip(self.weights, self.biases):
            x = np.dot(w, x) + b
            Z.append(x)
        return Z

    
    def calculate_neurons(self, x):
        # I think in the end the first layer wasn't required. I'll maybe change it later.
        # If I change it here, I'll need to move the indices in backpropagate.
        A = [x]
        for w, b in zip(self.weights, self.biases):
            x = self.activation_function(np.dot(w, x) + b)
            A.append(x)
        return A

    
    def backpropagate(self, x, y):
        inputs = self.calculate_inputs(x)
        neurons = self.calculate_neurons(x)
        grad_C = self.cost_gradient(neurons[-1], y)
        errors = [np.multiply(grad_C, inputs[-1])]

        for z, W in zip(inputs[-2:0:-1], self.weights[:0:-1]):
            error = np.multiply(np.dot(np.transpose(W), errors[-1]),
                                self.activation_derivative(z))
            errors.append(error)
        errors.reverse()
        weight_partials = []
        for e, a in zip(errors, neurons):
            weight_partials.append(np.dot(e, np.transpose(a)))

        return weight_partials, errors



    def update(self, weight_partials, bias_partials):
        new_weights = []
        new_biases = []
        for w, b, wp, bp in zip(self.weights, self.biases, weight_partials, bias_partials):
            new_weights.append(w - wp)
            new_biases.append(b - bp)

        self.weights = new_weights
        self.biases = new_biases
            
        

    def SGD(self, U, epochs, lr=1E-3, MB_size=50, train_ratio=0.9):
        '''
        Parameters
        ----------
        U: array-like
            list of tuples containing input-output pairs (x, y).
        '''
        j = int(len(U) * train_ratio)
        train_U = U[0:j]
        test_U = U[j:]
        
        for epoch in range(epochs):
            np.random.shuffle(train_U)
            
            for i in range(int(np.ceil(len(train_U) / MB_size))):
                minibatch = train_U[i * MB_size:(i+1) * MB_size]
                weight_partials = len(self.weights) * [0]
                bias_partials = len(self.weights) * [0]
                for x, y in minibatch:
                    wp, bp = self.backpropagate(x, y)
                    weight_partials = [a - lr * b / len(minibatch) for a, b in zip(weight_partials, wp)]
                    bias_partials = [a - lr * b / len(minibatch) for a, b in zip(bias_partials, bp)]            
            self.update(weight_partials, bias_partials)
            
            test_cost = 0
            for x, y in test_U:
                test_cost += self.cost(self.front_propagate(x), y) / len(test_U)
            if epoch % 10 == 0:
                print(f"{epoch = }, {test_cost = }")

    
    def print_parameters(self):
        weights = [w.shape for w in self.weights]
        biases = [b.shape for b in self.biases]
        print(f"dimension = {self.dim}, {weights = }, {biases = }")

In [391]:
np.random.seed(42)
dim = [3, [5, 10, 12], 3]
net = Network(dim)
net.print_parameters()
net.front_propagate(np.random.uniform(0, 1, (3, 1)))

dimension = [3, 5, 10, 12, 3], weights = [(5, 3), (10, 5), (12, 10), (3, 12)], biases = [(5, 1), (10, 1), (12, 1), (3, 1)]


array([[0.65755991],
       [0.67933132],
       [0.87978686]])

In [394]:
n = 500
epochs = 100
data = [(np.random.uniform(0, 1, (3, 1)), np.transpose(np.array([[0.5, 0.5, 0.5]]))) for _ in range(n)]
net.SGD(data, epochs)

epoch = 0, test_cost = np.float64(0.019645264061833943)
epoch = 10, test_cost = np.float64(0.013317415185250476)
epoch = 20, test_cost = np.float64(0.009724061280893499)
epoch = 30, test_cost = np.float64(0.007327044707847312)
epoch = 40, test_cost = np.float64(0.005899436413211777)
epoch = 50, test_cost = np.float64(0.004896492805929875)
epoch = 60, test_cost = np.float64(0.004206439853955422)
epoch = 70, test_cost = np.float64(0.003652995627938199)
epoch = 80, test_cost = np.float64(0.003260807685247638)
epoch = 90, test_cost = np.float64(0.0029649711880085365)


In [393]:
net.front_propagate(np.random.uniform(0, 1, (3, 1)))

array([[0.6046406 ],
       [0.61035728],
       [0.6714795 ]])