In [213]:
import numpy as np
import math

def sigmoid(x):
    if isinstance(x, np.ndarray):
        return np.array([sigmoid(_x) for _x in x])
    return 1 / (1 + math.exp(-x))
def sigmoid_prime(x):
    if isinstance(x, np.ndarray):
        return np.array([sigmoid_prime(_x) for _x in x])
    return sigmoid(x)*(1-sigmoid(x))

def add_arrays(a1, a2):
    return [x1 + x2 for x1, x2 in zip(a1, a2)]

In [339]:
learning_rate = 50

In [76]:
sigmoid_prime(np.array([1,2,3]))

array([0.19661193, 0.10499359, 0.04517666])

In [166]:
add_arrays([np.array([1, 2]), np.array([1, 2])], [np.array([2, 3]), np.array([2, 3])])

[array([3, 5]), array([3, 5])]

In [356]:
class Network:
    def __init__(self, input_size, output_size):
        # weights is an array of weight matricies
        self.w = []
        self.b = []
        self.z = []
        self.a = []
        self.input_size = input_size
        self.output_size = output_size
        
        self.layer_sizes = [input_size, output_size]
        
    def add_layer(self, layer_size):
        self.layer_sizes = self.layer_sizes[:-1] + [layer_size] + [self.layer_sizes[-1]]
        
    def init_parameters(self):
        # input layer --w[0]--> first layer --w[1]--> ... --w[n-1]--> n-th layer
        self.w = [np.random.rand(self.layer_sizes[i+1], self.layer_sizes[i]) for i in range(len(self.layer_sizes) - 1)]
        
        # input layer --b[0]--> first layer --b[1]--> ... --w[n-1]--> n-th layer
        self.b = [np.random.rand(layer_size) for layer_size in self.layer_sizes[1:]] 
        
        # input layer --a[0]--> first layer --a[1]--> ... n-th layer --a[n]-->
        self.init_activities()
        
    def init_activities(self, inputs=None):
        self.a = [np.empty(layer_size) for layer_size in self.layer_sizes]
        self.z = [np.empty(layer_size) for layer_size in self.layer_sizes]
        if inputs is not None:
            self.a[0] = inputs
            
    def feed_forward(self, inputs: np.ndarray):
        self.assert_input_shape(inputs)
        self.init_activities(inputs)
        i = 1
        while i < len(self.layer_sizes):
            prev_a = self.a[i-1]
            # sum(weights * previous activations) + bias for each neuron
            self.z[i] = np.dot(self.w[i-1], prev_a) + self.b[i-1]
            
            # pass through activation function
            self.a[i] = self.activation_function(self.z[i])
            
            i += 1
        return self.a[-1]
    
    def train(self, examples, eta):
        d_w = [np.zeros(w.shape) for w in self.w]
        d_b = [np.zeros(b.shape) for b in self.b]
        
        for x, y in examples:
            d_d_w, d_d_b = self.backprop(x, y)
            
            d_w = add_arrays(d_w, d_d_w)
            d_b = add_arrays(d_b, d_d_b)
        
        self.w = [w - (eta/len(examples)) * dw for w, dw in zip(self.w, d_w)]
        self.b = [b - (eta/len(examples)) * db for b, db in zip(self.b, d_b)]
    
    def SGD(self, training_data, epochs, mini_batch_size, eta,
            test_data=None):
        """Train the neural network using mini-batch stochastic
        gradient descent.  The "training_data" is a list of tuples
        "(x, y)" representing the training inputs and the desired
        outputs.  The other non-optional parameters are
        self-explanatory.  If "test_data" is provided then the
        network will be evaluated against the test data after each
        epoch, and partial progress printed out.  This is useful for
        tracking progress, but slows things down substantially."""
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.train(mini_batch, eta)
            if test_data:
                print("Epoch {0}: {1} / {2}".format(
                    j, self.evaluate(test_data), n_test))
            else:
                print("Epoch {0} complete".format(j))

                
    def assert_input_shape(self, x):
        assert (x.ndim == 1 and len(x) == self.layer_sizes[0]), "example input doesn't match network input shape"
    def assert_output_shape(self, y):
        assert (y.ndim == 1 and len(y) == self.layer_sizes[-1]), "example output doesn't match network output shape"
    
    def backprop(self, x, y):
        """ Computes gradient of the cost function.
            Returns d_w, d_b (changes to be made to weights and biases)"""
        d_w = [np.zeros(w.shape) for w in self.w]
        d_b = [np.zeros(b.shape) for b in self.b]

        self.assert_input_shape(x)
        self.assert_output_shape(y)

        output = self.feed_forward(x)

        # compute error for the output layer (which happens to equal the bias gradient)
        error = (self.a[-1] - y) * sigmoid_prime(self.z[-1])
        d_b[-1] = error
        d_w[-1] = transpose_mul_vectors(error, self.a[-2].transpose())

        # backpropagate the error
        for i, layer in reversed(list(enumerate(self.layer_sizes))[1:-1]):
            curr_w = self.w[i]  # weights connecting previous layer to this one
            error = np.dot(curr_w.transpose(), error) * sigmoid_prime(self.z[i])

            d_b[i - 1] = error
            prev_a = self.a[i - 1]  # activations of the previous layer)
            d_w[i-1] = transpose_mul_vectors(error, prev_a)

        return d_w, d_b
    
    def example_error(self, example):
        x, t = example
        y = self.feed_forward(x)
        return np.linalg.norm(t - y) ** 2
    
    def network_error(self, examples):
        avg = 0
        for example in examples:
            avg += self.example_error(example)
        return avg / (2 * len(examples))
    
    def evaluate(self, test_data):
        test_results = [(self.feed_forward(x), y)
                        for x, y in test_data]
        return sum(int(x == y) for (x, y) in test_results)
    
    
    def activation_function(self, z):
        return sigmoid(z)
    
    def activation_function_prime(self, z):
        return sigmoid_prime(z)

In [157]:
# couldn't figure out how to cleanly multiply vectors as matrices...
def transpose_mul_vectors(v1, v2):
    return np.dot(v1.reshape(len(v1), 1), v2.reshape(1, len(v2)))

In [360]:
mynet = Network(2, 1)
# mynet.add_layer(Layer(1))
mynet.add_layer(2)
mynet.init_parameters()


In [366]:
print(mynet.network_error(training_data[:100]))
mynet.SGD(training_data, 100, 100, 0.1)
print(mynet.network_error(training_data[:100]))

0.00025785438250106485
Epoch 0 complete
Epoch 1 complete
Epoch 2 complete
Epoch 3 complete
Epoch 4 complete
Epoch 5 complete
Epoch 6 complete
Epoch 7 complete
Epoch 8 complete
Epoch 9 complete
Epoch 10 complete
Epoch 11 complete
Epoch 12 complete
Epoch 13 complete
Epoch 14 complete
Epoch 15 complete
Epoch 16 complete
Epoch 17 complete
Epoch 18 complete
Epoch 19 complete
Epoch 20 complete
Epoch 21 complete
Epoch 22 complete
Epoch 23 complete
Epoch 24 complete
Epoch 25 complete
Epoch 26 complete
Epoch 27 complete
Epoch 28 complete
Epoch 29 complete
Epoch 30 complete
Epoch 31 complete
Epoch 32 complete
Epoch 33 complete
Epoch 34 complete
Epoch 35 complete
Epoch 36 complete
Epoch 37 complete
Epoch 38 complete
Epoch 39 complete
Epoch 40 complete
Epoch 41 complete
Epoch 42 complete
Epoch 43 complete
Epoch 44 complete
Epoch 45 complete
Epoch 46 complete
Epoch 47 complete
Epoch 48 complete
Epoch 49 complete
Epoch 50 complete
Epoch 51 complete
Epoch 52 complete
Epoch 53 complete
Epoch 54 comple

In [364]:
mynet.feed_forward(training_data[0][0])

array([0.46301583])

In [376]:
mynet.feed_forward(np.array([.4, .1]))

array([0.50270411])

In [212]:

print("output err = d_b[-1] = {}".format((0.75455431 - 1) * sigmoid_prime(1.12305158)))

print("d_w[-1] = {}".format(transpose_mul_vectors(np.array([-0.0454570574756476]), np.array([0.63997489, 0.70351126]).transpose()) ))



output err = d_b[-1] = -0.0454570574756476
d_w[-1] = [[-0.02909138 -0.03197955]]


In [113]:
import random
training_data = []
for i in range(10000):
    x = random.uniform(0, .5)
    y = random.uniform(0, .5)
    training_data.append((np.array([x, y]), np.array([x+y])))
training_data[:5]

[(array([0.43085089, 0.3888689 ]), array([0.81971979])),
 (array([0.2234768 , 0.27962075]), array([0.50309756])),
 (array([0.285902  , 0.02903087]), array([0.31493287])),
 (array([0.17001119, 0.19660781]), array([0.366619])),
 (array([0.11986967, 0.05190368]), array([0.17177336]))]

In [207]:
transpose_mul_vectors(np.array([-0.0454570574756476]), np.array([0.63997489, 0.70351126]).transpose()) 

array([[-0.02909138, -0.03197955]])

In [222]:
training_data

[(array([0.43085089, 0.3888689 ]), array([0.81971979])),
 (array([0.2234768 , 0.27962075]), array([0.50309756])),
 (array([0.285902  , 0.02903087]), array([0.31493287])),
 (array([0.17001119, 0.19660781]), array([0.366619])),
 (array([0.11986967, 0.05190368]), array([0.17177336])),
 (array([0.04277986, 0.06151598]), array([0.10429584])),
 (array([0.13255055, 0.39719683]), array([0.52974738])),
 (array([0.179462  , 0.01759897]), array([0.19706096])),
 (array([0.16849105, 0.35854365]), array([0.5270347])),
 (array([0.44726501, 0.44119295]), array([0.88845797])),
 (array([0.22026388, 0.34960983]), array([0.56987371])),
 (array([0.16802843, 0.13414092]), array([0.30216935])),
 (array([0.17131185, 0.17923461]), array([0.35054645])),
 (array([0.02993948, 0.23464769]), array([0.26458717])),
 (array([0.30437164, 0.40435149]), array([0.70872313])),
 (array([0.1171543, 0.3277295]), array([0.4448838])),
 (array([0.0314946 , 0.34884274]), array([0.38033734])),
 (array([0.46380283, 0.33843311]), ar

In [None]:
import sys
print(sys.path)


In [198]:
testnet = mynet