In [1]:
# Python base (following Nielsen's network.py) for R-adoption
# https://github.com/mnielsen/neural-networks-and-deep-learning/blob/master/src/network.py

In [2]:
import random 
import numpy as np

In [3]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of sigmoid function"""
    return sigmoid(z)*(1-sigmoid(z))

In [4]:
class Network(object):
    
    def __init__(self, sizes):
        """Initialize network with number of neurons in respective layers, e.g.
        [2, 3, 1] means a hidden-layer of 3. Initialise biases and weights
        randomly using N(0,1)"""
        self.num_layers = len(sizes)
        self.sizes = sizes
        # Biases only for 2nd to last layers; 3x1 then 1x1
        # Random normal
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        
    def feedforward(self, a):
        """Propogate input forward and return output"""
        for b, w in zip(self.biases, self.weights):
            # Sigmoid custom function
            a = sigmoid(np.dot(w, a) + b)
        return a
    
    def SGD(self, training_data, epochs, mini_batch_size, lr,
            validation_data=None):
        """Training data is a list of tuples[(x,y)], lr
        is the learning-rate"""
        n = len(training_data)
        for j in range(epochs):
            # Stochastic mini-batch
            random.shuffle(training_data)
            # Partition set into min-batches
            mini_batches = [training_data[k:k+mini_batch_size] 
                            for k in range(0, n, mini_batch_size)]
            # Feed-forward mini-batch
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, lr)
            # Validiation data
            if validation_data:
                n_valid = len(validation_data)
                print("Epoch {0}: {1}/{2}".format(
                        j, self.evaluate(validation_data), n_valid))
            else:
                print("Epcoh {0} complete".format(j))
                
    def update_mini_batch(self, mini_batch, lr):
        """Update network's weights and biases by applying GD using
        backpropgation to a single mini-batch which is a list of tuples"""
        nmb = len(mini_batch)
        # Initialise updates with zero
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # Got through mini_batch
        for x, y in mini_batch:
            # Back-propogation will return delta
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        # Opposite direction of gradient
        self.weights = [w-(lr/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(lr/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        """Return tuple `nabla_b, nabla_w' representing gradient
        for cost function C_x. `nabla_b' and `nabla_w' are lists of
        numpy arrays"""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        
        # Feed-forward
        activation = x
        activations = [x]
        # z = f(w.x + b)
        zs = []  # list to store all z vectors
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
            
        # Backwards 
        # Last layer
        sp = sigmoid_prime(zs[-1])
        delta = self.cost_derivative(activations[-1], y) * sp
        nabla_b[-1] = delta
        nabla_w[-1]= np.dot(delta, activations[-2].transpose())
        # Second to second-to-last-layer
        for k in range(2, self.num_layers):
            sp = sigmoid_prime(zs[-k])
            delta = np.dot(self.weights[-k+1].transpose(), delta) * sp
            nabla_b[-k] = delta
            nabla_w[-k] = np.dot(delta, activations[-k-1].transpose())
        return (nabla_b, nabla_w)
    
    def evaluate(self, test_data):
        """Return accuracy metric"""
        # Argument with highest probability
        #pred = np.argmax(self.feedforward(x), axis=1)
        #return np.sum(pred == y)
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)
    
    def cost_derivative(self, output_activations, y):
        """Return vector of partial derivatives"""
        return(output_activations-y)        

# Iris

In [5]:
from sklearn import datasets

def vectorized_result(j):
    """ Turn int `y' into one-hot encoding"""
    e = np.zeros((3, 1))
    e[j] = 1.0
    return e

# Load iris from sklearn
iris = datasets.load_iris()
X = iris.data/np.max(iris.data)  # Scale to 0-1
Y = iris.target

# Shuffle data
idx = np.random.choice(np.arange(len(Y)), 100, replace=False)

# Create training data
train_inputs = [np.reshape(x, (4, 1)) for x in X[idx]]
train_results = [vectorized_result(y) for y in Y[idx]]
train_iris = list(zip(train_inputs, train_results))

# Create test data
test_inputs = [np.reshape(x, (4, 1)) for x in X[-idx]]
test_results = Y[-idx]
test_iris = list(zip(test_inputs, test_results))

# Sample
test_iris[0]

(array([[ 0.60759494],
        [ 0.39240506],
        [ 0.20253165],
        [ 0.02531646]]), 0)

In [6]:
# Create Network
net = Network([4, 10, 3])

In [7]:
# Train
net.SGD(training_data=train_iris, 
        epochs=30, 
        mini_batch_size=10,
        lr=3.0,
        validation_data=test_iris)

Epoch 0: 35/100
Epoch 1: 33/100
Epoch 2: 67/100
Epoch 3: 67/100
Epoch 4: 67/100
Epoch 5: 93/100
Epoch 6: 67/100
Epoch 7: 72/100
Epoch 8: 67/100
Epoch 9: 81/100
Epoch 10: 67/100
Epoch 11: 86/100
Epoch 12: 91/100
Epoch 13: 67/100
Epoch 14: 94/100
Epoch 15: 79/100
Epoch 16: 85/100
Epoch 17: 67/100
Epoch 18: 93/100
Epoch 19: 95/100
Epoch 20: 77/100
Epoch 21: 85/100
Epoch 22: 85/100
Epoch 23: 73/100
Epoch 24: 86/100
Epoch 25: 70/100
Epoch 26: 94/100
Epoch 27: 88/100
Epoch 28: 79/100
Epoch 29: 95/100


# Load Digits

In [8]:
def vectorized_result(j):
    """ Turn int `y' into one-hot encoding"""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

# Load breast_cancer from sklearn
mnist = datasets.load_digits()
X = mnist.data/np.max(mnist.data)  # Scale to 0-1
Y = mnist.target

# Shuffle data
idx = np.random.choice(np.arange(len(Y)), 1500, replace=False)

# Create training data
train_inputs = [np.reshape(x, (64, 1)) for x in X[idx]]
train_results = [vectorized_result(y) for y in Y[idx]]
train_mnist = list(zip(train_inputs, train_results))

# Create test data
test_inputs = [np.reshape(x, (64, 1)) for x in X[-idx]]
test_results = Y[-idx]
test_mnist = list(zip(test_inputs, test_results))

# Sample
test_mnist[0]

(array([[ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.9375],
        [ 0.3125],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.25  ],
        [ 1.    ],
        [ 0.4375],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.0625],
        [ 0.8125],
        [ 1.    ],
        [ 0.    ],
        [ 0.5625],
        [ 0.125 ],
        [ 0.    ],
        [ 0.    ],
        [ 0.3125],
        [ 1.    ],
        [ 0.6875],
        [ 0.3125],
        [ 1.    ],
        [ 0.5625],
        [ 0.    ],
        [ 0.    ],
        [ 0.4375],
        [ 1.    ],
        [ 0.875 ],
        [ 1.    ],
        [ 1.    ],
        [ 0.4375],
        [ 0.    ],
        [ 0.    ],
        [ 0.0625],
        [ 0.6875],
        [ 0.9375],
        [ 1.    ],
        [ 0.625 ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.8125],
        [ 1.

In [9]:
# Create Network
net = Network([64, 10, 10])

In [10]:
# Train
net.SGD(training_data=train_mnist, 
        epochs=30, 
        mini_batch_size=10,
        lr=3.0,
        validation_data=test_mnist)

Epoch 0: 905/1500
Epoch 1: 1115/1500
Epoch 2: 1160/1500
Epoch 3: 1241/1500
Epoch 4: 1347/1500
Epoch 5: 1405/1500
Epoch 6: 1399/1500
Epoch 7: 1409/1500
Epoch 8: 1418/1500
Epoch 9: 1436/1500
Epoch 10: 1435/1500
Epoch 11: 1435/1500
Epoch 12: 1432/1500
Epoch 13: 1444/1500
Epoch 14: 1451/1500
Epoch 15: 1448/1500
Epoch 16: 1457/1500
Epoch 17: 1451/1500
Epoch 18: 1452/1500
Epoch 19: 1460/1500
Epoch 20: 1468/1500
Epoch 21: 1468/1500
Epoch 22: 1467/1500
Epoch 23: 1459/1500
Epoch 24: 1473/1500
Epoch 25: 1462/1500
Epoch 26: 1471/1500
Epoch 27: 1470/1500
Epoch 28: 1470/1500
Epoch 29: 1472/1500
