In [1]:
# Python base (following Nielsen's network.py) for R-adoption
# https://github.com/mnielsen/neural-networks-and-deep-learning/blob/master/src/network.py

In [2]:
import random 
import numpy as np

In [None]:
from sklearn import datasets

def vectorized_result(j):
    """ Turn int `y' into one-hot encoding"""
    e = np.zeros((3, 1))
    e[j] = 1.0
    return e

# Load iris from sklearn
iris = datasets.load_iris()
X = iris.data/np.max(iris.data)  # Scale to 0-1
Y = iris.target

# Shuffle data
idx = np.random.choice(np.arange(len(Y)), 100, replace=False)

In [20]:
print(len(idx))

# Create training data
train_inputs = [np.reshape(x, (4, 1)) for x in X[idx]]
train_results = [vectorized_result(y) for y in Y[idx]]
train_iris = list(zip(train_inputs, train_results))

# Create test data
test_inputs = [np.reshape(x, (4, 1)) for x in X[-idx]]
test_results = Y[-idx]
test_iris = list(zip(test_inputs, test_results))

100
100


In [8]:
class Network(object):
    
    def __init__(self, sizes):
        """Initialize network with number of neurons in respective layers, e.g.
        [2, 3, 1] means a hidden-layer of 3. Initialise biases and weights
        randomly using N(0,1)"""
        self.num_layers = len(sizes)
        self.sizes = sizes
        # Biases only for 2nd to last layers; 3x1 then 1x1
        # Random normal
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        #self.biases = [0.5+np.zeros(b.shape) for b in self.biases]
        #self.weights = [0.5+np.zeros(w.shape) for w in self.weights]
        print("Bias Init")
        print(self.biases)
        print("Weights Init")
        print(self.weights)
        
        
    def feedforward(self, a):
        """Propogate input forward and return output"""
        for b, w in zip(self.biases, self.weights):
            # Sigmoid custom function
            a = sigmoid(np.dot(w, a) + b)
        return a
    
    def SGD(self, training_data, epochs, mini_batch_size, lr,
            validation_data=None):
        """Training data is a list of tuples[(x,y)], lr
        is the learning-rate"""
        n = len(training_data)
        for j in range(epochs):
            # Stochastic mini-batch
            random.shuffle(training_data)
            # Partition set into min-batches
            mini_batches = [training_data[k:k+mini_batch_size] 
                            for k in range(0, n, mini_batch_size)]
            # Feed-forward mini-batch
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, lr)
            # Validiation data
            if validation_data:
                n_valid = len(validation_data)
                print("Epoch {0}: {1}/{2}".format(
                        j, self.evaluate(validation_data), n_valid))
            else:
                print("Epcoh {0} complete".format(j))
                
    def update_mini_batch(self, mini_batch, lr):
        """Update network's weights and biases by applying GD using
        backpropgation to a single mini-batch which is a list of tuples"""
        nmb = len(mini_batch)
        # Initialise updates with zero
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # Got through mini_batch
        for x, y in mini_batch:
            # Back-propogation will return delta
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        # Opposite direction of gradient
        self.weights = [w-(lr/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(lr/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        """Return tuple `nabla_b, nabla_w' representing gradient
        for cost function C_x. `nabla_b' and `nabla_w' are lists of
        numpy arrays"""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        
        # Feed-forward
        activation = x
        activations = [x]
        # z = f(w.x + b)
        zs = []  # list to store all z vectors
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)

        # Backwards 
        # Last layer
        sp = sigmoid_prime(zs[-1])
        delta = self.cost_derivative(activations[-1], y) * sp
        nabla_b[-1] = delta
        nabla_w[-1]= np.dot(delta, activations[-2].transpose())
        # Second to second-to-last-layer
        for k in range(2, self.num_layers):
            sp = sigmoid_prime(zs[-k])
            delta = np.dot(self.weights[-k+1].transpose(), delta) * sp
            nabla_b[-k] = delta
            nabla_w[-k] = np.dot(delta, activations[-k-1].transpose())
        return (nabla_b, nabla_w)
    
    def evaluate(self, test_data):
        """Return accuracy metric"""
        # Argument with highest probability
        #pred = np.argmax(self.feedforward(x), axis=1)
        #return np.sum(pred == y)
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)
    
    def cost_derivative(self, output_activations, y):
        """Return vector of partial derivatives"""
        return(output_activations-y)
    
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of sigmoid function"""
    return sigmoid(z)*(1-sigmoid(z))

# Iris

In [9]:
# Create Network
net = Network([4, 6, 3])

Bias Init
[array([[ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5]]), array([[ 0.5],
       [ 0.5],
       [ 0.5]])]
Weights Init
[array([[ 0.5,  0.5,  0.5,  0.5],
       [ 0.5,  0.5,  0.5,  0.5],
       [ 0.5,  0.5,  0.5,  0.5],
       [ 0.5,  0.5,  0.5,  0.5],
       [ 0.5,  0.5,  0.5,  0.5],
       [ 0.5,  0.5,  0.5,  0.5]]), array([[ 0.5,  0.5,  0.5,  0.5,  0.5,  0.5],
       [ 0.5,  0.5,  0.5,  0.5,  0.5,  0.5],
       [ 0.5,  0.5,  0.5,  0.5,  0.5,  0.5]])]


In [14]:
print(len(test_iris))
net.evaluate(test_iris)

100


95

In [10]:
# Train
net.SGD(training_data=train_iris, 
        epochs=1000, 
        mini_batch_size=10,
        lr=0.3)

print(net.biases)
print(net.weights)

Epcoh 0 complete
Epcoh 1 complete
Epcoh 2 complete
Epcoh 3 complete
Epcoh 4 complete
Epcoh 5 complete
Epcoh 6 complete
Epcoh 7 complete
Epcoh 8 complete
Epcoh 9 complete
Epcoh 10 complete
Epcoh 11 complete
Epcoh 12 complete
Epcoh 13 complete
Epcoh 14 complete
Epcoh 15 complete
Epcoh 16 complete
Epcoh 17 complete
Epcoh 18 complete
Epcoh 19 complete
Epcoh 20 complete
Epcoh 21 complete
Epcoh 22 complete
Epcoh 23 complete
Epcoh 24 complete
Epcoh 25 complete
Epcoh 26 complete
Epcoh 27 complete
Epcoh 28 complete
Epcoh 29 complete
Epcoh 30 complete
Epcoh 31 complete
Epcoh 32 complete
Epcoh 33 complete
Epcoh 34 complete
Epcoh 35 complete
Epcoh 36 complete
Epcoh 37 complete
Epcoh 38 complete
Epcoh 39 complete
Epcoh 40 complete
Epcoh 41 complete
Epcoh 42 complete
Epcoh 43 complete
Epcoh 44 complete
Epcoh 45 complete
Epcoh 46 complete
Epcoh 47 complete
Epcoh 48 complete
Epcoh 49 complete
Epcoh 50 complete
Epcoh 51 complete
Epcoh 52 complete
Epcoh 53 complete
Epcoh 54 complete
Epcoh 55 complete
Ep

# Load Digits

In [33]:
def vectorized_result(j):
    """ Turn int `y' into one-hot encoding"""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

# Load breast_cancer from sklearn
mnist = datasets.load_digits()
X = mnist.data/np.max(mnist.data)  # Scale to 0-1
Y = mnist.target

# Shuffle data
idx = np.random.choice(np.arange(len(Y)), 1500, replace=False)

# Create training data
train_inputs = [np.reshape(x, (64, 1)) for x in X[idx]]
train_results = [vectorized_result(y) for y in Y[idx]]
train_mnist = list(zip(train_inputs, train_results))

# Create test data
test_inputs = [np.reshape(x, (64, 1)) for x in X[-idx]]
test_results = Y[-idx]
test_mnist = list(zip(test_inputs, test_results))

# Sample
test_mnist[0]

(array([[ 0.    ],
        [ 0.    ],
        [ 0.25  ],
        [ 0.8125],
        [ 1.    ],
        [ 0.6875],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.5625],
        [ 1.    ],
        [ 0.5625],
        [ 0.625 ],
        [ 0.9375],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.3125],
        [ 0.25  ],
        [ 0.    ],
        [ 0.75  ],
        [ 0.6875],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.3125],
        [ 1.    ],
        [ 0.75  ],
        [ 0.0625],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.0625],
        [ 0.5625],
        [ 0.9375],
        [ 0.5   ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.5   ],
        [ 0.75  ],
        [ 0.    ],
        [ 0.    ],
        [ 0.    ],
        [ 0.0625],
        [ 0.375 ],
        [ 0.

In [34]:
# Create Network
net = Network([64, 10, 10])

In [35]:
# Train
net.SGD(training_data=train_mnist, 
        epochs=30, 
        mini_batch_size=10,
        lr=3.0,
        validation_data=test_mnist)

Epoch 0: 853/1500
Epoch 1: 1102/1500
Epoch 2: 1228/1500
Epoch 3: 1268/1500
Epoch 4: 1318/1500
Epoch 5: 1328/1500
Epoch 6: 1389/1500
Epoch 7: 1395/1500
Epoch 8: 1404/1500
Epoch 9: 1437/1500
Epoch 10: 1423/1500
Epoch 11: 1440/1500
Epoch 12: 1450/1500
Epoch 13: 1445/1500
Epoch 14: 1448/1500
Epoch 15: 1436/1500
Epoch 16: 1456/1500
Epoch 17: 1454/1500
Epoch 18: 1463/1500
Epoch 19: 1457/1500
Epoch 20: 1467/1500
Epoch 21: 1466/1500
Epoch 22: 1464/1500
Epoch 23: 1463/1500
Epoch 24: 1463/1500
Epoch 25: 1470/1500
Epoch 26: 1469/1500
Epoch 27: 1467/1500
Epoch 28: 1468/1500
Epoch 29: 1462/1500


## Debug (compare values to R) ... Ignore

In [7]:
sizes = [3, 5, 2]    

In [8]:
test_biases = [
    np.array([[-0.28080838],[ 0.2316751 ],[ 0.87261225],[-0.96765989],[-1.868544  ]]),
    np.array([[ 2.10775394],[ 0.41855275]])
]
test_weights = [
    np.array([[-0.81267026,  0.17627318, -0.60639905],
              [ 0.50974091,  2.34693197,  0.33875867],
              [-1.20632438, -1.25457351, -1.17803266],
              [ 0.06163412,  0.61925722,  0.87939343],
              [-0.41764508, -0.28984466,  0.09663896]]),
    np.array([[ 0.37480004,  0.04123139,  1.5200263 , -2.02504715,  0.2665885 ],
              [ 1.1946554 ,  0.18426967, -0.16337889, -0.91305046,  0.05401374]])
]

In [17]:
def feedforward(a):
    """Propogate input forward and return output"""
    for b, w in zip(test_biases, test_weights):
        a = sigmoid(np.dot(w, a) + b)
    return a

result = feedforward(a=0.5)
print(result.shape)
print(result)

(2, 3)
[[ 0.92956719  0.92438907  0.91830857]
 [ 0.64309165  0.6703279   0.63000043]]
