In [15]:
import sys
stdout = sys.stdout
import numpy as np
from numpy.random import shuffle
sys.stdout = stdout
import time
print("started")

started


In [16]:
class Network(object):
    def __init__(
        self,
        nodes,
        step_size=1
    ):
        """
        nodes represents the number of nodes per layer. 
        eg: [2,3,5] is 2 feature input, 
        3 neurons in the first layer,
        5 neurons in the final layer.
        5 also represents the number of classes
        
        nb_layers includes both the output and input layers
        outputs is the output matrix of each layer. An output matrix is of shape (p, n),
            where p is the number of examples given to the feedforward, and n is the nb of nodes in the layer
        derivates is the derivates of each layer. Each row is a different layer
        errod_ds is the derivate of the error function
        
        Default cost function set to cross entropy, therefore, weights[-1] == nb of classes
        """
        self.step_size = step_size
        
        self.nb_layers = len(nodes)
        self.nodes = nodes
        self.weights = [ np.ones((n+1, m), dtype=float) for n,m in zip(nodes[:-1], nodes[1:])]
        self.outputs = [ 0 for n in nodes[1:]]
        self.derivatives = [ 0 for n in nodes[1:]]
        self.error_ds = np.zeros(nodes[-1])
        self.gradients = [ 0 for n in nodes[1:]]
        
    def fforw(self, inputs, labels):
        shape = np.shape(inputs)
        cur_input = np.ones((shape[0], shape[1] + 1))
        cur_input[:, :-1] = inputs
        for l,w in enumerate(self.weights):
#             print(np.shape(cur_input), np.shape(w))
            out = self.sigmoid(np.dot(cur_input, w))
#             print(l, out)
            self.outputs[l] = out
            self.derivatives[l] = self.sigmoid(out, True)
            
            shape = np.shape(out)
            cur_input = np.ones((shape[0], shape[1] + 1))
            cur_input[:, :-1] = out
        
        #Calculating the derivative of the error function for backprop
        self.error_ds = self.softmaxLoss(cur_input[:, :-1], labels)
        
        #appending the input as output[-1] for future use
        self.outputs.append(inputs)
#         self.outputs.append([])
        
        return   

    def backprop(self):
        """
        Each gradient is of shape p x m,
            where p is the number of examples, m is the number of output nodes from the layer
        """
        self.gradients[-1] = self.derivatives[-1] * self.error_ds
        
        for i in range(1, len(self.derivatives)):
            index = len(self.derivatives) - 1 - i
            example_gradients = np.zeros(np.shape(self.derivatives[index]))
            for j,example in enumerate(self.derivatives[index]):
                example_gradients[j] = np.dot(np.diag(self.derivatives[index][j]), self.weights[index + 1][:-1]).dot(self.gradients[index+1][j])        
            self.gradients[index] = example_gradients
    
    def update_weights(self, inputs, labels):
        """
        inputs is the given input for the network.
        Shape of inputs should be (n x m)
            Where n is the number of examples,
            m is the number of features
        labels are the correct labels for each example of shape (n,)
        """
        self.fforw(inputs, labels)
        self.backprop()
        for i,w in enumerate(self.weights):
            shape = np.shape(self.outputs[i-1])
            hat_o = np.ones((shape[0], shape[1] + 1))
            hat_o [:, :-1] = self.outputs[i-1]
            for e,g in enumerate(self.gradients[i]):
                single_grad = self.gradients[i][e]
                single_grad.shape = (len(single_grad), 1)
                single_hat_o = hat_o[e]
                single_hat_o.shape = (1, len(single_hat_o))
                single_update = -self.step_size*(np.dot(single_grad, single_hat_o)).T
                self.weights[i] += single_update
    
    def sigmoid(self, x, derivative=False):
        if derivative:
            return x*(1-x)
        else:
            return 1/(1+np.exp(-x))

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
    def softmaxLoss(self, X, y):
        if (len(y.shape) == 0):
            m = 1
        else:
            m = y.shape[0]
        p = self.softmax(X)
#         log_likelihood = -np.log(p[range(m), y])
#         loss = np.sum(log_likelihood) / m

        dx = p.copy()
        dx[range(m), y] -= 1
        dx /= m
        return dx
    
    def fit(self, inputs, labels, epochs):
        t_start = time.clock()
        print("starting fit, time is: ", t_start)
        t_last = time.clock()
        for i in range(epochs):
            print("epoch: ", i + 1)
            # randomize data
            print(np.shape(inputs), np.shape(labels))
            state = np.random.get_state()
            np.random.shuffle(inputs)
            np.random.set_state(state)
            np.random.shuffle(labels)
            print(np.shape(inputs), np.shape(labels))
            self.update_weights(inputs, labels)
            t_epoch = time.clock()
            print("time elapsed is: ", t_epoch - t_last)
            
    def predict(self, inp, labels):
        self.fforw(inp, labels)
#         return self.outputs[len(self.nodes) - 2]
        return [np.argmax(x) for x in self.outputs[len(self.nodes) - 2]] #output of final layer

# General functions

def safe_divide(num, denom):
    if len(num) != len(denom):
        return []
    return [num[i] / denom[i] if num[i] > 0.0 and denom[i] > 0.0 else 0.0 for i in range(len(num))]

def score(preds, targets, nclasses):
    # calculate fscore for each class, then take macro average (unweighted)
    if (len(preds) != len(targets)):
        return
    true_pos = [0.0] * nclasses
    pos = [0.0] * nclasses
    rel_pos = [0.0] * nclasses
    for i in range(len(preds)):
        if preds[i] == targets[i]:
            true_pos[targets[i]] += 1.0
        pos[preds[i]] += 1.0
        rel_pos[targets[i]] += 1.0
    
    true_pos = np.array(true_pos)
    pos = np.array(pos)
    rel_pos = np.array(rel_pos)
    print(true_pos, pos, rel_pos)
    p = safe_divide(true_pos, pos)
    r = safe_divide(true_pos, rel_pos)
    print(p, r)
    
    f_scores = [2.0 * p[i] * r[i] / (p[i] + r[i]) if p[i] + r[i] > 0.0 else 0.0 for i in range(len(p))]
    
    return np.mean(f_scores)

In [17]:
# Example of 1 hidden layer, 1 output layer NN. 
# Input is 2 features, nb of classes is 5 in this case
n = Network([3,3,3])

# Updates the weights given a mini-batch of 2 examples in this case 
    #Eg: [1,2] are the features for the first example, [3,4] are the features for the second example
    #[0,4] represents the labels of the given examples, where 0 means [1,2] represents class 1, 
    # and 4 means [3,4] is class 5.
# n.update_weights([[1,2], [3,4]], np.array([0,4]))

In [18]:
print("Test Example")
data = [[1, 3, 1], [3, 5, 1], [5, 4, 2], [2, 1, 0],[0, 4, 0],[2, 3, 0], [3, 2, 1], [4, 1, 0]]
targets = [0,1,2,1,2,0,1,2]
print("We train the network on the following: ")
print("data: ", data, "\ntargets: ", targets)
n.fit(data, np.array(targets), 1)
print("Results:")
test = [[1, 0, 1], [2,3,1], [3,1,0], [4,1,0], [5,3,0], [0,1,2]]
test_targ = [2, 2, 0, 1, 0, 1]
preds = n.predict(test, np.array(test_targ))
print("tests: ", test, "predictions: ", preds, "score: ", score(preds, test_targ, 3))


Test Example
We train the network on the following: 
data:  [[1, 3, 1], [3, 5, 1], [5, 4, 2], [2, 1, 0], [0, 4, 0], [2, 3, 0], [3, 2, 1], [4, 1, 0]] 
targets:  [0, 1, 2, 1, 2, 0, 1, 2]
starting fit, time is:  173.62673917191555
epoch:  1
(8, 3) (8,)
(8, 3) (8,)
time elapsed is:  0.007375399875996891
Results:
[0. 2. 0.] [0. 6. 0.] [2. 2. 2.]
[0.0, 0.3333333333333333, 0.0] [0.0, 1.0, 0.0]
tests:  [[1, 0, 1], [2, 3, 1], [3, 1, 0], [4, 1, 0], [5, 3, 0], [0, 1, 2]] predictions:  [1, 1, 1, 1, 1, 1] score:  0.16666666666666666


In [19]:
# x = np.loadtxt("../data/train_x.csv", delimiter=",")
# y = np.loadtxt("../data/train_y.csv", delimiter=",")
# x = x.reshape(-1, 64*64)
# y = y.reshape(-1, 1)

# binary_pics = (x > 250) + 0

# NN = Network([4096, 4096, 10])
# NN.fit(binary_pics, y, 1)

epochs = [1, 5, 25, 125, 625]
nhidden = [1, 2, 3]
hiddensize = [5, 50, 500, 4096]

def tune(epochs, nhidden, hiddensize, train_x, train_y, test_x, test_y):
    res = []
    for nepochs in epochs:
        for layers in nhidden:
            for size in hiddensize:
                nodes = [4096]
                for layer in layers:
                    nodes.append(size)
                nodes.append(10)
                NN = Network(nodes)
                NN.fit(train_x, train_y, nepochs)
                preds = NN.predict(test_x, test_y)
                s = score(preds, test_y, 4096)
                res.append((nepochs, layers, size, score))
    return res
    

In [20]:
print("done")

done
