In [None]:
import sys
stdout = sys.stdout
import numpy as np
from numpy.random import shuffle
sys.stdout = stdout
import time
print("started")

In [None]:
class Network(object):
    def __init__(
        self,
        nodes,
        step_size=0.01
    ):
        """
        nodes represents the number of nodes per layer. 
        eg: [2,3,5] is 2 feature input, 
        3 neurons in the first layer,
        5 neurons in the final layer.
        5 also represents the number of classes
        
        nb_layers includes both the output and input layers
        outputs is the output matrix of each layer. An output matrix is of shape (p, n),
            where p is the number of examples given to the feedforward, and n is the nb of nodes in the layer
        derivates is the derivates of each layer. Each row is a different layer
        errod_ds is the derivate of the error function
        
        Default cost function set to cross entropy, therefore, weights[-1] == nb of classes
        """
        self.step_size = step_size
        
        self.nb_layers = len(nodes)
        self.nodes = nodes
        self.weights = [ np.random.rand(n, m) for n,m in zip(nodes[:-1], nodes[1:])]
#         print([np.shape(x) for x in self.weights])
        temp = [np.shape(x) for x in self.weights[1:]]
        temp.append((self.nodes[-1],1))
#         print(temp)
        self.biases = [np.random.rand(1,n) for n in self.nodes[1:]]
        self.outputs = [ 0 for n in nodes[1:]]
        self.derivatives = [ 0 for n in nodes[1:]]
        self.error_ds = np.zeros(nodes[-1])
        self.gradients = [ 0 for n in nodes[1:]]
        self.deltas = [ 0 for n in nodes[1: ]]
    
    def get_bias_matrix(index, nsamples):
        b = self.biases[index]
    
    def fforw(self, inputs, labels):
        curr_input = inputs
        for w in range(self.nb_layers-1):
            next_inp = curr_input.dot(self.weights[w])
            next_inp += self.biases[w]
#             print(trans_bias.shape, self.biases[w].shape)
#             print(next_inp.shape)
            next_act = self.relu(next_inp)
            self.outputs[w] = next_act
            curr_input = next_act
    
    def backprop(self, inputs, labels):
        # using MSE loss, calculate error
        err = np.zeros((len(labels), self.nodes[-1]), dtype=float)
        for i,label in enumerate(labels):
            expected = int(label[0])
            err[i][expected] = label[0]
        err = err - self.outputs[-1]
#         print(err.shape)
        # calculate gradients
        for i in range(self.nb_layers-1):
            self.gradients[i] = self.relu(self.outputs[i], True)
        
        # calculate delta
        self.deltas[self.nb_layers-2] = err * self.gradients[self.nb_layers-2]
        
        for i in reversed(range(0, self.nb_layers-2)):
#             print(self.deltas[i+1].shape, self.weights[i+1].shape, self.gradients[i].shape)
            self.deltas[i] = (self.deltas[i+1].dot(self.weights[i+1].T)) * self.gradients[i]
        
        # update weights
        for i in reversed(range(1, self.nb_layers-1)):
            update = (-self.step_size)*self.outputs[i-1].T.dot(self.deltas[i])
            self.weights[i] += update
        self.weights[0] += (-self.step_size)*inputs.T.dot(self.deltas[0])
        
        # update biases
        for i in range(self.nb_layers-1):
            bias_update = np.sum(self.deltas[i], axis=0) * (-self.step_size)
#             print("bias",self.biases[i].shape, bias_update.shape)
            self.biases[i] = self.biases[i] + bias_update
    
    def train(self, inputs, labels):
        self.fforw(inputs, labels)
        self.backprop(inputs, labels)
        
    def relu(self, x, derivative=False):
        if derivative:
            x[x<=0.0] = 0.0
            x[x>0.0] = 1.0
            return x
        else:
            return x * (x > 0)
    
    def sigmoid(self, x, derivative=False):
        if derivative:
            return x*(1.0-x)
        else:
            return 1.0/(1.0+np.exp(-x))

    def fit(self, inputs, labels, epochs):
        t_start = time.clock()
        print("starting fit, time is: ", t_start)
        t_last = time.clock()
        for i in range(epochs):
            print("epoch: ", i + 1)
            # randomize data
            print(np.shape(inputs), np.shape(labels))
            state = np.random.get_state()
            np.random.shuffle(inputs)
            np.random.set_state(state)
            np.random.shuffle(labels)
            print(np.shape(inputs), np.shape(labels))
#             self.update_weights(inputs, labels)
            self.train(inputs, labels)
            t_epoch = time.clock()
            print("time elapsed is: ", t_epoch - t_last)
            
    def predict(self, inp, labels):
        self.fforw(inp, labels)
#         return self.outputs[len(self.nodes) - 2]
#         print("outputs len: ", len(self.outputs))
#         print("outputs", self.outputs[-1])
        print("outputs", self.outputs[self.nb_layers-2][0])
        return [np.argmax(x) for x in self.outputs[self.nb_layers-2]]
        #output of final layer

# General functions

def safe_divide(num, denom):
    if len(num) != len(denom):
        return []
    return [num[i] / denom[i] if num[i] > 0.0 and denom[i] > 0.0 else 0.0 for i in range(len(num))]

def score(preds, targets, nclasses):
    # calculate fscore for each class, then take macro average (unweighted)
    if (len(preds) != len(targets)):
        return -1.0
    true_pos = [0.0] * nclasses
    pos = [0.0] * nclasses
    rel_pos = [0.0] * nclasses
    for i in range(len(preds)):
        if preds[i] == targets[i]:
            true_pos[int(targets[i])] += 1.0
        pos[int(preds[i])] += 1.0
        rel_pos[int(targets[i])] += 1.0
    
    true_pos = np.array(true_pos)
    pos = np.array(pos)
    rel_pos = np.array(rel_pos)
    print("true_pos", true_pos, "\n", "pos", pos, "\n", "rel_pos", rel_pos, "\n")
    p = safe_divide(true_pos, pos)
    r = safe_divide(true_pos, rel_pos)
    print(p, r)
    
    f_scores = [2.0 * p[i] * r[i] / (p[i] + r[i]) if p[i] + r[i] > 0.0 else 0.0 for i in range(len(p))]
    
    return np.mean(f_scores)

def acc(preds, targets):
    if (len(preds) != len(targets)):
        return -1.0
    correct = 0
    for i in range(len(preds)):
        if preds[i] == targets[i]:
            correct += 1
    return float(correct) / float(len(preds))
            

In [None]:
nodes = [4096, 2048, 10]
print([(n, m) for n, m in zip(nodes[:-1], nodes[1:])])
# print([ np.random.rand(n, m) for n,m in zip(nodes[:-1], nodes[1:])])

In [None]:
start_time = time.clock()
print("Loading train_x.npy, elapsed time: ", time.clock() - start_time)
# x = np.loadtxt("../data/train_x.csv", delimiter=",", dtype=int)
x = np.load("../data/x_centred_train.npy")
print("Loaded train_x.csv. Loading train_y.csv, elapsed time: ", time.clock() - start_time)
y = np.loadtxt("../data/train_y.csv", delimiter=",", dtype=int)
print(y.shape)
x = x.reshape(-1, 64*64)
y = y.reshape(-1, 1)


# print(type(y[0][0]), y[0].shape, y.shape)

# binary_pics = (x > 250) + 0

print(np.shape(x), np.shape(y))
print(x[5000][2053], type(x[5000][2053]))

#shuffle x and y
state = np.random.get_state()
np.random.shuffle(x)
np.random.set_state(state)
np.random.shuffle(y)

#make train/valid split
train_size = 35000
valid_size = 1000
end = train_size + valid_size
train_x = x[0:train_size]
train_y = y[0:train_size]
valid_x = x[train_size: end]
valid_y = y[train_size: end]

print(np.shape(train_x), np.shape(train_y), np.shape(valid_x), np.shape(valid_y))

print(x.shape, type(x[500]), type(x[500][50]))

for i in range(0, train_size):
    for j in range(0, 4096):
        if (np.isnan(train_x[i][j])):
            print(i, j, "NaN found")
        if (type(train_x[i][j]) != np.float64):
            print(i, j, type(train_x[i][j]), "non float")


In [None]:
NN = Network([4096, 2048, 10])
NN.train(train_x, train_y)

In [None]:

print("Training NN, elapsed time: ", time.clock() - start_time)
NN = Network([4096, 2048, 10])
NN.fit(train_x, train_y, 3)

print("NN trained. elapsed time: ", time.clock() -start_time)



def tune(maxepochs, nhidden, hiddensize, train_x, train_y, test_x, test_y):
    res = []
    for layers in nhidden:
        for size in hiddensize:
            nodes = [4096]
            for layer in layers:
                nodes.append(size)
            nodes.append(10)
            NN = Network(nodes)
            for i in range(1, maxepochs):
                NN.fit(train_x, train_y, 1)
                preds = NN.predict(test_x, test_y)
                s = score(preds, test_y, 10)
                a = acc(preds, test_y)
                res.append((i, layers, size, score))
    return res



In [None]:
print(len(NN.weights))
def nonzerocount(e):
    count = 0
    for entry in e:
        if entry == 0.0 or entry == 0:
            count += 1
    return count

# print([nonzerocount(e) for e in valid_x])
print("preds:")
preds = NN.predict(valid_x, valid_y)
print("score:")
print("fscore: ", score(preds, valid_y, 10))
print("acc: ", acc(preds, valid_y))
epochs = 8
nhidden = [1, 2]
hiddensize = [1, 5, 50, 500, 2048, 4096]

print("done")

In [None]:
print([train_x[982][i] for i in range(0,4096)])