In [1]:
import numpy as np
np.random.seed(seed=1)
import h5py
with h5py.File('../data/Assignment-1-Dataset/train_128.h5','r') as H:
    data = np.copy(H['data'])
with h5py.File('../data/Assignment-1-Dataset/train_label.h5','r') as H:
    label = np.copy(H['label'])

In [2]:
with h5py.File('../data/Assignment-1-Dataset/train_128.h5','r') as H:
    data = np.copy(H['data'])
with h5py.File('../data/Assignment-1-Dataset/train_label.h5','r') as H:
    label = np.copy(H['label'])

data = data.copy()
#data = np.c_[data, np.ones(len(data))]

g = data.copy()

weights = (np.random.rand(g.shape[1]) - 0.5)/100
alphas = (np.random.rand(g.shape[1]) - 0.5)/100

In [63]:
def add_constant(data):
    return np.c_[data, np.ones(len(data))]

def relu(matrix):
    # activation function that gives x if x > 0 else 0
    return np.clip(matrix, 0, None)

def leaky_relu(matrix, alpha=0.005):
    # activation function that draws on ReLU but has a slight gradient for x < 0
    matrix[matrix<0] = matrix[matrix<0] * alpha
    return matrix

def sigmoid(column):
    # activation function that returns a value between 0 and 1, good for probabilities
    return 1/(1+np.exp(-column))

def tanh(column):
    # Activation function that returns a value between -1 and 1
    # problems occurred with large negatives when applying (1 - np.exp(-column)) / (1 + np.exp(-column)) 
    # using numpy equivalent instead
    return np.tanh(column)

def softmax(layer_output):
    j = np.exp(layer_output)
    j /= j.sum(axis=1).reshape(len(j),1)
    return j

#def node_softmax(in_data, weights):
#    # Multiply each feature (including constant) by its weight then sum the result
#    in_data = in_data.copy()
#    for d in range(in_data.shape[1]):
#        in_data[:,d] = np.exp(weights[d] * in_data[:,d])
#    in_data = in_data / in_data.sum(axis=1)
#    return in_data

def derivative_sigmoid(x):
    return np.multiply( x, (1-x))
    
def derivative_tanh(x):
    return 1 - np.multiply(x, x)

def derivative_relu(x):
    return (x > 0) - 0.0

def derivative_leaky_relu(x, alpha=0.005):
    #returns the gradient of a leaky_relu at point x
    to_ret = grad_relu(x)
    to_ret[to_ret<=0] = alpha
    return to_ret

def grad_sigmoid(x):
    #returns the gradient of a sigmoid at point x
    return sigmoid(x) * (1-sigmoid(x))

def grad_tanh(x):
    #returns the gradient of a tanh at point x
    return 1 - np.tanh(x)**2

def grad_relu(x):
    #returns the gradient of a relu at point x
    return (np.array(x) > 0) - 0.0

def grad_leaky_relu(x, alpha=0.005):
    #returns the gradient of a leaky_relu at point x
    to_ret = np.array(grad_relu(x))
    to_ret[to_ret<=0] = alpha
    return to_ret
    
def grad_softmax(prediction_list, iclass):
    prediction_list /= np.max(prediction_list,axis=1, keepdims=True)
    j = np.exp(prediction_list)
    j =  j/j.sum()
    yhat = j[iclass]
    return yhat - 1
    
def get_gradient(activation_function):
    gradient_dic = {
        'relu':grad_relu,
        'leaky_relu':grad_leaky_relu,
        'tanh':grad_tanh,
        'sigmoid':grad_sigmoid,
        'softmax':grad_softmax
    }
    return gradient_dic[activation_function]

get_derivative = {
    'sigmoid':derivative_sigmoid,
    'tanh':derivative_tanh,
    'relu':derivative_relu,
    'leaky_relu':derivative_leaky_relu,
}

def activate(in_data, kind='relu'):
    # Apply an activation function to a node's output
    actionary = {
        'relu':relu,
        'leaky_relu':leaky_relu,
        'sigmoid':sigmoid,
        'tanh':tanh,
        'softmax':softmax
    }
    return actionary[kind](in_data)

def compute_softmax_scores(layer_output):
    j = np.exp(layer_output)
    j /= j.sum(axis=1).reshape(len(j),1)
    return j
    
def compute_cross_entropy_loss(yhat,label):
    return 0 - np.log(yhat[:,label])[0]

def get_cross_entropy_grads(output,label):
    n_instances = len(label)
    gradients_matrix = compute_softmax_scores(output)
    gradients_matrix[range(n_instances),label] -= 1
    gradients_matrix /= n_instances
    return gradients_matrix


def node_mult(in_data, weights, softmax=False):
    # Multiply each feature (including constant) by its weight then sum the result
    in_data = add_constant(in_data)
    if in_data.shape[1] != weights.shape[1]:
        raise ValueError("Input matrix doesn't match weight vector: {} weights and {} features".format(
            len(weights),in_data.shape[1])
        )
    for d in range(in_data.shape[1]):
        in_data[:,d] = weights[d] * in_data[:,d]
    if softmax:
        in_data[:,:-1] += in_data[:,-1:]
        in_data = in_data[:,:-1]
        in_data = np.exp(in_data)
        return in_data/in_data.sum(axis=1).reshape((len(in_data),1))
    else:
        return in_data.sum(axis=1)
    
def layer_mult(in_data, weights, bias, activation_func='sigmoid'):
    # Multiply each feature (including constant) by its weight then sum the result
    #in_data = add_constant(in_data)
    if in_data.shape[1] != weights.shape[0]:
        raise ValueError("Input matrix doesn't match weight vector: {} weights and {} features".format(
            len(weights),in_data.shape[1])
        )
    if activation_func=='softmax':
        layer_out = in_data.dot(weights) + bias
        return softmax(layer_out)
    else:
        layer_out = in_data.dot(weights) + bias
        return activate(layer_out, kind=activation_func).T
    
def matricise_label(label,output):
    T = np.zeros_like(output)
    T[range(len(T)),label] += 1
    return T

def get_cost(label,output):
    label = matricise_label(label,output)
    return -np.multiply(label,np.log(output)).sum()

def error_output(label,output):
    label = matricise_label(label,output)
    return output - label

In [4]:
class Layer:
    
    def __init__(self, n_nodes, activation, n_inputs):
        self.activation = activation
        afunc = self.activation
        self.weights = np.random.random((n_inputs,n_nodes))
        self.bias = np.random.randn(n_nodes)
        
    def get_layer_output(self, df_in):
        self.in_data = df_in
        self.output = layer_mult(df_in, self.weights, self.bias, activation_func=self.activation)
        return self.output
        

In [70]:
class Network:
    
    def __init__(self):
        self.layers = {}
        self.in_data = None
        self.n_layers = 0
        
    def set_indata(self, in_data, label):
        self.in_data = in_data
        self.label = label
        self.in_features = in_data.shape[1]
        self.to_pass = self.in_data
    
    def assign_layer(self, n_nodes, activation, n_inputs):
        self.layers[self.n_layers] = Layer(n_nodes, activation, n_inputs)
        #self.layers[self.n_layers].set_input(self.to_pass)
        self.n_layers += 1
        
    def feed_forward(self):
        self.outputs_by_layer = []
        data_in = self.in_data.copy()
        for ilayer in self.layers.values():
            #print(ilayer.activation)
            self.ilayer = ilayer
            data_in = ilayer.get_layer_output(data_in).T
            self.outputs_by_layer.append(data_in.copy())
        self.output = data_in.reshape(data_in.shape[:2]).T
        return self.outputs_by_layer

    
    def score_network(self):
        self.feed_forward()
        self.prediction = np.argmax(t.output, axis=1).reshape(len(t.label))
        self.error = compute_cross_entropy_loss(self.output, self.label)

        
    def get_loss(self):
        return np.mean([compute_cross_entropy_loss(
                compute_softmax_score(self.output[i],self.label[i])
        ) for i in range(len(self.label))])
    
    def get_batched_loss(self,batch):
        loss = [compute_cross_entropy_loss(compute_softmax_score(self.output[i], self.label[i])) for i in batch]
        return loss
    
    def get_batch(self, frac=0.05):
        return np.random.choice(range(len(self.in_data)), replace=False, size=int(len(self.in_data)*frac))

    
    def backpropagate(self):
        cost = get_cost(self.label, self.output)
        error = error_output(self.label, self.output)
        for i in range(len(self.layers),0,-1):
            i -= 1
            self.layers[i].error = error
            if i != max(self.layers.keys()):
                to_pass = self.outputs_by_layer[i]
                error = np.multiply( get_derivative[self.layers[i].activation](self.outputs_by_layer[i]),
                                    error.dot(self.layers[i+1].weights.T))
            #print(i,self.outputs_by_layer[i-1].T.shape, error.shape,)
            if i == 0:
                self.to_pass = self.in_data.copy()
            else:
                self.to_pass = self.outputs_by_layer[i-1].copy()
            #print(i,self.to_pass.shape,error.shape,self.to_pass.T.dot(error).shape)
            self.layers[i].grads_w = self.to_pass.T.dot(error)
            self.layers[i].grads_b = error.sum(axis=0)
            #print(i,self.layers[i].grads_w.shape, self.layers[i].grads_b.shape)

    def update_weights(self, train_rate=0.05):
        i=0
        for ilayer in self.layers.values():
            i+=1
            #print(i, ilayer.weights.shape, ilayer.grads_w.shape)
            ilayer.weights -= (ilayer.grads_w * train_rate)
            ilayer.bias -= (ilayer.grads_b * train_rate)
        

    def train(self, iters, train_rate = 0.05):
        self.old_error = get_cost(self.label, self.output)
        for i in range(iters):
            self.backpropagate()
            self.update_weights()
            self.score_network()
        self.new_error = get_cost(self.label, self.output)
        print(self.new_error - self.old_error)
        
    def get_batched_network_output(self,batch):
        data_in = self.in_data.copy()[batch]
        for ilayer in self.layers.values():
            data_in = ilayer.get_layer_output(data_in).T
        return data_in

In [123]:
t = Network()
t.set_indata(g[:100], label[:100])

t.assign_layer(25,'sigmoid', 128)
#t.assign_layer(20,'tanh', 25)
#t.assign_layer(15,'leaky_relu', 20)
#t.assign_layer(12,'sigmoid',15)
t.assign_layer(10,'softmax',25)

t.score_network()
print(t.error.sum())

266.037735395


  from ipykernel import kernelapp as app


In [124]:
t.train(1000)

  from ipykernel import kernelapp as app


-338.293675234


In [125]:
(t.prediction == label[:100]).sum()

92

In [126]:
list(zip(t.prediction, label[:100]))

[(9, 9),
 (0, 0),
 (0, 0),
 (3, 3),
 (0, 0),
 (2, 2),
 (7, 7),
 (2, 2),
 (5, 5),
 (5, 5),
 (0, 0),
 (9, 9),
 (5, 5),
 (5, 5),
 (7, 7),
 (9, 9),
 (1, 1),
 (0, 0),
 (6, 6),
 (0, 4),
 (3, 3),
 (1, 1),
 (4, 4),
 (8, 8),
 (4, 4),
 (3, 3),
 (0, 0),
 (2, 2),
 (4, 4),
 (4, 4),
 (5, 5),
 (3, 3),
 (6, 6),
 (0, 6),
 (0, 0),
 (8, 8),
 (5, 5),
 (2, 2),
 (1, 1),
 (0, 6),
 (6, 6),
 (9, 7),
 (9, 9),
 (5, 5),
 (9, 9),
 (2, 2),
 (7, 7),
 (3, 3),
 (0, 0),
 (3, 3),
 (3, 3),
 (3, 3),
 (7, 7),
 (2, 2),
 (2, 2),
 (0, 6),
 (6, 6),
 (8, 8),
 (3, 3),
 (1, 3),
 (5, 5),
 (0, 0),
 (5, 5),
 (5, 5),
 (0, 0),
 (2, 2),
 (0, 0),
 (0, 0),
 (4, 4),
 (1, 1),
 (3, 3),
 (1, 1),
 (6, 6),
 (3, 3),
 (1, 1),
 (4, 4),
 (4, 4),
 (0, 6),
 (1, 1),
 (9, 9),
 (1, 1),
 (3, 3),
 (5, 5),
 (7, 7),
 (9, 9),
 (5, 7),
 (1, 1),
 (7, 7),
 (9, 9),
 (9, 9),
 (9, 9),
 (3, 3),
 (2, 2),
 (9, 9),
 (3, 3),
 (6, 6),
 (4, 4),
 (1, 1),
 (1, 1),
 (8, 8)]