In [1]:
import numpy as np

In [2]:
class Adam:
    def __init__(self,eta,beta1=0.9,beta2=0.999,epsilon=10e-8):
        self.eta = eta
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
    def __call__(self,iteration,last_m,last_v,gradient):
        adaptive_eta = self.eta*np.sqrt(1-self.beta2**iteration)/(1-self.beta1**iteration)
        current_m = self.beta1*last_m+(1-self.beta1)*gradient
        current_v = self.beta2*last_v+(1-self.beta2)*gradient**2
        return -adaptive_eta*current_m/(np.sqrt(current_v)+self.epsilon),current_m,current_v

In [3]:
def relu(x,derivative=False):
    if derivative:
        np.where(x>0,1,0.)
    return np.where(x>0,x,0.)

def sigmoid(x,derivative=False):
    f = 1/(1+np.exp(-x))
    if derivative:
        return f*(1-f)
    return f

def tanh(x,derivative=False):
    f = (np.exp(2*x)-1)/(np.exp(2*x)+1)
    if derivative:
        return 1-f**2
    return f

def elu(x,derivative=False):
    if derivative:
        np.where(x>0,1,np.exp(x))
    return np.where(x>0,x,np.exp(x)-1)

def linear(x,derivative=False):
    if derivative:
        return sign(x)
    return x

def softmax(x,axis=-1):
    shift_x = x - np.max(x,axis,keepdims=True)
    return np.exp(shift_x)/np.sum(np.exp(shift_x),axis,keepdims=True)

def cross_entropy_loss(outputs,ground_truth,epsilon=1e-8):
    return -np.mean(np.sum(ground_truth*np.log(outputs+epsilon),1))

def mse(outputs,ground_truth):
    return np.mean(0.5*np.sum((outputs-ground_truth)**2,1))

class NeuralNetwork:
    def __init__(self,num_features,neurons_per_layer,activation_per_layer,random_seed=None):
        self._num_features = num_features
        self._neurons = neurons_per_layer
        self._num_layers = len(self._neurons)
        self.activations = activation_per_layer
        self.params,self.moments = self.initialize_parameters(random_seed)
    def initialize_parameters(self,random_seed):
        np.random.seed(random_seed)
        parameters = []
        moments = []
        input_neurons = self._num_features
        for i,output_neurons in enumerate(self._neurons):
            parameters.append({"weights":np.random.randn(input_neurons,output_neurons)*0.1,
                                                 "bias": np.zeros((1,output_neurons))})
            moments.append({"weights":[np.zeros([input_neurons,output_neurons])]*2,
                                                 "bias": [np.zeros((1,output_neurons))]*2})
            input_neurons = output_neurons
        np.random.seed(None)
        return parameters,moments
    def predict(self,features):
        outputs = [features]
        for layer,activation in zip(self.params,self.activations):
            outputs.append(np.atleast_2d(activation(np.dot(outputs[-1],layer['weights'])+layer['bias'])))
        return outputs
    def backprop(self,outputs,labels,adam,iteration,cost_function):
        loss = cost_function(outputs[self._num_layers],labels)
        bp_error = (outputs[self._num_layers]-labels)/labels.shape[0]
        for i in reversed(range(0,self._num_layers)):
            weights = self.params[i]['weights']
            bias = self.params[i]['bias']
            
            #calculate gradients and error to be backpropogated
            weight_grad = outputs[i].T@bp_error
            bias_grad = np.sum(bp_error,0)
            if i > 0:
                bp_error = bp_error@weights.T*self.activations[i-1](outputs[i],True)
            
            #update parameters and moment vectors
            wm,wv = self.moments[i]['weights']
            bm,bv = self.moments[i]['bias']
            wupdate,wm,wv = adam(iteration,wm,wv,weight_grad)
            bupdate,bm,bv = adam(iteration,bm,bv,bias_grad)
            
            self.moments[i]['weights'] = [wm,wv]
            self.moments[i]['bias'] = [bm,bv]
            self.params[i]['weights'] = weights+wupdate
            self.params[i]['bias'] = bias+bupdate
        return loss
    def train(self,train_inputs,train_target,batch_size,epochs,lr,cost_function):
        adam = Adam(lr)
        
        num_datum,num_features = train_inputs.shape
        iteration = 1
        
        losses = []
        for i in range(epochs):
            batch_num = 0
            random_indices = np.random.permutation(num_datum)
            while batch_num < num_datum:
                if batch_num + 2*batch_size > num_datum:
                    current_batch = random_indices[batch_num:]
                else:
                    current_batch = random_indices[batch_num:(batch_num+batch_size)]
                x_batch, y_batch = train_inputs[current_batch,:],np.atleast_2d(train_target[current_batch,:])
                
                #forward_pass
                outputs = self.predict(x_batch)
                
                #backprop
                losses.append(self.backprop(outputs,y_batch,adam,iteration,cost_function))
                
                batch_num += batch_size
                iteration+=1 
        return losses
        