# ANN from Scratch

#### **Math in this notebook is not clear, avoid to use**

#### refer to: https://towardsdatascience.com/building-neural-network-from-scratch-9c88535bf8e9

In [None]:
import json, numpy as np
np.random.seed(59)

## define layer template

# #### *forward, calc network layer by layer; backward, error from layer output, adjust by derivative, return error to layer input*

In [None]:
class Layer:
    def __init__(self): pass
    def savelayer(self): return None
    def loadlayer(self, laydict): pass

    def forward(self, input): return input
    def backward(self, input, err_out):
        num_units = input.shape[1]
        derivative = np.eye(num_units)
        return np.dot(err_out, derivative) # err_in,

## Activation layers, Sigmoid and ReLU

#### Sigmoid function,  $ g(x) = \frac{1}{1+e^{-x}}, g' = \frac{e^{-x}}{(1+e^{-x})^2}= g(1-g)$, Sigmoid maps $(-\infty, +\infty)$ to (0 ,1), reverse function logit below

In [None]:
class sigmoid(Layer):
    def __init__(self): pass

    def forward(self, input):
        self.output = 1 / (1 + np.exp(-input))
        return self.output
    def backward(self, input, err_out):
        sigmoid_deriv = self.output * (1-self.output)
        return err_out * sigmoid_deriv


### ReLU function,  R(x) = max(0, x), R' = (x>0), 1 if x>0 else 0, in python T=1, F=0

In [None]:
class ReLU(Layer):
    def __init__(self): pass
    
    def forward(self, input):
        relu_out = np.maximum(0,input)
        return relu_out
    def backward(self, input, err_out):
        relu_grad = input > 0 # slope 1 or 0
        return err_out*relu_grad

## Dense layer

In [None]:
class dense(Layer):
    def __init__(self, input_units, output_units, learning_rate=0.5):
        self.learning_rate = learning_rate
        self.wgs = np.random.normal(loc=0.0, scale = np.sqrt(2/(input_units+output_units)), size = (input_units,output_units))
        # Loc='mean of distribution', scale='standard deviation of the normal distribution', size='size and shape of the output'
        self.bias = np.zeros(output_units)

    def savelayer(self): # save vertical stack wgs + bias
        return np.concatenate((self.wgs, [self.bias]), axis=0)
    
    def loadlayer(self, layerlist): # 
        self.wgs = np.array(layerlist[:-1])
        self.bias = np.array(layerlist[-1])
    
    def forward(self, inputs):
        return np.dot(inputs, self.wgs) + self.bias
    
    def backward(self, inputs, err_out):
        err_in = np.dot(err_out, self.wgs.T)
        
        # compute gradient w.r.t. weights and biases
        delta_wgs = np.dot(inputs.T, err_out)
        delta_bias = err_out.mean(axis=0)*inputs.shape[0]
        
        assert delta_wgs.shape == self.wgs.shape and delta_bias.shape == self.bias.shape
        
        # Here we perform a stochastic gradient descent update step. 
        self.wgs = self.wgs + self.learning_rate * delta_wgs
        self.bias = self.bias + self.learning_rate * delta_bias
        
        return err_in

## Cost function -- Softmax_crossEntropy and its gradient

#### $logit = ln\frac {p}{1-p}$, where: p=probability, $odds = \frac{p}{1-p}$
logit function is reverse function of sigmoid, map (0, 1) domain to $(-\infty, +\infty)$

#### cross entropy, loss = $a_{correct}-ln \sum_{i}e^{a_i}$

In [None]:
def softmax_crossentropy(logits, reference_answers):
    # Compute crossentropy from logits[batch, n_classes] and ids of correct answers
    logits_for_answers = logits[np.arange(len(logits)), reference_answers]
    
    xentropy = logits_for_answers - np.log(np.sum(np.exp(logits), axis=-1))
    return xentropy

In [None]:
def grad_softmax_crossentropy(logits, reference_answers):
    # Compute crossentropy gradient from logits[batch,n_classes] and ids of correct answers
    ones_for_answers = np.zeros_like(logits)
    ones_for_answers[np.arange(len(logits)), reference_answers] = 1
    
    softmax = np.exp(logits) / np.exp(logits).sum(axis=-1,keepdims=True)
    return (ones_for_answers - softmax) / logits.shape[0]


## Network

In [None]:
class yann: # need time to re-digest this block of code
    def __init__(self):
        pass
    
    def train_network(network, nnInputs, nnOutputs, epoches=1000, batch_size=0):
        if batch_size < 2:
            batch_size = len(nnInputs)
        for epoch in range(epoches): # epoch: times scan through whole data 
            for kb in range(int(len(nnInputs)/batch_size)):
                if (kb+1)*batch_size>len(nnInputs):
                    layerIn = nnInputs[kb*batch_size:,:]
                    y_bat = nnOutputs[kb*batch_size:,:]
                else:
                    layerIn = nnInputs[kb*batch_size:(kb+1)*batch_size,:]
                    y_bat = nnOutputs[kb*batch_size:(kb+1)*batch_size,:]
                
                layerOuts = []; layerIns = []
                for layer in network:
                    layerIns.append(layerIn)
                    layerOuts.append(layer.forward(layerIn))
                    layerIn = layerOuts[-1]
                
                err_out = y_bat - layerOuts[-1]
                for layer_ix in range(len(network))[::-1]:
                    layer = network[layer_ix]
                    err_out = layer.backward(layerIns[layer_ix],err_out)   
            
    def evaluate_network(network, nnInputs):
        layerIn = nnInputs; layerOuts = []
        for layer in network:
            layerOuts.append(layer.forward(layerIn))
            layerIn = layerOuts[-1]
        print('training result: ')
        print(str(layerOuts[-1].T))
    
    def save_network(network, fname):
        network_data = []
        for layer in network:
            network_data.append(layer.savelayer())
        np.savetxt(fname, network_data, fmt='%s')

    def load_network(network, fname):
        with open(fname, 'r') as fp:
            nnstr = " ".join(fp.read().replace('\n', ' ').replace('\r', '')
             .split()).replace(' ', ',').replace('[,', '[').replace(',]', ']')\
            .replace(']],', ']];').replace(',[[', ';[[')
            # read whole file as one string
        slayer = nnstr.split(';')
        # slayer = re.findall("(\[{2}[^a-df-zA-DF-Z]+\]{2})", nnstr)
        for nl in range(len(slayer)):
            if slayer[nl] == 'None':
                continue
            network[nl].loadlayer(json.loads(slayer[nl]))
            # network[nl*2].loadlayer(json.loads(slayer[nl]))
        return network