# Imports

In [1]:
import numpy as np
from time import time
import matplotlib.pyplot as plt
import pandas as pd
import math

# https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65

# Load the Data

In [2]:
#<Something>

# Let's get started

## Utility functions

In [8]:
def get_folds_idx(N, nFolds, seed=42):
    """
    Randomly permute [0,N] and extract indices for each fold
    """
    np.random.seed(seed)
    rnd_idx = np.random.permutation(N)
    N_fold = N//nFolds
    indices = []
    for i in range(nFolds):
        start = i*N_fold
        end = min([(i+1)*N_fold, N])
        # if (N<end):
        #     end = N
        indices.append(rnd_idx[start:end])
    return indices


def showImage(img, label):
    ficAr = np.array(img, dtype='float')
    roughSd = int(math.sqrt(img.size))
    pic = picAr.reshape((roughSd, roughSd)).T
    # plt.imshow(pix, cmap='gray')
    plt.imshow(pix)
    lb = str(label)
    plt.title('label for this image is',lb)
    # print(label)
    plt.show()

    
def act_fn(typ, Z):
    """
    Arguments:
    typ -- sigmoid/RELU
    Z -- numpy array of any shape
    
    Returns:
    A -- output of sigmoid(z)/RELU(z), same shape as Z
    cache -- returns Z as well, for backprop
    """
    if (typ.lower()=='sigmoid'):
        a = 1/(1+np.exp(-Z))
        return a
    elif (typ.lower()=='relu'):
        a = np.maximum(0,Z)
        assert(a.shape == Z.shape)
        return a
    elif (typ.lower()=='tanh'):
        return np.tanh(Z)
#     elif (typ.lower()=='softmax'):
#         tmp = np.exp(Z)
#         a = tmp/np.sum(tmp)
#         assert(a.shape == Z.shape)
#         return a

    
def back_fn(typ, Z):
    if (typ.lower()=='relu'):
        dZ = np.array(Z, copy=True) # Converting dz to a correct object.
        # When z <= 0, you should set dz to 0 as well. 
        dZ[Z <= 0] = 0
        assert (dZ.shape == Z.shape)
        return dZ
    elif (typ.lower()=='sigmoid'):
        dZ = np.exp(-Z)/(1+np.exp(-Z))**2
        assert (dZ.shape == Z.shape)
        return dZ
    elif (typ.lower()=='tanh'):
        dZ = 1-np.tanh(Z)**2
        return dZ
    # elif (typ.lower()=='softmax'):
        
    # else return Z

## Class (Cuz I'm fancy)

In [None]:
class Layer:
    def __init__(self, num_nodes, out_nodes, act_fn):
        self.num_nodes = num_nodes
        self.act_fun = act_fn
        
        self.inputs = np.zeros([num_nodes,1]) # output of prev layer
        ssz = np.sqrt(num_nodes+out_nodes)
        if out_nodes != 0:
            self.weights = np.random.randn(num_nodes, out_nodes)/ssz
            self.bias = np.random.randn(1, out_nodes)/ssz
            # self.outs = np.zeros((1, out_nodes))
            # self.ys = np.zeros((out_nodes, 1)) # Linear forward. Intermediate step basically
            self.derivs = np.zeros((1, out_nodes))
            self.zsList = np.zeros((1, out_nodes))
            self.asList = np.zeros((1, out_nodes))
        else:
            self.weights = None
            self.bias = None
            # self.outs = None
            # self.ys = None
            self.derivs = None
            self.asList = None
            self.zsList = None

            
            
class NeuralNet:
    def __init__(self, input_dim, output_dim, hidden_dim, act_fns, lr_strat, cost_fn='mse', seed=42):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim # list of dimensions
        self.num_layers = len(self.hidden_dim)+2
        self.act_fns = act_fns
        assert(len(self.act_fns) == self.num_layers-1)
        layers = self.hidden_dim.copy()
        layers.insert(0, self.input_dim) #Not sure if replacement is inplace
        layers.append(self.output_dim)
        self.num_nodes = layers #List of number of nodes
        self.cost_fn = cost_fn
        assert(self.num_layers == len(self.num_nodes))
        self.network = []
        self.lr_strat = lr_strat
        
        for i in range(self.num_layers):
            if i==self.num_layers-1:
                tmp = Layer(num_nodes[i], 0, act_fns[i])
            else:
                tmp = Layer(num_nodes[i], num_nodes[i+1], act_fns[i])                
            self.network.append(tmp)
    
    def forward(self, xin, target): #Target is just a single number
        
        self.network[0].inputs = xin #Because first layer can only have input as activation
        L=self.num_layers
        for i in range(L-1): #Because don't need forward in last layer
            self.network[i].asList = self.network[i].bias + np.dot(self.network[i].inputs, self.network[i].weights) # dim = out_nodes x 1
            # if ()
            self.network[i].zsList = act_fn(self.network[i].act_fns, self.network[i].asList)
            self.network[i+1].inputs = self.network[i].zsList
        # self.network[L-1].inputs = softmax()
        return self.network[L-1].inputs #dim: 1xDim 

    
    def backward(self, y):
        L = self.num_layers
        y_hat = self.network[L-1].zsList
        self.network[L-1].derivs = (y_hat - y) / y.shape[0] # If softmax, then change this ig, idk
        for i in range(L-2, -1, -1): #for i in range(N, -1, -1) gives i=N, i=N-1, ..., i=0
            err = np.multiply(self.network[i+1].derivs.dot(self.network[i].weights.T), back_fn(self.network[i].act_fns, self.network[i].asList))
            self.network[i].derivs = err
        # return self.network[0].derivs
        return
    
    def update_weights(self, y, epoch, ilr):
        eta = ilr #Initial learning rate
        self.backward(y)
        L = self.num_layers
        for i in range(L-1):
            grad_wts = np.dot(self.network[i].zsList.T, self.network[i].derivs)
            grad_b = np.sum(self.network[i].derivs, axis=0)
            self.network[i].weights -= eta*grad_wts
            self.network[i].bias -= eta*grad_b
            eta = self.update_lr(ilr, epoch+1)
            
    def update_lr(self, eta0, iteration):
        if (self.lr_strat == 0):
            return eta0
        else:
            return eta0/(iteration**0.5)
        
    
            
        
        
    
    
# class NeuralNet:
    
#     def __init__(self, input_dim=None, output_dim=None, hidden_dim=None, num_hidden_layers=None, typs=None, seed=42):
#         # Can add error, no need
#         self.input_dim = input_dim
#         self.output_dim = output_dim
#         self.hidden_dim = hidden_dim
#         self.num_hid_layers = num_hidden_layers
#         assert(len(self.hidden_dim)==self.num_hid_layers)
#         self.typs = typs
#         self.layers = self.hidden_dim.copy()
#         self.layers.insert(0, self.input_dim) #Not sure if replacement is inplace
#         self.layers.append(self.output_dim)
#         assert(len(self.typs) == self.num_hid_layers+1)
#         self.params, self.outs, self.derivs = init_params(self, seed)
        
#     def init_params(self, seed):
#         np.random.seed(seed)
#         parameters = {}
#         output = {}
#         deriv = {}
#         # 
#         L = len(self.layers)
#         for l in range(1, L):
#             ssz = np.sqrt(self.layers[l]+self.layers[l-1])
#             parameters['W' + str(l)] = np.random.randn(self.layers[l],self.layers[l-1])/ssz 
#             #W(^l)_{i,j} = Weight from lth to l+1th layer. i=destn, j=src idx
#             parameters['b' + str(l)] = np.random.randn(self.layers[l],1)/ssz
#             output[str(l)] = np.zeros((self.layers[l],1)) # Change to none somehow
#             deriv[str(l)] = np.zeros((self.layers[l],1)) # Change to none somehow
            
#             assert(parameters['W' + str(l)].shape == (self.layers[l], self.layers[l-1]))
#             assert(parameters['b' + str(l)].shape == (self.layers[l], 1))
#             assert(output[str(l)].shape == (self.layers[l], 1))
#             assert(deriv[str(l)].shape == (self.layers[l], 1))

#         return parameters, output, deriv
    
#     def forward(self, xin):
#         L = len(self.layers)
#         for i in range(1,L):
#             xout = []
#             self.outs[str(i)] = act_fn(self.typs[i-1], np.dot(self.params['W'+str(i)], xin))
#             x_out = self.outs[str(i)]
#             xin = x_out # curr output = next input
#         yout = cin
        
#         return yout
    
#     def backward(self, yout):
#         L = len(self.layers)
        
#         for i in reversed(range(L)): # backwards
#             if i == L - 1:
#                 # logits/pred - target
#                 err = self.outs[str(i)] - yout
#                 self.derivs[str(i)] = np.multiply(err, back_fn(self.typs[i-1], self.outs[str(i)]))
                
#             else:
#                 # Weighted sum of derivs
#                 err = 
# #                 for j, node in enumerate(self.network[i]):
# #                     err = sum([node_['weights'][j] * node_['delta'] for node_ in self.network[i+1]])
# #                     node['delta'] = err * transfer_derivative(node['output'])
                    
#     def update(self, x, eta):
#         for i, layer in enumerate(self.network):
#             # Grab input values
#             if i == 0: inputs = x
#             else: inputs = [node_['output'] for node_ in self.network[i-1]]
#             # Update weights
#             for node in layer:
#                 for j, input in enumerate(inputs):
#                     # dw = - learning_rate * (error * transfer') * input
#                     node['weights'][j] += - eta * node['delta'] * input
    
    
#     def train(self, X, y, eta=0.5, n_epochs=200):
#         for epoch in range(n_epochs):
#             for (x_, y_) in zip(X, y):
#                 self._forward_pass(x_) # forward pass (update node["output"])
#                 yhot_ = self._one_hot_encoding(y_, self.output_dim) # one-hot target
#                 self._backward_pass(yhot_) # backward pass error (update node["delta"])
#                 self._update_weights(x_, eta) # update weights (update node["weight"])

#     # Predict using argmax of logits
#     def predict(self, X):
#         ypred = np.array([np.argmax(self._forward_pass(x_)) for x_ in X], dtype=np.int)
#         return ypred
    
#     def _one_hot_encoding(self, idx, output_dim):
#         x = np.zeros(output_dim, dtype=np.int)
#         x[idx] = 1
#         return x
        
        
        
        
### TO DO (By tomorrow):
### Activation function to be checked for each layer n not just for the model. Take this input ## PARTLY DONE
### TO DO (By 7 Nov):
### Implement backprop for individual layer. Loop over in training function. Take help from neural_network.ipynb
### Fix last layer to be softmax. Softmax just take outputs of last layer and convert them to probability distribution
### I have already taken act_fns of size total layers - 1. So softmax can be internally added in the end
### Fuck it, Vaibhav ka chhaapo