In [1]:
#from __future__ import print_function
import numpy as np 


import matplotlib.pyplot as plt

%matplotlib inline
np.random.seed(42)

weightList=[]
biasList = []





class ReLU():
    def __init__(self):
        # ReLU layer simply applies elementwise rectified linear unit to all inputs
        pass
    
    def forward(self, input):
        # Apply elementwise ReLU to [batch, input_units] matrix
        relu_forward = np.maximum(0,input)
        return relu_forward
    
    def forward1(self, input):
        # Apply elementwise ReLU to [batch, input_units] matrix
        relu_forward = np.maximum(0,input)
        return relu_forward
    
    
    def backward(self, input, grad_output):
        # Compute gradient of loss w.r.t. ReLU input
        relu_grad = input > 0
        #print(grad_output*relu_grad)
        return grad_output*relu_grad

class Dense():
    def __init__(self, input_units, output_units, learning_rate=0.1):
        # A dense layer is a layer which performs a learned affine transformation:
        # f(x) = <W*x> + b
        
        self.learning_rate = learning_rate
        #self.weights = np.random.normal(loc=0.0, scale = np.sqrt(2/(input_units+output_units)), size = (input_units,output_units))
        #print(self.weights.shape)
        mu, sigma = 0, 0.1
        self.weights = np.random.normal(mu,sigma,size=(input_units, output_units))
        
        
        self.biases = np.zeros(output_units)
        
        
    def forward(self,input):

        # print(input.shape)
        # print(self.weights.shape)
        # print(self.biases.shape)
        f = np.dot(input,self.weights) + self.biases
    

        return f
    
    def forward1(self,input):

        # print(input.shape)
        # print(self.weights.shape)
        # print(self.biases.shape)
        
        weightList.append(self.weights)
        biasList.append(self.biases)
        f = np.dot(input,self.weights) + self.biases
        #print(f.shape)

        return f
    
    def backward(self,input,grad_output):
        # compute d f / d x = d f / d dense * d dense / d x
        # where d dense/ d x = weights transposed
        grad_input = np.dot(grad_output, self.weights.T)
        
        # compute gradient w.r.t. weights and biases
        grad_weights = np.dot(input.T, grad_output)
        grad_biases = grad_output.mean(axis=0)*input.shape[0]
        
        assert grad_weights.shape == self.weights.shape and grad_biases.shape == self.biases.shape
        
        # Here we perform a stochastic gradient descent step. 
        self.weights = self.weights - self.learning_rate * grad_weights
        self.biases = self.biases - self.learning_rate * grad_biases
        
        return grad_input


def softmax_crossentropy_with_logits(logits,reference_answers):
    # Compute crossentropy from logits[batch,n_classes] and ids of correct answers
    logits_for_answers = logits[np.arange(len(logits)),reference_answers]
   
    part = np.log(np.sum(np.exp(logits),axis=-1))

    
    xentropy = - logits_for_answers + part
    
    return xentropy
def grad_softmax_crossentropy_with_logits(logits,reference_answers):
    # Compute crossentropy gradient from logits[batch,n_classes] and ids of correct answers
    ones_for_answers = np.zeros_like(logits)
   
    ones_for_answers[np.arange(len(logits)),reference_answers] = 1

   
    softmax = np.exp(logits) / np.exp(logits).sum(axis=-1,keepdims=True)
    
    return (- ones_for_answers + softmax) / logits.shape[0]
##2000, 1500, 1000, 500, 10 

network = []
#print(X_train.shape[1])
network.append(Dense(9,50))
network.append(ReLU())
network.append(Dense(50,50))
network.append(ReLU())
network.append(Dense(50,50))
network.append(ReLU())
network.append(Dense(50,50))
network.append(ReLU())
network.append(Dense(50,2))

def forward(network, X, k = 0):
    # Compute activations of all network layers by applying them sequentially.
    # Return a list of activations for each layer. 
    
    activations = []
    input = X
    # Looping through each layer
    for l in network:
      if k == 0:
        activations.append(l.forward(input))
        input = activations[-1]
      else:
        activations.append(l.forward1(input))
        input = activations[-1]

    return activations
def predict(network,X,k=0):
    # Compute network predictions. Returning indices of largest Logit probability
    logits = forward(network,X,k)[-1]
  


    return logits.argmax(axis=-1)
def train(network,X,y):

    layer_activations = forward(network,X)
   
    # print(layer_activations[6].shape)
    
    layer_inputs = [X]+layer_activations  #layer_input[i] is an input for network[i]

    #print(X.shape)
    
    logits = layer_activations[-1]

    
    # Compute the loss and the initial gradient
    loss = softmax_crossentropy_with_logits(logits,y)
    loss_grad = grad_softmax_crossentropy_with_logits(logits,y)
   
    
    # Propagate gradients through the network
    # Reverse propogation as this is backprop
    for layer_index in range(len(network))[::-1]:
       
        layer = network[layer_index]
        
        loss_grad = layer.backward(layer_inputs[layer_index],loss_grad) #grad w.r.t. input, also weight updates
        
    return np.mean(loss)





print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")



from tqdm import trange
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    #assert len(inputs) == len(targets)
    if shuffle:
        indices = np.random.permutation(len(inputs))
    for start_idx in trange(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        #print(len(targets[excerpt]))
        yield inputs[excerpt], targets[excerpt]

train_log = []
val_log = []
for epoch in range(5):
  for x_batch,y_batch in iterate_minibatches(X_train,y_train,batchsize=32,shuffle=True):
    train(network,x_batch,y_batch)

    
train_log.append(np.mean(predict(network,X_train)==y_train))
val_log.append(np.mean(predict(network,X_val)==y_val))
    
    #clear_output()
print("Epoch",epoch)
print("Train accuracy:",train_log[-1])
print("Val accuracy:",val_log[-1])

print(100*np.sum(predict(network,X_test,1)==y_test)/len(y_test))



\\\\\\\\\\\\\\\\\\\\\\\\\\\


NameError: ignored