In [256]:
import numpy as np

def softmax(x):
    b = np.max(x)
    numerator = np.exp(x-b)
    probs = numerator/np.sum(numerator,axis=1,keepdims=True)
    return probs #Normalized probabilities of each class

class NeuralNet:
    
    def __init__(self,dimensions):
        self.weights = [0]*(len(dimensions)-1)
        self.biases = [0]*(len(dimensions)-1)
        for i in range(len(dimensions)-1):
            self.weights[i] = np.random.uniform(low=(-1)/float((np.sqrt(dimensions[i]))),
                                                high=(1)/float((np.sqrt(dimensions[i]))),
                                                size=((dimensions[i],dimensions[i+1]))
                                               )
            self.biases[i] = np.zeros((dimensions[i+1]))   
        self.D = dimensions[len(dimensions)-1]
    
    def affine_backward(self,output_grad):
        x = self.affine_cache[i]
        N = x.shape[0]
        D = np.prod(x.shape[1:])
        inp = x.reshape((N,D))
        
    def loss(self,probs,labels):
        N = probs.shape[0]
        loss = -np.sum((np.log(probs[np.arange(N),labels])))
        loss /= N
        dprobs = np.zeros_like(probs)
        #print dout
        #dprobs[np.arange(N),labels] = -1/probs[np.arange(N),labels]
        #dprobs /= N
        return loss
    
    def fprop(self,inp,labels):
        N = inp.shape[0]
        #determine product of all dimensions
        D = np.prod(inp.shape[1:])
        #reshape inputs to Number of examples x product of all dimensions
        data = inp.reshape((N,D))
        activations1 = np.dot(data,self.weights[0]) + self.biases[0]
        hidden1 = np.maximum(0,activations1)
        
        activations2 = np.dot(hidden1,self.weights[1]) + self.biases[1]
        
        probs = softmax(activations2)
        
        loss = self.loss(probs,labels)
        cache = (inp,activations1,hidden1,activations2,probs,labels)
        return loss, cache

    def bprop(self,cache):
        inp, activations1, hidden1, activations2, probs,labels = cache
        N = inp.shape[0]
        #grads of softmax function
        grad_oa = probs
        
        print np.sum(grad_oa,axis=1)
        grad_oa[np.arange(labels.shape[0]),labels] -= 1
        print grad_oa
        #grads of layer 2
        grad_W2 = np.dot(np.transpose(hidden1),grad_oa)/N
        grad_b2 = np.sum(grad_oa,axis=0)/N
        grad_hs = np.dot(grad_oa,np.transpose(self.weights[1]))

        #Gradient through Relu nonlinearity
        grad_ha = grad_hs*(np.where(activations1>0,1,0))

        #grads of input layer
        grad_W1 = np.dot(np.transpose(inp),grad_ha)/N
        grad_b1 = np.sum(grad_ha,axis=0)/N
        grad_inp = np.dot(grad_ha,np.transpose(self.weights[0]))/N

        return (grad_inp, grad_W1, grad_b1, grad_ha, grad_hs,grad_W2, grad_b2, grad_oa)
        
        
    def grad_check(self,inp,labels,epsilon,cache):
        (_,grad_W1,grad_b1,_,_,grad_W2,grad_b2,_) = cache
        actual_loss, _ = self.fprop(inp,labels)
        #Gradient check on b2
        for i in range(self.biases[1].shape[0]):
            self.biases[1][i] += epsilon
            loss_perturbed_b2,_ = self.fprop(inp,labels)
            self.biases[1][i] -= epsilon
            check_grad = (loss_perturbed_b2-actual_loss)/epsilon
            if grad_b2[i] == 0:
                print (check_grad+epsilon)/(grad_b2[i]+epsilon)
            else:
                print check_grad/grad_b2[i]
        #Gradient check on W2 weights
        for i in range(self.weights[1].shape[0]):
            for j in range(self.weights[1].shape[1]):
                self.weights[1][i,j] += epsilon
                loss_perturbed_W2,_ = self.fprop(inp,labels)
                self.weights[1][i,j] -= epsilon
                check_grad = (loss_perturbed_W2-actual_loss)/epsilon
                if grad_W2[i,j] == 0:
                    print (check_grad+epsilon)/(grad_W2[i,j]+epsilon)
                else:
                    print check_grad/grad_W2[i,j]

        #Gradient check on b1
        for i in range(self.biases[0].shape[0]):
            self.biases[0][i] += epsilon
            loss_perturbed_b1,_ = self.fprop(inp,labels)
            self.biases[0][i] -= epsilon
            check_grad = (loss_perturbed_b1-actual_loss)/epsilon
            if grad_b1[i] == 0:
                print (check_grad+epsilon)/(grad_b1[i]+epsilon)
            else:
                print check_grad/grad_b1[i]
        #Gradient check on W2 weights
        for i in range(self.weights[0].shape[0]):
            for j in range(self.weights[0].shape[1]):
                self.weights[0][i,j] += epsilon
                loss_perturbed_W1,_ = self.fprop(inp,labels)
                self.weights[0][i,j] -= epsilon
                check_grad = (loss_perturbed_W1-actual_loss)/epsilon
                if grad_W1[i,j] == 0:
                    print (check_grad+epsilon)/(grad_W1[i,j]+epsilon)
                else:
                    print check_grad/grad_W1[i,j]
        

np.random.seed(123)            
NN = NeuralNet([10,20,2])
example = np.random.uniform(size=(4,10))#[1,2,3,4,5,6,7,8,9,10]
labels = np.ones((4,),dtype=np.int)
loss,(inp,activations1,hidden1,activations2,probs,labels) = NN.fprop(example,labels)

grads = NN.bprop((inp,activations1,hidden1,activations2,probs,labels))
NN.grad_check(example,labels,0.00001,grads)


[ 1.  1.  1.  1.]
[[ 0.5735648  -0.5735648 ]
 [ 0.53267377 -0.53267377]
 [ 0.50343878 -0.50343878]
 [ 0.55478725 -0.55478725]]
1.00000228813
0.999997711877
1.00000046263
0.999999537281
1.00000020877
0.999999791194
1.00000006994
0.999999931349
1.0
1.0
1.00000114454
0.999998855496
1.0000007361
0.999999263785
1.00000053027
0.999999469577
1.00000067481
0.999999325194
1.00000090064
0.999999099291
1.00000033379
0.999999666165
1.0
1.0
1.00000088741
0.999999112493
1.00000019072
0.999999809623
1.00000096961
0.999999030446
1.0000004847
0.999999515437
1.0
1.0
1.0
1.0
1.000000401
0.99999959881
1.00000017878
0.999999821473
1.00000057208
0.99999942778
1.0000004869
0.999999658949
1.00000019528
1.0
1.0000006263
1.0000002801
0.999999621154
0.999999928147
0.999999857136
1.00000067398
1.0
0.999999303928
1.00000080575
0.999999959154
1.00000025911
1.0
1.0
1.00000091719
0.999999303039
0.999999911176
1.00000019026
0.999999905324
1.00000005437
1.0
1.00000024463
1.00000010914
0.99999992044
0.999999961094
0.999