In [1]:
import numpy as np
import random

In [2]:
N = 20
dimensions = [10,5, 10]
data = np.random.randn(N, dimensions[0])
labels = np.zeros((N, dimensions[2]))
for i in xrange(N):
    labels[i, random.randint(0, dimensions[2] -1)] = 1
    
params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], )

In [3]:
def sigmoid(x):
    """Sigmoid function"""
    ###################################################################
    # Compute the sigmoid function for the input here.                #
    ###################################################################
    sig_f = 1 / (1 + np.exp( - x))
    return sig_f

In [4]:
def sigmoid_grad(f):
    g = f * (1- f)
    return g

In [5]:
def softmax(x):
    if x.ndim == 1:
        x -= np.min(x)
        x = np.exp(x)
        x /= np.sum(x)
    else:
        x -= np.min(x, axis = 1, keepdims = True)
        x = np.exp(x)
        x /= np.sum(x, axis = 1, keepdims = True)
    return x

In [6]:
# First implement a gradient checker by filling in the following functions
def gradcheck_naive(f, x):
    """ 
    Gradient check for a function f 
    - f should be a function that takes a single argument and outputs the cost and its gradients
    - x is the point (numpy array) to check the gradient at
    """ 

    rndstate = random.getstate()
    random.setstate(rndstate)  
    fx, grad = f(x) # Evaluate function value at original point
    h = 1e-4

    # Iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index
    
        ### YOUR CODE HERE: try modifying x[ix] with h defined above to compute numerical gradients
        ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 
        ### possible to test cost functions with built in randomness later
        x[ix] += h 
        print x
        random.setstate(rndstate)
        fxph = f(x)[0]
        print fxph
        x[ix] -= 2 * h
        random.setstate(rndstate)
        fxmh = f(x)[0]
        print fxmh
        x[ix] += h
        numgrad = (fxph - fxmh) / (2 * h)  
        print numgrad
        # Compare gradients
        reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
        if reldiff > 1e-5:
            print "Gradient check failed."
            print "First gradient error found at index %s" % str(ix)
            print "Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad)
            return
    
        it.iternext() # Step to next dimension

    print "Gradient check passed!"

In [7]:
def forward_backward_prop(data, labels, params):
    """ Forward and backward propagation for a two-layer sigmoidal network """
    ###################################################################
    # Compute the forward propagation and for the cross entropy cost, #
    # and backward propagation for the gradients for all parameters.  #
    ###################################################################
    
    ### Unpack network parameters (do not modify)
    t = 0
    W1 = np.reshape(params[t: t+dimensions[0]*dimensions[1]], (dimensions[0], dimensions[1]))
    t += dimensions[0]*dimensions[1]
    b1 = np.reshape(params[t: t+dimensions[1]], (1, dimensions[1]))
    t += dimensions[1]
    W2 = np.reshape(params[t: t+dimensions[1]*dimensions[2]], (dimensions[1], dimensions[2]))
    t += dimensions[1]*dimensions[2]
    b2 = np.reshape(params[t: t+dimensions[2]], (1, dimensions[2]))
    
    ### YOUR CODE HERE: forward propagation
    h = sigmoid(data.dot(W1) + b1)
    scores = softmax(h.dot(W2) + b2)
    cost = np.sum(-np.log(scores[labels ==1])) / N
    ### END YOUR CODE
    
    ## Compare softmax output with original output
    #print scores[1]
    #print labels[1]
    
    dscores = scores - labels
    #print dscores[1]
    dscores = dscores / N
    #print dscores[1]
    gradb2 = np.sum(dscores, axis=0)
    gradW2 = np.dot(h.T, dscores)
    
    grad_h = np.dot(dscores, W2.T)
    grad_h = sigmoid_grad(h) * grad_h
    
    gradb1 = np.sum(grad_h, axis=0)
    gradW1 = np.dot(data.T, grad_h)
    g1 = gradW1.flatten()
    ### Stack gradients (do not modify)
    grad = np.concatenate((g1, gradb1.flatten(), gradW2.flatten(), gradb2.flatten()))
    print grad.shape
    
    return cost, grad
    
    

In [8]:
forward_backward_prop(data, labels, params)
# Perform gradcheck on your neural network
print "=== For autograder ==="
gradcheck_naive(lambda params: forward_backward_prop(data, labels, params), params)

(115,)
=== For autograder ===
(115,)
[ -5.17527141e-01   1.17197391e+00   1.75367101e+00  -2.99915778e-01
  -4.71191307e-01   1.48274884e+00   1.67627931e+00  -1.06104203e+00
  -1.39948158e+00   3.03219257e-01   4.80946559e-01  -4.10935640e-01
  -9.95581226e-02   2.17805888e+00   1.38263508e+00  -2.88741738e-01
  -2.44844980e-01   7.51048720e-01  -1.08003095e+00  -6.51728843e-01
  -1.70030921e+00  -2.65938152e-01   9.08516532e-01   1.03832286e+00
   6.13358278e-01  -4.27413235e-01  -9.00722203e-01   1.03450216e+00
   1.07990251e-01   5.49597634e-01  -8.03000414e-01  -1.15823251e+00
  -4.85715954e-01   1.53098314e+00  -2.44007050e-01   9.18876435e-01
  -3.86520985e-01  -1.72353293e-04  -1.18699235e+00   1.42413921e-01
   5.53113469e-02   1.65152531e-02   1.98612026e+00   8.52999077e-01
  -4.00606892e-01   8.14499295e-02  -2.35950450e-01  -3.39788497e-01
  -1.65082951e-01   4.46210308e-01  -1.28194110e+00   1.52773916e+00
  -9.44284866e-01  -7.51471131e-01   7.85661263e-01  -1.59521669e+