In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize

NUM_INPUT = 784  # Number of input neurons
NUM_HIDDEN = 40  # Number of hidden neurons
NUM_OUTPUT = 10  # Number of output neurons
NUM_CHECK = 5  # Number of examples on which to check the gradient

### Helper Functions

In [23]:
def reluprime(x):
    relux = np.zeros(x.shape)
    relux[:, np.where(x > 0)] = 1
    return relux

In [29]:
def relu(x):
    relux = np.zeros(x.shape)
    relux[np.where(x > 0), :] = x[np.where(x > 0), :]
    return relux

In [22]:
def hotten(y):
    new_Y = np.zeros((y.shape[0],10))
    for i,val in enumerate(y):
        new_Y[i][val] = 1
    return new_Y

In [28]:
def softmax(x):
    z = np.exp(x) / np.sum(np.exp(x))
    return z

In [64]:
def get_yHat(x,w):
    W1, b1, W2, b2 = unpack(w)
    z1 = np.dot(W1,x.T) + b1[:,None]
    h = relu(z1)
    z2 = np.dot(W2,h) + b2[:,None]
    return softmax(z2)

### Given Functions

In [2]:
# Given a vector w containing all the weights and biased vectors, extract
# and return the individual weights and biases W1, b1, W2, b2.
# This is useful for performing a gradient check with check_grad.
def unpack (w):
    W1 = w[0:NUM_INPUT * NUM_HIDDEN]
    W1 = np.reshape(W1, (NUM_HIDDEN, NUM_INPUT))
    w = np.delete(w, np.arange(NUM_INPUT * NUM_HIDDEN))
    b1 = w[0:NUM_HIDDEN]
    w = np.delete(w, np.arange(NUM_HIDDEN))
    W2 = w[0:NUM_HIDDEN * NUM_OUTPUT]
    W2 = np.reshape(W2, (NUM_OUTPUT, NUM_HIDDEN))
    w = np.delete(w, np.arange(NUM_HIDDEN * NUM_OUTPUT))
    b2 = w[0:NUM_OUTPUT]
    return W1, b1, W2, b2

In [34]:
# Given individual weights and biases W1, b1, W2, b2, concatenate them and
# return a vector w containing all of them.
# This is useful for performing a gradient check with check_grad.
def pack (W1, b1, W2, b2):
    W = W1.flatten()
    W = np.append(W, b1)
    W = np.append(W, W2.flatten())
    W = np.append(W, b2)
    return W

In [35]:
# Load the images and labels from a specified dataset (train or test).
def loadData (which):
    images = np.load("fashion_mnist_{}_images.npy".format(which))
    labels = np.load("fashion_mnist_{}_labels.npy".format(which))
    return images, labels

In [106]:
# Given training images X, associated labels Y, and a vector of combined weights
# and bias terms w, compute and return the cross-entropy (CE) loss. You might
# want to extend this function to return multiple arguments (in which case you
# will also need to modify slightly the gradient check code below).
def fCE (X, Y, w):
    yHat = get_yHat(X,w)
    cost = np.mean(np.dot(Y,np.log(yHat.T)))
    return cost

In [46]:
# Given training images X, associated labels Y, and a vector of combined weights
# and bias terms w, compute and return the gradient of fCE. You might
# want to extend this function to return multiple arguments (in which case you
# will also need to modify slightly the gradient check code below).
def gradCE (X, Y, w):
    W1, b1, W2, b2 = unpack(w)
    return grad

In [47]:
# Given training and testing datasets and an initial set of weights/biases b,
# train the NN.
def train (trainX, trainY, testX, testY, w):
    pass

## Main

In [78]:
trainX, trainY = loadData("train")
testX, testY = loadData("test")

# Initialize weights randomly
W1 = 2*(np.random.random(size=(NUM_HIDDEN, NUM_INPUT))/NUM_INPUT**0.5) - 1./NUM_INPUT**0.5
b1 = 0.01 * np.ones(NUM_HIDDEN)
W2 = 2*(np.random.random(size=(NUM_OUTPUT, NUM_HIDDEN))/NUM_HIDDEN**0.5) - 1./NUM_HIDDEN**0.5
b2 = 0.01 * np.ones(NUM_OUTPUT)

In [80]:
# Concatenate all the weights and biases into one vector; this is necessary for check_grad
w = pack(W1, b1, W2, b2)
w.shape

(31810,)

In [92]:
idxs

array([32006, 26457,  4714, 26828,   811])

In [100]:
trainY[idxs]

array([3, 5, 8, 8, 2], dtype=int32)

In [113]:
idxs = np.random.permutation(trainX.shape[0])[0:NUM_CHECK]
np.atleast_2d(trainX[idxs, :])
fCE((trainX[idxs, :]),(trainY[idxs]), w)

-1774.4592347265639

In [111]:
# Check that the gradient is correct on just a few examples (randomly drawn).
# idxs = np.random.permutation(trainX.shape[0])[0:NUM_CHECK]
# print(scipy.optimize.check_grad(lambda w_: fCE(np.atleast_2d(trainX[idxs, :]), np.atleast_2d(trainY[idxs, :]), w_), \
#                                     lambda w_: gradCE(np.atleast_2d(trainX[idxs, :]), np.atleast_2d(trainY[idxs, :]),
#                                                       w_), \
#                                     w))

In [9]:
W1.shape

(40, 784)

In [10]:
W2.shape

(10, 40)

In [11]:
b1.shape

(40,)

In [12]:
b2.shape

(10,)