# example of gradient checking

In [None]:
# gradient checking: compare the analytical gradient with the numerical gradient
# taking the affine layer as an example
from gradient_check import eval_numerical_gradient_array
import numpy as np
from layers import *
N = 2
D = 3
M = 4
x = np.random.normal(size=(N, D))
w = np.random.normal(size=(D, M))
b = np.random.normal(size=(M, ))
dout = np.random.normal(size=(N, M))

# do a forward pass first
out, cache = affine_forward(x, w, b)
# check grad f/grad w, the [0] below gets the output out of the (output, cache) original output
f=lambda w: affine_forward(x, w, b)[0]
# compute the analytical gradient you wrote, [1] get the dw out of the (dx, dw, db) original output
grad = affine_backward(dout, cache)[1]
# compute the numerical gradient using the provided utility function
ngrad = eval_numerical_gradient_array(f, w, dout)
print(grad)
print(ngrad)
# they should be similar enough within some small error tolerance

# example of training a network

In [1]:
# Load the dataset
import scipy.io
import numpy as np
data = scipy.io.loadmat("mnist_data.mat")
X = data['training_data']
y = data['training_labels'].ravel()
X_test = data['test_data']

# Split the data into a training set and validation set.
num_train = X.shape[0]
indices = np.array(range(num_train))
np.random.shuffle(indices)
train_indices, val_indices = indices[0:50000], indices[50000:]
X_train, X_val = X[train_indices], X[val_indices]
y_train, y_val = y[train_indices], y[val_indices]

from solver import Solver
from classifiers.fc_net import FullyConnectedNet

In [None]:
data = {
      'X_train': X_train,
      'y_train': y_train,
      'X_val': X_val,
      'y_val': y_val}

# TODO: fill out the hyperparamets
hyperparams = {'lr_decay': ,
               'num_epochs': ,
               'batch_size': ,
               'learning_rate': 
              }

# TODO: fill out the number of units in your hidden layers
hidden_dim = [] # this should be a list of units for each hiddent layer

model = FullyConnectedNet(input_dim=784,
                          hidden_dim=hidden_dim)
solver = Solver(model, data,
                update_rule='sgd',
                optim_config={
                  'learning_rate': hyperparams['learning_rate'],
                },
                lr_decay=hyperparams['lr_decay'],
                num_epochs=hyperparams['num_epochs'], 
                batch_size=hyperparams['batch_size'],
                print_every=100)
solver.train()