In [1]:
import numpy as np
import matplotlib.pyplot as plt
import argparse

In [2]:
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    e_s = np.sum(e_x, axis=1, keepdims=True)
    e = e_x / e_s
    return e

In [3]:
def sigmoid(x):
    sigm = 1 / (1 + np.exp(-x))
    return sigm

In [4]:
def get_initial_params(input_size, num_hidden, num_output):
    parameters = {}
    parameters["W1"] = np.random.standard_normal((input_size, num_hidden))
    parameters["b1"] = np.zeros((1, num_hidden), dtype=float)
    parameters["W2"] = np.random.standard_normal((num_hidden, num_output))
    parameters["b2"] = np.zeros((1, num_output), dtype=float)
    return parameters

In [5]:
def forward_prop(data, labels, params):
    W1 = params["W1"]
    b1 = params["b1"]
    W2 = params["W2"]
    b2 = params["b2"]

    z1 = data @ W1 + b1
    a = sigmoid(z1)
    z2 = a @ W2 + b2
    y = softmax(z2)
    cross_entropy = np.multiply(labels, -np.log(y)).sum()
    cross_entropy /= y.shape[0]

    return a, y, cross_entropy

In [6]:
def backward_prop(data, labels, params, forward_prop_func):
    W1 = params["W1"]
    b1 = params["b1"]
    W2 = params["W2"]
    b2 = params["b2"]
    weight = data.shape[0]

    a, y, cross_entropy = forward_prop_func
    diff_1 = (y - labels)
    gradient_W2 = a.T @ diff_1
    gradient_b2 = np.sum(diff_1, axis=0, keepdims=True)

    diff_2 =  np.multiply((diff_1 @ W2.T), a * (1 - a))
    gradient_W1 = data.T @ diff_2
    gradient_b1 = np.sum(diff_2, axis=0, keepdims=True)

    gradient = {}
    gradient["W1"] = gradient_W1 / weight
    gradient["b1"] = gradient_b1 / weight
    gradient["W2"] = gradient_W2 / weight
    gradient["b2"] = gradient_b2 / weight
    return gradient

In [7]:
def backward_prop_regularized(data, labels, params, forward_prop_func, reg):

SyntaxError: unexpected EOF while parsing (<ipython-input-7-64b82c62908b>, line 1)

In [9]:
def gradient_descent_epoch(train_data, train_labels, learning_rate, batch_size, params, forward_prop_func, backward_prop_func):
    params['W1'] -= learning_rate * backward_prop_func['W1']
    params['b1'] -= learning_rate * backward_prop_func['b1']
    params['W2'] -= learning_rate * backward_prop_func['W2']
    params['b2'] -= learning_rate * backward_prop_func['b2']
    return

In [10]:
def nn_train(
    train_data, train_labels, dev_data, dev_labels,
    get_initial_params_func, forward_prop_func, backward_prop_func,
    num_hidden=300, learning_rate=5, num_epochs=30, batch_size=1000):

    (nexp, dim) = train_data.shape

    params = get_initial_params_func(dim, num_hidden, 10)

    cost_train = []
    cost_dev = []
    accuracy_train = []
    accuracy_dev = []
    for epoch in range(num_epochs):
        gradient_descent_epoch(train_data, train_labels,
            learning_rate, batch_size, params, forward_prop_func, backward_prop_func)

        h, output, cost = forward_prop_func(train_data, train_labels, params)
        cost_train.append(cost)
        accuracy_train.append(compute_accuracy(output,train_labels))
        h, output, cost = forward_prop_func(dev_data, dev_labels, params)
        cost_dev.append(cost)
        accuracy_dev.append(compute_accuracy(output, dev_labels))

    return params, cost_train, cost_dev, accuracy_train, accuracy_dev

def nn_test(data, labels, params):
    h, output, cost = forward_prop(data, labels, params)
    accuracy = compute_accuracy(output, labels)
    return accuracy

def compute_accuracy(output, labels):
    accuracy = (np.argmax(output,axis=1) ==
        np.argmax(labels,axis=1)).sum() * 1. / labels.shape[0]
    return accuracy

def one_hot_labels(labels):
    one_hot_labels = np.zeros((labels.size, 10))
    one_hot_labels[np.arange(labels.size),labels.astype(int)] = 1
    return one_hot_labels

def read_data(images_file, labels_file):
    x = np.loadtxt(images_file, delimiter=',')
    y = np.loadtxt(labels_file, delimiter=',')
    return x, y

def run_train_test(name, all_data, all_labels, backward_prop_func, num_epochs):
    params, cost_train, cost_dev, accuracy_train, accuracy_dev = nn_train(
        all_data['train'], all_labels['train'],
        all_data['dev'], all_labels['dev'],
        get_initial_params, forward_prop, backward_prop_func,
        num_hidden=300, learning_rate=5, num_epochs=num_epochs, batch_size=1000
    )

    t = np.arange(num_epochs)

    fig, (ax1, ax2) = plt.subplots(2, 1)

    ax1.plot(t, cost_train,'r', label='train')
    ax1.plot(t, cost_dev, 'b', label='dev')
    ax1.set_xlabel('epochs')
    ax1.set_ylabel('loss')
    ax1.set_title('With Regularization')
    ax1.legend()

    ax2.plot(t, accuracy_train,'r', label='train')
    ax2.plot(t, accuracy_dev, 'b', label='dev')
    ax2.set_xlabel('epochs')
    ax2.set_ylabel('accuracy')
    ax2.legend()

    fig.savefig('./' + name + '.pdf')

    accuracy = nn_test(all_data['test'], all_labels['test'], params)
    print('For model %s, got accuracy: %f' % (name, accuracy))

In [34]:
parser = argparse.ArgumentParser(description='Train a nn model.')
parser.add_argument('--num_epochs', type=int, default=30)





_StoreAction(option_strings=['--num_epochs'], dest='num_epochs', nargs=None, const=None, default=30, type=<class 'int'>, choices=None, help=None, metavar=None)

In [36]:
np.random.seed(100)
train_data, train_labels = read_data('./images_train.csv', './labels_train.csv')
train_labels = one_hot_labels(train_labels)
p = np.random.permutation(60000)
train_data = train_data[p,:]
train_labels = train_labels[p,:]

dev_data = train_data[0:10000,:]
dev_labels = train_labels[0:10000,:]
train_data = train_data[10000:,:]
train_labels = train_labels[10000:,:]

mean = np.mean(train_data)
std = np.std(train_data)
train_data = (train_data - mean) / std
dev_data = (dev_data - mean) / std

test_data, test_labels = read_data('./images_test.csv', './labels_test.csv')
test_labels = one_hot_labels(test_labels)
test_data = (test_data - mean) / std

all_data = {
    'train': train_data,
    'dev': dev_data,
    'test': test_data
}

all_labels = {
    'train': train_labels,
    'dev': dev_labels,
    'test': test_labels
}

In [37]:
run_train_test('baseline', all_data, all_labels, backward_prop, args.num_epochs)

NameError: name 'args' is not defined

In [None]:
run_train_test('baseline', all_data, all_labels, backward_prop, args.num_epochs)
run_train_test('regularized', all_data, all_labels,
    lambda a, b, c, d: backward_prop_regularized(a, b, c, d, reg=0.0001),
    args.num_epochs)