In [None]:
import importlib
from network import network
import layers
import utils
import losses
import nonlinearities

Here, we train a fully connected "deep" neural net on the MNIST dataset. Choice of hyperparameters and learning rate don't really matter because everything works on MNIST. 

In [None]:
import numpy as np
import scipy.io
import scipy.special
import math

# HYPER PARAMETERS
# Each entry represents a layer of size n
# These are just defaults, can be set train() function as well
HIDDEN_LAYERS = [128, 64]
NUM_EPOCHS = 50
BATCH_SIZE = 500
LEARNING_RATE = 0.3

# Each entry represents a layer of size n
INPUT_SIZE = 784
OUTPUT_SIZE = 10

TRAIN_DATA_LOC = "./digit-dataset/train.mat"
TEST_DATA_LOC = "./digit-dataset/test.mat"

# load our dataset
train_dataset = scipy.io.loadmat(TRAIN_DATA_LOC)
test_dataset = scipy.io.loadmat(TEST_DATA_LOC)

train_data = train_dataset['train_images']
train_labels = train_dataset['train_labels']
test_data = test_dataset['test_images']

# labels to one hot
train_labels_onehot = np.eye(OUTPUT_SIZE)[train_labels.T.tolist()]

# simple normalization
train_data = (train_data / np.amax(train_data))
test_data = (test_data / np.amax(test_data))

# reshape data into matrix of (n, feature_size)
train_data = np.swapaxes(train_data.reshape((INPUT_SIZE, train_data.shape[2])), 0, 1)
test_data = np.swapaxes(test_data.reshape((INPUT_SIZE, test_data.shape[2])), 0, 1)


Now create a feedforward nn with softmax cross-entropy loss. 

In [None]:
loss = losses.SoftmaxCrossEntropyLoss()
nn = network(loss)

units = [INPUT_SIZE] + HIDDEN_LAYERS + [OUTPUT_SIZE]
for i in range(len(units) - 1):
    new_layer = layers.FullyConnected(units[i], units[i+1], 1e-1, nonlinearities.Sigmoid() if i != len(units) - 2 else None)
    nn.add_layer(new_layer)

nn.train((train_data, train_labels_onehot), 64, 200, 0.3)