In [1]:
import numpy as np
import matplotlib.pyplot as plt
from utils import load_data, load_config, write_to_file, one_hot_encoding

# Load configuration
config = load_config('./config.yaml')

# Load the data and reshape from (32 x 32) to (1024 x 1)
x_train, y_train, x_test, y_test = load_data()

# One-hot encoding
y_train = np.eye(len(y_train), 10)[y_train]
y_test = np.eye(len(y_test), 10)[y_test]

x_train = np.array([image.reshape((1024)) for image in x_train], dtype='float')
x_test = np.array([image.reshape((1024)) for image in x_test], dtype='float')

# Create validation set out of training data.
num = int(len(x_train) * 0.8)
[x_train, x_val]= np.split(x_train, [num])
[y_train, y_val] = np.split(y_train, [num])

In [2]:
# Calculate feature mean and standard deviation for x_train, and use them to
# Z score x_train, X_val and X_test
def z_score_train_test(train, val, test):
    train_T = train.T
    val_T = val.T
    test_T = test.T
    for i in range(len(train_T)):
        mean = np.mean(train_T[i])
        SD = np.std(train_T[i])
        train_T[i] = (train_T[i] - mean) / SD
        val_T[i] = (val_T[i] - mean) / SD
        test_T[i] = (test_T[i] - mean) / SD
    return train_T.T, val_T.T, test_T.T

# Z-scoring
x_train, x_val, x_test = z_score_train_test(x_train, x_val, x_test)

In [3]:
train_acc = []
valid_acc = []
train_loss = []
valid_loss = []
best_model = None

In [4]:
from neuralnet import *

model = NeuralNetwork(config=config)

In [5]:
def accuracy(y, t):
    y = np.argmax(y, axis=1)
    t = np.argmax(t, axis=1)
    res = [y_hat == t_hat for y_hat, t_hat in zip(y, t)]
    return np.sum(res) / len(res)

In [8]:
# SGD
for i in range(config['epochs']): 
    # Randomize the order of the indices into the training set
    shuffled_indices = np.random.permutation(len(x_train))
    x_train = x_train[shuffled_indices]
    y_train = y_train[shuffled_indices]
    for j in range(0, len(x_train), config['batch_size']):
        if (j + config['batch_size'] < len(x_train)):
            batch_x = x_train[[j, j + config['batch_size']]]
            batch_y = y_train[[j, j + config['batch_size']]]
        else:
            batch_x = x_train[[j, len(x_train) - 1]]
            batch_y = y_train[[j, len(x_train) - 1]]
        y, loss = model(x=batch_x, targets=batch_y)
        model.backward()
        
    y, loss = model.forward(x_val, y_val)
    acc = accuracy(y, y_val)
    print('Epoch', i, 'Loss', loss, 'Accuracy', acc)

0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.5
0.5
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.5
0.5
0.0
0.0
0.5
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.5
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.5
0.0
0.5
0.5
0.0
0.5
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
1.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.5
0.5
0.0
0.0
0.5


0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.0
0.5
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.5
0.5
0.5
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.5
0.5
0.5
1.0
0.5
0.0
0.0
0.0
1.0
0.5
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
1.0
0.5
0.5
0.5
0.0
0.5
0.5
0.0
0.0
0.5
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.5
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.5
0.5
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.5
0.0
0.5
0.5
0.5
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0


0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.5
0.0
0.0
0.5
0.5
0.0
0.0
0.0
1.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0


0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.5
0.0
0.5
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.5
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.5
0.5
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.5
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.5
0.5
0.5
0.0
0.5
0.5
0.5
0.0
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.0
0.5
0.5
0.5
0.0
0.0
0.5
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.0
0.0
0.0
0.0
0.5
0.0
0.5
0.0
0.5
0.5
0.5
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.0
0.5
0.5
0.0
0.5
0.0
0.5
0.5
0.0
0.0
0.5
0.5
0.0
0.0
0.0
0.0
0.0
0.0
0.5
0.5
0.5
0.5
0.5
0.5
0.0
0.0
0.0


KeyboardInterrupt: 

In [None]:
model = NeuralNetwork(config=config)

# One output bias weight
epsilon = 1e-2
model.layers[2].b[0][8] += epsilon
y = model.forward(x_val)
e_w_plus = model.loss(y, y_val)
model.layers[2].b[0][8] -= epsilon
y = model.forward(x_val)
e_w_minus = model.loss(y, y_val)
e_w_diff = e_w_plus - e_w_minus
expected_d_w = (e_w_diff / 2 * epsilon)
expected_d_w

In [None]:
model = NeuralNetwork(config=config)

# One hidden bias weight
epsilon = 1e-2
model.layers[0].b[0][8] += epsilon
y = model.forward(x_val)
e_w_plus = model.loss(y, y_val)
model.layers[0].b[0][8] -= epsilon
y = model.forward(x_val)
e_w_minus = model.loss(y, y_val)
e_w_diff = e_w_plus - e_w_minus
expected_d_w = (e_w_diff / 2 * epsilon)
expected_d_w

In [None]:
model = NeuralNetwork(config=config)

# Choose two (1/2) hidden to output bias weight
epsilon = 1e-2
model.layers[2].w[0][8] += epsilon
y = model.forward(x_val)
e_w_plus = model.loss(y, y_val)
model.layers[2].w[0][8] -= epsilon
y = model.forward(x_val)
e_w_minus = model.loss(y, y_val)
e_w_diff = e_w_plus - e_w_minus
expected_d_w = (e_w_diff / 2 * epsilon)
expected_d_w

In [None]:
model = NeuralNetwork(config=config)

# Choose two (2/2) output bias weight
epsilon = 1e-2
model.layers[2].w[8][0] += epsilon
y = model.forward(x_val)
e_w_plus = model.loss(y, y_val)
model.layers[2].w[8][0] -= epsilon
y = model.forward(x_val)
e_w_minus = model.loss(y, y_val)
e_w_diff = e_w_plus - e_w_minus
expected_d_w = (e_w_diff / 2 * epsilon)
expected_d_w