In [3]:
import numpy as np

from dataset import get_2D_normalised

In [7]:
class TwoLayerNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.params = {}
        self.params['W1'] = np.random.randn(input_size, hidden_size) / np.sqrt(input_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = np.random.randn(hidden_size, output_size) / np.sqrt(hidden_size)
        self.params['b2'] = np.zeros(output_size)

    def forward(self, X):
        self.h1 = np.maximum(0, np.dot(X, self.params['W1']) + self.params['b1'])
        scores = np.dot(self.h1, self.params['W2']) + self.params['b2']
        return scores

    def loss(self, X, y, reg):
        scores = self.forward(X)

        exp_scores = np.exp(scores)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

        correct_logprobs = -np.log(probs[range(X.shape[0]),y])
        data_loss = np.sum(correct_logprobs) / X.shape[0]
        reg_loss = 0.5 * reg * np.sum(self.params['W1'] * self.params['W1']) + 0.5 * reg * np.sum(self.params['W2'] * self.params['W2'])
        loss = data_loss + reg_loss
        return loss

    def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_epochs=100, batch_size=200, verbose=True, early_stop=False, patience=5, tol=1e-3):
        num_train = X.shape[0]
        iterations_per_epoch = max(num_train // batch_size, 1)
        best_loss = float('inf')
        count = 0

        for epoch in range(num_epochs):
            for iteration in range(iterations_per_epoch):
                batch_indices = np.random.choice(num_train, batch_size, replace=False)
                X_batch = X[batch_indices]
                y_batch = y[batch_indices]

                scores = self.forward(X_batch)

                exp_scores = np.exp(scores)
                probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

                dscores = probs
                dscores[range(batch_size),y_batch] -= 1
                dscores /= batch_size

                dW2 = np.dot(self.h1.T, dscores)
                db2 = np.sum(dscores, axis=0, keepdims=True)

                dh1 = np.dot(dscores, self.params['W2'].T)
                dh1[self.h1 <= 0] = 0

                dW1 = np.dot(X_batch.T, dh1)
                db1 = np.sum(dh1, axis=0, keepdims=True)

                dW2 += reg * self.params['W2']
                dW1 += reg * self.params['W1']

                self.params['W1'] -= learning_rate * dW1
                self.params['b1'] -= learning_rate * db1.ravel()
                self.params['W2'] -= learning_rate * dW2
                self.params['b2'] -= learning_rate * db2.ravel()

            if verbose and epoch % 10 == 0:
                loss = self.loss(X, y, reg)
                print("Epoch {}, loss = {:.4f}".format(epoch, loss))

            if early_stop:
                loss = self.loss(X, y, reg)
                if loss < best_loss - tol:
                    best_loss = loss
                    count = 0
                else:
                    count += 1

                if count == patience:
                    print("Early stopping: no improvement for {} epochs".format(patience))
                    return

In [5]:
(x_train, y_train), (x_test, y_test) = get_2D_normalised()

In [8]:
input_size = x_train.shape[1]
hidden_size = 50
output_size = 10

net = TwoLayerNN(input_size, hidden_size, output_size)

net.train(x_train, y_train, learning_rate=1e-2, reg=1e-4, num_epochs=100, batch_size=200, verbose=True, early_stop=True, patience=5, tol=1e-3)

Epoch 0, loss = 1.9829
Epoch 10, loss = 1.6903
Epoch 20, loss = 1.6011
Epoch 30, loss = 1.5633
Epoch 40, loss = 1.4857
Epoch 50, loss = 1.4577
Epoch 60, loss = 1.4247
Epoch 70, loss = 1.3948
Epoch 80, loss = 1.3732
Early stopping: no improvement for 5 epochs
Test accuracy: 0.4754


In [9]:
# Evaluate on test set
test_acc = (np.argmax(net.forward(x_test), axis=1) == y_test).mean()
print("Test accuracy: {:.4f}".format(test_acc))

Test accuracy: 0.4754
