In [1]:
import numpy as np

In [2]:
class Layer:
  def __init__(self, num_inputs, num_nodes):
    # init weights Glorot and Bengio 2010
    self.weights = (2*np.random.random((num_inputs, num_nodes)) - 1) * np.sqrt(6.0 / (num_nodes + num_nodes))
    self.bias = np.zeros((1, num_nodes))

    # Kaiming weights
    self.dweights = np.zeros((num_inputs, num_nodes))
    self.dbias = np.zeros((1, num_nodes))

In [4]:
class NeuralNetwork:
    def __init__(self, X, y, num_hidden_units = 25, l1_lambda = 1e-3, learning_rate = 0.1):
        # m for #training examples and n for #features
        self.n_obs, self.n_features = X.shape

        # regularization term lambda_ (lambda is reserved keyword)
        self.lambda_ = l1_lambda
        self.learning_rate = learning_rate

        self.X = X
        self.y = y

        self.l1 = Layer(self.n_features, num_hidden_units)
        self.l2 = Layer(num_hidden_units, len(np.unique(y)))

        self.a0 = X
        self.a1 = np.zeros((self.n_obs, num_hidden_units))
        self.a2 = np.zeros((self.n_obs, len(np.unique(y))))

    @staticmethod
    def stable_softmax(z):
        exps = np.exp(z - np.max(z))
        return exps / np.sum(exps, axis=1, keepdims=True)

    def forward_prop(self, X):
        W2 = self.l2.weights
        W1 = self.l1.weights
        b2 = self.l2.bias
        b1 = self.l1.bias

        # forward prop
        a0 = X
        z1 = np.dot(a0, W1) + b1

        # apply nonlinearity (relu)
        a1 = np.maximum(0, z1)
        z2 = np.dot(a1, W2) + b2

        probs = NeuralNetwork.stable_softmax(z2)

        self.a0 = X
        self.probs = probs
        self.a1 = a1
        self.a2 = probs
        return probs

    def back_prop(self):
        W2 = self.l2.weights
        W1 = self.l1.weights
        b2 = self.l2.bias
        b1 = self.l1.bias

        dz2 = self.probs
        dz2[np.arange(self.n_obs), self.y] -= 1
        dz2 /= self.n_obs

        self.l2.dweights = np.dot(self.a1.T, dz2) + self.lambda_ * W2
        self.l1.dbias = np.sum(dz2, axis=0, keepdims=True)

        dz1 = np.dot(dz2, W2.T)
        dz1 = dz1 * (self.a1 > 0)

        self.l1.dweights = np.dot(self.a0.T, dz1) + self.lambda_ * W1
        self.l1.dbias = np.sum(dz1, axis=0, keepdims=True)

        return

    def update_weights(self):
        lr = self.learning_rate

        # take a step along negative gradient
        self.l2.weights -= lr * self.l2.dweights
        self.l1.weights -= lr * self.l1.dweights
        self.l2.bias -= lr * self.l2.dbias
        self.l1.bias -= lr * self.l1.dbias

        return 

    def compute_loss(self):
        W2 = self.l2.weights
        W1 = self.l1.weights

        y = self.y.astype(int)
        data_loss = np.sum(-np.log(self.probs[np.arange(self.n_obs), self.y]) / self.n_obs)
        l2_reg_loss = 0.5 * self.lambda_ * np.sum(W1 * W1) + 0.5 * self.lambda_ * np.sum(W2 * W2)
        loss = data_loss + l2_reg_loss
        return loss

    def train(self, X, y, epochs=10000):
        for i in range(epochs):
            self.forward_prop(X)

            # log loss along the way
            if i % 2000 == 0:
              loss = self.compute_loss()
              print(f"iteration: {i} loss: {loss}")

            self.back_prop()
            self.update_weights()

        return

## Test using Generated Data (same data as what we used in Logistic Regression implementation)

In [5]:
from sklearn import datasets
(X, y) = datasets.make_classification(n_samples=10000, n_features=7, n_informative=5, n_redundant=2)

#Split in test train split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [6]:
nn = NeuralNetwork(X_train, y_train)
nn.train(X_train, y_train)

iteration: 0 loss: 1.2408950696948744
iteration: 2000 loss: 0.13631904988868987
iteration: 4000 loss: 0.1309320965348822
iteration: 6000 loss: 0.1277427847090521
iteration: 8000 loss: 0.12600546267699875


In [7]:
y_pred = nn.forward_prop(X_test)
y_pred = np.argmax(y_pred, axis=1)

In [8]:
print(f"Accuracy: {np.sum(y_test==y_pred)/X_test.shape[0]}")

Accuracy: 0.969
