In [1]:
import numpy as np

In [43]:
class Layer:
  def __init__(self, num_inputs, num_nodes):
    # init weights Glorot and Bengio 2010
    self.weights = (2*np.random.random((num_inputs, num_nodes)) - 1) * np.sqrt(6.0 / (num_nodes + num_nodes))
    self.bias = np.zeros((1, num_nodes))

    self.dweights = np.zeros((num_inputs, num_nodes))
    self.dbias = np.zeros((1, num_nodes))

In [41]:
class DenseNeuralNetwork:
    def __init__(self, input_features, hidden_unit_list, learning_rate = 0.1):
        self.n_features = input_features

        self.learning_rate = learning_rate

        self.num_layers = len(hidden_unit_list)

        self.layers = {}
        self.a = {}
        self.layers[0] = Layer(self.n_features, hidden_unit_list[0])

        for i in range(1, self.num_layers):
          self.layers[i] = Layer(hidden_unit_list[i-1], hidden_unit_list[i])


    @staticmethod
    def stable_softmax(z):
        exps = np.exp(z - np.max(z))
        return exps / np.sum(exps, axis=1, keepdims=True)


    @staticmethod
    def sigmoid(z):
      return np.exp(z)/(1.0+np.exp(z))

    def forward_prop(self, X):
        # forward prop
        self.a[0] = X
        for i in range(1, self.num_layers):
          z1 = np.dot(self.a[i-1], self.layers[i-1].weights) + self.layers[i-1].bias
          # apply nonlinearity (relu)
          self.a[i] = np.maximum(0, z1)

        z1 = np.dot(self.a[self.num_layers-1], self.layers[self.num_layers-1].weights) + self.layers[self.num_layers-1].bias 

        # TODO: make below more generalized for different activation functions.         
        self.prob = DenseNeuralNetwork.sigmoid(z1)
        #self.prob = DenseNeuralNetwork.stable_softmax(z1)
        return self.prob

    def back_prop(self, X, y):
        m = y.shape[0]
        dz2 = self.prob
        dz2[np.arange(m), y] -= 1
        dz2 /= m

        for i in reversed(range(self.num_layers)):
          self.layers[i].dweights = np.dot(self.a[i].T, dz2)
          self.layers[i].dbias = np.sum(dz2, axis=0, keepdims=True)
          dz1 = np.dot(dz2, self.layers[i].weights.T)
          dz2 = dz1 * (self.a[i] > 0)

        return

    def update_weights(self):
        lr = self.learning_rate
        for i in range(1, self.num_layers):
          self.layers[i].weights -= lr * self.layers[i].dweights
          self.layers[i].bias -= lr * self.layers[i].dbias
        return 

    def compute_loss(self, y):
        y = y.astype(int)
        m = y.shape[0]
        loss = np.sum(-np.log(self.prob[np.arange(m), y]) / m)
        return loss

    def train(self, X, y, epochs=10000):
        for i in range(epochs):
            self.forward_prop(X)

            # log loss along the way
            if i % 2000 == 0:
              loss = self.compute_loss(y)
              print(f"iteration: {i} loss: {loss}")

            self.back_prop(X, y)
            self.update_weights()

        return

## Test using Generated Data (same data as what we used in Logistic Regression implementation)

In [16]:
from sklearn import datasets
(X, y) = datasets.make_classification(n_samples=10000, n_features=7, n_informative=5, n_redundant=2)

#Split in test train split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [42]:
nn = DenseNeuralNetwork(7, [25, len(np.unique(y))])
nn.train(X_train, y_train)

7 25
25 2
iteration: 0 loss: 1.1548861210939627
iteration: 2000 loss: 0.3349812553188528
iteration: 4000 loss: 0.3070030530101862
iteration: 6000 loss: 0.2953476722118449
iteration: 8000 loss: 0.28892488545762374


In [44]:
y_pred = nn.forward_prop(X_test)
y_pred = np.argmax(y_pred, axis=1)

In [45]:
print(f"Accuracy: {np.sum(y_test==y_pred)/X_test.shape[0]}")

Accuracy: 0.8845
