In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [0]:
class NeuralNet :

    def __init__(self, layers=[200, 200, 10], learning_rate = 0.001, activation=['elu','elu','softmax'],
                 epochs=100, elu_alpha=1.2, batch_size=250, l2_lambda = 1e-4, epsilon=1e-8,
                 beta1=0.9, beta2=0.999):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.num_layers = len(layers)
        self.layers = layers
        self.activation = activation
        self.elu_alpha = elu_alpha
        self.activate = {
            'elu': self.elu_activation,
            'softmax': self.softmax_activation
        }
        self.weights = []
        self.bias = []
        self.batch_size = batch_size
        self.differentiate = {
            'elu': self.d_elu_activation,
            'softmax': self.d_softmax_activation
        }
        self.optimizer_cache = {}
        self.l2_lambda = l2_lambda
        self.epsilon = epsilon
        self.beta1 = beta1
        self.beta2 = beta2

    def forward_pass(self, train, save_cache=False):
        cache = {
            'scores': [],
            'inputs': []
        }
        for i,n in enumerate(self.layers):
            if i == 0:
                Z = np.dot(train,self.weights[i]) + self.bias[i]
            else:
                Z = np.dot(A,self.weights[i]) + self.bias[i]
            if save_cache:
                cache['scores'].append(Z)
                if i!=0:
                    cache['inputs'].append(A)
            A = self.activate[self.activation[i]](Z)
        return (A, cache) if save_cache else (A, None)

    def backpropogate_update(self, X_train, Y_train, prediction, cache, iter):
        batch_size = X_train.shape[0]
        d_output = self.d_categorical_cross_entropy_loss(Y_train,prediction)


        for layer in range(len(self.layers)-1,0,-1):
            d_score = d_output*self.differentiate[self.activation[layer]](cache['scores'][layer])
            if layer==0:
                d_weights = np.dot(d_score, X_train.T)/batch_size
            else:
                d_weights = np.dot(cache['inputs'][layer-1].T, d_score)/batch_size
            d_bias = np.sum(d_score, axis=0, keepdims=True)
            d_output = np.dot(d_score,self.weights[layer].T)


            self.weights[layer] -= 0.001 * (self.l2_lambda*self.weights[layer])# l2_regularization
            self.bias[layer] -= 0.001 * (self.l2_lambda*self.bias[layer])  # l2_regularization

    def softmax_activation(self, Z):
        Z_dash = Z - Z.max()  # for numerical stability
        e = np.exp(Z_dash)
        return e / (np.sum(e, axis=1, keepdims=True))

    def d_softmax_activation(self, y):
        return y * (1 - y)

    def elu_activation(self, Z):
        return np.where(Z >= 0, Z, self.elu_alpha*(np.exp(Z) - 1))

    def d_elu_activation(self, Z):
        return (Z >= 0).astype('float32') + (Z < 0).astype('float32') * (self.elu_activation(Z) + self.elu_alpha)

    def categorical_cross_entropy_loss(self, actual, prediction):
        prediction /= np.sum(prediction, axis=-1, keepdims=True)
        prediction = np.clip(prediction, 10e-8, 1. - 10e-8)  # for numerical stability
        return -np.sum(actual * np.log(prediction))

    def d_categorical_cross_entropy_loss(self, actual, prediction):
        return actual - prediction

    def init_weights(self,M):
        # using He normal initialization
        weights = []
        bias = []
        for n,m in enumerate(self.layers):
            if n==0:
                weights.append(np.random.normal(0, np.sqrt(2/M),size=[M,m]))
            else:
                weights.append(np.random.normal(0,np.sqrt(2/self.layers[n-1]),size=[self.layers[n-1],m]))
            bias.append(np.random.uniform(-0.2,0.2,size=[1,m]))
        return weights, bias

    def get_batch(self, X_train, Y_train):
        n_batches = X_train.shape[0]//self.batch_size
        if n_batches == 0:
            yield X_train, Y_train
        for i in range(n_batches):
            if i==n_batches-1:
                yield X_train[i*self.batch_size:, :], Y_train[i*self.batch_size:, :]
            else:
                yield X_train[i*self.batch_size:(i+1)*self.batch_size, :], Y_train[i*self.batch_size:(i+1)*self.batch_size, :]

    def train(self, X_train, Y_train):
        (N, M) = X_train.shape
        self.weights, self.bias = self.init_weights(M)

        iter = self.batch_size

        for epoch in range(self.epochs):
            shuffle_indices = np.random.permutation(X_train.shape[0])
            X_train_shuffled = X_train[shuffle_indices]
            Y_train_shuffled = Y_train[shuffle_indices]


            for X_batch, Y_batch in self.get_batch(X_train_shuffled, Y_train_shuffled):
                prediction, cache = self.forward_pass(X_batch, save_cache=True)

                self.backpropogate_update(X_batch, Y_batch, prediction, cache, iter)

                iter += self.batch_size

            print("epoch {}: Training accuracy = {}".format(epoch+1, accuracy(self.predict(X_train), Y_train)))

    def predict(self,X):
        n_batches = X.shape[0] // self.batch_size
        output_size = self.layers[len(self.layers)-1]
        if n_batches == 0:
            predictions,cache = self.forward_pass(X,save_cache=False)
        else:
            predictions = np.zeros([X.shape[0],output_size])
            for i in range(n_batches):
                if i==n_batches-1:
                    predictions[i*self.batch_size:], cache = self.forward_pass(X[i*self.batch_size:])
                else:
                    predictions[i * self.batch_size:(i+1)*self.batch_size], \
                    cache = self.forward_pass(X[i * self.batch_size: (i+1)*self.batch_size])
        return predictions



In [3]:
import pandas as pd

df = pd.read_csv('ex2data2.csv', sep = ',')
df.head()

Unnamed: 0,A,B,C
0,-0.092742,0.68494,1
1,-0.21371,0.69225,1
2,-0.375,0.50219,1
3,-0.51325,0.46564,1
4,-0.52477,0.2098,1


In [4]:
X_train = df.iloc[:,:-1]
Y_train = df.iloc[:, -1]

X_train = np.array(X_train)
Y_train = np.array(Y_train)
Y_train = Y_train.reshape(Y_train.shape[0], 1)

print(X_train.shape)
print(Y_train.shape)

(117, 2)
(117, 1)


In [5]:
print(X_train[0])

[-0.092742  0.68494 ]


In [6]:
# Split the data into test and train sets
from sklearn.utils import shuffle

X_train, Y_train = shuffle(X_train, Y_train)

X_test = X_train[85:,:]
Y_test = Y_train[85:,:]

X_train_ = X_train[:250,:]
Y_train_ = Y_train[:250,:]

print(X_train_.shape)
print(Y_train_.shape)
print(X_test.shape)
print(Y_test.shape)

(117, 2)
(117, 1)
(32, 2)
(32, 1)


In [0]:
def accuracy(actual, prediction):
    return np.mean(np.argmax(actual,axis=1)==np.argmax(prediction,axis=1))

In [8]:
def main():
  
    print("training data shape: {}".format(X_train.shape))
    print("training labels shape: {}".format(Y_train.shape))

    nn = NeuralNet()
    nn.train(X_train, Y_train)
    test_pred = nn.predict(X_test)

    print("Final Testing Accuracy = {}".format(accuracy(Y_test, test_pred)))


if __name__ == '__main__':
    main()


training data shape: (117, 2)
training labels shape: (117, 1)
epoch 1: Training accuracy = 0.3247863247863248
epoch 2: Training accuracy = 0.3247863247863248
epoch 3: Training accuracy = 0.3247863247863248
epoch 4: Training accuracy = 0.3247863247863248
epoch 5: Training accuracy = 0.3247863247863248
epoch 6: Training accuracy = 0.3247863247863248
epoch 7: Training accuracy = 0.3247863247863248
epoch 8: Training accuracy = 0.3247863247863248
epoch 9: Training accuracy = 0.3247863247863248
epoch 10: Training accuracy = 0.3247863247863248
epoch 11: Training accuracy = 0.3247863247863248
epoch 12: Training accuracy = 0.3247863247863248
epoch 13: Training accuracy = 0.3247863247863248
epoch 14: Training accuracy = 0.3247863247863248
epoch 15: Training accuracy = 0.3247863247863248
epoch 16: Training accuracy = 0.3247863247863248
epoch 17: Training accuracy = 0.3247863247863248
epoch 18: Training accuracy = 0.3247863247863248
epoch 19: Training accuracy = 0.3247863247863248
epoch 20: Traini