In [511]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [512]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1.0 - sigmoid(x))

def relu(x):
    return np.maximum(0,x)

def relu_derivative(x):
    return 0 if x <= 0 else 1

In [513]:
gain = {
    "tanh": 5/3,
    "relu": 2**0.5,
    "sigmoid": 1
}

activation_map = {
    "relu": relu,
    "sigmoid": sigmoid
    
}

In [514]:
class NeuralNetwork:
    
    def __init__(self, arch, seed=99):
        np.random.seed(99)
        self.num_layers = len(arch)
        self.params = {}
        
        for idx, layer in enumerate(arch):
            layer_idx = idx + 1
            
            layer_input_size = layer["input_dim"]
            layer_output_size = layer["output_dim"]
            activation = layer["activation"]
            # kaiming initialization
            kaiming_weight_init = gain[activation] / np.sqrt(layer_input_size)
            self.params["w" + str(layer_idx)] = np.full((layer_output_size, layer_input_size), kaiming_weight_init)
            
            # random weight initialization
            # self.params["w" + str(layer_idx)] = np.random.randn(layer_output_size, layer_input_size) * 0.01
            self.params["b" + str(layer_idx)] = np.ones((layer_output_size, 1))
            self.params["z" + str(layer_idx)] = np.ones((layer_output_size, 1))
            self.params["a" + str(layer_idx)] = np.ones((layer_output_size, 1))
            print("w" + str(layer_idx), self.params["w" + str(layer_idx)].shape)
            print("b" + str(layer_idx), self.params["b" + str(layer_idx)].shape)
            print("z" + str(layer_idx), self.params["z" + str(layer_idx)].shape)
            print("a" + str(layer_idx), self.params["a" + str(layer_idx)].shape)
        
        self.params["loss"] = 1
        self.arch = arch
        self.derivatives = {}
        self.lr = 0.01
        
        
    def forward_propogation(self, X):
        self.params["a0"] = X
        for l in range(1, self.num_layers + 1):
            # z = wa + b
            self.params["z" + str(l)] = np.add(np.dot(self.params["w" + str(l)], self.params["a" + str(l-1)]), self.params["b" + str(l)])
            self.params["a" + str(l)] = activation_map[self.arch[l-1]["activation"]](self.params["z" + str(l)])
            
    def predict(self, x):
        self.forward_propogation(x)
        return self.params["a" + str(self.num_layers)]
            
    def compute_loss(self, y):
        self.params["loss"] = -(y * np.log(self.params["a" + str(self.num_layers)]) + (1-y) * np.log(1 - self.params["a" + str(self.num_layers)]))
        
    def compute_derivatives(self, y):
        # Partial derivatives of cost function wrt. z[l], w[l], b[l]
        # dL/dz[l] = dL/da[l] * da[l]/dz[l]
#         print("y: ", y)
#         print('a' + str(self.num_layers), self.params['a' + str(self.num_layers)].shape)
        self.derivatives['dz' + str(self.num_layers)] = self.params['a' + str(self.num_layers)] - y
#         print('dz' + str(self.num_layers))
#         print(self.derivatives['dz' + str(self.num_layers)].shape)
        #dL/dw[l] = dL/dz[l] * dz[l]/dw[l]
        #dz[l]/dw[l] = a[l-1]
        self.derivatives['dw' + str(self.num_layers)] = np.dot(self.derivatives['dz' + str(self.num_layers)], np.transpose(self.params['a' + str(self.num_layers-1)]))
#         print('dw' + str(self.num_layers))
#         print(self.derivatives['dw' + str(self.num_layers)].shape)
        # dL/db[l] = dL/dz[l] * dz[l]/db[l] = dL/dz[l]
        self.derivatives['db' + str(self.num_layers)] = self.derivatives['dz' + str(self.num_layers)]
#         print('db' + str(self.num_layers))
#         print(self.derivatives['db' + str(self.num_layers)].shape)
        
        for l in range(self.num_layers-1, 0, -1):
            # dz[l+1]/da[l] = w[l+1]
            # da[l]/dz[l] = sigmoid_derivative(z[l])
            # dL/dz[l] = dL/dz[l+1] * dz[l+1]/da[l] * da[l]/dz[l]
#             print('dz' + str(l))
#             print('w' + str(l+1), ' . ', 'dz' + str(l+1), ' * ', 'z' + str(l))
            
#             print(self.params['w' + str(l+1)].shape, self.derivatives['dz' + str(l+1)].shape, 
#                   sigmoid_derivative(self.params['z' + str(l)]).shape)
            
#             print(np.transpose(self.params['w' + str(l+1)]).shape, self.derivatives['dz' + str(l+1)].shape, 
#                   sigmoid_derivative(self.params['z' + str(l)]).shape)
            
            # broadcase operation *
    
            self.derivatives['dz' + str(l)] = np.dot(np.transpose(self.params['w' + str(l+1)]), 
                                                     self.derivatives['dz' + str(l+1)]) * \
                                                     sigmoid_derivative(self.params['z' + str(l)])
            
            # dL/dw[l] = dL/dz[l] * dz[l]/dw[l] = dL/dz[l] * a[l-1]
            self.derivatives['dw' + str(l)] = np.dot(self.derivatives['dz' + str(l)], np.transpose(self.params['a' + str(l-1)]))
            
            # dL/db[l] = dL/z[l] * dz[l]/db = dL/z[l]
            self.derivatives['db' + str(l)] = self.derivatives['dz' + str(l)]
            
        
    def update_network_parameters(self):
        for l in range(1, self.num_layers+1):
            self.params["w" + str(l)] -= self.lr * self.derivatives['dw' + str(l)]
            self.params["b" + str(l)] -= self.lr * self.derivatives['db' + str(l)]
    
    def backward_propogation(self, y):
        
        self.compute_derivatives(y)
        self.update_network_parameters()
        
    def fit(self, X, Y, num_iter):
        for iter in range(num_iter):
            acc = 0
            loss = 0
            correct_predictions = 0
            for i in range(X.shape[0]):
#                print("x before: ", X[i])
                x = X[i].reshape((X[i].size, 1))
#                print("x after: ", X[i])
                y = Y[i]
                self.forward_propogation(x)
                self.compute_loss(y)
                loss += self.params["loss"]
                y_pred = self.predict(x)
                y_pred = (y_pred > 0.5)
                if y_pred.all() == y:
                    correct_predictions += 1
                self.backward_propogation(y)
                
            print('Iteration: ', iter)
            print('Loss: ', loss)
            print('Accuracy: ', (correct_predictions / X.shape[0]) * 100)
            
            
    def matrix_fit(self, X, Y, num_iter):
        acc = 0
        loss = 0
        correct_predictions = 0
        for iter in range(num_iter):
            # x = X[i].reshape((X[i].size, 1))
            x = X[i].reshape((X[i].size, 1))
            y = Y[i]
            self.forward_propogation(x)
            self.compute_loss(y)
            loss = self.params["loss"]
            y_pred = self.predict(x)
            y_pred = (y_pred > 0.5)
            if y_pred.all() == y:
                correct_predictions += 1
            self.backward_propogation(y)
            
            print('Iteration: ', iter)
            print('Loss: ', loss)
        
        print('Accuracy: ', (correct_predictions / num_iter) * 100)
        

In [515]:
dataset = pd.read_csv('wheat-seeds-binary.csv')

In [516]:
shuffled_dataset = dataset.sample(frac=1).reset_index(drop=True)
shuffled_dataset['Class'] = shuffled_dataset['Class'] - 1

In [517]:
X = shuffled_dataset.iloc[:, 0:-1].values
y = shuffled_dataset.iloc[:, -1].values

In [518]:
sc_X = StandardScaler()
X = sc_X.fit_transform(X)

In [519]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(98, 7) (42, 7) (98,) (42,)


In [520]:
# ARCH = [
#     {"input_dim": 7, "output_dim": 25, "activation": "sigmoid"},
#     {"input_dim": 25, "output_dim": 50, "activation": "sigmoid"},
# #    {"input_dim": 50, "output_dim": 50, "activation": "relu"},
#     {"input_dim": 50, "output_dim": 25, "activation": "sigmoid"},
#     {"input_dim": 25, "output_dim": 1, "activation": "sigmoid"},
# ]

In [521]:
ARCH = [
    {"input_dim": 7, "output_dim": 2, "activation": "sigmoid"},
    {"input_dim": 2, "output_dim": 1, "activation": "sigmoid"},
]

In [522]:
classifier = NeuralNetwork(ARCH)
# classifier.fit(X_train, y_train, 1)
classifier.matrix_fit(X_train, y_train, 1)

w1 (2, 7)
b1 (2, 1)
z1 (2, 1)
a1 (2, 1)
w2 (1, 2)
b2 (1, 1)
z2 (1, 1)
a2 (1, 1)
Iteration:  0
Loss:  [[1.5593111]]
Accuracy:  0.0


In [523]:
acc = 0
n_c = 0
for i in range(0, X_test.shape[0]):
  x = X_test[i].reshape((X_test[i].size, 1))
  y = y_test[i]
  y_pred = classifier.predict(x)
  y_pred = (y_pred > 0.5)
  #print('Expected: %d Got: %d' %(y, y_pred))
  if y_pred == y:
      n_c += 1

acc = (n_c/X_test.shape[0])*100
print("Test Accuracy", acc)

Test Accuracy 45.23809523809524


In [524]:

import os
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
sns.set_style("whitegrid")

#import keras
# from keras.models import Sequential
# from keras.layers import Dense
# from keras.utils import np_utils
# from keras import regularizers

from sklearn.metrics import accuracy_score

In [525]:
# number of samples in the data set
N_SAMPLES = 1000
# ratio between training and test sets
TEST_SIZE = 0.1

X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)

print(X.shape)
print(y.shape)

(1000, 2)
(1000,)


In [526]:
ARCH = [
    {"input_dim": 2, "output_dim": 25, "activation": "sigmoid"},
    {"input_dim": 25, "output_dim": 50, "activation": "sigmoid"},
    {"input_dim": 50, "output_dim": 50, "activation": "relu"},
    {"input_dim": 50, "output_dim": 25, "activation": "sigmoid"},
    {"input_dim": 25, "output_dim": 1, "activation": "sigmoid"},
]

In [527]:
classifier = NeuralNetwork(ARCH)
# classifier.fit(X_train, y_train, 1)
classifier.matrix_fit(X_train, y_train, 1)

w1 (25, 2)
b1 (25, 1)
z1 (25, 1)
a1 (25, 1)
w2 (50, 25)
b2 (50, 1)
z2 (50, 1)
a2 (50, 1)
w3 (50, 50)
b3 (50, 1)
z3 (50, 1)
a3 (50, 1)
w4 (25, 50)
b4 (25, 1)
z4 (25, 1)
a4 (25, 1)
w5 (1, 25)
b5 (1, 1)
z5 (1, 1)
a5 (1, 1)
Iteration:  0
Loss:  [[6.00247569]]
Accuracy:  0.0


In [528]:
'''
References:

https://medium.com/binaryandmore/beginners-guide-to-deriving-and-implementing-backpropagation-e3c1a5a1e536#68b5
https://github.com/pranavbudhwant/backpropagation-in-numpy/blob/master/Implementation_Notebook.ipynb

https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795
https://github.com/SkalskiP/ILearnDeepLearning.py/blob/master/01_mysteries_of_neural_networks/03_numpy_neural_net/Numpy%20deep%20neural%20network.ipynb

'''

'\nReferences:\n\nhttps://medium.com/binaryandmore/beginners-guide-to-deriving-and-implementing-backpropagation-e3c1a5a1e536#68b5\nhttps://github.com/pranavbudhwant/backpropagation-in-numpy/blob/master/Implementation_Notebook.ipynb\n\nhttps://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795\nhttps://github.com/SkalskiP/ILearnDeepLearning.py/blob/master/01_mysteries_of_neural_networks/03_numpy_neural_net/Numpy%20deep%20neural%20network.ipynb\n\n'