# Simple Neural Network from scratch

Building a neural network with one hidden layer, using forward propagation and backpropagation.


In [1]:
import numpy as np
import pandas as pd

from keras.backend import epsilon
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
pd.set_option('max_colwidth', None)

## Defining the Neural Network


In [2]:
class Layer:

    def __init__(self):
        """Layer base class.
        """
        self.input = None
        self.output = None

    def forward_propagation(self, input):
        """Computes the output Y of a layer for a given input X.

        Args:
            input: input to the layer.
        """
        raise NotImplementedError

    def backward_propagation(self, output_error, learning_rate):
        """Computes dE/dX for a given dE/dY (and update parameters if any).

        Args:
            output_error: output error.
            learning_rate: learning Rate.
        """
        raise NotImplementedError


In [3]:
class FCLayer(Layer):

    def __init__(self, input_size, output_size):
        """Fully Connected Layer.

        Args:
            input_size: number of input neurons.
            output_size: number of output neurons.
        """
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    def forward_propagation(self, input_data):
        """Function to apply forward propogation.

        Args:
            input_data: input to the layer.

        Returns:
            self.output: output for given input.
        """
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        """Function to apply backward propogation.

        Args:
            output_error: output error.
            learning_rate: learning Rate.

        Returns:
            input_error: dE/dX for a given output_error=dE/dY
        """
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error


In [4]:
class ActivationLayer(Layer):

    def __init__(self, activation, activation_prime):
        """Activation Layer.

        Args:
            activation: activation function.
            activation_prime: derivative of activation function.
        """
        self.activation = activation
        self.activation_prime = activation_prime

    def forward_propagation(self, input_data):
        """Function to apply forward propogation.

        Args:
            input_data: input to the layer.

        Returns:
            self.output: activated input.
        """
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        """Function to apply backward propogation.

        Args:
            output_error: output error.
            learning_rate: learning Rate.

        Returns:
            input_error: dE/dX for a given output_error=dE/dY
        """
        return self.activation_prime(self.input) * output_error


### Activation function - Sigmoid


In [5]:
def sigmoid(x):
    """Implementing the sigmoid function for x.
    sig(x) = 1/(1+e^-x)

    Args:
        x: input for which sigmoid function needs to be calculated.

    Returns:
        the sigmoid function.
    """
    return 1 / (1 + (np.exp(-x)))


def sigmoid_prime(x):
    """Derivative of the sigmoid function.
    sig'(x) = sig(x) * (1-sig(x))

    Args:
        x: input for which derivative of signmoid function needs to be calculated.

    Returns:
        the derivative of sigmoid function.
    """
    return sigmoid(x) * (1 - sigmoid(x))


### Loss function - Cross Entropy

<img src="http://androidkt.com/wp-content/uploads/2021/05/Selection_099-1024x200.png" width=600>


In [6]:
def bce(y_true, y_pred):
    """Implementing cross entropy loss.

    Args:
        y_true: true value of input.
        y_pred: predicted value of input.

    Returns:
        the cross entropy loss.
    """
    if y_true == 1:
        return -np.log(y_pred)

    else:
        return -np.log(1 - y_pred)


def bce_prime(y_true, y_pred):
    """Implementing derivative of cross entropy loss.

    Args:
        y_true: true value of input.
        y_pred: predicted value of input.

    Returns:
        the cross entropy loss.
    """
    if y_true == 1:
        return -1 / y_pred

    else:
        return 1 / (1 - y_pred)


### Combining all the functions


In [7]:
class Network:

    def __init__(self):
        """Neural Network.
        """
        self.layers = []
        self.loss = None
        self.loss_prime = None

    def add(self, layer):
        """Add layer to network.
        """
        self.layers.append(layer)

    def use(self, loss, loss_prime):
        """Set loss to use.
        """
        self.loss = loss
        self.loss_prime = loss_prime

    def predict(self, input_data):
        """Predict output for given input.
        """
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    def fit(self, x_train, y_train, epochs, learning_rate):
        """Train the network.
        """
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i + 1, epochs, err))

## Loading Dataset
[Dataset on kaggle](https://www.kaggle.com/omnamahshivai/surgical-dataset-binary-classification)

Attribute Information:

In [8]:
# load dataset
original_df = pd.read_csv('Surgical-deepnet.csv')

x = original_df.iloc[:, :-1].values
y = original_df['complication'].values

In [9]:
# Split into train and test datasets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=0
)

x_train = np.expand_dims(x_train, axis=1)

print('Training samples: ', x_train.shape)
print('Testing samples: ', x_test.shape)

Training samples:  (10244, 1, 24)
Testing samples:  (4391, 24)


## Define the network

In [10]:
# Network
net = Network()

# input_shape=(1, 24)       ;   output_shape=(1, 100)
net.add(FCLayer(24, 100))
net.add(ActivationLayer(sigmoid, sigmoid_prime))

# input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(FCLayer(100, 50))
net.add(ActivationLayer(sigmoid, sigmoid_prime))

# input_shape=(1, 50)       ;   output_shape=(1, 1)
net.add(FCLayer(50, 1))
net.add(ActivationLayer(sigmoid, sigmoid_prime))

In [27]:
# train on all samples
net.use(bce, bce_prime)

net.fit(
    x_train,
    y_train,
    epochs=200,
    learning_rate=0.001
)

epoch 1/50   error=0.555069
epoch 2/50   error=0.547742
epoch 3/50   error=0.561009
epoch 4/50   error=0.559113
epoch 5/50   error=0.551968
epoch 6/50   error=0.555554
epoch 7/50   error=0.553901
epoch 8/50   error=0.548685


KeyboardInterrupt: 

## Evaluation

In [12]:
# test on 3 samples
out = net.predict(x_test[:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[:3])



predicted values : 
[array([[0.14884123]]), array([[0.27856554]]), array([[0.04212324]])]
true values : 
[1 1 0]


## Testing


In [24]:
preds = [i[0][0] for i in net.predict(x_test)]
pd.DataFrame.from_dict({
    'Predictions': preds,
    'True Values': y_test
})

Unnamed: 0,Predictions,True Values
0,0.148841,1
1,0.278566,1
2,0.042123,0
3,0.065332,0
4,0.306132,1
...,...,...
4386,0.188757,0
4387,0.360816,1
4388,0.042123,0
4389,0.125844,0


## Time Complexity

Time complexity is the amount of time taken by an algorithm to run, as a function of the length of the input. It measures the time taken to execute each statement of code in an algorithm.

Our Neural Network consists of 2 input nodes going into a hidden layer with 3 nodes which in turn goes to the output layer with 1 node. The weighted sum of each layer is going through a sigmoid activation function.

<img src="NN.png" width=600>


In [None]:
def convert_time(time):
    return datetime.timedelta(
        hours=time.hour, minutes=time.minute, seconds=time.second, microseconds=time.microsecond
    )


In [None]:
for initial_time, final_time, epochs in zip(all_initial_times, all_final_times, all_epochs):
    initial_timedelta = convert_time(initial_time)
    final_timedelta = convert_time(final_time)

    # total time taken by program
    print('Total time taken for %7d epochs: %18s' %
          (epochs, final_timedelta - initial_timedelta))
