In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
iris = datasets.load_iris()

  from pandas.core import (


In [None]:
def train_test_split(X, Y, split = 0.25):

  length = X.shape[0]
  index_split = int(length * split)

  # Generates random order of numbers from 0 till length
  indices = np.random.permutation(length)
  test_indices, train_indices = indices[:index_split], indices[index_split:]

  X_train, X_test = X[train_indices], X[test_indices]
  Y_train, Y_test = Y[train_indices], Y[test_indices]
  return X_train, X_test, Y_train, Y_test

In [None]:
def calculate_accuracy(X, Y):
  return (X == Y).sum() / len(Y) * 100

In [None]:
class NeuralNetwork:

    def __init__(self, X, Y, layer_count, neurons_list):
        self.weights = []
        self.biases = []
        self.lr = None
        neurons = [X.shape[1]] + neurons_list + [Y.shape[0]]
        for i in range(layer_count + 1):
            weight_layer = np.random.random(size=(neurons[i + 1], neurons[i]))
            bias_layer = np.random.random(size=(1, neurons[i + 1]))
            self.weights.append(weight_layer)
            self.biases.append(bias_layer)

    # Returns max(0, value) and its derrivative (value basically replaced by 1)
    def __relu(self, X):
        result = np.maximum(X, 0)
        derr = np.where(X > 0, 1, 0)
        return result, derr

    def __svm_loss_derrivative(self, X, y):
        derr = X - X[np.arange(y.shape[0]), y].reshape(-1,1)         # (135,5) - (135, 1) vectorized subtraction
        derr[derr < 0] = 0                                           # Creating mask and replacing negative values with 0
        loss = np.sum(derr)                                          # Sum up the matrix to compute loss
        derr[derr > 0] = 1                                           # Creating mask and replacing positive values with 1
        derr[np.arange(y.shape[0]), y] = np.sum(derr, axis = 1) * -1 # Replace target label value with -1 * (sum of 1's in row)
        return loss, derr

    # Predict values given a feature vector
    def forward_propagation(self, prev_features):
        cache = []
        for weight, bias in zip(self.weights, self.biases):
            normal_out = np.dot(prev_features, weight.T) + bias            # Calculate normal output
            ldx, ldw, ldb = weight, prev_features, np.ones_like(bias)      # Calculate local derrivatives
            relu_out, relu_derr = self.__relu(normal_out)                  # Calculate relu
            cache.append((ldx, ldw, ldb, normal_out, relu_out, relu_derr)) # Add to cache
            prev_features = relu_out                                       # Set relu output as previous features (for the next layer)
        return normal_out, cache                                           # Return non-relu final output

    def backward_propagation(self, Y, ud, cache):
        gradients = []
        for i in reversed(range(len(self.weights))):
            ldx, ldw, ldb, normal_out, relu_out, relu_derr = cache[i]    # Extract cache values
            dx = np.dot(ud * relu_derr, ldx)                             # Calculate dx
            dw = np.dot(ldw.T, ud) / Y.shape[0]                          # Calculate dw (and normalize)
            db = np.sum(ud, axis=0) / Y.shape[0]                         # Calculate db (and normalize)
            ud = dx                                                      # dx becomes upper derrivative for previous layer
            gradients.insert(0, (dw, db))                                # Add gradients to list
        return gradients

    # Applying gradeint descent on weights and biases
    def __gradient_descent(self, gradients):
        for i in range(len(self.weights)):
            dw, db = gradients[i]
            self.weights[i] -= self.lr * dw.T
            self.biases[i] -= self.lr * db.reshape(1, -1)

    # Returns Logits of test data
    def Predict_Confidence(self, X):
        predictions, cache = self.forward_propagation(X)
        return predictions

    # Predicts class of test data (argmax)
    def Predict_Class(self, X):
        X = self.Predict_Confidence(X)
        return np.argmax(X, axis=1)

    def Train(self, X, Y, alpha = 0.0001, loss_at_iter = 50, max_iter = None):

        # Setting up some stuff
        convergence_check = False
        self.lr = alpha
        X = np.array(X) # Dimensions are: (samples, features)
        Y = np.array(Y).reshape(-1,) # Dimensions are: (samples, 1)

        # Handling Termination by changes in old and new loss values incase max_iter is not defined
        if max_iter is None:
          convergence_check = True
          print("Convergence Criteria will be used")

        old_loss = 0
        counter = 0

        while(True):

          predictions, cache = self.forward_propagation(X)
          loss, derrivative = self.__svm_loss_derrivative(predictions, Y) # Returns loss and derrivative

          if (counter % loss_at_iter == 0):
              print(f"Loss at iteration {counter} = {loss}")

          gradients = self.backward_propagation(Y, derrivative, cache)
          self.__gradient_descent(gradients)

          # Either do convergence check or max iteration check
          if convergence_check:
            if abs(old_loss - loss) / loss < 0.0001: # If the loss difference is lesser than 0.01%, break
              print("Convergence Reached")
              break
          else:
            if (counter >= max_iter - 1): # If max iterations are reached, break
              print("Maximum Iterations Reached")
              break

          old_loss = loss
          counter +=1

        print(f"Final Loss at iteration {counter} = {loss}\n")

In [None]:
X, Y, Y_names = iris['data'], iris['target'], iris['target_names']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 0.1)

nn = NeuralNetwork(X, Y_names, 3, [5,10,5])
nn.Train(X_train, Y_train, 0.01, 100)

Convergence Criteria will be used
Loss at iteration 0 = 10090.113230649671
Loss at iteration 100 = 0.039118162766396125
Loss at iteration 200 = 0.03603578697226606
Loss at iteration 300 = 0.03475219889201231
Loss at iteration 400 = 0.032574120962116204
Loss at iteration 500 = 0.030397629992584108
Loss at iteration 600 = 0.028299575139761224
Loss at iteration 700 = 0.026735201299303846
Loss at iteration 800 = 0.03598337402539986
Loss at iteration 900 = 0.036757588502976724
Loss at iteration 1000 = 0.031770415847623745
Loss at iteration 1100 = 0.023317191003888738
Loss at iteration 1200 = 0.0536286100168466
Loss at iteration 1300 = 0.026956564534642347
Loss at iteration 1400 = 0.10135095469315747
Loss at iteration 1500 = 0.024999709619085486
Loss at iteration 1600 = 0.0617588063887462
Loss at iteration 1700 = 0.03780824845969466
Loss at iteration 1800 = 0.09895731785349504
Loss at iteration 1900 = 0.02211799816796134
Loss at iteration 2000 = 0.0364930577969238
Loss at iteration 2100 = 0.

In [None]:
predicted = nn.Predict_Class(X_test)
print(f"Predicted: {predicted}")
print(f"Actual: {Y_test}")
print(f"Accuracy: {calculate_accuracy(predicted, Y_test)}")

Predicted: [0 1 1 1 0 0 2 2 0 2 0 0 1 0 1]
Actual: [0 1 1 1 0 0 2 2 0 2 0 0 1 0 1]
Accuracy: 100.0
