<a href="https://colab.research.google.com/github/VaRuN-2509/AGV-git-task-Varun-/blob/main/ANN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Import the required libraries and load the data
import numpy as np
import pandas as pd
import yaml
from matplotlib import pyplot as plt

#read the data set
data = pd.read_csv('/content/train.csv')
config_file = None

#later we wil need to perform calculations related to exponential but the datas would have mixed type hence defining a vectorized exp function
exp_func = np.vectorize(np.exp)


m,n = data.shape

if config_file is not None:
  with open(config_file,'r') as f:
    config = yaml.safe_load(f)

  model = config['model']
  lr = config['model']['learning_rate']
  ratio = config['model']['train_test_ratio']
else:
  ratio = 0.8

data.head()


In [None]:
#Seperate the data set for training and testing for features(X) and output(Y)
data = np.array(data)
np.random.shuffle(data) #shuffles the entries of array

#80% of dataset for training i.e 120 datas
#20% data set for testing i.e the remaining 30 datas

data_train = data[0:int(ratio*m)]  #obtains 80% of data object
X_train = data_train[:,1:] #obtains the features excluding the output which is 'Species'
Y_train = data_train[:,0].reshape(-1).astype(int)  #obtains the output set 'Species'

data_test = data[int(ratio*m):,:]
X_test = data_test[:,1:] #obtains the features excluding the output which is 'Species'
Y_test = data_test[:,0].reshape(-1).astype(int) #obtains the output set 'Species'
print("Shape of train set x:",X_train.shape,"y:",Y_train.shape)
print("Shape of test set x:",X_test.shape,"y:",Y_test.shape)

M,N = X_train.shape
print(Y_train)



Shape of train set x: (33600, 784) y: (33600,)
Shape of test set x: (8400, 784) y: (8400,)
[4 8 7 ... 8 6 3]


In [None]:
# Initialize the parameters for the neural network
def _init_():
    W1 = np.random.randn(N, 100) * np.sqrt(2./N)
    b1 = np.zeros((1, 100))
    W2 = np.random.randn(100,100) * np.sqrt(2./100)
    b2 = np.zeros((1,100))
    W3 = np.random.randn(100, 10) * np.sqrt(2./100)
    b3 = np.zeros((1, 10))

    return W1, b1, W2, b2, W3, b3

def relu(Z):
    return np.maximum(0, Z)


def deriv_relu(Z):
    return np.where(Z>0, 1, 0).astype(float)

# Softmax function for output layer
def softmax(Z):
    Z_shift = Z - np.max(Z, axis=1, keepdims=True) # subtract row-wise max
    exp_vals = (np.exp(Z_shift)).T
    return (exp_vals / np.sum(exp_vals, axis=1, keepdims=True)).T

# One-hot encode the output labels
def one_hot_vector(Y):
    Y_one_hot = np.zeros((Y.size, 10))
    for i in range(Y.size):
        Y_one_hot[i][int(Y[i])] = 1 #One_hote code splits your numeric value as 0s and 1s where 1 is placed in the index of your numeric Value
    return Y_one_hot

# Forward propagation loop
def forwardloop(W1, b1, W2, b2, W3, b3, X):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = relu(Z2)
    Z3 = np.dot(A2,W3) + b3
    A3 = softmax(Z3)
    return Z1, A1, Z2, A2, Z3, A3


def backwardloop(Z1, Z2, Z3, A1, A2, A3, W1, W2, W3, X, Y):
    M = X.shape[0]
    dZ3 = (A3-Y)
    dW3 = (1 / M) * np.dot(A2.T,dZ3)
    db3 = (1 / M) * np.sum(dZ3, axis=0, keepdims=True)
    dZ2 = np.dot(dZ3,W3.T) * deriv_relu(Z2)
    dW2 = (1 / M) * np.dot(A1.T,dZ2)
    db2 = (1 / M) * np.sum(dZ2, axis=0, keepdims=True)
    dZ1 = np.dot(dZ2,W2.T)*deriv_relu(Z1)
    dW1 = (1 / M) * np.dot(X.T,dZ1)
    db1 = (1 / M) * np.sum(dZ1, axis=0, keepdims=True)

    return dW1,db1,dW2,db2,dW3,db3

def update(W1, W2, W3, b1, b2, b3, dW1, dW2, dW3, db1, db2, db3, alpha):
    W1 = W1 - alpha*dW1
    W2 = W2 - alpha*dW2
    W3 = W3 - alpha*dW3
    b1 = b1 - alpha*db1
    b2 = b2 - alpha*db2
    b3 = b3 - alpha*db3

    return W1,W2,W3,b1,b2,b3

# Predict output by taking the max probability
def predicted_output(A3):
    return np.argmax(A3, axis=1) #argmax returns the index of maximum values among the elements in axis 0(columns) Thereby you get the probability of 1 being in that position and the A2 matrix

# Calculate accuracy
def get_accuracy(output, Y):
    accuracy = np.sum(output == Y) / Y.size #accuracy is no.of correct prediction/total no.of predictions
    print(f"Predicted result: {output}")
    print(f"Actual result: {Y}")
    return accuracy

def compute_loss(A3, Y_one_hot):
    return -np.mean(np.sum(Y_one_hot * np.log(A3 + 1e-9), axis=1))

# Full training process
def iterate_process(X, Y, alpha, iterations):
    W1, b1, W2, b2, W3, b3 = _init_()
    for i in range(iterations):
        Z1, A1, Z2, A2, Z3, A3 = forwardloop(W1, b1, W2, b2, W3, b3, X)
        Y_encoded = one_hot_vector(Y)
        dW1,db1,dW2,db2,dW3,db3 = backwardloop(Z1, Z2, Z3, A1, A2, A3, W1, W2, W3, X, Y_encoded)
        W1, W2, W3, b1, b2, b3 = update(W1, W2, W3, b1, b2, b3, dW1, dW2, dW3, db1, db2, db3, alpha)
        loss = compute_loss(A3, Y_encoded)


        if i % 2 == 0:
            output = predicted_output(A3)
            accuracy = get_accuracy(output, Y)
            print(f"Iteration {i}: A2 mean = {A3.mean()}, std = {A3.std()}")
            print("A3 min/max/mean:", A3.min(), A3.max(), A3.mean())
            print(f"Iteration {i}: Loss = {loss:.4f}, Accuracy = {accuracy:.2f}")

    return W1, W2, W3, b1, b2, b3
W1,W2,W3,b1,b2,b3 = iterate_process(X_train,Y_train,0.01,50)


Predicted result: [0 0 0 ... 6 1 8]
Actual result: [4 8 7 ... 8 6 3]
Iteration 0: A2 mean = 2.9761904761904836e-05, std = 0.000904115915997074
A3 min/max/mean: 3.319690037664925e-294 0.27787975223633316 2.9761904761904836e-05
Iteration 0: Loss = 19.6635, Accuracy = 0.08


  return (exp_vals / np.sum(exp_vals, axis=1, keepdims=True)).T


Predicted result: [1 1 1 ... 1 1 1]
Actual result: [4 8 7 ... 8 6 3]
Iteration 2: A2 mean = nan, std = nan
A3 min/max/mean: nan nan nan
Iteration 2: Loss = nan, Accuracy = 0.11
Predicted result: [0 0 0 ... 0 0 0]
Actual result: [4 8 7 ... 8 6 3]
Iteration 4: A2 mean = nan, std = nan
A3 min/max/mean: nan nan nan
Iteration 4: Loss = nan, Accuracy = 0.10


KeyboardInterrupt: 

In [None]:
def test(X_test,Y_test):
  Z1, A1, Z2, A2, Z3, A3 = forwardloop(W1, b1, W2, b2, W3, b3, X)
  out = np.argmax(A3, axis=1)
  print(out.shape)
  accuracy = np.sum(out == Y_test ) / Y_test.size #accuracy is no.of correct prediction/total no.of predictions
  print(f"Predicted result: {out}")
  print(f"Actual result: {Y_test}")
  print(f"accuracy : {accuracy}")

test(X_test,Y_test)

(8400,)
Predicted result: [2 9 4 ... 5 0 8]
Actual result: [2 9 4 ... 5 0 8]
accuracy : 0.7354761904761905
