# Pre-processing

In [None]:
# importing libraries

import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [None]:
# importing dataset

!gdown --id -q 1N_iNvJ8zXfCiSb5t3IqjH1JMDcHUEYnj
data = pd.read_csv("Iris.csv")
data.head()

In [None]:
# checking for null values and 0 values

print(data.isnull().sum())
print(data[data == 0].count())

In [None]:
# convert output column

data = pd.read_csv("Iris.csv")
dummies = pd.get_dummies(data["Species"])
data["Species"] = dummies.values.tolist()
print(data["Species"])

In [None]:
# shuffling data

shuffled_data = data.sample(frac=1, random_state=1).reset_index(drop=True)
print(shuffled_data)

In [None]:
# splitting input and output

X = shuffled_data.drop(["Id", "Species"], axis=1).values
Y = np.vstack(shuffled_data["Species"].tolist())

In [None]:
# normalizing input

X = (X - X.min(axis = 0)) / (X.max(axis = 0) - X.min(axis = 0))

In [None]:
# creating training, validation and test sets

X_train = X[0:90].T
Y_train = Y[0:90].T
X_val = X[90:120].T
Y_val = Y[90:120].T
X_test = X[120:150].T
Y_test = Y[120:150].T

# Model

In [None]:
def weight_init(X, Y):
  W1 = np.random.randn(h1, X.shape[0]) * 0.01
  W2 = np.random.randn(h2, h1) * 0.01
  W3 = np.random.randn(Y.shape[0], h2) * 0.01
  b1 = np.zeros((h1, 1))
  b2 = np.zeros((h2, 1))
  b3 = np.zeros((Y.shape[0], 1))
  params = {"W1": W1, "W2": W2, "W3": W3, "b1": b1, "b2": b2, "b3": b3}
  return params

In [None]:
def relu(X):
  return np.maximum(0, X)

In [None]:
def relu_derivative(X):
  X[X <= 0] = 0
  X[X > 0] = 1
  return X

In [None]:
def sigmoid(X):
  exp = np.exp(-X.astype(float))
  return (1 / (1 + exp))

In [None]:
def sigmoid_derivative(X):
  return X * (1 - X)

In [None]:
def forward_prop(X, params):
  Z1 = np.dot(params["W1"], X) + params["b1"]
  A1 = relu(Z1)
  Z2 = np.dot(params["W2"], A1) + params["b2"]
  A2 = relu(Z2)
  Z3 = np.dot(params["W3"], A2) + params["b3"]
  A3 = sigmoid(Z3)
  cache = {"Z1": Z1, "Z2": Z2, "Z3": Z3, "A1": A1, "A2": A2, "A3": A3}
  return cache

In [None]:
def back_prop(X, Y, params, cache):
  m = X.shape[1]
  
  dZ3 = cache["A3"] - Y
  dW3 = np.dot(dZ3, cache["A2"].T) / m
  db3 = np.sum(dZ3, axis=1, keepdims=True) / m

  dZ2 = np.dot(params["W3"].T, dZ3) * relu_derivative(cache["A2"])
  dW2 = np.dot(dZ2, cache["A1"].T) / m
  db2 = np.sum(dZ2, axis=1, keepdims=True) / m
  
  dZ1 = np.dot(params["W2"].T, dZ2) * relu_derivative(cache["A1"])
  dW1 = np.dot(dZ1, X.T) / m
  db1 = np.sum(dZ1, axis=1, keepdims=True) / m
  
  grads = {"dW1": dW1, "dW2": dW2, "dW3": dW3, "db1": db1, "db2": db2, "db3": db3}
  return grads

In [None]:
def weight_update(params, grads):
  W1 = params["W1"] - learning_rate * grads["dW1"]
  W2 = params["W2"] - learning_rate * grads["dW2"]
  W3 = params["W3"] - learning_rate * grads["dW3"]
  b1 = params["b1"] - learning_rate * grads["db1"]
  b2 = params["b2"] - learning_rate * grads["db2"]
  b3 = params["b3"] - learning_rate * grads["db3"]
  params = {"W1": W1, "W2": W2, "W3": W3, "b1": b1, "b2": b2, "b3": b3}
  return params

In [None]:
def calculate_cost(Y_hat, Y):
  L = -np.sum(np.multiply(Y, np.log(Y_hat)))
  return L

In [None]:
def calculate_accuracy(params, X, Y):
  cache = forward_prop(X, params)
  
  A3 = cache["A3"].T
  max_vals = A3.max(axis = 1).reshape(-1, 1)
  A3 = np.where(A3 == max_vals, 1, 0).T
  
  errors = np.sum(np.abs(A3 - Y)) // 2
  total = Y.shape[1]
  accuracy = (total - errors) / total
  return accuracy

In [None]:
def train_model(X_train, Y_train, X_val, Y_val, verbose=1):
  train_err = []
  train_acc = []
  val_err = []
  val_acc = []

  params = weight_init(X_train, Y_train)
  for i in range(epochs):
    cache = forward_prop(X_train, params)
    grads = back_prop(X_train, Y_train, params, cache)
    params = weight_update(params, grads)
    
    preds = forward_prop(X_val, params)
    train_err.append(calculate_cost(cache["A3"], Y_train))
    val_err.append(calculate_cost(preds["A3"], Y_val))
    train_acc.append(calculate_accuracy(params, X_train, Y_train))
    val_acc.append(calculate_accuracy(params, X_val, Y_val))
    
    if (verbose and i == (i // 100) * 100):
      print(f"Epoch {i + 1}/{epochs}")
      print(f"Training Loss: {round(train_err[i], 2)}, Training Accuracy: {round(train_acc[i], 2)}")
      print(f"Validation Loss: {round(val_err[i], 2)}, Validation Accuracy: {round(val_acc[i], 2)}")
    
    history = {"train_err": train_err, "train_acc": train_acc, "val_err": val_err, "val_acc": val_acc}

  return history, params

# Evaluation

In [None]:
# hyperparameters

h1 = 100
h2 = 100
epochs = 1000
learning_rate = 0.1

In [None]:
history, params = train_model(X_train, Y_train, X_val, Y_val, verbose=0)

In [None]:
acc = round(calculate_accuracy(params, X_train, Y_train), 2)
print(f"Training Accuracy: {acc}")
acc = round(calculate_accuracy(params, X_val, Y_val), 2)
print(f"Validation Accuracy: {acc}")
acc = round(calculate_accuracy(params, X_test, Y_test), 2)
print(f"Test Accuracy: {acc}")

In [None]:
train_err = history["train_err"]
val_err = history["val_err"]

fig_err = go.Figure()
fig_err.add_trace(go.Scatter(x=list(range(len(train_err))), y=train_err, name="Training Error", mode='lines+markers'))
fig_err.add_trace(go.Scatter(x=list(range(len(val_err))), y=val_err, name="Validation Error", mode='lines+markers'))
fig_err.update_layout(title = f'Error vs Iterations',title_x=0.5, xaxis_title= "Iterations", yaxis_title="Error")

fig_err.show(renderer="svg")

In [None]:
train_acc = history["train_acc"]
val_acc = history["val_acc"]

fig_acc = go.Figure()
fig_acc.add_trace(go.Scatter(x=list(range(len(train_acc))), y=train_acc, name="Training Accuracy", mode='lines+markers'))
fig_acc.add_trace(go.Scatter(x=list(range(len(val_acc))), y=val_acc, name="Validation Accuracy", mode='lines+markers'))
fig_acc.update_layout(title = f'Accuracy vs Iterations',title_x=0.5, xaxis_title= "Iterations", yaxis_title="Accuracy")

fig_acc.show(renderer="svg")