In [2]:
# FEEDFORWARD NEURON NETWORK WITH MLP ARCHITECTURE FOR XOR PROBLEM

import numpy as np
from sklearn.metrics import accuracy_score
def sigmoid(x):
  return 1/(1+np.exp(-x))
def dersigmoid(x):
  return x*(1-x)
ip = np.array([[0,0], [0,1], [1,0], [1,1]])
op = np.array([[0], [1], [1], [0]])
ipsize = 2; hiddensize = 2; opsize = 1
np.random.seed(42)
ip_hid_weight = np.random.rand(ipsize, hiddensize)
hid_op_weight = np.random.rand(hiddensize, opsize)
ip_hid_bias = np.random.rand(1, hiddensize)
hid_op_bias = np.random.rand(1, opsize)
lr = 0.1; epochs = 10000; cost = 0

for i in range(epochs):
  hidlayerip = np.dot(ip, ip_hid_weight) + ip_hid_bias
  hidlayerop = sigmoid(hidlayerip)
  oplayerip = np.dot(hidlayerop, hid_op_weight) + hid_op_bias
  predop = sigmoid(oplayerip)
  error = op - predop

  # BACKPROPAGATION
  der_predop = error * dersigmoid(predop)
  errorhiddenlayer = np.dot(der_predop, hid_op_weight.T)
  der_hiddenlayer = errorhiddenlayer * dersigmoid(hidlayerop)

  ip_hid_weight += np.dot(ip.T, der_hiddenlayer)*lr
  hid_op_weight += np.dot(hidlayerop.T, der_predop) * lr
  ip_hid_bias += np.sum(der_hiddenlayer, axis=0, keepdims=True) * lr
  hid_op_bias += np.sum(der_predop, axis=0, keepdims=True) * lr
print("INPUT     PREDICTED OUTPUT     ACTUAL OUTPUT")
for i in range(len(ip)):
  hidlayerip = np.dot(ip, ip_hid_weight) + ip_hid_bias
  hilayerop = sigmoid(hidlayerip)
  oplayerip = np.dot(hidlayerop, hid_op_weight) + hid_op_bias
  predop = sigmoid(oplayerip)
  predop = predop.round()
  print(ip[i],"\t     ",predop[i],"\t\t  ",op[i])
cost = np.sum((error)**2)
print(f"Final Cost Function: {cost:.6f}")
acc = accuracy_score(op, predop)
print("Accuracy:", acc)

INPUT     PREDICTED OUTPUT     ACTUAL OUTPUT
[0 0] 	      [0.] 		   [0]
[0 1] 	      [1.] 		   [1]
[1 0] 	      [1.] 		   [1]
[1 1] 	      [0.] 		   [0]
Final Cost Function: 0.013410
Accuracy: 1.0


In [12]:
# MULTICLASS CLASSIFICATION TASKS USING SOFTMAX ACTIVATION FUNCTION

import numpy as np
import pandas as pd
def softmax(z):
    exp_z = np.exp(z - np.max(z))  # subtract max for numerical stability
    return exp_z / exp_z.sum(axis=1, keepdims=True)
def relu(x):
    return np.maximum(0, x) 
    
colnames = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
data = pd.read_csv("Iris.csv", names=colnames, header=0)
data['species'] = data['species'].astype('category').cat.codes
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
X = np.array(X, dtype=float)
input_size = X.shape[1]
hidden_size = 5
output_size = len(np.unique(y))
learning_rate = 0.001
epochs = 1000
np.random.seed(0)
W1 = np.random.rand(input_size, hidden_size)
b1 = np.random.rand(hidden_size)
W2 = np.random.rand(hidden_size, output_size)
b2 = np.random.rand(output_size)
def forward(X):
    z1 = np.dot(X, W1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)
    return a1, a2  
def one_hot_encode(y, num_classes):
    return np.eye(num_classes)[y]
y_encoded = one_hot_encode(y, output_size)
for epoch in range(epochs):
    a1, a2 = forward(X)
    loss = -np.mean(np.sum(y_encoded * np.log(a2 + 1e-15), axis=1))
    output_error = a2 - y_encoded
    dW2 = np.dot(a1.T, output_error) / X.shape[0]
    db2 = np.sum(output_error, axis=0) / X.shape[0]
    hidden_error = np.dot(output_error, W2.T) * (a1 > 0)
    dW1 = np.dot(X.T, hidden_error) / X.shape[0]
    db1 = np.sum(hidden_error, axis=0) / X.shape[0]
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.3f}")
sample_data = X[:10]
_, probabilities = forward(sample_data)
predictions = np.argmax(probabilities, axis=1)
print("\nPredicted classes for first 10 samples:\n", predictions)
print("Actual expected classes for first 10 samples:\n", y[:10])

Epoch 0, Loss: 3.594
Epoch 100, Loss: 1.020
Epoch 200, Loss: 0.998
Epoch 300, Loss: 0.975
Epoch 400, Loss: 0.950
Epoch 500, Loss: 0.925
Epoch 600, Loss: 0.899
Epoch 700, Loss: 0.872
Epoch 800, Loss: 0.844
Epoch 900, Loss: 0.817

Predicted classes for first 10 samples:
 [0 0 0 0 0 0 0 0 0 0]
Actual expected classes for first 10 samples:
 [0 0 0 0 0 0 0 0 0 0]
