Practical 6 : Create a Neural network architecture from scratch in Python and use it to do multi-class classification on any data

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Activation Functions
def softmax(z):
    exp = np.exp(z - np.max(z, axis=1, keepdims=True))  # for numerical stability
    return exp / np.sum(exp, axis=1, keepdims=True)

def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return (z > 0).astype(float)

# Loss Function
def cross_entropy(y_true, y_pred):
    m = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred + 1e-9)) / m
    return loss

# Neural Network Class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Xavier Initialization
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = relu(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, y_true, learning_rate=0.01):
        m = X.shape[0]

        dZ2 = self.A2 - y_true
        dW2 = (self.A1.T @ dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * relu_derivative(self.Z1)
        dW1 = (X.T @ dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        # Update weights and biases
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def train(self, X, y, epochs=1000, learning_rate=0.01):
        for i in range(epochs):
            y_pred = self.forward(X)
            loss = cross_entropy(y, y_pred)
            self.backward(X, y, learning_rate)

            if i % 100 == 0:
                print(f"Epoch {i}, Loss: {loss:.4f}")

    def predict(self, X):
        y_pred = self.forward(X)
        return np.argmax(y_pred, axis=1)

# Load Dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)

# One hot encoding the target
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y)

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Build and Train Model
input_size = X_train.shape[1]
hidden_size = 10
output_size = y_train.shape[1]

nn = NeuralNetwork(input_size, hidden_size, output_size)
nn.train(X_train, y_train, epochs=1000, learning_rate=0.01)

# Evaluate
y_pred_test = nn.predict(X_test)
y_true_test = np.argmax(y_test, axis=1)

accuracy = np.mean(y_pred_test == y_true_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 0, Loss: 1.6441
Epoch 100, Loss: 0.6327
Epoch 200, Loss: 0.4530
Epoch 300, Loss: 0.3753
Epoch 400, Loss: 0.3233
Epoch 500, Loss: 0.2827
Epoch 600, Loss: 0.2498
Epoch 700, Loss: 0.2226
Epoch 800, Loss: 0.2003
Epoch 900, Loss: 0.1820
Test Accuracy: 96.67%


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Add header names
headers = ['age', 'sex', 'chest_pain', 'resting_blood_pressure', 'serum_cholestoral',
'fasting_blood_sugar', 'resting_ecg_results', 'max_heart_rate_achieved',
'exercise_induced_angina', 'oldpeak', 'slope_of_the_peak', 'num_of_major_vessels',
'thal', 'heart_disease']
# Load the dataset
heart_df = pd.read_csv('/home/student/Downloads/heart.csv', sep=',', names=headers)
# Convert input to numeric values (if any non-numeric values exist)
heart_df = heart_df.apply(pd.to_numeric, errors='coerce')
# Handle missing values (if any)
heart_df.fillna(heart_df.mean(), inplace=True)
# Convert input data to numpy arrays
X = heart_df.drop(columns=['heart_disease'])
# Replace target class with 0 and 1
heart_df['heart_disease'] = heart_df['heart_disease'].replace({1: 0, 2: 1})
y_label = heart_df['heart_disease'].values.reshape(-1, 1)
# Split data into train and test sets
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y_label, test_size=0.2, random_state=2)
# Standardize the dataset
sc = StandardScaler()
Xtrain = sc.fit_transform(Xtrain)
Xtest = sc.transform(Xtest)
print(heart_df.dtypes)
print(f"\nShape of train set is {Xtrain.shape}")
print(f"\nShape of test set is {Xtest.shape}")
print(f"\nShape of train label is {ytrain.shape}")
print(f"\nShape of test labels is {ytest.shape}")
# Neural Network Class
class NeuralNet:
def _init_(self, layers=[13, 8, 1], learning_rate=0.001, iterations=1000):
self.params = {}
self.learning_rate = learning_rate
self.iterations = iterations
self.loss = []
self.layers = layers
self.X = None
self.y = None
def init_weights(self):
np.random.seed(1)
self.params["W1"] = np.random.randn(self.layers[0], self.layers[1]) * 0.01
self.params['b1'] = np.zeros((1, self.layers[1]))
self.params['W2'] = np.random.randn(self.layers[1], self.layers[2]) * 0.01
self.params['b2'] = np.zeros((1, self.layers[2]))
def sigmoid(self, Z):
return 1 / (1 + np.exp(-Z))
def dSigmoid(self, Z):
sig = self.sigmoid(Z)
return sig * (1 - sig)
def relu(self, Z):
return np.maximum(0, Z)
def dRelu(self, Z):
return (Z > 0).astype(float)
def eta(self, x):
return np.maximum(x, 1e-10)
def entropy_loss(self, y, yhat):
epsilon = 1e-10
yhat = np.clip(yhat, epsilon, 1 - epsilon)
loss = -np.mean(y * np.log(yhat) + (1 - y) * np.log(1 - yhat))
return loss
def forward_propagation(self):
Z1 = self.X.dot(self.params['W1']) + self.params['b1']
A1 = self.relu(Z1)
Z2 = A1.dot(self.params['W2']) + self.params['b2']
yhat = self.sigmoid(Z2)
loss = self.entropy_loss(self.y, yhat)
self.params['Z1'] = Z1
self.params['Z2'] = Z2
self.params['A1'] = A1
return yhat, loss
def back_propagation(self, yhat):
m = self.X.shape[0]
y_inv = 1 - self.y
yhat_inv = 1 - yhat
dl_wrt_yhat = (yhat - self.y) / m
dl_wrt_sig = self.dSigmoid(self.params['Z2'])
dl_wrt_z2 = dl_wrt_yhat * dl_wrt_sig
dl_wrt_A1 = dl_wrt_z2.dot(self.params['W2'].T)
dl_wrt_w2 = self.params['A1'].T.dot(dl_wrt_z2)
dl_wrt_b2 = np.sum(dl_wrt_z2, axis=0, keepdims=True)
dl_wrt_z1 = dl_wrt_A1 * self.dRelu(self.params['Z1'])
dl_wrt_w1 = self.X.T.dot(dl_wrt_z1)
dl_wrt_b1 = np.sum(dl_wrt_z1, axis=0, keepdims=True)
self.params['W1'] -= self.learning_rate * dl_wrt_w1
self.params['W2'] -= self.learning_rate * dl_wrt_w2
self.params['b1'] -= self.learning_rate * dl_wrt_b1
self.params['b2'] -= self.learning_rate * dl_wrt_b2
def fit(self, X, y):
self.X = X
self.y = y
self.init_weights()
for i in range(self.iterations):
yhat, loss = self.forward_propagation()
self.back_propagation(yhat)
self.loss.append(loss)
# Learning rate decay (optional)
if i % 100 == 0 and i != 0:
self.learning_rate *= 0.9 # Decay learning rate
if i % 100 == 0:
print(f"Iteration {i}, Loss: {loss}")
def predict(self, X):
Z1 = X.dot(self.params['W1']) + self.params['b1']
A1 = self.relu(Z1)
Z2 = A1.dot(self.params['W2']) + self.params['b2']
yhat = self.sigmoid(Z2)
return np.round(yhat)
def acc(self, y, yhat):
return np.mean(y == yhat) * 100
def plot_loss(self):
plt.plot(self.loss)
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.title("Loss curve during training")
plt.show()
# Train the model
nn = NeuralNet(layers=[13, 8, 1], learning_rate=0.001, iterations=1000)
nn.fit(Xtrain, ytrain)
# Evaluate the model
y_pred = nn.predict(Xtest)
accuracy = nn.acc(ytest, y_pred)
print(f"Test Accuracy: {accuracy}%")
# Plot the loss curve
nn.plot_loss()