In [4]:
import pandas as pd

# Load data
df = pd.read_csv('D:/Varsity Lab Works/AI lab/Iris.csv')

# Remove Id column if exists
if 'Id' in df.columns:
    df = df.drop('Id', axis=1)

print("Dataset:")
df.head()

Dataset:


Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [9]:
from sklearn.model_selection import train_test_split
import numpy as np

# Prepare features and target
X = df.drop("Species", axis=1)
y = df["Species"]

# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ONE-HOT ENCODE THE LABELS
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

def one_hot_encode(y, num_classes):
    one_hot = np.zeros((len(y), num_classes))
    one_hot[np.arange(len(y)), y] = 1
    return one_hot

y_train_onehot = one_hot_encode(y_train_encoded, 3)
y_test_onehot = one_hot_encode(y_test_encoded, 3)


In [10]:
# Network architecture
input_size = X_train_scaled.shape[1]
hidden_size = 6
output_size = 3
learning_rate = 0.01

In [11]:
# Initialize weights and biases
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.01
b2 = np.zeros((1, output_size))

In [14]:
# ===== ENHANCEMENT 1: Activation Functions =====
def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def sigmoid_derivative(x):
    return x * (1 - x)

activation_function = sigmoid 
activation_derivative = sigmoid_derivative


In [19]:

# ===== ENHANCEMENT 2: Momentum Optimizer =====
velocity_W1 = np.zeros_like(W1)
velocity_b1 = np.zeros_like(b1)
velocity_W2 = np.zeros_like(W2)
velocity_b2 = np.zeros_like(b2)
momentum = 0.9

# Forward propagation
def forward_propagation(X):
    z1 = np.dot(X, W1) + b1
    a1 = activation_function(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)
    return z1, a1, z2, a2


In [20]:
# ===== ENHANCEMENT 3: Backward Propagation with Momentum =====
def backward_propagation(X, y, z1, a1, z2, a2):
    global W1, b1, W2, b2, velocity_W1, velocity_b1, velocity_W2, velocity_b2
    m = X.shape[0]
    
    dz2 = a2 - y
    dW2 = np.dot(a1.T, dz2) / m
    db2 = np.sum(dz2, axis=0, keepdims=True) / m
    
    dz1 = np.dot(dz2, W2.T) * activation_derivative(a1)
    dW1 = np.dot(X.T, dz1) / m
    db1 = np.sum(dz1, axis=0, keepdims=True) / m
    
    # Update with momentum
    velocity_W1 = momentum * velocity_W1 - learning_rate * dW1
    velocity_b1 = momentum * velocity_b1 - learning_rate * db1
    velocity_W2 = momentum * velocity_W2 - learning_rate * dW2
    velocity_b2 = momentum * velocity_b2 - learning_rate * db2
    
    W1 += velocity_W1
    b1 += velocity_b1
    W2 += velocity_W2
    b2 += velocity_b2

# Loss function
def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / m


In [17]:
# ===== ENHANCEMENT 4: Compute Accuracy Function =====
def compute_accuracy(X, y_true):
    _, _, _, y_pred = forward_propagation(X)
    predictions = np.argmax(y_pred, axis=1)
    actual = np.argmax(y_true, axis=1)
    return np.mean(predictions == actual)


In [21]:
# ===== ENHANCEMENT 5: Training with History Tracking =====
epochs = 1000
train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []

print("\n" + "="*60)
print("Training started...")
print("="*60)

for epoch in range(epochs):
    # Forward and backward pass
    z1, a1, z2, a2 = forward_propagation(X_train_scaled)
    train_loss = compute_loss(y_train_onehot, a2)
    backward_propagation(X_train_scaled, y_train_onehot, z1, a1, z2, a2)
    
    # Calculate metrics
    train_acc = compute_accuracy(X_train_scaled, y_train_onehot)
    
    # Test metrics
    _, _, _, y_test_pred = forward_propagation(X_test_scaled)
    test_loss = compute_loss(y_test_onehot, y_test_pred)
    test_acc = compute_accuracy(X_test_scaled, y_test_onehot)
    
    # Store history
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)
    
    if epoch % 100 == 0:
        print(f"Epoch {epoch:4d} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.4f}")

print("="*60)



Training started...
Epoch    0 | Train Loss: 0.6966 | Train Acc: 0.3417 | Test Loss: 0.6992 | Test Acc: 0.3000
Epoch  100 | Train Loss: 1.0954 | Train Acc: 0.3417 | Test Loss: 1.0969 | Test Acc: 0.3000
Epoch  200 | Train Loss: 1.0236 | Train Acc: 0.6167 | Test Loss: 1.0174 | Test Acc: 0.6333
Epoch  300 | Train Loss: 0.6733 | Train Acc: 0.7833 | Test Loss: 0.6426 | Test Acc: 0.8000
Epoch  400 | Train Loss: 0.5385 | Train Acc: 0.8250 | Test Loss: 0.5006 | Test Acc: 0.8333
Epoch  500 | Train Loss: 0.4963 | Train Acc: 0.8750 | Test Loss: 0.4563 | Test Acc: 0.9000
Epoch  600 | Train Loss: 0.4719 | Train Acc: 0.9083 | Test Loss: 0.4309 | Test Acc: 0.9333
Epoch  700 | Train Loss: 0.4507 | Train Acc: 0.9167 | Test Loss: 0.4086 | Test Acc: 0.9333
Epoch  800 | Train Loss: 0.4272 | Train Acc: 0.9167 | Test Loss: 0.3843 | Test Acc: 0.9333
Epoch  900 | Train Loss: 0.4002 | Train Acc: 0.9250 | Test Loss: 0.3564 | Test Acc: 0.9333


In [23]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
_, _, _, y_pred = forward_propagation(X_test_scaled)
predictions = np.argmax(y_pred, axis=1)
actual = np.argmax(y_test_onehot, axis=1)

final_accuracy = accuracy_score(actual, predictions)
print(f"\nTest Accuracy: {final_accuracy:.4f} ({final_accuracy*100:.2f}%)")



Test Accuracy: 0.9667 (96.67%)
