In [2]:
import pandas as pd
import numpy as np

# One-hot encode the labels
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]

# Map string labels to integers
def label_to_int(labels):
    label_map = {"iris-setosa": 0, "iris-versicolor": 1, "iris-virginica": 2}
    # Strip any leading/trailing spaces and convert to lowercase for consistency
    return np.array([label_map[label.strip().lower()] for label in labels])

# Load the Iris dataset from a CSV file
data = pd.read_csv('iris.csv')

# Assuming the last column is the label and the rest are features
X = data.iloc[:, :-1].values  # All columns except the last one (features)
y = data.iloc[:, -1].values   # Only the last column (label)

# Convert string labels to integers
y = label_to_int(y)

# One-hot encode the labels
y_encoded = one_hot_encode(y, 3)

# Normalize the features (Standardization)
def normalize(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    return (X - mean) / std

# Split the dataset into training and testing sets
def train_test_split(X, y, test_size=0.2):
    m = X.shape[0]
    split_idx = int(m * (1 - test_size))
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded)

# Normalize the data
X_train = normalize(X_train)
X_test = normalize(X_test)

# Neural Network Hyperparameters
input_size = X_train.shape[1]  # Number of features (4 for Iris)
hidden_size = 4  # Number of neurons in the hidden layer
output_size = y_train.shape[1]  # Number of classes (3 for Iris)

# Initialize weights and biases
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size)  # Weights from input to hidden layer
b1 = np.zeros((1, hidden_size))  # Bias for hidden layer
W2 = np.random.randn(hidden_size, output_size)  # Weights from hidden to output layer
b2 = np.zeros((1, output_size))  # Bias for output layer

# Activation functions
def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Loss function (Cross-entropy)
def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), np.argmax(y_true, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

# Forward pass
def forward(X):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return A1, A2

# Backward pass (Gradient descent)
def backward(X, y, A1, A2, learning_rate=0.01):
    global W1, b1, W2, b2
    m = X.shape[0]
    
    # Compute the gradients
    dZ2 = A2 - y  # Derivative of loss with respect to Z2
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * (A1 > 0)  # Derivative of ReLU
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    
    # Update weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    
    return W1, b1, W2, b2

# Train the model
def train(X_train, y_train, epochs=1000, learning_rate=0.01):
    for epoch in range(epochs):
        A1, A2 = forward(X_train)
        loss = compute_loss(y_train, A2)
        global W1, b1, W2, b2
        W1, b1, W2, b2 = backward(X_train, y_train, A1, A2, learning_rate)
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")

    return W1, b1, W2, b2

# Prediction function
def predict(X):
    _, A2 = forward(X)
    return np.argmax(A2, axis=1)

# Train the model
W1, b1, W2, b2 = train(X_train, y_train, epochs=1000, learning_rate=0.01)

# Evaluate the model
y_pred_train = predict(X_train)
y_pred_test = predict(X_test)

# Calculate accuracy
train_accuracy = np.mean(y_pred_train == np.argmax(y_train, axis=1)) * 100
test_accuracy = np.mean(y_pred_test == np.argmax(y_test, axis=1)) * 100

print(f"Training Accuracy: {train_accuracy}%")
print(f"Test Accuracy: {test_accuracy}%")


Epoch 0, Loss: 2.6778987589627032
Epoch 100, Loss: 0.6065460953116352
Epoch 200, Loss: 0.41685083611232226
Epoch 300, Loss: 0.3277902586435314
Epoch 400, Loss: 0.2769445842628548
Epoch 500, Loss: 0.23808678474759143
Epoch 600, Loss: 0.2083962225320746
Epoch 700, Loss: 0.1854733485380891
Epoch 800, Loss: 0.16529961514058714
Epoch 900, Loss: 0.14894189846420378
Training Accuracy: 98.33333333333333%
Test Accuracy: 6.666666666666667%
