In [50]:
for name in ['X_batch', 'y_pred_batch', 'y_true_batch']:
    if name in globals():
        del globals()[name]
        
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Confirm shapes
print("Full X shape:", X.shape)
print("Full y shape:", y.shape)

# Train/Validation split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# One-hot encode the labels
def one_hot(y, num_classes=None):
    if num_classes is None:
        num_classes = np.max(y) + 1
    return np.eye(num_classes)[y]

y_train_onehot = one_hot(y_train)
y_val_onehot = one_hot(y_val)

# Confirm matching shapes
print("X_train shape:", X_train.shape)
print("y_train_onehot shape:", y_train_onehot.shape)

Full X shape: (150, 4)
Full y shape: (150,)
X_train shape: (120, 4)
y_train_onehot shape: (120, 3)


In [51]:
import numpy as np

def softmax_2d(logits):
    max_vals = np.max(logits, axis = 1, keepdims=True)
    e_x = np.exp(logits - max_vals) # stability
    return e_x / np.sum(e_x, axis = 1, keepdims=True)

# Test case
logits = np.array([[1.0, 3.0, 2.0],
                   [2.0, 2.0, 2.0]])
print("Softmax Output:")
print(softmax_2d(logits))
 

Softmax Output:
[[0.09003057 0.66524096 0.24472847]
 [0.33333333 0.33333333 0.33333333]]


In [52]:
import numpy as np
def cross_entropy_loss(Y, P):
    epsilon = 1e-10
    loss = -np.mean(np.sum(Y * np.log(P + epsilon), axis=1))
    return loss

# Test case
y_true = np.array([[1, 0, 0], [0, 1, 0]])
y_pred = np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1]])
    
print("Cross Entropy Loss: ", cross_entropy_loss(y_true, y_pred))

Cross Entropy Loss:  0.2899092474925425


In [53]:
import numpy as np

def softmax_2d(logits):
    max_vals = np.max(logits, axis = 1, keepdims=True)
    e_x = np.exp(logits - max_vals) # stability
    return e_x / np.sum(e_x, axis = 1, keepdims=True)

def forward_pass(X, W, b):
    logits = np.dot(X, W) + b
    probs = softmax_2d(logits)
    return probs

# Test case
X = np.array([[1.0, 3.0, 2.0],
              [2.0, 2.0, 2.0]]) # Matrix multiply
W = np.random.randn(3, 3) * 0.01 # Weight Matrix.
b = np.zeros((1, 3)) # Bias Vector.

# Run the forward pass
probs = forward_pass(X, W, b)
print(probs)

[[0.33303334 0.34750787 0.31945879]
 [0.33052956 0.3409587  0.32851174]]


In [54]:
def compute_gradients(X, y_true, y_pred):
    dlogits = y_pred - y_true
    dW = X.T @ dlogits 
    db = np.sum(dlogits, axis=0, keepdims=True)
    return dW, db

In [55]:
# Example test input
X_batch = np.array([[1.0, 2.0, 3.0],
                    [4.0, 5.0, 6.0]])

y_true_batch = np.array([[0, 1, 0],
                         [1, 0, 0]])

# Fake predictions (softmax output)
y_pred_batch = np.array([[0.2, 0.7, 0.1],
                         [0.6, 0.3, 0.1]])

# Call function
dW, db = compute_gradients(X_batch, y_true_batch, y_pred_batch)

# Output
print("Gradient of W:\n", dW)
print("Gradient of b:\n", db)


Gradient of W:
 [[-1.4  0.9  0.5]
 [-1.6  0.9  0.7]
 [-1.8  0.9  0.9]]
Gradient of b:
 [[-2.00000000e-01 -5.55111512e-17  2.00000000e-01]]


In [61]:
# Initialize parameters
n_features = X_train.shape[1]
n_classes = y_train_onehot.shape[1]

W = np.random.randn(n_features, n_classes) * 0.01 # small random weights
b = np.zeros((1, n_classes))

# set hyperparameters
learning_rate = 0.1
n_epochs = 1000
train_losses = []

for epoch in range(n_epochs):
    # 1. Forward pass
    y_pred = forward_pass(X_train, W, b)

    # 2. Compute training loss
    loss = cross_entropy_loss(y_train_onehot, y_pred)
    train_losses.append(loss)

    # Compute gradients
    dW, db = compute_gradients(X_train, y_train_onehot, y_pred)

    # 4. Update weights and biases
    W -= learning_rate * dW
    b -= learning_rate * db

    # show progress every 100 epochs 
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")



Epoch 0, Loss: 1.0950
Epoch 100, Loss: 7.8672
Epoch 200, Loss: 0.7024
Epoch 300, Loss: 0.7651
Epoch 400, Loss: 0.6865
Epoch 500, Loss: 0.6631
Epoch 600, Loss: 0.6217
Epoch 700, Loss: 0.5797
Epoch 800, Loss: 0.5745
Epoch 900, Loss: 0.5755


In [62]:
# Get predicted probabilities for validation set
y_val_pred_probs = forward_pass(X_val, W, b)

# Convert probabilities to class predictions
y_val_pred = np.argmax(y_val_pred_probs, axis=1)

In [63]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy:.4f}")

Validation Accuracy: 0.9667
