<h1 style="color:white;">
    Iris Multiclass Logistic Regression
</h1>
<h3 style="color:white;">
    Single Layer Perceptron with Softmax Output Head
</h3>

<h3 style="color:white;">
    Load and split data into training and testing sets
</h3>

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder  # consider StandardScaler too

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=14)

print(f"Loaded {X.shape[0]} samples with {X.shape[1]} features each."
      f"\nTraining set: {X_train.shape[0]} samples"
      f"\nTesting set: {X_test.shape[0]} samples")

Loaded 150 samples with 4 features each.
Training set: 120 samples
Testing set: 30 samples


In [5]:
# --- One-hot encode targets ---
encoder = OneHotEncoder(sparse_output=False)
y_train_oh = encoder.fit_transform(y_train.reshape(-1, 1))
y_test_oh = encoder.transform(y_test.reshape(-1, 1))

In [6]:
import numpy as np


def linear_logits(X, weights, bias):
    return np.dot(X, weights) + bias  # bias is broadcasted to each sample row


def softmax(z):
    """
    Row-wise softmax with numerical stabilization.
    Z: (N, K) -> P: (N, K), each row sums to 1.
    """
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # stability trick
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


def cross_entropy(y_true, y_pred):
    """
    Compute the cross-entropy loss between true labels and predicted probabilities.

    Cross-entropy measures the difference between two probability distributions:
    the true distribution (`y_true`, typically one-hot encoded) and the predicted
    distribution (`y_pred`). A lower loss indicates that the predicted distribution
    is closer to the true distribution.

    Assumptions:
    - `y_true` is a NumPy array of shape (n_samples, n_classes) with one-hot 
      encoded labels.
    - `y_pred` is a NumPy array of shape (n_samples, n_classes) containing predicted
      probabilities (each row should sum to 1).
    - NumPy (`np`) is imported and available in the namespace.
    - This implementation is intended for multi-class classification problems.
    """
    # Clip predicted values to avoid log(0), which would cause NaN/inf errors
    y_pred_clipped = np.clip(y_pred, 1e-15, 1 - 1e-15)

    # Compute negative log-likelihood for each sample
    log_probs = -np.sum(y_true * np.log(y_pred_clipped), axis=1)

    # Return average cross-entropy across all samples
    return np.mean(log_probs)

In [7]:
np.random.seed(14)
weights = np.random.randn(X.shape[1], y_train_oh.shape[1]) * 0.01 # one weight per feature
bias = np.random.randn(y_train_oh.shape[1])  # single bias
learning_rate = 0.01

# initial loss
y_pred = linear_logits(X_train, weights, bias)
y_pred_prob = softmax(y_pred)
loss = cross_entropy(y_train_oh, y_pred_prob)

print(f"Weights:\n{weights}\n")
print(f"Biases:\n{bias}\n")
print(f"Initial loss: {float(loss)}")

Weights:
[[ 0.01551339  0.00079186  0.00173977]
 [-0.00072337 -0.02004329  0.00144678]
 [-0.01501169  0.00211109 -0.00558205]
 [ 0.01084529 -0.00186289  0.00014661]]

Biases:
[-1.07556947  0.64225207 -0.18033671]

Initial loss: 1.267801484586637


In [8]:
for epoch in range(10000):
    y_pred = linear_logits(X_train, weights, bias)
    y_pred_prob = softmax(y_pred)
    loss = cross_entropy(y_train_oh, y_pred_prob)
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")
    
    # Gradient calculation
    error = y_pred_prob - y_train_oh # softmax and cross-entropy partial derivatives
    weights_grad = (1 / X_train.shape[0]) * np.dot(X_train.T, error)
    bias_grad = (1 / X_train.shape[0]) * np.sum(error, axis=0)
    
    # Update weights and bias
    weights -= learning_rate * weights_grad
    bias -= learning_rate * bias_grad

Epoch 0, Loss: 1.2678
Epoch 1000, Loss: 0.3648
Epoch 2000, Loss: 0.2776
Epoch 3000, Loss: 0.2317
Epoch 4000, Loss: 0.2031
Epoch 5000, Loss: 0.1834
Epoch 6000, Loss: 0.1690
Epoch 7000, Loss: 0.1580
Epoch 8000, Loss: 0.1492
Epoch 9000, Loss: 0.1421


In [9]:
y_test_pred = linear_logits(X_test, weights, bias)
model = softmax(y_test_pred)

test_loss = cross_entropy(y_test_oh, model)
print(f"Test Loss: {test_loss:.4f}")

model_labels = np.argmax(model, axis=1)
acc = np.mean(model_labels == y_test)
print(f"Test Accuracy: {acc:.4f}")

Test Loss: 0.0798
Test Accuracy: 1.0000
