In [23]:
import numpy as np


In [24]:
def softmax(scores):
    exp_scores = np.exp(scores - np.max(scores, axis=1, keepdims=True))
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

In [25]:
def compute_loss(y_true, y_pred):
    n_samples = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred + 1e-15)) / n_samples
    return loss

In [26]:
def one_hot_encode(y, num_classes):
    one_hot = np.zeros((y.size, num_classes))
    one_hot[np.arange(y.size), y] = 1
    return one_hot

In [27]:
def batch_GD_softmax(X, y, num_classes, learning_rate=0.01, max_epochs=1000):    
    n_samples, n_features = X.shape
    theta = np.random.randn(n_features, num_classes) * 0.01

    y_one_hot = one_hot_encode(y, num_classes)

    train_ratio = 0.8
    n_train = int(train_ratio * n_samples)
    X_train, X_val = X[:n_train], X[n_train:]
    y_train, y_val = y_one_hot[:n_train], y_one_hot[n_train:]

    training_losses = []
    validation_losses = []

    minimum_val_error = float('inf')
    best_epoch = None
    best_theta = None

    for epoch in range(max_epochs):
        scores = np.dot(X_train, theta)
        y_pred = softmax(scores)

        train_loss = compute_loss(y_train, y_pred)
        training_losses.append(train_loss)

        val_scores = np.dot(X_val, theta)
        y_val_pred = softmax(val_scores)
        val_loss = compute_loss(y_val, y_val_pred)
        validation_losses.append(val_loss)

        if val_loss < minimum_val_error:
            minimum_val_error = val_loss
            best_epoch = epoch
            best_theta = theta.copy()

        gradient = np.dot(X_train.T, (y_pred - y_train)) / n_train

        theta -= learning_rate * gradient


    return best_theta, best_epoch, minimum_val_error, training_losses, validation_losses

In [28]:
def main () :
    # Example :
    np.random.seed(42)
    X = np.random.randn(500, 5)  # 500 samples, 5 features
    y = np.random.randint(0, 3, 500)  # 3 classes

    num_classes = 3
    learning_rate = 0.1
    max_epochs = 10000

    best_theta, best_epoch, min_val_error, training_losses, validation_losses = batch_GD_softmax(X, y, num_classes, learning_rate, max_epochs)

    print("Best epoch:", best_epoch)
    print("Minimum validation error:", min_val_error)
    print("Best model parameters (theta):")
    print(best_theta)

In [29]:
main()

Best epoch: 1176
Minimum validation error: 1.0922675360469773
Best model parameters (theta):
[[-0.00349244  0.10028011 -0.1061367 ]
 [ 0.06081777 -0.00417335 -0.06969195]
 [-0.10013998  0.04980121  0.06799869]
 [ 0.01186224  0.02900514 -0.05324665]
 [-0.0359313   0.00248115  0.03860303]]
