In [2]:
#Batch Gradient Descent with early stopping for Softmax Regression
from sklearn import datasets
iris = datasets.load_iris()
list(iris.keys())

['data',
 'target',
 'frame',
 'target_names',
 'DESCR',
 'feature_names',
 'filename']

In [4]:
import numpy as np

In [5]:
X = iris["data"][:, (2,3)]  # petal width
y = (iris["target"] == 2).astype(np.int)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = (iris["target"] == 2).astype(np.int)


In [12]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Step 2: Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 4: Add a bias term to the feature matrix
X_train = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

# Step 5: Define the softmax function
def softmax(z):
    e_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return e_z / e_z.sum(axis=1, keepdims=True)

# Step 6: Define the cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred)) / m

# Step 7: Initialize model parameters
num_classes = 3
num_features = X_train.shape[1]
learning_rate = 0.1
num_epochs = 1000

weights = np.random.randn(num_features, num_classes)

# Step 8: Train the model
for epoch in range(num_epochs):
    scores = np.dot(X_train, weights)
    predicted_probs = softmax(scores)
    loss = cross_entropy_loss(np.eye(num_classes)[y_train], predicted_probs)

    # Calculate gradients
    gradients = np.dot(X_train.T, (predicted_probs - np.eye(num_classes)[y_train])) / X_train.shape[0]

    # Update weights
    weights -= learning_rate * gradients

    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}")

# Step 9: Use the trained model for inference on the test set
test_scores = np.dot(X_test, weights)
test_predicted_probs = softmax(test_scores)
test_predicted_labels = np.argmax(test_predicted_probs, axis=1)

# Step 10: Calculate and print accuracy
correct_predictions = (test_predicted_labels == y_test)
accuracy = np.mean(correct_predictions)

print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 100/1000, Loss: 0.2590
Epoch 200/1000, Loss: 0.1981
Epoch 300/1000, Loss: 0.1702
Epoch 400/1000, Loss: 0.1536
Epoch 500/1000, Loss: 0.1424
Epoch 600/1000, Loss: 0.1343
Epoch 700/1000, Loss: 0.1281
Epoch 800/1000, Loss: 0.1232
Epoch 900/1000, Loss: 0.1192
Epoch 1000/1000, Loss: 0.1159
Test Accuracy: 100.00%
