In [2]:
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Learning rate.
alpha = 0.1

# Load the dataset
iris = load_iris ()
X = iris.data [:, [2, 3]] # Using petal length and petal width
y = iris.target

# --- Case 1: Not Linearly Separable ( Versicolor vs. Virginica ) ---
X_nls_full = X [ y != 0]
y_nls_full = y [ y != 0]
y_nls_full [ y_nls_full == 1] = 0 # Versicolor
y_nls_full [ y_nls_full == 2] = 1 # Virginica
X_train_nls, X_test_nls, y_train_nls, y_test_nls = train_test_split (X_nls_full, y_nls_full, test_size =0.3, random_state =42)

# --- Case 2: Linearly Separable ( Setosa vs. The Rest ) ---
X_ls_full = X
y_ls_full = np.copy ( y )
y_ls_full [ y_ls_full != 0] = 1 # Versicolor and Virginica are class 1
X_train_ls, X_test_ls, y_train_ls, y_test_ls = train_test_split (X_ls_full, y_ls_full, test_size =0.3, random_state =42)

# Helper function to add the bias term (x0 =1)
def add_bias ( X ) :
    return np.c_ [ np.ones ( X.shape [0]), X ]

# Take a scalar or numpy array ‘a‘ and return the sigmoid activation
def sigmoid(a):
    return 1 / (1 + np.exp(-a))

# Compute the Negative Log Likelihood loss
def nll_loss(y_true, y_pred):
    # NLL is defined as NLL = −[y⋅log(pred)+(1−y)⋅log(1−pred)]
    return -np.mean((y_true * np.log(y_pred)) + ((1 - y_true) * np.log(1 - y_pred)))

# Calculate gradient of NLL loss with respect to weights W
def calculate_gradient(X, y_true, weights):
    n = X.shape[0]
    # Gradient is deriviative i.e. 1/n . xt . (y^ - y_true) where X is feature matrix, y^ is sigmoid(X . w) and y_true is vector
    y_pred = sigmoid(X @ weights)
    return (X.T @ (y_pred - y_true)) / n

# Perform Gradient Descent. Initialize weights, and then iteratively update them. 
# The function returns the final weights and a history of the loss and gradient norm.
def train_logistic_regression(X, y_true, alpha, iterations):
    # Initialise weights.
    weights = np.zeros(X.shape[1])
    losses = np.zeros(iterations)
    gradients = np.zeros(iterations)
    X_bias = add_bias(X)
    # Iterate
    for i in range(iterations):
        gradients[i] = calculate_gradient(X_bias, y_true, weights)
        losses[i] = nll_loss(y_true, sigmoid(X_bias @ weights))
        weights -= alpha * gradients[i]
    return weights, gradients, losses


Task 1: Non-Linearly Separable Case Train your model on the X train nls, y train nls dataset. Plot the loss and gradient norm over iterations. You should see them both converge.

In [4]:
weights_nls, gradients_nls, losses_nls = train_logistic_regression(X_train_nls, y_train_nls, 0.1, 2000)

plt.plot(weights_nls)
plt.show

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 3)