In [1]:
import numpy as np

# Sigmoid function to predict probability
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Cost function with regularization
def cost_function_reg(theta, X, y, lambda_):
    m = len(y)  # number of training examples
    h = sigmoid(np.dot(X, theta))  # prediction
    
    # Regularized cost
    cost = (1/m) * (-y.T.dot(np.log(h)) - (1 - y).T.dot(np.log(1 - h))) + (lambda_ / (2 * m)) * np.sum(np.square(theta[1:]))
    
    return cost

# Gradient with regularization
def gradient_reg(theta, X, y, lambda_):
    m = len(y)
    h = sigmoid(np.dot(X, theta))  # prediction
    
    grad = (1/m) * np.dot(X.T, (h - y))  # Gradient of the cost function
    grad[1:] += (lambda_ / m) * theta[1:]  # Add regularization (skip the first parameter)
    
    return grad

# Training function using gradient descent
def train_logistic_regression(X, y, lambda_, alpha=0.01, iterations=400):
    theta = np.zeros(X.shape[1])  # Initialize theta to zeros
    
    # Gradient descent
    for _ in range(iterations):
        grad = gradient_reg(theta, X, y, lambda_)
        theta = theta - alpha * grad  # Update the parameters
    
    return theta

# Example usage:
# X: Feature matrix (add intercept column of 1s manually)
# y: Labels
# lambda_: Regularization parameter

X = np.array([[1, 2, 3], [1, 3, 4], [1, 4, 5]])  # Example features (with intercept term)
y = np.array([0, 1, 0])  # Example labels
lambda_ = 0.1  # Regularization parameter

# Train the model
theta = train_logistic_regression(X, y, lambda_)
print("Optimized parameters (theta):", theta)


Optimized parameters (theta): [-0.05522946 -0.05800936 -0.10833008]
