In [None]:
 def _compute_z(theta_0, theta_weights, x_sample):
    """
    Computes the linear part: z = theta_0 + (theta_weights . x_sample)
    p.s. We have used theta_1 and X as the parameter names during the class.
    the purpose of which is to make you understand that whichever names a variable might be of,
    the concepts remain the same.
    """
    # First, check for compatible lengths |
    # i.e., "Cardinality" as discussed during the class.
    if len(theta_weights) != len(x_sample):
        raise ValueError("""Mismatch in length of weights and features\n
                            Or, Mismatch in the length of theta_1 and X""")
    
    z = theta_0
    for i in range(len(theta_weights)):
        z += theta_weights[i] * x_sample[i]
    return z

In [None]:
import math
 
def _sigmoid(z):
 """
 The Sigmoid activation function.
 Includes guards for numerical stability to prevent overflow.
 """
 if z > 700:  # e^(-700) is effectively 0
 return 1.0
 elif z < -700: # e^(700) is effectively infinity
 return 0.0
 else:
 return 1.0 / (1.0 + math.exp(-z))

In [None]:
 def _predict_probability(theta_0, theta_weights, x_sample):
 """
 Our full hypothesis: h(x) = sigmoid(z)
 """
 z = _compute_z(theta_0, theta_weights, x_sample)
 return _sigmoid(z)

In [1]:
import math
import sys

# Set higher recursion limit for potential deep calls, though not strictly necessary for this class
# sys.setrecursionlimit(2000)

class CoreLogisticRegression:
    """
    A basic implementation of Logistic Regression using Batch Gradient Descent.
    It handles binary classification problems.
    """
    
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        """
        Initializes the model's hyperparameters and internal variables.

        :param learning_rate: The step size for updating weights during optimization.
        :param n_iterations: The number of times the model will update the weights.
        """
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.theta_0 = 0.0      # Bias term (intercept)
        self.theta_weights = [] # Feature weights
        self.cost_history = []  # To track the cost over iterations
    
    def _sigmoid(self, z):
        """
        Computes the sigmoid function: h(z) = 1 / (1 + e^-z).
        Includes checks for numerical stability to prevent math domain errors 
        when exp(-z) results in extremely large or small numbers.

        :param z: The linear combination result (z = theta_0 + X * theta_weights).
        :return: Probability value between 0.0 and 1.0.
        """
        # For large positive z, math.exp(-z) approaches 0, so sigmoid approaches 1.0
        if z > 700: 
            return 1.0
        # For large negative z, math.exp(-z) approaches infinity, so sigmoid approaches 0.0
        elif z < -700: 
            return 0.0
        # Standard sigmoid calculation
        else: 
            return 1.0 / (1.0 + math.exp(-z))
    
    def _compute_z(self, x_sample):
        """
        Computes the linear combination of inputs and weights for a single sample (z).
        This is the input to the sigmoid function.

        :param x_sample: A single feature vector (list of features).
        :return: The scalar value z.
        """
        # Start with the bias term
        z = self.theta_0
        # Add the weighted sum of features
        for i in range(len(self.theta_weights)):
            z += self.theta_weights[i] * x_sample[i]
        return z
    
    def _predict_probability(self, x_sample):
        """
        Makes a probability prediction for a single sample.
        
        :param x_sample: A single feature vector.
        :return: The predicted probability (h(x)).
        """
        z = self._compute_z(x_sample)
        return self._sigmoid(z)
    
    def _compute_cost(self, y_true, y_pred_probs):
        """
        Computes the binary cross-entropy (Log Loss) for the entire dataset.

        Cost = (-1/m) * sum(y * log(h) + (1-y) * log(1-h))

        :param y_true: List of true target labels (0 or 1).
        :param y_pred_probs: List of predicted probabilities (0.0 to 1.0).
        :return: The average cost (loss) over all samples.
        """
        m = len(y_true)
        if m == 0: 
            return 0.0
            
        total_cost, epsilon = 0.0, 1e-9
        
        for i in range(m):
            # Clipping the predicted probability (h) to prevent log(0) which is undefined.
            h = max(epsilon, min(1.0 - epsilon, y_pred_probs[i]))
            
            # Binary Cross-Entropy formula for one sample
            cost_sample = -y_true[i] * math.log(h) - (1 - y_true[i]) * math.log(1 - h)
            total_cost += cost_sample
            
        # Return the average cost
        return total_cost / m
    
    def _compute_gradients(self, X_data, y_true, y_pred_probs):
        """
        Computes the gradients (partial derivatives) of the cost function 
        with respect to the bias (theta_0) and weights (theta_weights).

        Gradient = (1/m) * sum((h(x) - y) * x_j)

        :param X_data: The feature matrix.
        :param y_true: List of true target labels.
        :param y_pred_probs: List of predicted probabilities.
        :return: A tuple containing the bias gradient and the list of weight gradients.
        """
        m = len(y_true)
        # Number of features
        n_features = len(self.theta_weights)
        
        grad_theta_0 = 0.0
        # Initialize gradients for all weights to zero
        grad_theta_weights = [0.0] * n_features
        
        for i in range(m):
            # The core error term for Logistic Regression: (Prediction - True Label)
            error = y_pred_probs[i] - y_true[i]
            
            # Gradient for the bias (theta_0) - corresponding feature is always 1
            grad_theta_0 += error
            
            # Gradients for the weights (theta_weights)
            for j in range(n_features):
                grad_theta_weights[j] += error * X_data[i][j]
        
        # Average the gradients over all training examples (m)
        grad_theta_0 /= m
        for j in range(n_features): 
            grad_theta_weights[j] /= m
            
        return grad_theta_0, grad_theta_weights
    
    def fit(self, X_data, y_data, verbose=True):
        """
        Trains the model using batch gradient descent.

        :param X_data: The feature matrix (list of lists).
        :param y_data: The target vector (list of 0s and 1s).
        :param verbose: If True, prints the cost every 10% of iterations.
        """
        # Initialize weights based on the number of features in the input data
        if not X_data:
            print("Error: Input data X_data is empty.")
            return
            
        n_features = len(X_data[0])
        self.theta_0 = 0.0
        self.theta_weights = [0.0] * n_features
        self.cost_history = []
        
        # Start Batch Gradient Descent training loop
        for i in range(self.n_iterations):
            # 1. Calculate the predicted probabilities for the current weights
            y_pred_probs = [self._predict_probability(x) for x in X_data]
            
            # 2. Calculate the cost (Loss) and store it
            cost = self._compute_cost(y_data, y_pred_probs)
            self.cost_history.append(cost)
            
            # 3. Calculate the gradients (direction of steepest ascent)
            grad_theta_0, grad_theta_weights = self._compute_gradients(X_data, y_data, y_pred_probs)
            
            # 4. Update the weights and bias using the gradient descent update rule
            #   (New Parameter) = (Old Parameter) - (Learning Rate) * (Gradient)
            
            self.theta_0 -= self.learning_rate * grad_theta_0
            for j in range(n_features):
                self.theta_weights[j] -= self.learning_rate * grad_theta_weights[j]
            
            # Print cost periodically for monitoring
            if verbose and self.n_iterations >= 10 and i % (self.n_iterations // 10) == 0:
                print(f"Iteration {i}/{self.n_iterations}: Cost = {cost:.4f}")
        
        # Final cost display
        if verbose:
             print(f"Iteration {self.n_iterations}: Final Cost = {self.cost_history[-1]:.4f}")

    def predict_proba(self, X_data):
        """
        Predicts probabilities for new data points.

        :param X_data: The feature matrix for prediction.
        :return: A list of predicted probabilities (0.0 to 1.0).
        """
        return [self._predict_probability(x) for x in X_data]
        
    def predict(self, X_data, threshold=0.5):
        """
        Predicts class labels (0 or 1) based on a threshold.

        :param X_data: The feature matrix for prediction.
        :param threshold: The probability cut-off for assigning class 1.
        :return: A list of predicted class labels (0 or 1).
        """
        probabilities = self.predict_proba(X_data)
        return [1 if prob >= threshold else 0 for prob in probabilities]

if __name__ == "__main__":
    
    print("--- Testing CoreLogisticRegression ---")
    
    # 1. Create a simple 1D dataset (Hours Studied vs. Pass/Fail)
    # The dataset suggests a clear separation point around 3.5-4.0 hours.
    X_train = [[1.0], [1.5], [2.0], [2.5], [4.5], [5.0], [5.5], [6.0]]
    y_train = [0, 0, 0, 0, 1, 1, 1, 1] # 0 = Fail, 1 = Pass
    
    # 2. Initialize and train the model
    # Higher learning rate (0.1) and more iterations (5000) for fast convergence on small data
    model = CoreLogisticRegression(learning_rate=0.1, n_iterations=5000)
    print("Starting training...")
    model.fit(X_train, y_train, verbose=True)
    print("Training complete.")
    
    # 3. Print the final learned parameters
    print(f"\nFinal Bias (theta_0): {model.theta_0:.4f}")
    # Since X_train is 1D, we check the first element of weights
    if model.theta_weights:
        print(f"Final Weights (theta_1): {model.theta_weights[0]:.4f}")
    
    # 4. Make predictions on new, unseen data
    # 0.5 and 3.0 should be classified as 0 (Fail)
    # 3.5 is the transition point
    # 7.0 should be classified as 1 (Pass)
    X_test = [[0.5], [3.0], [3.5], [7.0]]
    probs = model.predict_proba(X_test)
    labels = model.predict(X_test)
    
    print("\n--- Test Results ---")
    for i in range(len(X_test)):
        print(f"Input: {X_test[i][0]} hours | "
              f"Prob(Pass): {probs[i]:.4f} | "
              f"Prediction: {labels[i]}")

    # Expected output:
    # 0.5 -> Low Prob -> 0
    # 3.0 -> Low/Mid Prob -> 0
    # 3.5 -> Mid Prob (~0.5) -> 0 or 1 (depending on exact convergence)
    # 7.0 -> High Prob -> 1

--- Testing CoreLogisticRegression ---
Starting training...
Iteration 0/5000: Cost = 0.6931
Iteration 500/5000: Cost = 0.1241
Iteration 1000/5000: Cost = 0.0713
Iteration 1500/5000: Cost = 0.0510
Iteration 2000/5000: Cost = 0.0401
Iteration 2500/5000: Cost = 0.0332
Iteration 3000/5000: Cost = 0.0284
Iteration 3500/5000: Cost = 0.0249
Iteration 4000/5000: Cost = 0.0222
Iteration 4500/5000: Cost = 0.0200
Iteration 5000: Final Cost = 0.0183
Training complete.

Final Bias (theta_0): -9.8485
Final Weights (theta_1): 2.9078

--- Test Results ---
Input: 0.5 hours | Prob(Pass): 0.0002 | Prediction: 0
Input: 3.0 hours | Prob(Pass): 0.2450 | Prediction: 0
Input: 3.5 hours | Prob(Pass): 0.5814 | Prediction: 1
Input: 7.0 hours | Prob(Pass): 1.0000 | Prediction: 1
