In [None]:
import math

class CoreLogisticRegression:
    
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        """
        Initialize the model.
        
        Args:
            learning_rate (float): The step size for gradient descent.
            n_iterations (int): Number of times to loop over the training data.
        """
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.theta_0 = 0.0  # Bias term
        self.theta_weights = [] # Weights for each feature
        self.cost_history = [] # To track learning progress
    
    # --- 1. Sigmoid Function ---
    def _sigmoid(self, z):
        if z > 700:
            return 1.0
        elif z < -700:
            return 0.0
        else:
            return 1.0 / (1.0 + math.exp(-z))

    # --- 2. Hypothesis and Helpers ---
    def _compute_z(self, x_sample):
        """Computes z = theta_0 + (theta_weights . x_sample)"""
        z = self.theta_0
        for i in range(len(self.theta_weights)):
            z += self.theta_weights[i] * x_sample[i]
        return z

    def _predict_probability(self, x_sample):
        """Our full hypothesis: h(x) = sigmoid(z)"""
        z = self._compute_z(x_sample)
        return self._sigmoid(z)

    # --- 3. Cost Function ---
    def _compute_cost(self, y_true, y_pred_probs):
        m = len(y_true)
        if m == 0:
            return 0.0
        
        total_cost = 0.0
        epsilon = 1e-9
        
        for i in range(m):
            h = y_pred_probs[i]
            y = y_true[i]
            h = max(epsilon, min(1.0 - epsilon, h)) # Clipping
            cost_sample = -y * math.log(h) - (1 - y) * math.log(1 - h)
            total_cost += cost_sample
            
        return total_cost / m

    # --- 4. Gradient Descent ---
    def _compute_gradients(self, X_data, y_true, y_pred_probs):
        m = len(y_true)
        n_features = len(self.theta_weights)
        
        grad_theta_0 = 0.0
        grad_theta_weights = [0.0] * n_features
        
        for i in range(m):
            x_sample = X_data[i]
            h = y_pred_probs[i]
            y = y_true[i]
            error = h - y 
            
            grad_theta_0 += error
            for j in range(n_features):
                grad_theta_weights[j] += error * x_sample[j]
                
        grad_theta_0 /= m
        for j in range(n_features):
            grad_theta_weights[j] /= m
            
        return grad_theta_0, grad_theta_weights

    # --- 5. Main Training Function ---
    def fit(self, X_data, y_data, verbose=True):
        """
        Train the model using (Batch) Gradient Descent.
        
        Args:
            X_data (list of lists): Training features. e.g., [[1, 2], [3, 4]]
            y_data (list): Target labels. e.g., [0, 1]
            verbose (bool): Whether to print cost updates.
        """
        # Get dimensions
        if not X_data:
            print("Error: X_data is empty.")
            return
        
        m_samples = len(y_data)
        # Assume all samples have the same number of features as the first one
        n_features = len(X_data[0]) 
        
        # Initialize parameters
        self.theta_0 = 0.0
        self.theta_weights = [0.0] * n_features
        self.cost_history = []
        
        # --- The Gradient Descent Loop ---
        for i in range(self.n_iterations):
            
            # 1. Get predictions (probabilities) for ALL samples
            y_pred_probs = []
            for x_sample in X_data:
                y_pred_probs.append(self._predict_probability(x_sample))
            
            # 2. Calculate the cost (for logging)
            cost = self._compute_cost(y_data, y_pred_probs)
            self.cost_history.append(cost)
            
            # 3. Calculate the gradients
            grad_theta_0, grad_theta_weights = self._compute_gradients(
                X_data, y_data, y_pred_probs
            )
            
            # 4. Update the parameters
            self.theta_0 -= self.learning_rate * grad_theta_0
            for j in range(n_features):
                self.theta_weights[j] -= self.learning_rate * grad_theta_weights[j]
            
            # Optional: Print progress
            if verbose and i % (self.n_iterations // 10) == 0:
                print(f"Iteration {i}: Cost = {cost:.4f}")

    # --- 6. Prediction Functions ---
    def predict_proba(self, X_data):
        """
        Predict probabilities for new data.
        """
        probabilities = []
        for x_sample in X_data:
            probabilities.append(self._predict_probability(x_sample))
        return probabilities

    def predict(self, X_data, threshold=0.5):
        """
        Predict class labels (0 or 1) based on a threshold.
        """
        probabilities = self.predict_proba(X_data)
        labels = []
        for prob in probabilities:
            if prob >= threshold:
                labels.append(1)
            else:
                labels.append(0)
        return labels

In [None]:
# --- Main execution ---
if __name__ == "__main__":
    
    print("--- Testing CoreLogisticRegression ---")
    
    # 1. Create a simple dataset
    # X = "Hours studied"
    # y = "Passed" (0 or 1)
    # We expect X to be a list of lists (features per sample)
    X_train = [[1.0], [1.5], [2.0], [2.5], [4.5], [5.0], [5.5], [6.0]]
    y_train = [0, 0, 0, 0, 1, 1, 1, 1]
    
    # 2. Initialize and train the model
    # A higher learning rate and more iterations are needed 
    # for this simple, un-scaled data.
    model = CoreLogisticRegression(learning_rate=0.1, n_iterations=5000)
    
    print("Starting training...")
    model.fit(X_train, y_train)
    print("Training complete.")
    
    # 3. Print the final parameters
    print(f"\nFinal Bias (theta_0): {model.theta_0:.4f}")
    print(f"Final Weights (theta_1): {model.theta_weights[0]:.4f}")
    
    # 4. Make predictions
    X_test = [[0.5], [3.0], [3.5], [7.0]]
    
    # Predict probabilities
    probs = model.predict_proba(X_test)
    # Predict labels
    labels = model.predict(X_test)
    
    print("\n--- Test Results ---")
    for i in range(len(X_test)):
        print(f"Input: {X_test[i][0]} hours | "
              f"Prob(Pass): {probs[i]:.4f} | "
              f"Prediction: {labels[i]}")

    # Expected output:
    # The decision boundary should be around 3.5
    # [0.5] -> Low prob, predict 0
    # [3.0] -> ~0.5 prob, might be 0 or 1 (near boundary)
    # [3.5] -> ~0.5 prob, might be 0 or 1 (near boundary)
    # [7.0] -> High prob, predict 1  

    