In [None]:
# # Logistic Regression from Scratch 
#
# **Goal**: Implement binary logistic regression *from first principles*, mirroring the structure of your linear regression notebook.
#
# **What you'll learn**
# - Model formulation: \( z = w^ x + b \), \( \hat y = \sigma(z) \)
# - Loss: Binary cross-entropy (negative log-likelihood)
# - Gradients & training with batch gradient descent
# - Evaluation: accuracy, confusion matrix, and decision boundary (2 features)
#
# -

# 1. Imports & Setup


import numpy as np
import math
import matplotlib.pyplot as plt
np.set_printoptions(precision=4, suppress=True)
RNG = np.random.default_rng(42)

In [None]:
# 2. Synthetic Dataset
#use the same structure you started with: two features and a binary label.
# Data (your original values)
data = {
"feature1": np.array([1.2, 2.4, 3.1, 4.5, 5.0, 6.7, 7.2, 8.9, 9.5, 10.0], dtype=float),
"feature2": np.array([0.5, 1.0, 1.2, 2.3, 2.8, 3.5, 4.1, 4.9, 5.5, 6.0], dtype=float),
"label": np.array([0, 0, 0, 0, 1, 0, 1, 1, 1, 1], dtype=float),
}


X = np.column_stack([data["feature1"], data["feature2"]]) 
y = data["label"] 
print("X shape:", X.shape, " y shape:", y.shape)

In [None]:
# Quick Visualization

plt.figure()
plt.scatter(X[y==0, 0], X[y==0, 1], label="Class 0", marker="o")
plt.scatter(X[y==1, 0], X[y==1, 1], label="Class 1", marker="x")
plt.xlabel("feature1")
plt.ylabel("feature2")
plt.legend()
plt.title("Synthetic Data")
plt.show()

In [None]:

def train_test_split_np(X_mat: np.ndarray, y_vec: np.ndarray, train_ratio: float = 0.8):
    m = len(y_vec)
    nth = int(train_ratio * m)
    return X_mat[:nth], y_vec[:nth], X_mat[nth:], y_vec[nth:]


X_train_cols, y_train, X_test_cols, y_test = train_test_split_lists([X[:,0], X[:,1]], y, 0.8)
X_train, y_train_np, X_test, y_test_np = train_test_split_np(X, y, 0.8)

In [None]:
# ## 4. Model: Sigmoid, Linear Score, Probabilities, and Loss

def sigmoid(z: float) -> float:
    # Stable sigmoid for scalar z
    # For vector use, we will rely on numpy later
    return 1.0 / (1.0 + math.exp(-z))

# Row-wise probabilities 

def y_hat(b: float, W: list, X_cols: list):
    probs = []
    for x1, x2 in zip(X_cols[0], X_cols[1]):
        z = x1*W[0] + x2*W[1] + b
        probs.append(sigmoid(z))
    return probs

# Vectorized versions (for loss/grad plots etc.)

def predict_proba(X_mat: np.ndarray, W_vec: np.ndarray, b: float) -> np.ndarray:
    z = X_mat @ W_vec + b
    return 1.0 / (1.0 + np.exp(-z))


def predict_labels(X_mat: np.ndarray, W_vec: np.ndarray, b: float, threshold: float = 0.5) -> np.ndarray:
    proba = predict_proba(X_mat, W_vec, b)
    return (proba >= threshold).astype(int)


def binary_cross_entropy(y_true: np.ndarray, y_prob: np.ndarray) -> float:
    eps = 1e-12
    y_prob = np.clip(y_prob, eps, 1 - eps)
    return -np.mean(y_true * np.log(y_prob) + (1 - y_true) * np.log(1 - y_prob))


In [None]:
# ## 5. Gradients 
# Using your per-parameter gradient helpers and also a vectorized variant.

def dj_dw(y_list: list, yhat_list: list, X_cols: list, n: int):
    m = len(y_list)
    total = 0.0
    for i in range(m):
        total += (yhat_list[i] - y_list[i]) * X_cols[n][i]
    return total / m


def dj_db(y_list: list, yhat_list: list):
    m = len(y_list)
    total = 0.0
    for i in range(m):
        total += (yhat_list[i] - y_list[i])
    return total / m

In [None]:
# Vectorized gradients 

def gradients(X_mat: np.ndarray, y_vec: np.ndarray, W_vec: np.ndarray, b: float):
    m = len(y_vec)
    probs = predict_proba(X_mat, W_vec, b)
    error = probs - y_vec
    grad_W = (X_mat.T @ error) / m
    grad_b = np.sum(error) / m
    return grad_W, grad_b, probs

In [None]:
# (B) Vectorized with history

def train_vectorized(X_mat, y_vec, epochs=500, learn_rate=0.05):
    W = np.zeros(X_mat.shape[1])
    b = 0.0
    history = {"loss": [], "W": [], "b": []}
    for t in range(epochs):
        grad_W, grad_b, probs = gradients(X_mat, y_vec, W, b)
        # record loss
        loss = binary_cross_entropy(y_vec, probs)
        history["loss"].append(loss)
        history["W"].append(W.copy())
        history["b"].append(b)
        # update
        W -= learn_rate * grad_W
        b -= learn_rate * grad_b
    return W, b, history

In [None]:
# B) vectorized trainer
W_vec, b_vec, hist = train_vectorized(X_train, y_train_np, epochs=800, learn_rate=0.05)
print("[Vectorized] W:", W_vec, " b:", b_vec)

# Evaluate on test set
probs_test = predict_proba(X_test, W_vec, b_vec)
preds_test = (probs_test >= 0.5).astype(int)
acc_test = np.mean(preds_test == y_test_np.astype(int))
print("Test accuracy:", round(acc_test, 4))
print("Test predictions:", preds_test.tolist())
print("Test true labels:", y_test_np.astype(int).tolist())

In [None]:

plt.figure()
plt.plot(hist["loss"])
plt.xlabel("Epoch")
plt.ylabel("Binary Cross-Entropy")
plt.title("Training Loss")
plt.show()

In [None]:
from collections import Counter

def confusion_matrix(true, pred):
    # returns (TP, FP, FN, TN)
    tp = np.sum((true == 1) & (pred == 1))
    tn = np.sum((true == 0) & (pred == 0))
    fp = np.sum((true == 0) & (pred == 1))
    fn = np.sum((true == 1) & (pred == 0))
    return tp, fp, fn, tn

TP, FP, FN, TN = confusion_matrix(y_test_np.astype(int), preds_test)
print({"TP": int(TP), "FP": int(FP), "FN": int(FN), "TN": int(TN)})