In [16]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D           # noqa: F401
from ipywidgets import (Button, FloatSlider, IntSlider, VBox, HBox,
                        Output, Layout, HTML, ToggleButton)

# ------------------------------------------------------------------ #
# 1.  Helpers
# ------------------------------------------------------------------ #
def _sigmoid(z: np.ndarray) -> np.ndarray:
    z = np.clip(z, -60, 60)       # keep numerically stable
    return 1. / (1. + np.exp(-z))

def compute_loss(th_: np.ndarray | None = None) -> float:
    th = theta if th_ is None else th_
    logits = X_b @ th
    p = _sigmoid(logits)
    eps = 1e-15
    return float(-np.mean(y * np.log(p + eps) + (1 - y) * np.log(1 - p + eps)))

# ------------------------------------------------------------------ #
# 2.  Data
# ------------------------------------------------------------------ #
def generate_data(n_points: int = 300) -> None:
    global X, y, X_b
    X = 20 * np.random.rand(n_points, 1) - 10
    y = (X >= 0).astype(float)
    X_b = np.c_[np.ones((n_points, 1)), X]

generate_data()
x_min, x_max = -10, 10

# ------------------------------------------------------------------ #
# 3.  Globals & caches
# ------------------------------------------------------------------ #
theta = np.zeros((2, 1))                # β0 , β1
step_counter = 0
loss_history, intercept_history, slope_history = [], [], []
last_gradients: np.ndarray | None = None
_theta0_vals = _theta1_vals = _loss_surface = None

# ------------------------------------------------------------------ #
# 4.  Loss-surface pre-calc (for speed)
# ------------------------------------------------------------------ #
def compute_loss_surface() -> None:
    global _theta0_vals, _theta1_vals, _loss_surface
    _theta0_vals = np.linspace(-10, 10, 60)
    _theta1_vals = np.linspace(-5, 5, 60)
    T0, T1 = np.meshgrid(_theta0_vals, _theta1_vals)
    flat = np.c_[T0.ravel(), T1.ravel()]
    logits = X_b @ flat.T
    p = _sigmoid(logits)
    eps = 1e-15
    logloss = -np.mean(y * np.log(p + eps) +
                       (1 - y) * np.log(1 - p + eps), axis=0)
    _loss_surface = logloss.reshape(T0.shape)

# ------------------------------------------------------------------ #
# 5.  Plotting
# ------------------------------------------------------------------ #
output = Output()

def _draw_gradient_arrows(ax_3d):
    if last_gradients is None or len(intercept_history) < 2:
        return
    gx, gy = float(last_gradients[0, 0]), float(last_gradients[1, 0])
    if gx == 0 and gy == 0:
        return
    x0, y0, z0 = (intercept_history[-2],
                  slope_history[-2],
                  loss_history[-2])
    scale = .4
    ax_3d.quiver(x0, y0, z0, -gx*scale, 0, 0,
                 color='tab:green', linewidth=2, arrow_length_ratio=.15)
    ax_3d.quiver(x0, y0, z0, 0, -gy*scale, 0,
                 color='tab:blue',  linewidth=2, arrow_length_ratio=.15)
    ax_3d.quiver(x0, y0, z0, -gx*scale, -gy*scale, 0,
                 color='tab:red',   linewidth=2, arrow_length_ratio=.15)

def plot_current_state():
    with output:
        output.clear_output(wait=True)
        fig = plt.figure(figsize=(13, 7))
        gs = fig.add_gridspec(2, 2, width_ratios=[1, 1.3])

        # ---- Linear score panel ------------------------------------- #
        ax_lin = fig.add_subplot(gs[0, 0])
        x_line = np.linspace(x_min, x_max, 400)
        z_line = theta[0, 0] + theta[1, 0] * x_line
        ax_lin.plot(x_line, z_line, color='black',
                    label=r'$z=\beta_0+\beta_1x$')
        ax_lin.axhline(0, linestyle=':', color='k', lw=.8)
        ax_lin.axvline(0, linestyle=':', color='k', lw=.8)
        ax_lin.set(xlabel='x value', ylabel='Linear score z',
                   title='Linear score',
                   xlim=(x_min, x_max), ylim=(-5, 5))  # <= fixed range
        ax_lin.legend(loc='upper left')

        # ---- Sigmoid panel ------------------------------------------ #
        ax_sig = fig.add_subplot(gs[1, 0])
        p_line = _sigmoid(z_line)
        ax_sig.plot(x_line, p_line, color='black', label=r'$\sigma(z)$')
        colors = np.where(y.ravel() == 1, 'crimson', 'royalblue')
        ax_sig.scatter(X.ravel(), y.ravel(), s=18, color=colors, alpha=.8,
                       label='data')
        ax_sig.axhline(.5, linestyle=':', color='k', lw=.8)
        ax_sig.axvline(0,  linestyle=':', color='k', lw=.8)
        ax_sig.set(xlabel='x value',
                   ylabel='Logistic regression score p',
                   title='Sigmoid probability',
                   xlim=(x_min, x_max), ylim=(-.02, 1.02))
        ax_sig.legend(loc='upper left')

        # ---- 3-D loss surface -------------------------------------- #
        ax3d = fig.add_subplot(gs[:, 1], projection='3d')
        T0, T1 = np.meshgrid(_theta0_vals, _theta1_vals)
        ax3d.plot_surface(T0, T1, _loss_surface,
                          cmap='viridis', alpha=.6, linewidth=0)
        if loss_history:
            ax3d.plot(intercept_history, slope_history, loss_history,
                      color='gold', marker='o', markersize=4, lw=2, zorder=10)
        _draw_gradient_arrows(ax3d)
        ax3d.set(xlabel='Intercept β₀',
                 ylabel='Slope β₁',
                 zlabel='Log-loss (BCE)',             # <-- label tweak
                 title='Loss surface & descent path')
        ax3d.view_init(35,45)      # <-- makes surface “face” viewer
        ax3d.set_zlim(0, np.max(_loss_surface))

        plt.tight_layout()
        plt.show()

# ------------------------------------------------------------------ #
# 6.  UI widgets
# ------------------------------------------------------------------ #
title_html = HTML("<h2>Logistic Regression: Is the number positive or negative?</h2>")

step_btn   = Button(description='Step',   button_style='success',
                    layout=Layout(width='80px'))
reset_btn  = Button(description='Reset',  layout=Layout(width='80px'))
data_btn   = Button(description='New Data', layout=Layout(width='90px'))

lr_slider  = FloatSlider(value=.1, min=.001, max=1.0, step=.001,
                         description='Learning rate', readout_format='.3f',
                         layout=Layout(width='240px'))
bs_slider  = IntSlider(value=300, min=10, max=300, step=10,
                       description='Batch size', layout=Layout(width='240px'))

rand_init_toggle = ToggleButton(value=False, description='Random Init: OFF',
                                layout=Layout(width='140px'))

step_info = HTML()
step_info.layout = Layout(min_width='260px')

# ------------------------------------------------------------------ #
# 7.  Reset / refresh helpers
# ------------------------------------------------------------------ #
def reset_model():
    global theta, step_counter, loss_history, intercept_history
    global slope_history, last_gradients
    theta = (np.random.uniform([-10, -5], [10, 5]).reshape(2, 1)
             if rand_init_toggle.value else np.zeros((2, 1)))
    step_counter = 0
    loss_history       = [compute_loss()]
    intercept_history  = [theta[0, 0]]
    slope_history      = [theta[1, 0]]
    last_gradients     = None
    compute_loss_surface()
    plot_current_state()
    step_info.value = (
        f"<b>Step:</b> {step_counter}<br>"
        f"<b>Old Intercept:</b> — &nbsp;&nbsp;<b>Old Slope:</b> —<br>"
        f"<b>Gradient Intercept:</b> — &nbsp;&nbsp;<b>Gradient Slope:</b> —<br>"
        f"<b>New Intercept:</b> {theta[0,0]:+.4f}&nbsp;&nbsp;"
        f"<b>New Slope:</b> {theta[1,0]:+.4f}<br>"
        f"<b>Log-loss (BCE):</b> {loss_history[-1]:.4f}"           # <-- label tweak
    )

def refresh_data(_):
    generate_data()
    reset_model()

# ------------------------------------------------------------------ #
# 8.  Training step
# ------------------------------------------------------------------ #
def on_step(_):
    global theta, step_counter, last_gradients
    old = theta.copy()

    idx = np.random.choice(len(X_b), bs_slider.value, replace=False)
    Xb, yb = X_b[idx], y[idx]
    preds  = _sigmoid(Xb @ theta)
    grads  = (Xb.T @ (preds - yb)) / bs_slider.value

    theta -= lr_slider.value * grads
    last_gradients = grads.copy()

    step_counter += 1
    loss_history.append(compute_loss())
    intercept_history.append(theta[0, 0])
    slope_history.append(theta[1, 0])

    plot_current_state()
    step_info.value = (
        f"<b>Step:</b> {step_counter}<br>"
        f"<b>Old Intercept:</b> {old[0,0]:+.4f}&nbsp;&nbsp;"
        f"<b>Old Slope:</b> {old[1,0]:+.4f}<br>"
        f"<b>Gradient Intercept:</b> {grads[0,0]:+.4f}&nbsp;&nbsp;"
        f"<b>Gradient Slope:</b> {grads[1,0]:+.4f}<br>"
        f"<b>New Intercept:</b> {theta[0,0]:+.4f}&nbsp;&nbsp;"
        f"<b>New Slope:</b> {theta[1,0]:+.4f}<br>"
        f"<b>Log-loss (BCE):</b> {loss_history[-1]:.4f}"           # <-- label tweak
    )

# ------------------------------------------------------------------ #
# 9.  Widget wiring
# ------------------------------------------------------------------ #
step_btn.on_click(on_step)
reset_btn.on_click(lambda _: reset_model())
data_btn.on_click(refresh_data)

def _toggle(change):
    rand_init_toggle.description = "Random Init: ON" if change['new'] else "Random Init: OFF"
    reset_model()
rand_init_toggle.observe(_toggle, names='value')

controls = VBox([HBox([step_btn, reset_btn, data_btn], layout=Layout(gap='8px')),
                 lr_slider, bs_slider, rand_init_toggle],
                layout=Layout(gap='10px'))

top_row = HBox([controls, step_info], layout=Layout(gap='40px', align_items='flex-start'))

ui = VBox([title_html, top_row, output],
          layout=Layout(padding='10px', border='1px solid lightgray'))

# ------------------------------------------------------------------ #
# 10.  Show!
# ------------------------------------------------------------------ #
compute_loss_surface()
reset_model()
ui  # display inside Jupyter / Voilà


VBox(children=(HTML(value='<h2>Logistic Regression: Is the number positive or negative?</h2>'), HBox(children=…