# Lab 3 – Module 1: Activation Functions — Bending Space

**Time:** ~5 minutes

---

> **KEY IDEA**  
> In Module 0, some dot patterns couldn’t be separated by any straight line.
> One solution would be to invent a more complicated boundary — but that gets messy fast.  
>
> Activation functions take a completely different approach: **they rearrange the dots first, so that a straight line can work afterward.**
> Think of it like untangling a knot before you measure it.  
>
> Keep that idea in mind as you answer these questions.

## 1. Setup

Run this cell to load two activation functions we’ll compare: **Sigmoid** and **ReLU**.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import Dropdown, FloatSlider, interact

# --- Activation functions ---
def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def relu(x):
    return np.maximum(0, x)

# --- Grid helpers ---
def make_grid(n_lines=13, span=3):
    """Return lists of horizontal and vertical grid lines."""
    pts = np.linspace(-span, span, n_lines)
    t   = np.linspace(-span, span, 120)
    h_lines = [np.column_stack([t, np.full_like(t, p)]) for p in pts]
    v_lines = [np.column_stack([np.full_like(t, p), t]) for p in pts]
    return h_lines, v_lines

def warp(lines, func):
    return [np.column_stack([func(L[:, 0]), func(L[:, 1])]) for L in lines]

print('Setup complete!')

## 2. Watch the Grid Warp

Imagine a square sheet of graph paper (the **left** plot).  
An activation function stretches and squishes that sheet (the **right** plot).

Switch between **Sigmoid** and **ReLU** and notice:
- Do the straight grid lines stay straight, or do they curve?
- Where does most of the space get compressed?
- Which function changes the grid more dramatically?

In [None]:
def show_warp(activation_name):
    func = {'Sigmoid': sigmoid, 'ReLU': relu}[activation_name]
    h, v = make_grid()

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6), dpi=100)

    # Original grid
    for L in h:
        ax1.plot(L[:, 0], L[:, 1], 'steelblue', alpha=0.5, lw=1)
    for L in v:
        ax1.plot(L[:, 0], L[:, 1], 'steelblue', alpha=0.5, lw=1)
    ax1.set_title('BEFORE  (original grid)', fontsize=13, fontweight='bold')
    ax1.set_xlabel('x\u2081'); ax1.set_ylabel('x\u2082')
    ax1.set_xlim(-3.5, 3.5); ax1.set_ylim(-3.5, 3.5)
    ax1.set_aspect('equal'); ax1.grid(True, alpha=0.2)

    # Warped grid
    for L in warp(h, func):
        ax2.plot(L[:, 0], L[:, 1], 'crimson', alpha=0.6, lw=1.5)
    for L in warp(v, func):
        ax2.plot(L[:, 0], L[:, 1], 'crimson', alpha=0.6, lw=1.5)
    ax2.set_title(f'AFTER  ({activation_name} applied)', fontsize=13, fontweight='bold')
    ax2.set_xlabel(f'{activation_name}(x\u2081)')
    ax2.set_ylabel(f'{activation_name}(x\u2082)')
    if activation_name == 'Sigmoid':
        ax2.set_xlim(-0.1, 1.1); ax2.set_ylim(-0.1, 1.1)
    else:
        ax2.set_xlim(-0.5, 3.5); ax2.set_ylim(-0.5, 3.5)
    ax2.set_aspect('equal'); ax2.grid(True, alpha=0.2)

    plt.tight_layout(); plt.show()

    if activation_name == 'Sigmoid':
        print('Sigmoid squishes the entire grid into a small [0,1] x [0,1] box.')
        print('Straight lines become S-shaped curves. Points far from the center')
        print('all pile up near the edges.')
    else:
        print('ReLU keeps the top-right quadrant (positive values) unchanged')
        print('but flattens everything negative onto the axes.')
        print('It creates a sharp fold rather than a smooth curve.')

interact(
    show_warp,
    activation_name=Dropdown(options=['Sigmoid', 'ReLU'], value='Sigmoid',
                             description='Activation:')
);

## 3. Straight Rule → Curved Boundary

Here’s the payoff. After warping, we draw a perfectly **straight line** in the warped space (right plot, green line). Now look at what that same rule looks like back in the original space (left plot).

**It’s curved!**

This is the trick: **rearrange the dots so a straight line works, and the boundary in the original world bends to fit the data.**

Adjust the **Threshold** slider to slide the green line and watch the curve move in the original space.

In [None]:
def show_curved_boundary(activation_name, threshold):
    func = {'Sigmoid': sigmoid, 'ReLU': relu}[activation_name]

    # Dense point cloud
    g = np.linspace(-3, 3, 80)
    X1g, X2g = np.meshgrid(g, g)
    orig = np.column_stack([X1g.ravel(), X2g.ravel()])
    warped = np.column_stack([func(orig[:, 0]), func(orig[:, 1])])

    # Linear rule in warped space
    above = warped[:, 0] + warped[:, 1] > threshold

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6), dpi=100)

    # Original space
    ax1.scatter(orig[above, 0],  orig[above, 1],  c='salmon',    s=4, alpha=0.3)
    ax1.scatter(orig[~above, 0], orig[~above, 1], c='steelblue', s=4, alpha=0.3)
    ax1.set_title('ORIGINAL space \u2014 boundary looks CURVED', fontsize=12, fontweight='bold')
    ax1.set_xlabel('x\u2081'); ax1.set_ylabel('x\u2082')
    ax1.set_xlim(-3, 3); ax1.set_ylim(-3, 3)
    ax1.set_aspect('equal'); ax1.grid(True, alpha=0.2)

    # Warped space
    ax2.scatter(warped[above, 0],  warped[above, 1],  c='salmon',    s=4, alpha=0.3)
    ax2.scatter(warped[~above, 0], warped[~above, 1], c='steelblue', s=4, alpha=0.3)
    # Decision line
    if activation_name == 'Sigmoid':
        t = np.linspace(0, 1, 100)
        lim = (-0.05, 1.05)
    else:
        t = np.linspace(0, 3, 100)
        lim = (-0.3, 3.3)
    ax2.plot(t, threshold - t, 'green', lw=3, label=f'y\u2081 + y\u2082 = {threshold:.1f}')
    ax2.set_title('WARPED space \u2014 boundary is STRAIGHT', fontsize=12, fontweight='bold')
    ax2.set_xlabel(f'{activation_name}(x\u2081)')
    ax2.set_ylabel(f'{activation_name}(x\u2082)')
    ax2.set_xlim(lim); ax2.set_ylim(lim)
    ax2.set_aspect('equal'); ax2.grid(True, alpha=0.2); ax2.legend(fontsize=10)

    plt.tight_layout(); plt.show()

interact(
    show_curved_boundary,
    activation_name=Dropdown(options=['Sigmoid', 'ReLU'], value='Sigmoid',
                             description='Activation:'),
    threshold=FloatSlider(min=0.1, max=2.0, step=0.1, value=0.8,
                          description='Threshold:', continuous_update=False)
);

## Answer‑Sheet Questions (Q4 – Q6)

**Q4.** In your own words, what did the activation function do to the grid of points? Use the before/after comparison in your answer.

**Q5.** After the warping, you drew what looked like a straight-line rule — but it created a **curved** boundary in the original space. How does this solve the problem you identified in Q3?

**Q6.** Compare how Sigmoid and ReLU each warped the grid. Which changed the space more dramatically? What might be a tradeoff between a dramatic warp and a gentler one?

---

**Next:** Continue to **Module 2** to look at how these activation functions handle extreme inputs and why that matters for learning.