## 📝 To-Do List: Exploring Projections and PCA Intuition

### Step 1: Explore the data orientation
- Use the **“Data angle θ”** slider to rotate the data cloud.  
- Observe how the cloud aligns or misaligns with the x- and y-axes.  
- **Question:** When θ = 0°, which axis (x or y) captures most of the variance?  
- **Question:** What about when θ = 90°?  

---

### Step 2: Adjust variances
- Use the **“Var ∥”** slider (variance along the main data axis).  
- Use the **“Var ⟂”** slider (variance perpendicular to the main data axis).  
- Try:  
  - Set **Var ⟂ = 0**. What happens to the projection error?  
  - Make **Var ⟂** very large. How does the data look?  

---

### Step 3: Project onto different α-axes
- Use the **“Axis α”** slider to change the projection axis.  
- Watch the **red (α-axis projection)** and **green (⊥ axis projection)** points.  
- Look at the **dashed lines**: they show how each original point is projected.  

---

### Step 4: Compare errors
- Look at the **error curve plot** (right subplot).  
- Move α and see how the vertical red line moves across the error curve.  
- **Question:** At which α is the projection error the smallest?  
- **Verify:** Does this α match the true data orientation θ?  

---

### Step 5: Connect to PCA
- Notice: PCA **chooses the α-axis with minimum error (maximum variance)**.  
- Compare your observed α from Step 4 to the given θ (data orientation).  
- **Reflect:** Why does PCA always find the “best” direction automatically?  

---

✅ **By the end of this exploration, you should be able to:**  
1. Explain why **keeping the axis with maximum variance** minimizes information loss.  
2. See how projection error increases when you pick the “wrong” α.  
3. Understand how PCA finds this optimal α without us telling it the data orientation.  


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import FloatSlider, VBox, HBox, interactive_output

np.random.seed(42)
n = 300

def generate_rotated_data(angle=0.0, var_parallel=5.0, var_perp=0.5):
    """Generate 2D Gaussian data oriented at 'angle' relative to the x-axis."""
    u = np.sqrt(var_parallel) * np.random.randn(n)
    v = np.sqrt(var_perp) * np.random.randn(n)
    data = np.vstack([u, v])
    theta = np.deg2rad(angle)
    R = np.array([[np.cos(theta), -np.sin(theta)],
                  [np.sin(theta),  np.cos(theta)]])
    rotated = R @ data
    return rotated.T

def plot_and_project_alpha(angle=0.0, var_parallel=5.0, var_perp=0.5, alpha=0.0):
    # Generate data
    data = generate_rotated_data(angle, var_parallel, var_perp)
    x, y = data[:,0], data[:,1]

    # Projection axis at angle alpha
    alpha_rad = np.deg2rad(alpha)
    axis_vec = np.array([np.cos(alpha_rad), np.sin(alpha_rad)])   # α-axis
    perp_vec = np.array([-np.sin(alpha_rad), np.cos(alpha_rad)])  # perpendicular axis

    # Project onto alpha-axis
    coords_on_axis = data @ axis_vec
    proj_points_alpha = np.outer(coords_on_axis, axis_vec)

    # Project onto perpendicular axis
    coords_on_perp = data @ perp_vec
    proj_points_perp = np.outer(coords_on_perp, perp_vec)

    # Projection error (MSE along perp direction)
    proj_error = np.mean(coords_on_perp**2)

    # Compute projection error for all α
    alphas = np.linspace(0, 180, 181)
    errors = []
    for a in alphas:
        a_rad = np.deg2rad(a)
        perp_vec_tmp = np.array([-np.sin(a_rad), np.cos(a_rad)])
        coords_perp_tmp = data @ perp_vec_tmp
        errors.append(np.mean(coords_perp_tmp**2))

    # --- Plotting with width ratios ---
    fig, axes = plt.subplots(1, 2, figsize=(14,6), gridspec_kw={"width_ratios":[2.5,1]})

    # Left: data + projection
    axes[0].scatter(x, y, alpha=0.5, c="blue", s=40, label="Original points")
    axes[0].scatter(proj_points_alpha[:,0], proj_points_alpha[:,1],
                    alpha=0.8, c="red", s=40, label=f"Proj on α={alpha:.1f}°")
    axes[0].scatter(proj_points_perp[:,0], proj_points_perp[:,1],
                    alpha=0.8, c="green", s=40, label="Proj on ⟂ axis")

    # Fewer error lines to α-axis projection
    idx_sample = np.linspace(0, len(x)-1, 20, dtype=int)
    for i in idx_sample:
        axes[0].plot([x[i], proj_points_alpha[i,0]], [y[i], proj_points_alpha[i,1]], "r--", alpha=0.3)
        axes[0].plot([x[i], proj_points_perp[i,0]], [y[i], proj_points_perp[i,1]], "g--", alpha=0.3)

    # Projection axis lines
    line_len = max(np.max(np.abs(x)), np.max(np.abs(y))) * 1.7
    axes[0].plot([-line_len*axis_vec[0], line_len*axis_vec[0]],
                 [-line_len*axis_vec[1], line_len*axis_vec[1]],
                 color="red", linestyle="--", linewidth=2, label="α-axis (kept)")
    axes[0].plot([-line_len*perp_vec[0], line_len*perp_vec[0]],
                 [-line_len*perp_vec[1], line_len*perp_vec[1]],
                 color="green", linestyle="--", linewidth=2, label="⊥ α-axis (discarded)")

    axes[0].axhline(0, color="k", linewidth=1, alpha=0.5)
    axes[0].axvline(0, color="k", linewidth=1, alpha=0.5)
    axes[0].set_aspect("equal", adjustable="box")
    axes[0].set_title(f"Projection at α={alpha:.1f}°\nError={proj_error:.2f}", fontsize=12)
    axes[0].legend(loc="upper right")

    # Right: error curve (smaller)
    axes[1].plot(alphas, errors, label="Projection error vs α")
    axes[1].axvline(alpha, color="r", linestyle="--", label=f"α={alpha:.1f}°")
    axes[1].set_xlabel("α (degrees)")
    axes[1].set_ylabel("Projection error (MSE)")
    axes[1].set_title("Projection error curve", fontsize=12)
    axes[1].legend()

    plt.show()

    # Print results
    print(f"Data rotation angle θ: {angle:.1f}°")
    print(f"Projection axis angle α: {alpha:.1f}°")
    print(f"Variance along data axis: {var_parallel:.2f}")
    print(f"Variance perpendicular: {var_perp:.2f}")
    print(f"Projection error (MSE at α={alpha:.1f}°): {proj_error:.2f}")

# Sliders
angle_slider = FloatSlider(min=0, max=180, step=5, value=30, description="Data angle θ")
var_parallel_slider = FloatSlider(min=0.1, max=10, step=0.1, value=5, description="Var ∥")
var_perp_slider = FloatSlider(min=0.0, max=5, step=0.1, value=0.5, description="Var ⟂")
alpha_slider = FloatSlider(min=0, max=180, step=5, value=0, description="Axis α")

out = interactive_output(plot_and_project_alpha, {
    'angle': angle_slider,
    'var_parallel': var_parallel_slider,
    'var_perp': var_perp_slider,
    'alpha': alpha_slider
})

ui = HBox([out, VBox([angle_slider, var_parallel_slider, var_perp_slider, alpha_slider])])
ui


HBox(children=(Output(), VBox(children=(FloatSlider(value=30.0, description='Data angle θ', max=180.0, step=5.…