In [None]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import FloatSlider, VBox, HBox, interactive_output
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
import time

# --- Shared setup ---
np.random.seed(42)
n = 300

def generate_rotated_data(angle=30.0, var_parallel=5.0, var_perp=0.5, n_samples=n):
    """Generate 2D Gaussian data rotated by 'angle'."""
    u = np.sqrt(var_parallel) * np.random.randn(n_samples)
    v = np.sqrt(var_perp) * np.random.randn(n_samples)
    data = np.vstack([u, v])
    theta = np.deg2rad(angle)
    R = np.array([[np.cos(theta), -np.sin(theta)],
                  [np.sin(theta),  np.cos(theta)]])
    return (R @ data).T

def projection_error(data, alpha):
    """Mean squared error when projecting data onto α-axis."""
    a_rad = np.deg2rad(alpha)
    perp_vec = np.array([-np.sin(a_rad), np.cos(a_rad)])
    coords = data @ perp_vec
    return np.mean(coords**2)

def iterative_search(data, step_size=5):
    """Greedy search for axis minimizing projection error."""
    current_alpha = 0
    trajectory = []
    while True:
        err = projection_error(data, current_alpha)
        left = projection_error(data, (current_alpha-step_size) % 180)
        right = projection_error(data, (current_alpha+step_size) % 180)
        trajectory.append((current_alpha, err))
        if err <= left and err <= right:
            break
        current_alpha = (current_alpha - step_size) % 180 if left < right else (current_alpha + step_size) % 180
    return trajectory

# =====================================================
# 1. Matplotlib PCA Illustration (sliders)
# =====================================================
def plot_and_project_alpha(angle=0.0, var_parallel=5.0, var_perp=0.5, alpha=0.0):
    data = generate_rotated_data(angle, var_parallel, var_perp)
    x, y = data[:,0], data[:,1]

    # Projection vectors
    alpha_rad = np.deg2rad(alpha)
    axis_vec = np.array([np.cos(alpha_rad), np.sin(alpha_rad)])
    perp_vec = np.array([-np.sin(alpha_rad), np.cos(alpha_rad)])

    # Projections
    coords_on_axis = data @ axis_vec
    coords_on_perp = data @ perp_vec
    proj_points_alpha = np.outer(coords_on_axis, axis_vec)
    proj_points_perp = np.outer(coords_on_perp, perp_vec)
    proj_error = np.mean(coords_on_perp**2)

    # Error curve
    alphas = np.linspace(0, 180, 181)
    errors = [projection_error(data, a) for a in alphas]

    # --- Plot ---
    fig, axes = plt.subplots(1, 2, figsize=(14,6), gridspec_kw={"width_ratios":[2.5,1]})
    axes[0].scatter(x, y, alpha=0.5, c="blue", s=40, label="Original points")
    axes[0].scatter(proj_points_alpha[:,0], proj_points_alpha[:,1], alpha=0.8, c="red", s=40, label=f"Proj α={alpha:.1f}°")
    axes[0].scatter(proj_points_perp[:,0], proj_points_perp[:,1], alpha=0.8, c="green", s=40, label="Proj ⟂ axis")

    # Sample error lines
    for i in np.linspace(0, len(x)-1, 20, dtype=int):
        axes[0].plot([x[i], proj_points_alpha[i,0]], [y[i], proj_points_alpha[i,1]], "r--", alpha=0.3)
        axes[0].plot([x[i], proj_points_perp[i,0]], [y[i], proj_points_perp[i,1]], "g--", alpha=0.3)

    # Projection axes
    line_len = max(np.max(np.abs(x)), np.max(np.abs(y))) * 1.7
    axes[0].plot([-line_len*axis_vec[0], line_len*axis_vec[0]], [-line_len*axis_vec[1], line_len*axis_vec[1]], "r--", lw=2, label="α-axis")
    axes[0].plot([-line_len*perp_vec[0], line_len*perp_vec[0]], [-line_len*perp_vec[1], line_len*perp_vec[1]], "g--", lw=2, label="⊥ α-axis")
    axes[0].axhline(0, color="k", lw=1, alpha=0.5); axes[0].axvline(0, color="k", lw=1, alpha=0.5)
    axes[0].set_aspect("equal"); axes[0].set_title(f"Projection at α={alpha:.1f}°\nError={proj_error:.2f}"); axes[0].legend()

    axes[1].plot(alphas, errors, label="Error vs α")
    axes[1].axvline(alpha, color="r", linestyle="--", label=f"α={alpha:.1f}°")
    axes[1].set_xlabel("α (degrees)"); axes[1].set_ylabel("Projection error (MSE)")
    axes[1].set_title("Projection error curve"); axes[1].legend()
    plt.show()

# Sliders
angle_slider = FloatSlider(min=0, max=180, step=5, value=30, description="Data θ")
var_parallel_slider = FloatSlider(min=0.1, max=10, step=0.1, value=5, description="Var ∥")
var_perp_slider = FloatSlider(min=0.0, max=5, step=0.1, value=0.5, description="Var ⟂")
alpha_slider = FloatSlider(min=0, max=180, step=5, value=0, description="Axis α")

out = interactive_output(plot_and_project_alpha, {
    'angle': angle_slider,
    'var_parallel': var_parallel_slider,
    'var_perp': var_perp_slider,
    'alpha': alpha_slider
})
ui_matplotlib = HBox([out, VBox([angle_slider, var_parallel_slider, var_perp_slider, alpha_slider])])

# =====================================================
# 2. Plotly PCA Illustration (add/reset points, animate search)
# =====================================================
data0 = generate_rotated_data(40, 5, 0.5)
data = data0.copy()
alphas = np.linspace(0, 180, 181)
errors = [projection_error(data, a) for a in alphas]

fig = go.FigureWidget()

# Left panel
fig.add_scatter(x=data[:,0].tolist(), y=data[:,1].tolist(), mode="markers", marker=dict(color="blue", size=6), name="Data")
fig.add_scatter(x=[], y=[], mode="lines", line=dict(color="red", dash="dash"), name="α-axis")
fig.add_scatter(x=[], y=[], mode="lines", line=dict(color="green", dash="dash"), name="⊥ axis")

# Right panel
fig.add_scatter(x=alphas.tolist(), y=[float(v) for v in errors], mode="lines", line=dict(color="gray"), name="Error curve", xaxis="x2", yaxis="y2")
fig.add_scatter(x=[], y=[], mode="lines", line=dict(color="red", dash="dash"), name="Current α", xaxis="x2", yaxis="y2")
fig.add_scatter(x=[], y=[], mode="markers", marker=dict(color="red", size=10), name="Current error", xaxis="x2", yaxis="y2")

fig.update_layout(
    title="Add points with widgets, then Recompute",
    width=1200, height=600,
    xaxis=dict(domain=[0,0.45], title="x", scaleanchor="y", scaleratio=1),
    yaxis=dict(title="y"),
    xaxis2=dict(domain=[0.55,1], title="α (degrees)"),
    yaxis2=dict(title="Projection error (MSE)")
)

# Widgets
x_input = widgets.FloatText(description="x:")
y_input = widgets.FloatText(description="y:")
add_btn = widgets.Button(description="Add point", button_style="info")
recompute_btn = widgets.Button(description="Recompute", button_style="success")
reset_btn = widgets.Button(description="Reset", button_style="danger")

def add_point(b):
    global data
    new_point = np.array([[x_input.value, y_input.value]])
    data = np.vstack([data, new_point])
    with fig.batch_update():
        fig.data[0].x = data[:,0].tolist()
        fig.data[0].y = data[:,1].tolist()
    fig.layout.title = f"Added point ({x_input.value:.2f},{y_input.value:.2f}). Press Recompute."

def recompute(b):
    global data
    all_errors = [projection_error(data, a) for a in alphas]
    trajectory = iterative_search(data)
    L = float(np.max(np.abs(data)) * 1.5)
    for step, (a, e) in enumerate(trajectory):
        theta = np.deg2rad(a)
        axis_vec = np.array([np.cos(theta), np.sin(theta)])
        perp_vec = np.array([-np.sin(theta), np.cos(theta)])
        with fig.batch_update():
            fig.data[1].x, fig.data[1].y = [-L*axis_vec[0], L*axis_vec[0]], [-L*axis_vec[1], L*axis_vec[1]]
            fig.data[2].x, fig.data[2].y = [-L*perp_vec[0], L*perp_vec[0]], [-L*perp_vec[1], L*perp_vec[1]]
            fig.data[3].x, fig.data[3].y = alphas.tolist(), [float(v) for v in all_errors]
            fig.data[4].x, fig.data[4].y = [a, a], [0, float(max(all_errors))*1.05]
            fig.data[5].x, fig.data[5].y = [a], [float(e)]
            fig.layout.title = f"Step {step+1}: α={a:.1f}°, Error={e:.2f}"
        time.sleep(0.4)

def reset_plot(b):
    global data
    data = data0.copy()
    all_errors = [projection_error(data, a) for a in alphas]
    with fig.batch_update():
        fig.data[0].x, fig.data[0].y = data[:,0].tolist(), data[:,1].tolist()
        fig.data[1].x, fig.data[1].y = [], []
        fig.data[2].x, fig.data[2].y = [], []
        fig.data[3].x, fig.data[3].y = alphas.tolist(), [float(v) for v in all_errors]
        fig.data[4].x, fig.data[4].y = [], []
        fig.data[5].x, fig.data[5].y = [], []
        fig.layout.title = "Reset to original data."

add_btn.on_click(add_point)
recompute_btn.on_click(recompute)
reset_btn.on_click(reset_plot)

ui_plotly = widgets.HBox([x_input, y_input, add_btn, recompute_btn, reset_btn])

# =====================================================
# Expose both UIs
# =====================================================





## 📝 To-Do List: Exploring Projections and PCA Intuition

### Step 1: Explore the data orientation
- Use the **“Data angle θ”** slider to rotate the data cloud.  
- Observe how the cloud aligns or misaligns with the x- and y-axes.  
- **Question:** When θ = 0°, which axis (x or y) captures most of the variance?  
- **Question:** What about when θ = 90°?  

---

### Step 2: Adjust variances
- Use the **“Var ∥”** slider (variance along the main data axis).  
- Use the **“Var ⟂”** slider (variance perpendicular to the main data axis).  
- Try:  
  - Set **Var ⟂ = 0**. What happens to the projection error?  
  - Make **Var ⟂** very large. How does the data look?  

---

### Step 3: Project onto different α-axes
- Use the **“Axis α”** slider to change the projection axis.  
- Watch the **red (α-axis projection)** and **green (⊥ axis projection)** points.  
- Look at the **dashed lines**: they show how each original point is projected.  

---

### Step 4: Compare errors
- Look at the **error curve plot** (right subplot).  
- Move α and see how the vertical red line moves across the error curve.  
- **Question:** At which α is the projection error the smallest?  
- **Verify:** Does this α match the true data orientation θ?  

---

### Step 5: Connect to PCA
- Notice: PCA **chooses the α-axis with minimum error (maximum variance)**.  
- Compare your observed α from Step 4 to the given θ (data orientation).  
- **Reflect:** Why does PCA always find the “best” direction automatically?  

---

✅ **By the end of this exploration, you should be able to:**  
1. Explain why **keeping the axis with maximum variance** minimizes information loss.  
2. See how projection error increases when you pick the “wrong” α.  
3. Understand how PCA finds this optimal α without us telling it the data orientation.  


In [2]:
print("Matplotlib interactive PCA demo:")
display(ui_matplotlib)

Matplotlib interactive PCA demo:


HBox(children=(Output(), VBox(children=(FloatSlider(value=30.0, description='Data θ', max=180.0, step=5.0), Fl…

# PCA and Sensitivity to Outliers

In this exercise, we will use the interactive plot to explore how **Principal Component Analysis (PCA)** behaves when we add new points to the dataset.

---

## 1. Warm-up: No Outliers
- Start with the default dataset.  
- Click **Recompute Directions**.  
- Observe the **red dashed α-axis**.

**Question:**  
Does the α-axis align with the main spread of the data cloud?  
_Write your observation here:_

---

## 2. Single Outlier Experiment
- Add **one point far along the x-axis**, for example `(x=50, y=0)`.  
- Click **Recompute Directions**.

**Question:**  
How does this single point change the α-axis?  
_Write your observation here:_

---

## 3. Vertical Outlier
- Press **Reset** to return to the original dataset.  
- Add a point high on the y-axis, e.g. `(x=0, y=50)`.  
- Click **Recompute Directions**.

**Question:**  
Does PCA now prefer the vertical direction? Why?  
_Write your explanation here:_

---

## 4. Cluster of Outliers
- Reset the dataset.  
- Add 3–4 points far in the same region, e.g. around `(x=40, y=40)`.  
- Recompute.

**Question:**  
Does PCA follow the majority of points, or do the few outliers dominate?  
_Write your answer here:_

---

## 5. Balanced vs. Unbalanced Extremes
- Reset the dataset.  
- Add one extreme point at `(50, 0)` and another at `(-50, 0)`.  
- Recompute.

**Question:**  
Is PCA still pulled strongly in one direction? Why or why not?  
_Write your reasoning here:_

---

## 6. Reflection
**Final Question:**  
Based on your experiments, is PCA robust against outliers?  
If not, what strategies could we use to make PCA more robust?  
(Hint: Think about removing outliers, using robust PCA methods, or applying preprocessing steps.)

_Write your reflection here:_

---


In [3]:
print("Plotly interactive PCA demo:")
display(ui_plotly, fig)

Plotly interactive PCA demo:


HBox(children=(FloatText(value=0.0, description='x:'), FloatText(value=0.0, description='y:'), Button(button_s…

FigureWidget({
    'data': [{'marker': {'color': 'blue', 'size': 6},
              'mode': 'markers',
              'name': 'Data',
              'type': 'scatter',
              'uid': '718875fc-ef8f-4f26-88c4-621526cb7c00',
              'x': [1.1290973799490354, -1.4008221444038031, 1.4765067681079405,
                    ..., 0.0001924550761997687, 1.964888990455732,
                    -2.019379930277529],
              'y': [1.2877336446575771, -1.5385055917895145, 1.2654695482579899,
                    ..., 0.510144162723883, 1.8554243331993183,
                    -0.43527428845931493]},
             {'line': {'color': 'red', 'dash': 'dash'},
              'mode': 'lines',
              'name': 'α-axis',
              'type': 'scatter',
              'uid': '59bf7cbc-fc89-4e6b-91be-4cdc65a863e4',
              'x': [],
              'y': []},
             {'line': {'color': 'green', 'dash': 'dash'},
              'mode': 'lines',
              'name': '⊥ axis',
              '