# Math Lab — FAANG-Level Mixed Problems

This lab is a *problem set + mini-verification* notebook.

---

In [None]:
import numpy as np

def check(name: str, cond: bool):
    if not cond:
        raise AssertionError(f'Failed: {name}')
    print(f'OK: {name}')

rng = np.random.default_rng(0)

## Problem 1 — Projection Matrix Properties

Let P be a projection matrix onto a subspace.
1) Show that P^2 = P (idempotent).
2) For orthogonal projection, show P = P^T.

### TODO (code): Construct projection onto span(u) and verify properties
# HINT:
P = u u^T / (u^T u)


In [None]:
u = rng.standard_normal(5)
# TODO
P = ...
check('idempotent', np.allclose(P @ P, P, atol=1e-8))
check('symmetric', np.allclose(P, P.T, atol=1e-8))

## Problem 2 — PSD Matrix Check

Show that for any matrix X, the matrix A = X^T X is positive semidefinite (PSD).

### TODO (code): sample random X, build A, verify v^T A v >= 0 for random v
# HINT:
v^T X^T X v = ||Xv||^2 >= 0


In [None]:
X = rng.standard_normal((10, 4))
A = X.T @ X
for _ in range(100):
    v = rng.standard_normal(4)
    val = float(v.T @ A @ v)
    if val < -1e-8:
        raise AssertionError('Not PSD?')
print('PSD check passed')

## Problem 3 — Least Squares Derivation

Derive the normal equations for minimizing ||Xw - y||^2.

### TODO (code): compare w_hat from solve vs np.linalg.lstsq
# HINT:
w = (X^T X)^{-1} X^T y


In [None]:
n,d = 200, 5
X = rng.standard_normal((n,d))
w_true = rng.standard_normal(d)
y = X@w_true + 0.1*rng.standard_normal(n)

# TODO: w_hat via solve
w_hat = ...

w_lstsq, *_ = np.linalg.lstsq(X, y, rcond=None)
check('close', np.allclose(w_hat, w_lstsq, atol=1e-6))

## Problem 4 — Bayes + Base Rate (Derivation)

Re-derive P(D|+) for the disease test scenario and explain the base-rate fallacy in 2-3 sentences.

### TODO (code): simulate and compare to analytic


In [None]:
P_D = 0.01
P_pos_given_D = 0.99
P_pos_given_notD = 0.05
P_D_given_pos = (P_pos_given_D*P_D) / (P_pos_given_D*P_D + P_pos_given_notD*(1-P_D))

N = 200000
disease = rng.random(N) < P_D
test_pos = np.empty(N, dtype=bool)
test_pos[disease] = rng.random(disease.sum()) < P_pos_given_D
test_pos[~disease] = rng.random((~disease).sum()) < P_pos_given_notD
est = disease[test_pos].mean()
print('analytic', P_D_given_pos, 'sim', est)
check('close', abs(est - P_D_given_pos) < 0.01)

## Problem 5 — PCA Link

Explain why PCA components are eigenvectors of the covariance matrix.

### TODO (code): compute covariance eigenvectors and compare with SVD directions
# HINT:
- Center X
- Cov = X^T X/(n-1)
- eigenvectors of Cov align with V from SVD


In [None]:
X = rng.standard_normal((500, 20))
Xc = X - X.mean(axis=0, keepdims=True)
Cov = (Xc.T @ Xc) / (Xc.shape[0] - 1)
eigvals, eigvecs = np.linalg.eigh(Cov)
idx = np.argsort(eigvals)[::-1]
eigvecs = eigvecs[:, idx]

U, S, Vt = np.linalg.svd(Xc, full_matrices=False)
V = Vt.T

# Compare subspaces spanned by top-k vectors via absolute correlation
k = 5
C = np.abs(eigvecs[:, :k].T @ V[:, :k])
print('abs alignment matrix (should be near diagonal)', C)
check('alignment', np.all(np.max(C, axis=1) > 0.9))

---
## Submission Checklist
- Derivations written
- TODO code complete
- Checks pass
