# Notebook 11: Dimensionality Reduction

Master PCA, t-SNE and dimensionality concepts.

In [None]:
import sys
sys.path.append('../..')
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.datasets import load_digits, load_iris
from koans.core.validator import KoanValidator
from koans.core.progress import ProgressTracker
validator = KoanValidator('11_dimensionality_reduction')
tracker = ProgressTracker()
print('Setup complete!')

## KOAN 11.1: PCA Basics
Apply PCA to reduce Iris to 2D.

In [None]:
def koan_1():
    iris = load_iris()
    X = iris.data
    pca = PCA(n_components=2)
    X_2d = pca.fit_transform(X)
    return X_2d

@validator.koan(1, 'PCA Basics', difficulty='Intermediate-Advanced')
def validate():
    result = koan_1()
    assert result.shape == (150, 2)
    print('✓ Reduced to 2D')
validate()

## KOAN 11.2: Explained Variance
Get cumulative explained variance.

In [None]:
def koan_2():
    digits = load_digits()
    pca = PCA(n_components=10).fit(digits.data)
    return np.cumsum(pca.explained_variance_ratio_)

@validator.koan(2, 'Explained Variance', difficulty='Intermediate-Advanced')
def validate():
    result = koan_2()
    assert len(result) == 10
    print(f'✓ 10 components: {result[-1]*100:.1f}% variance')
validate()

## KOAN 11.3: Component Selection
Find components for 95% variance.

In [None]:
def koan_3():
    digits = load_digits()
    pca = PCA(n_components=50).fit(digits.data)
    cum = np.cumsum(pca.explained_variance_ratio_)
    return np.argmax(cum >= 0.95) + 1

@validator.koan(3, 'Component Selection', difficulty='Intermediate-Advanced')
def validate():
    result = koan_3()
    assert 10 <= result <= 40
    print(f'✓ Need {result} components')
validate()

## KOAN 11.4: PCA Visualization
Reduce digits to 2D.

In [None]:
def koan_4():
    digits = load_digits()
    pca = PCA(n_components=2)
    return pca.fit_transform(digits.data), digits.target

@validator.koan(4, 'PCA Visualization', difficulty='Intermediate-Advanced')
def validate():
    X_2d, y = koan_4()
    assert X_2d.shape == (1797, 2)
    print('✓ Reduced digits to 2D')
validate()

## KOAN 11.5: Feature Loadings
Get component loadings.

In [None]:
def koan_5():
    iris = load_iris()
    pca = PCA(n_components=2).fit(iris.data)
    return pca.components_

@validator.koan(5, 'Feature Loadings', difficulty='Intermediate-Advanced')
def validate():
    result = koan_5()
    assert result.shape == (2, 4)
    print('✓ Got loadings')
validate()

## KOAN 11.6: t-SNE
Apply t-SNE for visualization.

In [None]:
def koan_6():
    digits = load_digits()
    X = digits.data[:500]
    tsne = TSNE(n_components=2, random_state=42)
    return tsne.fit_transform(X)

@validator.koan(6, 't-SNE', difficulty='Intermediate-Advanced')
def validate():
    result = koan_6()
    assert result.shape[1] == 2
    print('✓ Applied t-SNE')
validate()

## KOAN 11.7: Standardization
Scale before PCA.

In [None]:
def koan_7():
    from sklearn.preprocessing import StandardScaler
    iris = load_iris()
    scaler = StandardScaler()
    return scaler.fit_transform(iris.data)

@validator.koan(7, 'Standardization', difficulty='Intermediate-Advanced')
def validate():
    result = koan_7()
    assert abs(result.mean()) < 0.1
    print('✓ Standardized data')
validate()

## KOAN 11.8: Curse of Dimensionality
Compare distance variance.

In [None]:
def koan_8():
    np.random.seed(42)
    pts_high = np.random.rand(100, 100)
    pts_low = np.random.rand(100, 2)
    from scipy.spatial.distance import pdist
    return np.std(pdist(pts_high)), np.std(pdist(pts_low))

@validator.koan(8, 'Curse of Dimensionality', difficulty='Intermediate-Advanced')
def validate():
    std_high, std_low = koan_8()
    assert std_high < std_low
    print(f'✓ High-D std: {std_high:.3f}, Low-D std: {std_low:.3f}')
validate()

## Congratulations!
Completed Dimensionality Reduction!

In [None]:
progress = tracker.get_notebook_progress('11_dimensionality_reduction')
print(f'Progress: {progress}%')