# Principle Component Analysis

In [13]:
import os

os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

import numpy as np
import torch

In [46]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
dtype = torch.float32

n_samples = 100
n_features = 50

data = torch.tensor(np.random.rand(100, 50), dtype=dtype, device=device)

## LL1 

In [57]:
def pca_ll1(data: torch.Tensor, n_components: int) -> torch.Tensor:
    mean = torch.mean(data, 0)
    std_dev = torch.std(data, 0)
    data = (data - mean) / std_dev
    
    # Compute the covariance matrix
    covariance_matrix = (data.transpose(0, 1) @ data) / (data.size(0) - 1)

    # Eigen Decomposition
    eigen_values, eigen_vectors = torch.linalg.eig(covariance_matrix)
    eigen_values, eigen_vectors = eigen_values.real, eigen_vectors.real

    # sort eigenvalues and corresponding eigenvectors
    eigen_values, eigen_vectors = zip(
        *sorted(zip(eigen_values, eigen_vectors), key=lambda x: x[0], reverse=True)
    )

    # Create transformation matrix
    eigen_vectors = torch.stack(eigen_vectors[:n_components])
    
    return data @ eigen_vectors.T


res = pca_ll1(data, n_components=2)
res.size()

torch.Size([100, 2])