In [12]:
import torch
from sklearn.decomposition import PCA

torch.manual_seed(42)

<torch._C.Generator at 0x11b4c28d0>

In [13]:
n_samples = 1000
d = 10

data = torch.rand(n_samples, d)
data.shape

torch.Size([1000, 10])

In [14]:
n_components = 2

In [15]:
u, s, vt = torch.linalg.svd((data - data.mean(dim=0)), full_matrices=False)
w = vt.T[:, :n_components]  # First two columns of V
torch_compressed = (data-data.mean(dim=0)) @ w

In [16]:
pca = PCA(n_components=n_components)
pca.fit(data)
pca_compressed = pca.transform(data)


In [17]:
# match positive and negative signs
for i in range(n_components):
    if torch_compressed[0, i] > 0 and pca_compressed[0, i] < 0 or torch_compressed[0, i] < 0 and pca_compressed[0, i] > 0:
        print(f"Flipping sign of component {i} to match PCA")
        pca_compressed[:, i] *= -1
    


Flipping sign of component 0 to match PCA


In [18]:
torch.allclose(
    torch.tensor(pca_compressed, dtype=torch_compressed.dtype),
    torch_compressed,
    rtol=1e-5,
    atol=1e-5
), "PCA compression does not match torch SVD compression"

(True, 'PCA compression does not match torch SVD compression')

# Torch PCA Class


In [19]:
class TorchPCA:
    def __init__(self, n_components=None):
        self.n_components = n_components
        self.components_ = None

    def center_data(self, X):
        return X - X.mean(dim=0)

    def fit(self, X):
        # Center the data
        X_centered = self.center_data(X)

        # Perform SVD
        U, S, Vt = torch.linalg.svd(X_centered, full_matrices=False)

        # Store components and explained variance
        self.components_ = Vt[:self.n_components]
        return self

    def transform(self, X):
        # Center the data
        X_centered = self.center_data(X)

        # Project onto the components
        return X_centered @ self.components_.T



In [None]:
torch_pca = TorchPCA(n_components=n_components)
torch_pca.fit(data)
torch_pca_compressed = torch_pca.transform(data)

sklearn_pca = PCA(n_components=n_components)
sklearn_pca.fit(data)
sklearn_pca_compressed = sklearn_pca.transform(data)


assert torch.allclose(
    torch.tensor(sklearn_pca_compressed, dtype=torch_pca_compressed.dtype),
    torch_pca_compressed,
    rtol=1e-5,
    atol=1e-4
), "Torch PCA compression does not match sklearn PCA compression"


Flipping sign of component 0 to match PCA


In [21]:
%%timeit
torch_pca = TorchPCA(n_components=n_components)
torch_pca.fit(data)
torch_pca.transform(data)

91.3 μs ± 321 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [23]:
%%timeit
sklearn_pca = PCA(n_components=n_components)
sklearn_pca.fit(data)
sklearn_pca.transform(data)

146 μs ± 1.24 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
