Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.


feat(encoder): add PCAEncoder for incremental pca training
Browse files Browse the repository at this point in the history
  • Loading branch information
raccoonliukai committed Sep 2, 2019
1 parent 7101450 commit da56544
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions gnes/encoder/numeric/
Expand Up @@ -20,6 +20,42 @@
from ...helper import get_perm, batching, get_optimal_sample_size, train_required

class PCAEncoder(BaseNumericEncoder):
batch_size = 2048

def __init__(self, output_dim: int, *args, **kwargs):
super().__init__(*args, **kwargs)
self.output_dim = output_dim
self.pca_components = None
self.mean = None

def post_init(self):
from sklearn.decomposition import IncrementalPCA
self.pca = IncrementalPCA(n_components=self.output_dim)

def train(self, vecs: np.ndarray, *args, **kwargs) -> None:
num_samples, num_dim = vecs.shape
if self.output_dim > num_samples:
if self.mean.size:
raise ValueError('training PCA requires at least %d points, but %d was given' % (self.output_dim, num_samples))

assert self.output_dim < num_dim, 'PCA output dimension should < data dimension, received (%d, %d)' % (
self.output_dim, num_dim)


self.pca_components = np.transpose(self.pca.components_)
self.mean = self.pca.mean_

def encode(self, vecs: np.ndarray, *args, **kwargs) -> np.ndarray:
return np.matmul(vecs - self.mean, self.pca_components)

class PCALocalEncoder(BaseNumericEncoder):
batch_size = 2048

Expand Down

0 comments on commit da56544

Please sign in to comment.