Skip to content
This repository has been archived by the owner. It is now read-only.
Browse files
feat(encoder): add PCAEncoder for incremental pca training
  • Loading branch information
raccoonliukai committed Sep 2, 2019
1 parent 7101450 commit da56544f7299d87eaa464d86d8c9ee75b8c206e7
Showing with 36 additions and 0 deletions.
  1. +36 −0 gnes/encoder/numeric/
@@ -20,6 +20,42 @@
from ...helper import get_perm, batching, get_optimal_sample_size, train_required

class PCAEncoder(BaseNumericEncoder):
batch_size = 2048

def __init__(self, output_dim: int, *args, **kwargs):
super().__init__(*args, **kwargs)
self.output_dim = output_dim
self.pca_components = None
self.mean = None

def post_init(self):
from sklearn.decomposition import IncrementalPCA
self.pca = IncrementalPCA(n_components=self.output_dim)

def train(self, vecs: np.ndarray, *args, **kwargs) -> None:
num_samples, num_dim = vecs.shape
if self.output_dim > num_samples:
if self.mean.size:
raise ValueError('training PCA requires at least %d points, but %d was given' % (self.output_dim, num_samples))

assert self.output_dim < num_dim, 'PCA output dimension should < data dimension, received (%d, %d)' % (
self.output_dim, num_dim)


self.pca_components = np.transpose(self.pca.components_)
self.mean = self.pca.mean_

def encode(self, vecs: np.ndarray, *args, **kwargs) -> np.ndarray:
return np.matmul(vecs - self.mean, self.pca_components)

class PCALocalEncoder(BaseNumericEncoder):
batch_size = 2048

0 comments on commit da56544

Please sign in to comment.