In [1]:
import numpy as np
from sklearn.decomposition import IncrementalPCA
from sklearn.metrics import mean_squared_error
import time

In [2]:
def reconstruct_incremental_pca(X, pca, variance_explained=0.9025, profiling=False):
    if profiling:
        start = time.perf_counter()
    
    pca.partial_fit(X)
    #Y is the projection of X on k dimension
    Y = pca.transform(X)
    k = len(pca.explained_variance_ratio_)
    # check how many principal components we need based on the explained variance
    if variance_explained != 1:
        var_explained = 0
        for i, val in enumerate(pca.explained_variance_ratio_):
            var_explained += val
            if var_explained >= variance_explained:
                k = i
    # to reconstruct X, we use the formula X=YW
    X_estimate = np.dot(Y[:,0:k],pca.components_[0:k,:]) + np.mean(X, axis=0)
    
    if profiling:
        duration = time.perf_counter() - start
        return(X_estimate, duration)
    
    return(X_estimate)

In [3]:
## generate 3 dimension data set of 10 points
np.random.seed(1)
rng = np.random.RandomState(1999)
A = rng.randn(10, 3) + 2
# print('A')
# print(A)
pca = IncrementalPCA()
A_estimate = reconstruct_incremental_pca(A, pca, variance_explained=0.9025)
# print('A_estimate')
# print(A_estimate)

In [4]:
mean_squared_error(A, A_estimate)

0.05134517953069396

In [5]:
mse = (np.square(A - A_estimate)).mean(axis=1)