Determine why different PCA methods return different results 

In [32]:
import numpy as np 
import scipy as sp
from sklearn.decomposition import PCA
# -- local -- 
import env
import data as Data
# -- plotting
import matplotlib.pyplot as plt 
%matplotlib inline

Construct data matrix ${\bf X}$

In [3]:
pkay = Data.Pk()
n_mock = pkay._n_mock('qpm')
for i in range(1, n_mock+1): 
    pkay.Read('qpm', i, ell=0)
    pkay.krange([0.01, 0.15])
    k, pk, _ = pkay.rebin('beutler')
    
    if i == 1:
        X = np.zeros((n_mock, len(k)))
    X[i-1,:] = pk

${\bf X}$ is a $N_{mock} \times N_{k}$ matrix. Now lets subtract off the mean

In [4]:
mu_X = np.sum(X, axis=0)/np.float(n_mock)

(14,)


In [5]:
X -= mu_X

Now lets manually implement PCA

In [57]:
def PCAwhite(Xin): 
    #C_x = np.cov(Xin.T)
    U, S, V = sp.linalg.svd(Xin, full_matrices=False)
    #d, V = np.linalg.eigh(C_x)
    #D = np.diag(1./np.sqrt(d))
    #W = np.dot(np.dot(V, D), V.T)
    
    #X_w = np.dot(Xin, V.T)
    return U * np.sqrt(Xin.shape[0]-1), V.T

In [55]:
X_pca, W_pca = PCAwhite(X)

[ 162086.16267392   84476.78232975   52738.12050123   31403.94205646
   22481.72846206   17760.75329591   13814.5538258    10639.55889188
    8135.25506649    6562.66438656    5528.86183037    4872.73502952
    3930.08845293    3287.83945793]


In [56]:
print X_pca[0,:]

[-1.16475388 -0.69478916 -1.30720776  1.836608    1.56729737 -2.95857983
  1.04277207  2.12142586 -3.837459   -3.29238689  0.89975684 -0.1721782
 -0.49196657  0.55540551]


In [58]:
def PCAsklearn(Xin): 
    n_comp = Xin.shape[1]
    
    pca = PCA(n_components=n_comp, whiten=True)
    X_new = pca.fit_transform(Xin)
    return X_new, pca.components_.T

In [59]:
X_pca_sk, W_pca_sk = PCAsklearn(X)

In [60]:
print X_pca_sk[0,:]

[ 1.1653367  -0.69513681  1.30786186 -1.837527    1.5680816   2.96006023
  1.04329385  2.12248737  3.83937917  3.29403432 -0.90020706 -0.17226435
  0.49221274 -0.55568342]
