# Chap 8 Dimension Reduction

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Projection methods

In [5]:
np.random.seed(42)
m = 60
w_1, w_2 = 0.1,0.3
noise = 0.1

angles = np.random.rand(m) * 3 * np.pi/2 - 0.5
X = np.empty((m,3))

X[:,0] = np.cos(angles) + np.sin(angles)/2 + noise*np.random.rand(m)/2
X[:,1] = np.sin(angles)*0.7 + noise*np.random.randn(m)/2
X[:,2] = X[:,0]*w_1 + X[:,1]*w_2 + np.random.rand(m)

In [6]:
X.shape

(60, 3)

## PCA by SVD

In [7]:
X_centered = X - X.mean(axis=0)
U,s,V_T = np.linalg.svd(X_centered)
c_1 = V_T.T[:,0]
c_2 = V_T.T[:,1]

In [8]:
X_centered.shape

(60, 3)

In [9]:
U.shape

(60, 60)

In [10]:
s

array([7.05506099, 2.98798215, 2.04209023])

In [11]:
V_T 

array([[-0.94360818, -0.26476013, -0.19876034],
       [ 0.33084931, -0.73250113, -0.59496288],
       [ 0.01193027, -0.62717156,  0.77878977]])

In [12]:
c_1.shape,c_2.shape

((3,), (3,))

In [13]:
m,n = X.shape

S= np.zeros(X_centered.shape)
S[:n,:n] = np.diag(s)

In [14]:
S

array([[7.05506099, 0.        , 0.        ],
       [0.        , 2.98798215, 0.        ],
       [0.        , 0.        , 2.04209023],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.

In [15]:
np.allclose(X_centered,U.dot(S).dot(V_T))

True

In [16]:
W_2 = V_T.T[:,:2]
X2D = X_centered.dot(W_2)

In [17]:
print(f"{W_2},\n\n{V_T},\n\n{X2D}")


[[-0.94360818  0.33084931]
 [-0.26476013 -0.73250113]
 [-0.19876034 -0.59496288]],

[[-0.94360818 -0.26476013 -0.19876034]
 [ 0.33084931 -0.73250113 -0.59496288]
 [ 0.01193027 -0.62717156  0.77878977]],

[[-0.76507398 -0.33168158]
 [ 1.48265532  0.57054673]
 [ 1.0805771  -0.01921057]
 [ 0.30232421 -0.60526476]
 [-0.90513931  0.18648104]
 [-0.79842186  0.54702328]
 [-0.47105278  0.7284537 ]
 [ 1.34282445  0.00530485]
 [ 0.50647291 -0.07798973]
 [ 0.96947746 -0.06258005]
 [-0.41314956  0.55618453]
 [ 1.34634769  0.27735024]
 [ 1.33098835 -0.09177379]
 [-0.9399952   0.2664047 ]
 [-1.02070551 -0.0110634 ]
 [-0.90202069  0.3455568 ]
 [-0.91303326 -0.02690949]
 [-0.09011892 -0.59782944]
 [-0.52176961 -0.3762926 ]
 [-1.00044287 -0.3023946 ]
 [ 0.37466292 -0.51444565]
 [-0.90971111  0.15754732]
 [-0.85671069  0.15531324]
 [-0.73762689 -0.20251007]
 [-0.36244576 -0.26362968]
 [ 1.29392605  0.08100081]
 [-1.08504393 -0.17984033]
 [-0.11778305 -0.41423049]
 [ 0.20648304 -0.70088149]
 [-0.52659285

In [18]:
X2D_using_svd = X2D

## PCA using Scikit-Learn

In [19]:
from sklearn.decomposition import PCA

In [20]:
pca = PCA(n_components=2)
X2D = pca.fit_transform(X)

In [21]:
X2D[:5]

array([[-0.76507398,  0.33168158],
       [ 1.48265532, -0.57054673],
       [ 1.0805771 ,  0.01921057],
       [ 0.30232421,  0.60526476],
       [-0.90513931, -0.18648104]])

In [22]:
X2D_using_svd[:5]

array([[-0.76507398, -0.33168158],
       [ 1.48265532,  0.57054673],
       [ 1.0805771 , -0.01921057],
       [ 0.30232421, -0.60526476],
       [-0.90513931,  0.18648104]])

Notice that running PCA multiple times on slightly different datasets may result in different results. In general the only difference is that some axes may be flipped. In this example, PCA using Scikit-Learn gives the same projection as the one given by the SVD approach, except both axes are flipped:

In [23]:
np.allclose(X2D,-X2D_using_svd)

False

In [24]:
X3D_inv = pca.inverse_transform(X2D)

In [25]:
np.allclose(X3D_inv,X)

False

need more reading