In [1]:
import numpy as np

In [2]:
X = np.array(
    [
        [1.0, 20.0, 300.0],
        [3.0, 40.0, 500.0],
        [10.0, 2.0, 4000.0],
        [0.5, 200.0, 20.0 ]
    ]
)

In [3]:
# 1. Standardize the range of continuous initial variables
# Zero-center the data (subtract the mean)
X -= np.mean(X, axis = 0)
# Normalization (divide by the std. deviation)
X /= np.std(X, axis = 0)
X


array([[-0.69122342, -0.57735027, -0.55772096],
       [-0.164577  , -0.32356993, -0.43446771],
       [ 1.67868545, -0.80575257,  1.72246417],
       [-0.82288502,  1.70667277, -0.73027551]])

In [4]:
# 2. Compute the covariance matrix to identify correlations
# if positive then : the two variables increase or decrease together (correlated)
# if negative then : One increases when the other decreases (Inversely correlated)
cov = np.dot(X.T, X)
cov

array([[ 4.        , -2.30467039,  3.9494215 ],
       [-2.30467039,  4.        , -2.17164024],
       [ 3.9494215 , -2.17164024,  4.        ]])

In [5]:
# 3. Compute the eigenvectors and eigenvalues of the covariance matrix to identify the principal components
eig_vals, eig_vecs = np.linalg.eig(cov)
# convert to percentages
eig_vals /= eig_vals.sum()

print(eig_vecs)
print(eig_vals)

[[-0.62164455  0.71651996  0.31647623]
 [ 0.48516229  0.03501081  0.87372295]
 [-0.61495984 -0.69668744  0.36939275]]
[0.80880263 0.00394023 0.18725713]


In [6]:
# 4. Create a feature vector to decide which principal components to keep
num_dim_sel = 2
threshold = np.sort(eig_vals)[-num_dim_sel]

eig_vecs_selected = eig_vecs[:, eig_vals>=threshold]
eig_vals_selected = eig_vals[eig_vals>=threshold]

print(eig_vecs_selected)
print(eig_vals_selected)

[[-0.62164455  0.31647623]
 [ 0.48516229  0.87372295]
 [-0.61495984  0.36939275]]
[0.80880263 0.18725713]


In [7]:
# 5. Recast the data along the principal components axes
X_reduced = X.dot(eig_vecs_selected)
X_reduced

array([[ 0.49256268, -0.92921804],
       [ 0.21250466, -0.4952844 ],
       [-2.49371272,  0.4635253 ],
       [ 1.78864537,  0.96097714]])

another way

In [58]:
X = np.array(
    [
        [1.0, 20.0, 300.0],
        [3.0, 40.0, 500.0],
        [10.0, 2.0, 4000.0],
        [0.5, 200.0, 20.0 ]
    ]
)

In [59]:
# Assume input data matrix X of size [N x D]
X -= np.mean(X, axis = 0) # zero-center the data (important)
cov = np.dot(X.T, X) / X.shape[0] # get the data covariance matrix

# SVD
U,S,V = np.linalg.svd(cov)

Xrot = np.dot(X, U) # decorrelate the data

# PCA
Xrot_reduced = np.dot(X, U[:,:2]) # Xrot_reduced becomes [N x 100]
Xrot_reduced


array([[  903.48687339,    69.37640916],
       [  704.08054828,    44.10716631],
       [-2795.70947342,   -10.3163823 ],
       [ 1188.14205175,  -103.16719317]])

In [60]:
# whiten the data:
# divide by the eigenvalues (which are square roots of the singular values)
Xwhite = Xrot / np.sqrt(S + 1e-5)
Xwhite


array([[ 0.55659321,  1.04862031,  1.26117056],
       [ 0.43374892,  0.6666772 , -1.53861351],
       [-1.7222972 , -0.15593151,  0.09683715],
       [ 0.73195507, -1.559366  ,  0.1806058 ]])

# Important:

In practice, PCA/Whitening are not used with Convolutional Networks. However, it is very important to zero-center the data, and it is common to see normalization of every pixel as well.

The mean must be computed only over the training data and then subtracted equally from all splits (train/val/test).