# PCA and SVD

Mathematical Coding

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Set up some data and look at it a little.

In [None]:
x1 = np.random.normal(loc=40, scale=8, size=100)
x2 = x1 - 10 + np.random.normal(scale=3, size=100)
plt.scatter(x1, x2)

In [None]:
X = np.column_stack((x1, x2))
X.shape

In [None]:
X.mean(axis=0)

In [None]:
X_centered = X - X.mean(axis=0)

In [None]:
np.cov(X, rowvar=0)

In [None]:
np.cov(X_centered, rowvar=0)

In [None]:
np.dot(X.T, X)

In [None]:
np.dot(X_centered.T, X_centered)

In [None]:
np.dot(X_centered.T, X_centered)/99 # unimportant scaling

### PCA

We have data in $X$.

$C$ is the covariance matrix of $X$.

The eigenvalue decomposition of $C$ is:

$Q \Lambda Q^T = C$

Where the columns of $Q$ are eigenvectors $v$ and the diagonal elements of $\Lambda$ are eigenvalues $\lambda$.

$C v = \lambda v$

The eigenvalues $\lambda$ represent the amount of variance explained by the dimension represented by their eigenvector $v$.

In [None]:
eig_vals, Q = np.linalg.eig(np.cov(X_centered, rowvar=0)*99)

In [None]:
np.dot(Q, np.dot(np.diag(eig_vals), Q.T))

In [None]:
np.cov(X_centered, rowvar=0)*99

In [None]:
ordered = sorted(zip(eig_vals, Q.T), reverse=True)
eig_vals = np.array([_[0] for _ in ordered])
Q = np.column_stack((_[1] for _ in ordered))

In [None]:
X_transformed = np.dot(Q[:, 0].reshape(2, 1).T, X_centered.T)

In [None]:
X_transformed[:, :5]

In [None]:
X_reconstituted = np.dot(X_transformed.reshape(100, 1),
                         Q[:, 0].reshape(1, 2))

plt.scatter(X_centered[:, 0], X_centered[:, 1])
plt.scatter(X_reconstituted[:, 0], X_reconstituted[:, 1], c='r')

### SVD

We have data in $X$.

The singular value decomposition of $X$ is:

$U \Sigma V^T = X$

Where the columns of $U$ and $V$ are the left and right singular vectors of $X$ and the diagonal elements of $\Sigma$ are the singular values of $X$, also the square roots of the eigenvalues of $X^TX$.

In [None]:
U, singular_vals, V_T = np.linalg.svd(X_centered)
Sigma = np.zeros((100, 2))
Sigma[:2, :2] = np.diag(singular_vals)

In [None]:
np.dot(U, np.dot(Sigma, V_T))[:5, :]

In [None]:
X_centered[:5, :]

In [None]:
singular_vals**2

In [None]:
eig_vals

In [None]:
np.dot(U[:,0].reshape(100, 1), Sigma[0, 0])[:5]

In [None]:
X_transformed[0][:5].reshape(5, 1)

In [None]:
np.dot(U[:,0].reshape(100, 1), Sigma[0, 0]).dot(V_T[0, :].reshape(1, 2))[:5, :]

In [None]:
X_reconstituted[:5, :]

And there's also value in U and V themselves.