In [1]:
from matplotlib import pyplot as plt

In [2]:
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d

In [3]:
from matplotlib.patches import FancyArrowPatch

In [1]:
# import mpld3

In [4]:
%matplotlib widget

# Make inline plots vector graphics instead of raster graphics
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina', 'png')

## Create some artificial data

In [5]:
import numpy as np

np.random.seed(7) # random seed for consistency

d1, d2 = 3, 4

mu_vec1 = np.zeros(d1)
cov_mat1 = np.identity(d1)
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, d2).T
assert class1_sample.shape == (d1,d2), "The matrix has not the dimensions {}x{}".format(d1,d2)

mu_vec2 = np.ones(d1)
cov_mat2 = np.identity(d1)
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, d2).T
assert class2_sample.shape == (d1,d2), "The matrix has not the dimensions {}x{}".format(d1,d2)

all_samples = np.concatenate((class1_sample, class2_sample), axis=1)
assert all_samples.shape == (d1,2*d2), "The matrix has not the dimensions 3x40"
all_samples

array([[ 1.69052570e+00,  4.07516283e-01, -8.90385858e-04,
         6.00498516e-01,  1.50529937e+00, -4.53241412e-01,
         1.27445992e+00,  1.15433554e+00],
       [-4.65937371e-01, -7.88923029e-01, -1.75472431e+00,
        -6.25428974e-01,  7.38643585e-01,  1.55458031e+00,
        -5.26524532e-01,  6.12860057e-01],
       [ 3.28201637e-02,  2.06557291e-03,  1.01765801e+00,
        -1.71548261e-01,  7.57250921e-01,  1.12388091e+00,
         2.65069969e+00,  3.02907222e+00]])

## PCA using scikit-learn

In [6]:
from sklearn.decomposition import PCA as sklearnPCA

sklearn_pca = sklearnPCA(n_components=2)
sklearn_transf = sklearn_pca.fit_transform(all_samples.T)

plt.plot(sklearn_transf[0:4,0],sklearn_transf[0:4,1], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(sklearn_transf[4:8,0], sklearn_transf[4:8,1], '^', markersize=7, color='red', alpha=0.5, label='class2')

plt.xlabel('x_values')
plt.ylabel('y_values')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.legend()
plt.title('Transformed samples with class labels from matplotlib.mlab.PCA()')

sklearnPCA_fig = plt.gcf()
#sklearnPCA_figplt.get_figure()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## PCA manually

Scale to mean 0 and unit var:

In [7]:
col_means = all_samples.mean(axis=0)
col_std = all_samples.std(axis=0)
# all_samples = (all_samples - col_means) / col_std
# assert np.allclose(all_samples.mean(axis=0), 0)
# assert np.allclose(all_samples.std(axis=0), 1)

In [8]:
mean_vector = np.mean(all_samples, axis=1)[:, np.newaxis]
#print('Mean Vector:\n')
#print(mean_vector)

Covariance matrix:

In [9]:
cov_mat = np.cov(all_samples)
cov_mat

array([[ 0.57727147, -0.00998429,  0.13398467],
       [-0.00998429,  1.10397645,  0.34238617],
       [ 0.13398467,  0.34238617,  1.45656498]])

Eigenvectors and eigenvalues for the from the covariance matrix:

In [10]:
eig_val_cov, eig_vec_cov = np.linalg.eig(cov_mat)

In [11]:
eig_vec_cov

array([[ 0.09956385, -0.97097426, -0.21747648],
       [ 0.50955031, -0.1379762 ,  0.84930622],
       [ 0.85466106,  0.1953754 , -0.48102279]])

In [12]:
for i in range(len(eig_val_cov)):
    eigvec_cov = eig_vec_cov[:,i].reshape(1,d1).T
    print('Eigenvector {}: \n{}'.format(i+1, eigvec_cov))
    print('Eigenvalue {} from covariance matrix: {}'.format(i+1, eig_val_cov[i]))
    print(40 * '-')

Eigenvector 1: 
[[0.09956385]
 [0.50955031]
 [0.85466106]]
Eigenvalue 1 from covariance matrix: 1.6763047324950375
----------------------------------------
Eigenvector 2: 
[[-0.97097426]
 [-0.1379762 ]
 [ 0.1953754 ]]
Eigenvalue 2 from covariance matrix: 0.5488928585075267
----------------------------------------
Eigenvector 3: 
[[-0.21747648]
 [ 0.84930622]
 [-0.48102279]]
Eigenvalue 3 from covariance matrix: 0.9126153071385501
----------------------------------------


Test that rule for eigenvectors and eigenvalues apply:

In [13]:
for i in range(len(eig_val_cov)):
    eigv = eig_vec_cov[:,i].reshape(1,d1).T
    np.testing.assert_array_almost_equal(cov_mat.dot(eigv), eig_val_cov[i] * eigv,
                                         decimal=6, err_msg='', verbose=True)

Test that all eigenvalues have unit length:

In [14]:
for ev in eig_vec_cov:
    np.testing.assert_array_almost_equal(1.0, np.linalg.norm(ev))

Plot eigenvectors:

In [15]:
class Arrow3D(FancyArrowPatch):
    def __init__(self, xs, ys, zs, *args, **kwargs):
        FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)
        self._verts3d = xs, ys, zs

    def draw(self, renderer):
        xs3d, ys3d, zs3d = self._verts3d
        xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
        self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))
        FancyArrowPatch.draw(self, renderer)

fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111, projection='3d')

ax.plot(all_samples[0,:], all_samples[1,:], all_samples[2,:], 'o', markersize=8, color='green', alpha=0.2)
ax.plot([col_means[0]], [col_means[1]], [col_means[2]], 'o', markersize=10, color='red', alpha=0.5)
for v in eig_vec_cov.T:
    a = Arrow3D([col_means[0], v[0]], [col_means[1], v[1]], [col_means[2], v[2]], mutation_scale=20, lw=3, arrowstyle="-|>", color="r")
    ax.add_artist(a)
ax.set_xlabel('x_values')
ax.set_ylabel('y_values')
ax.set_zlabel('z_values')

plt.title('Eigenvectors')

plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Make a list of (eigenvalue, eigenvector) tuples:

In [16]:
eig_pairs = [(np.abs(eig_val_cov[i]), eig_vec_cov[:,i]) for i in range(len(eig_val_cov))]

Sort the (eigenvalue, eigenvector) tuples from high to low:

In [17]:
eig_pairs.sort()
eig_pairs.reverse()

Visually confirm that the list is correctly sorted by decreasing eigenvalues:

In [18]:
for i in eig_pairs:
    print(i[0])

1.6763047324950375
0.9126153071385501
0.5488928585075267


Data transformation:

In [19]:
matrix_w = np.hstack((eig_pairs[0][1].reshape(d1,1), eig_pairs[1][1].reshape(d1,1)))
print('Matrix W:\n', matrix_w)

Matrix W:
 [[ 0.09956385 -0.21747648]
 [ 0.50955031  0.84930622]
 [ 0.85466106 -0.48102279]]


In [20]:
transformed = matrix_w.T.dot(all_samples)
assert transformed.shape == (2,2*d2), "The matrix is not 2x40 dimensional."

In [21]:
transformed

array([[-0.04105317, -0.35965672, -0.02445629, -0.4055152 ,  1.17344234,
         1.70754766,  2.1240492 ,  3.01604319],
       [-0.77916034, -0.75965603, -1.97962132, -0.5792564 , -0.06428758,
         0.87827174, -1.99939259, -1.18758775]])

In [22]:
plt.plot(transformed[0,0:4], transformed[1,0:4], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(transformed[0,4:8], transformed[1,4:8], '^', markersize=7, color='red', alpha=0.5, label='class2')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.legend()
plt.title('Transformed samples with class labels')

plt.show()

Compare to plot from scikit-learn. Note that the orientation is arbitrary, so the plots may be a mirrors of each other. 

In [23]:
#sklearnPCA_fig
from sklearn.decomposition import PCA as sklearnPCA

sklearn_pca = sklearnPCA(n_components=2)
sklearn_transf = sklearn_pca.fit_transform(all_samples.T)

plt.plot(sklearn_transf[0:4,0],sklearn_transf[0:4,1], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(sklearn_transf[4:8,0], sklearn_transf[4:8,1], '^', markersize=7, color='red', alpha=0.5, label='class2')

plt.xlabel('x_values')
plt.ylabel('y_values')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.legend()
plt.title('Transformed samples with class labels from matplotlib.mlab.PCA()')

sklearnPCA_fig = plt.gcf()
#sklearnPCA_figplt.get_figure()
plt.show()

In [24]:
#%matplotlib notebook

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111, projection='3d')
plt.rcParams['legend.fontsize'] = 10   
ax.plot(class1_sample[0,:], class1_sample[1,:], class1_sample[2,:], 'o', markersize=8, color='blue', alpha=0.5, label='class1')
ax.plot(class2_sample[0,:], class2_sample[1,:], class2_sample[2,:], '^', markersize=8, alpha=0.5, color='red', label='class2')

plt.title('Samples for class 1 and class 2')
ax.legend(loc='upper right')

plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …