PCA Comparison among Shogun, Scikit-learn and Matplotlib 
---------------------

Following is the comparison of Principal Component Analysis implemented in Shogun with naive implementation and other toolboxes like sklearn and matplotlib. Two datasets are used for comparison, one is randomly generated and other is standard Iris dataset.  

In [15]:
import numpy as np
import time
%matplotlib notebook
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
from matplotlib.patches import FancyArrowPatch

In [16]:
# Adding random seed for consistency 
np.random.seed(1)

### Randomly Generated 3D Dataset

In [17]:
# Creating dataset of dimension 3 x 40 consisting of two classes
mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20).T

mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20).T

In [18]:
# Plotting dataset
fig1 = plt.figure(1)
fig1.set_size_inches(8, 8)
ax1 = fig1.add_subplot(111, projection='3d')
plt.rcParams['legend.fontsize'] = 10   
ax1.plot(class1_sample[0,:], class1_sample[1,:], class1_sample[2,:], 'o', markersize=8, color='blue', alpha=0.5, label='class1')
ax1.plot(class2_sample[0,:], class2_sample[1,:], class2_sample[2,:], '^', markersize=8, alpha=0.5, color='red', label='class2')
plt.title('Samples for class 1 and class 2')
ax1.legend(loc='upper right')
plt.show()
all_samples = np.concatenate((class1_sample, class2_sample), axis=1)

<IPython.core.display.Javascript object>

** Naive Python Implementation **

In [52]:
start = time.time()
cov_mat = np.cov([all_samples[0,:],all_samples[1,:],all_samples[2,:]])
eig_val_cov, eig_vec_cov = np.linalg.eig(cov_mat)
eig_pairs = [(np.abs(eig_val_cov[i]), eig_vec_cov[:,i]) for i in range(len(eig_val_cov))]
eig_pairs.sort()
eig_pairs.reverse()
matrix_w = np.hstack((eig_pairs[0][1].reshape(3,1), eig_pairs[1][1].reshape(3,1)))
transformed = matrix_w.T.dot(all_samples)
end = time.time()
print 'Naive Python implementation time:'
print end - start

Naive Python implementation time:
0.000977993011475


In [64]:
# Plotting eigen vectors
mean_x = np.mean(all_samples[0,:])
mean_y = np.mean(all_samples[1,:])
mean_z = np.mean(all_samples[2,:])
class Arrow3D(FancyArrowPatch):
	def __init__(self, xs, ys, zs, *args, **kwargs):
		FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)
		self._verts3d = xs, ys, zs
	def draw(self, renderer):
		xs3d, ys3d, zs3d = self._verts3d
		xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
		self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))
		FancyArrowPatch.draw(self, renderer)	
fig2 = plt.figure(figsize=(7,7))
ax2 = fig2.add_subplot(111, projection='3d')
ax2.plot(all_samples[0,:], all_samples[1,:], all_samples[2,:], 'o', markersize=8, color='green', alpha=0.2)
ax2.plot([mean_x], [mean_y], [mean_z], 'o', markersize=10, color='red', alpha=0.5)
for v in eig_vec_cov.T:
	a = Arrow3D([mean_x, v[0]], [mean_y, v[1]], [mean_z, v[2]], mutation_scale=20, lw=3, arrowstyle="-|>", color="r")
	ax2.add_artist(a)
ax2.set_xlabel('x_values')
ax2.set_ylabel('y_values')
ax2.set_zlabel('z_values')
plt.title('Eigenvectors')
plt.show()

<IPython.core.display.Javascript object>

In [54]:
# Plotting datapoints in transformed space
plt.plot(transformed[0,0:20], transformed[1,0:20], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(transformed[0,20:40], transformed[1,20:40], '^', markersize=7, color='red', alpha=0.5, label='class2')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.legend()
plt.title('Transformed samples with Naive implementation')
plt.show()

<IPython.core.display.Javascript object>

** Shogun ** 

In [58]:
from modshogun import * 
start = time.time()
train_features = RealFeatures(all_samples.T)
preprocessor = PCA(EVD)
preprocessor.set_target_dim(2)
preprocessor.init(train_features)
E = preprocessor.get_transformation_matrix()
transformed = E.T
end = time.time()
print 'Shogun Time:'
print end - start

Shogun Time:
0.00084400177002


In [57]:
plt.plot(transformed[0,0:20], transformed[1,0:20], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(transformed[0,20:40], transformed[1,20:40], '^', markersize=7, color='red', alpha=0.5, label='class2')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.legend()
plt.title('Transformed samples via Shogun')
plt.show()

<IPython.core.display.Javascript object>

** Scikit-learn **

In [59]:
from sklearn.decomposition import PCA as sklearnPCA
start = time.time()
sklearn_pca = sklearnPCA(n_components=2)
sklearn_transf = sklearn_pca.fit_transform(all_samples.T)
end = time.time()
print 'scikit-learn time:'
print end - start

scikit-learn time:
0.000843048095703


In [60]:
plt.plot(sklearn_transf[0:20,0],sklearn_transf[0:20,1], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(sklearn_transf[20:40,0], sklearn_transf[20:40,1], '^', markersize=7, color='red', alpha=0.5, label='class2')
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.legend()
plt.title('Transformed samples via sklearn.decomposition.PCA')
plt.show()

<IPython.core.display.Javascript object>

** Matplotlib ** 

In [61]:
from matplotlib.mlab import PCA as mlabPCA
start = time.time()
mlab_pca = mlabPCA(all_samples.T)
end = time.time()
print 'matplotlib time:'
print end - start

matplotlib time:
0.000514984130859


In [62]:
plt.plot(mlab_pca.Y[0:20,0],mlab_pca.Y[0:20,1], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(mlab_pca.Y[20:40,0], mlab_pca.Y[20:40,1], '^', markersize=7, color='red', alpha=0.5, label='class2')
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.legend()
plt.title('Transformed samples with class labels from matplotlib.mlab.PCA()')
plt.show()

<IPython.core.display.Javascript object>