In [1]:
#!/usr/bin/env python

This examples shows how we can approximate the eigenvectors of a kernel matrix by combining random SVD and nystrom

**Method**<br>
1. We first subsample p columns, within these p columns, we pick a smaller q columns (p >> q) and use the q columns as L for nystrom<br>
2. We find the eigenvector from the q columns to approximate the eigenvectors for p x p matrix as V1<br>
3. We next use V1 as a projection matrix for random svd to refine V1 into a better version V2<br>
4. We then use V2 (better approximated) again to approximate the eigenvector of the entire kernel matrix K<br>
<br>
Nystrom eigenvector as Q 	-> 	random svd refine the eigenvectors 	-> expand it to the complete Kernel matrix <br>
nystrom expansion			-> 	svd refinement  					-> nystrom expansion

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn 
from sklearn.utils import shuffle
from sklearn.kernel_approximation import Nystroem
from tools import *

Initialize all the setting

In [3]:
X = csv_load('../dataset/wine.csv', shuffle_samples=True)
p = 145				
q = 30				
n = X.shape[0]		# number of total samples
γ = get_rbf_γ(X)	# γ used for the gaussian kerenl

Initialize subsamples

In [4]:
Xa = X[0:q, :]	
Xb = X[0:p, :]
sampledK = sklearn.metrics.pairwise.rbf_kernel(X, Y=Xb, gamma=γ)

Compute the true kernel from p samples

In [5]:
K = sklearn.metrics.pairwise.rbf_kernel(X, gamma=γ)
Kp = sampledK[0:p, 0:p]
[Λ, U] = np.linalg.eig(Kp)	# compute the "actual" eigenvectors

**Step 1**<br>
Use Nystrom to approximate the initial V1

In [6]:
L = sampledK[0:p, 0:q]
A = L[0:q,:]
[σs,V] = np.linalg.eig(A)
V = V[:,0:10] # only keeping the largest eigenvectors
Σ = np.diag(1/(σs[0:10]))
V1 = L.dot(V).dot(Σ)

The result of step 1 give us a bad approximation	

In [7]:
jupyter_print('We used 30 samples to approximate eigenvector of 60 samples (Note: This approximation is not supposed to be good')
print_two_matrices_side_by_side(U[0:15, 0:4], V1[0:15, 0:4], title1='Actual eigenvectors', title2='Approximated Eigenvectors')
avg_error = mean_absolute_error(U[:,0:10], V1, (p*10))
jupyter_print('The average absolute initial error with Nystrom of each element is %f\n\n'% avg_error)

       Actual eigenvectors       	Approximated Eigenvectors    
[ 0.0487  0.1401  0.0067  0.1099]	[-0.0646  0.3456  0.0331  0.2763]
[ 0.0995 -0.0578  0.0508  0.0022]	[-0.2286 -0.0594 -0.0606 -0.0178]
[ 0.0921 -0.0664  0.086   0.0473]	[-0.2193 -0.084  -0.1542  0.0397]
[ 0.0404  0.145   0.0478  0.0852]	[-0.0499  0.3633 -0.0501  0.2463]
[ 0.0857 -0.0689  0.104   0.0765]	[-0.2086 -0.0948 -0.2056  0.0786]
[ 0.0977 -0.0602  0.0598  0.0129]	[-0.2264 -0.066  -0.0845 -0.0043]
[ 0.0216  0.1288  0.1347 -0.082 ]	[-0.0226  0.3481 -0.2229 -0.0702]
[ 0.1031 -0.0494  0.0252 -0.0239]	[-0.2313 -0.0389  0.0053 -0.0501]
[ 0.1022 -0.0518  0.0322 -0.0174]	[-0.231  -0.0446 -0.012  -0.0424]
[ 0.0995 -0.0581  0.0517  0.0029]	[-0.2291 -0.06   -0.0621 -0.0173]
[ 0.0924  0.0504 -0.1053 -0.0176]	[-0.1687  0.1576  0.294   0.0082]
[ 0.0912 -0.0669  0.0886  0.0511]	[-0.218  -0.0856 -0.1613  0.0448]
[ 0.0112  0.0913  0.1458 -0.2253]	[-0.011   0.2669 -0.2558 -0.4536]
[ 0.104  -0.0021 -0.0656 -0.0647]	[-0.2111  0.0587  

**Step 2**<br>
Use qr to orthogonalize V1 as Q and shrink 	

In [8]:
A2 = sampledK[0:p,0:p]
[Q,R] = np.linalg.qr(V1)		# note that qr here ran on a small matrix
M = Q.T.dot(A2)
[Ư, Σ2, Vᵀ] = np.linalg.svd(M)	# note that the svd here also ran on a small matrix
V2 = Q.dot(Ư)

In [9]:
jupyter_print('We used random SVD to refine the original approximate, this should be better')
print_two_matrices_side_by_side(U[0:15, 0:4], V2[0:15, 0:4], title1='Actual eigenvectors', title2='Approximated Eigenvectors')
avg_error = mean_absolute_error(U[:,0:10], V2, (p*10))
jupyter_print('Notice that the average absolute error after random svd of each element is %f'% avg_error)

       Actual eigenvectors       	Approximated Eigenvectors    
[ 0.0487  0.1401  0.0067  0.1099]	[ 0.0487 -0.1402  0.0067  0.1099]
[ 0.0995 -0.0578  0.0508  0.0022]	[ 0.0995  0.0577  0.0507  0.0021]
[ 0.0921 -0.0664  0.086   0.0473]	[ 0.0921  0.0664  0.086   0.0474]
[ 0.0404  0.145   0.0478  0.0852]	[ 0.0404 -0.145   0.0478  0.0852]
[ 0.0857 -0.0689  0.104   0.0765]	[ 0.0857  0.0689  0.104   0.0766]
[ 0.0977 -0.0602  0.0598  0.0129]	[ 0.0977  0.0602  0.0597  0.0127]
[ 0.0216  0.1288  0.1347 -0.082 ]	[ 0.0216 -0.1288  0.1347 -0.0822]
[ 0.1031 -0.0494  0.0252 -0.0239]	[ 0.1031  0.0494  0.0252 -0.0239]
[ 0.1022 -0.0518  0.0322 -0.0174]	[ 0.1022  0.0518  0.0322 -0.0174]
[ 0.0995 -0.0581  0.0517  0.0029]	[ 0.0995  0.0581  0.0518  0.003 ]
[ 0.0924  0.0504 -0.1053 -0.0176]	[ 0.0924 -0.0504 -0.1054 -0.0177]
[ 0.0912 -0.0669  0.0886  0.0511]	[ 0.0912  0.0669  0.0886  0.0513]
[ 0.0112  0.0913  0.1458 -0.2253]	[ 0.0112 -0.0914  0.1458 -0.2254]
[ 0.104  -0.0021 -0.0656 -0.0647]	[ 0.1041  0.0021 -

In [10]:
jupyter_print('Next, notice that the eigenvalues from random svd and the true eigenvalues are the same')
print('Actual eigenvalues 1st row / Approximated eigenvalues 2nd row')
print(Λ[0:10])
print(Σ2[0:10], '\n\n')

Actual eigenvalues 1st row / Approximated eigenvalues 2nd row
[83.2462 36.7099 15.7864  6.021   2.0448  0.6671  0.1831  0.1565  0.0691  0.0461]
[83.2462 36.7099 15.7864  6.021   2.0448  0.6671  0.1813  0.1562  0.0682  0.0457] 




**Step 3**<br>
Use the result from random SVD as basis of nystrom for the full kernel matrix

In [11]:
Σ3 = np.diag(1/Σ2)
Ꮭ = sampledK 
Ū = Ꮭ.dot(V2).dot(Σ3)

In [12]:
[Λᴋ, Uᴋ] = np.linalg.eig(K)	# compute the "actual" eigenvectors
jupyter_print('We now obtain the eigenvector approximation and compare it to the true eigenvectors')
print_two_matrices_side_by_side(Uᴋ[0:15, 0:4], Ū[0:15, 0:4], title1='Actual eigenvectors', title2='Approximated Eigenvectors')
avg_error = mean_absolute_error(Uᴋ[:,0:10], Ū, (n*10))
jupyter_print('Notice that the average absolute error after random svd of each element is %f'% avg_error)

       Actual eigenvectors       	Approximated Eigenvectors    
[-0.0424 -0.1298 -0.026  -0.0964]	[ 0.0487 -0.1401  0.0067  0.1099]
[-0.0879  0.0552 -0.0449 -0.0034]	[ 0.0995  0.0578  0.0508  0.0022]
[-0.081   0.0644 -0.0766 -0.0464]	[ 0.0921  0.0664  0.086   0.0473]
[-0.0348 -0.1319 -0.064  -0.0688]	[ 0.0404 -0.145   0.0478  0.0852]
[-0.0752  0.0672 -0.0929 -0.0745]	[ 0.0857  0.0689  0.104   0.0765]
[-0.0862  0.0578 -0.0531 -0.0137]	[ 0.0977  0.0602  0.0598  0.0129]
[-0.0178 -0.1113 -0.139   0.0927]	[ 0.0216 -0.1288  0.1347 -0.082 ]
[-0.0912  0.0465 -0.0219  0.0214]	[ 0.1031  0.0494  0.0252 -0.0239]
[-0.0904  0.049  -0.0281  0.0154]	[ 0.1022  0.0518  0.0322 -0.0174]
[-0.0879  0.0555 -0.0457 -0.004 ]	[ 0.0995  0.0581  0.0517  0.0029]
[-0.0823 -0.0522  0.0888  0.012 ]	[ 0.0924 -0.0504 -0.1053 -0.0176]
[-0.0802  0.0648 -0.0789 -0.0501]	[ 0.0912  0.0669  0.0886  0.0511]
[-0.0088 -0.0759 -0.1412  0.2184]	[ 0.0112 -0.0913  0.1458 -0.2253]
[-0.0926 -0.0013  0.0577  0.0584]	[ 0.104   0.0021 -