In [1]:
#!/usr/bin/env python
import numpy as np
from tools import *
from sklearn.metrics.pairwise import *

This code test the Mercer's Theorem<br>
Given<br>
1. $K$ as the kernel matrix for $n$ samples.<br>
2. $\{e_1, e_2, ..., e_m \}$ as its eigenfunctions <br>
2. $\{v_1, v_2, ...,  \}$ as its eigenvectors<br>
3. $\{\sigma_1, \sigma_2, ..., \sigma_m \}$ as its eigenvalues <br>
4. $X \subset \mathbb{R}^n$<br>
5. $k: \mathcal{X} \times \mathcal{X}$ be a symmetric continuous kernel function.<br>
then Mercer's Thm states that<br>
<br>
$$k(x, y) = \sum_{i=1}^\infty e_i(x) e_i(y)$$<br>
      $$= \begin{bmatrix} e_1(x) & e_2(x) & ... & e_m(x) & \end{bmatrix} \begin{bmatrix} e_1(x)  \\ e_2(x) \\ ... \\ e_m(x) \\ \end{bmatrix}      $$<br>
<br>
Implying that using eigenfunction as the basis gives a much smaller set of basis functions as the feature map<br>
Here, the eigenfunction is defined as <br>
$$e_i = \frac{1}{\sqrt{\sigma_i}} \Phi^{\top} v_i$$<br>
<br>
Therefore, using the eigenfunction as feature maps, we have<br>
$$\Phi = \begin{bmatrix} e_1(x) & e_2(x) & ... \end{bmatrix} = \begin{bmatrix} \Phi e_1 & \Phi e_2 & ... \end{bmatrix} = \begin{bmatrix} \frac{1}{\sqrt{\sigma_1}} \Phi \Phi^{\top} v_1 &  \frac{1}{\sqrt{\sigma_2}} \Phi \Phi^{\top} v_2 &  & ... \end{bmatrix}  = \begin{bmatrix} \frac{1}{\sqrt{\sigma_1}} K v_1 &  \frac{1}{\sqrt{\sigma_2}} K v_2 &  & ... \end{bmatrix} = K \begin{bmatrix} v_1 & v_2 & ... \end{bmatrix} \Sigma = K V \Sigma.$$<br>
<br>
Where <br>
$$ \Sigma = \begin{bmatrix} \frac{1}{\sqrt{\sigma_1}} & 0 & 0 & ... \\  0 & \frac{1}{\sqrt{\sigma_2}} & 0 & ...  \\  ... & ... & ... \end{bmatrix}$$<br>
<br>
<div class="alert alert-block alert-info"><br>
<b>Tip:</b> <br>
In this experiment, we are going to <br>
a. generate 10 random samples <br>
b. From these samples, we will directly compute the kernel matrix $K$                                                                           <br>
c. After $K$, we are going to use mercer's theorem to generate $\Phi$ with the eigenfunctions                                                   <br>
d. If Mercer is correct, then the feature map generated using the eigenfunctions $\Phi$ should give us the condition that $\Phi \Phi^{\top} = K$<br>
</div>

In [2]:
# Generate kernel matrix
γ = 0.5
X = np.random.randn(10,2)
K = rbf_kernel(X, gamma=γ)

# Generate feature maps via the eigenfunctinos
[D,V] = eigh_sort(K)
Σ = np.diag(1/np.sqrt(D[0:9]))
V = V[:, 0:9]

Φ = K.dot(V).dot(Σ)
K2 = Φ.dot(Φ.T)

#	Remember that since this is a Gaussian Kernel, the feature map should be 
#	$\Phi \in \mathbb{R}^{n \times \infty}$, but through Mercer's theorem, 
#	we are apply to obtain $\Phi \in \mathbb{R}^{n \times 9}$. This is much
#	easier to deal with. 
#
#	Lastly, when we print out the kernel matrix of $K$ and $K_2$, notice
#	that they are approximately the same. 

print(K, '\n') 
print(K2)

[[1.     0.1988 0.8349 0.3274 0.8804 0.3341 0.827  0.9866 0.827  0.6242]
 [0.1988 1.     0.057  0.0051 0.0766 0.0064 0.4869 0.1761 0.4525 0.0251]
 [0.8349 0.057  1.     0.6078 0.9832 0.5707 0.4771 0.8647 0.5043 0.8567]
 [0.3274 0.0051 0.6078 1.     0.498  0.9713 0.1226 0.3263 0.1082 0.8681]
 [0.8804 0.0766 0.9832 0.498  1.     0.462  0.5376 0.9219 0.5922 0.7629]
 [0.3341 0.0064 0.5707 0.9713 0.462  1.     0.1372 0.32   0.1112 0.8735]
 [0.827  0.4869 0.4771 0.1226 0.5376 0.1372 1.     0.7721 0.9319 0.3163]
 [0.9866 0.1761 0.8647 0.3263 0.9219 0.32   0.7721 1.     0.8195 0.6123]
 [0.827  0.4525 0.5043 0.1082 0.5922 0.1112 0.9319 0.8195 1.     0.2838]
 [0.6242 0.0251 0.8567 0.8681 0.7629 0.8735 0.3163 0.6123 0.2838 1.    ]] 

[[1.     0.1988 0.8349 0.3274 0.8804 0.3341 0.827  0.9866 0.827  0.6242]
 [0.1988 1.     0.057  0.0051 0.0766 0.0064 0.4869 0.1761 0.4525 0.0251]
 [0.8349 0.057  1.     0.6078 0.9832 0.5707 0.4771 0.8646 0.5043 0.8567]
 [0.3274 0.0051 0.6078 1.     0.498  0.9713 0.12