In [1]:
!pip install pykeops[full] > log.log


[31mERROR: breathe 4.26.1 has requirement Sphinx<3.5,>=3.0, but you'll have sphinx 1.8.5 which is incompatible.[0m


# To showcase and share test results

Please don't keep all the dirty work here, just clean results that may be useful to share.

In [2]:
import numpy as np
import torch
from sklearn.utils import check_random_state, as_float_array
from scipy.linalg import svd
from pykeops.torch import LazyTensor
from sklearn.kernel_approximation import Nystroem
import scipy

# Basic Nystrom code

In [3]:
##############################################################################

'''
The two classes below implement the Nystrom algorithm. One can transform
the data into the approximated feature-space and/or obtain the approximated 
kernel.

Example of usage:

Let X_i be a LazyTensor of shape =  (1, length, features), then 

LN = LazyNystrom_T(n_components=100 ,kernel='rbf', gamma=1.) # creates an instance
LN.fit(X_i)  # fits to data         
X_new_i = LN.transform(X_i)  # transform data to approximated features
K_approx = LN.K_approx(X_i)  # obtain approximated kernel

'''

class LazyNystrom_N:
    '''
        Class to implement Nystrom on torch LazyTensors.
        This class works as an interface between lazy tensors and 
        the Nystrom algorithm in NumPy.

        * The fit method computes K^{-1}_q.

        * The transform method maps the data into the feature space underlying
        the Nystrom-approximated kernel.

        * The method K_approx directly computes the Nystrom approximation.

        Parameters:

        n_components [int] = how many samples to select from data.
        kernel [str] = type of kernel to use. Current options = {linear, rbf}.
        gamma [float] = exponential constant for the RBF kernel. 
        random_state=[None, float] = to set a random seed for the random
                                     sampling of the samples. To be used when 
                                     reproducibility is needed.

    '''
  
    def __init__(self, n_components=100, kernel='linear', gamma:float = 1., 
                 random_state=None): 

        self.n_components = n_components
        self.kernel = kernel
        self.random_state = random_state
        self.gamma = gamma


    def fit(self, X:LazyTensor):
        ''' 
        Args:   X = lazy tensor with features of shape 
                (1, n_samples, n_features)

        Returns: Fitted instance of the class
        '''

        # Basic checks
        assert type(X) == LazyTensor, 'Input to fit(.) must be a LazyTensor.'
        assert X.shape[1] >= self.n_components, f'The application needs X.shape[1] >= n_components.'

        X = X.sum(dim=0).numpy()
        # Number of samples
        n_samples = X.shape[0]
        # Define basis
        rnd = check_random_state(self.random_state)
        inds = rnd.permutation(n_samples)
        basis_inds = inds[:self.n_components]
        basis = X[basis_inds]
        # Build smaller kernel
        basis_kernel = self._pairwise_kernels(basis, kernel = self.kernel)  
        # Get SVD
        U, S, V = svd(basis_kernel)
        S = np.maximum(S, 1e-12)
        self.normalization_ = np.dot(U / np.sqrt(S), V)
        self.components_ = basis
        self.component_indices_ = inds
        return self


    def _pairwise_kernels(self, x:np.array, y:np.array = None, kernel='linear',
                          gamma = 1.):
        '''Helper function to build kernel
        
        Args:   X = torch tensor of dimension 2.
                K_type = type of Kernel to return
        '''
        
        if y is None:
            y = x
        if kernel == 'linear':
            K = x @ y.T 
        elif kernel == 'rbf':
            K =  ( (x[:,None,:] - y[None,:,:])**2 ).sum(-1)
            K = np.exp(- gamma* K)
  
        return K

    def transform(self, X:LazyTensor) -> LazyTensor:
        ''' Applies transform on the data.
        
        Args:
            X [LazyTensor] = data to transform
        Returns
            X [LazyTensor] = data after transformation
        '''
        
        X = X.sum(dim=0)
        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)

        return LazyTensor((K_nq @ self.normalization_.T)[None,:,:])

    
    def K_approx(self, X:LazyTensor) -> LazyTensor:
        ''' Function to return Nystrom approximation to the kernel.
        
        Args:
            X[LazyTensor] = data used in fit(.) function.
        Returns
            K[LazyTensor] = Nystrom approximation to kernel'''
        
        X = X.sum(dim=0).numpy()
        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
        K_approx = K_nq @ self.normalization_ @ K_nq.T
        K_approx = torch.tensor(K_approx)
        return LazyTensor(K_approx[None,:,:])




##########################################################################

# Same as LazyNystrom_N but written with Pytorch

class LazyNystrom_T:
    '''
        Class to implement Nystrom on torch LazyTensors.
        This class works as an interface between lazy tensors and 
        the Nystrom algorithm in NumPy.

        * The fit method computes K^{-1}_q.

        * The transform method maps the data into the feature space underlying
        the Nystrom-approximated kernel.

        * The method K_approx directly computes the Nystrom approximation.

        Parameters:

        n_components [int] = how many samples to select from data.
        kernel [str] = type of kernel to use. Current options = {linear, rbf}.
        gamma [float] = exponential constant for the RBF kernel. 
        random_state=[None, float] = to set a random seed for the random
                                     sampling of the samples. To be used when 
                                     reproducibility is needed.

    '''
  
    def __init__(self, n_components=100, kernel='linear',  gamma:float = 1., 
                 random_state=None ):
        
        self.n_components = n_components
        self.kernel = kernel
        self.random_state = random_state
        self.gamma = gamma


    def fit(self, X:LazyTensor):
        ''' 
        Args:   X = torch lazy tensor with features of shape 
                (1, n_samples, n_features)

        Returns: Fitted instance of the class
        '''

        # Basic checks: we have a lazy tensor and n_components isn't too large
        assert type(X) == LazyTensor, 'Input to fit(.) must be a LazyTensor.'
        assert X.shape[1] >= self.n_components, f'The application needs X.shape[1] >= n_components.'

        X = X.sum(dim=0) 
        # Number of samples
        n_samples = X.size(0)
        # Define basis
        rnd = check_random_state(self.random_state)
        inds = rnd.permutation(n_samples)
        basis_inds = inds[:self.n_components]
        basis = X[basis_inds]
        # Build smaller kernel
        basis_kernel = self._pairwise_kernels(basis, kernel = self.kernel)  
        # Get SVD
        U, S, V = torch.svd(basis_kernel)
        S = torch.maximum(S, torch.ones(S.size()) * 1e-12)
        self.normalization_ = torch.mm(U / np.sqrt(S), V.t())
        self.components_ = basis
        self.component_indices_ = inds
        
        return self


    def _pairwise_kernels(self, x:torch.tensor, y:torch.tensor = None, kernel='linear',
                          gamma = 1.) -> torch.tensor:
        '''Helper function to build kernel
        
        Args:   X = torch tensor of dimension 2.
                K_type = type of Kernel to return
        '''
        
        if y is None:
            y = x
        if kernel == 'linear':
            K = x @ y.T
        elif kernel == 'rbf':
            K =  ( (x[:,None,:] - y[None,:,:])**2 ).sum(-1)
            K = torch.exp(- gamma * K )

        return K

    def transform(self, X:LazyTensor) -> LazyTensor:
        ''' Applies transform on the data.
        
        Args:
            X [LazyTensor] = data to transform
        Returns
            X [LazyTensor] = data after transformation
        '''
        
        X = X.sum(dim=0)
        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
        return LazyTensor((K_nq @ self.normalization_.t())[None,:,:])

    
    def K_approx(self, X:LazyTensor) -> LazyTensor:
        ''' Function to return Nystrom approximation to the kernel.
        
        Args:
            X[LazyTensor] = data used in fit(.) function.
        Returns
            K[LazyTensor] = Nystrom approximation to kernel'''
        
        X = X.sum(dim=0)
        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
        K_approx = K_nq @ self.normalization_ @ K_nq.t()
        return LazyTensor(K_approx[None,:,:])


## Testing the fit and transform methods - numpy version

Note: Given $a$ and $b$ two vectors, I am computing the error as $$e = \frac{\| a- b\|_2}{L} $$ where $L = len(a)$. My reasoning for the $1/L$ factor is that the error will grow linearly with the size of the vectors. 

In [13]:
# We test the LazyNystrom_N fit/transform methods using a Linear kernel

length = 1000
num_sampling = 100

x = torch.randint(10,(1,length,3),dtype=torch.float32)
X_i = LazyTensor(x)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='linear', n_components=num_sampling, random_state=0).fit(x[0].numpy())  # input: (length, features) array
x_new = sk_N.transform(x[0].numpy())                                                           # output: (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_N(num_sampling, random_state=0).fit(X_i)   # input: (1, length, features) lazy tensor
X_new_i = LN_test.transform(X_i)                                 # output: (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn = {err}')

Error when compared to sklearn = 0.0


In [14]:
# We test the LazyNystrom_N fit/transform methods using a Gaussian kernel

length = 1000
num_sampling = 100

x = torch.randint(10,(1,length,3),dtype=torch.float32)
X_i = LazyTensor(x)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='rbf', gamma=1., n_components=num_sampling, random_state=0).fit(x[0].numpy())
x_new = sk_N.transform(x[0].numpy())      # (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_N(num_sampling,kernel='rbf', gamma=1., random_state=0).fit(X_i) # input: (1, length, features) lazy tensor
X_new_i = LN_test.transform(X_i)                                                      # output: (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn =  {err}')

Error when compared to sklearn =  0.0


## Testing the fit and transform methods - torch version

In [15]:
# We test the LazyNystrom_T fit/transform methods using a Linear kernel

length = 1000
num_sampling = 100

x = torch.randint(10,(1,length,3),dtype=torch.float32)
X_i = LazyTensor(x)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='linear', n_components=num_sampling, random_state=0).fit(x[0].numpy())
x_new = sk_N.transform(x[0].numpy())      # (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_T(num_sampling, random_state=0).fit(X_i)                   # input: (1, length, features) lazy tensor
X_new_i = LN_test.transform(X_i)                                                 # output: (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn = {err}')

Error when compared to sklearn = 7.349115610122681e-06


In [16]:
# We test the LazyNystrom_T fit/transform methods using a Gaussian kernel

length = 1000
num_sampling = 100

x = torch.randint(10,(1,length,3),dtype=torch.float32)
X_i = LazyTensor(x)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='rbf', n_components=num_sampling, random_state=0).fit(x[0].numpy())
x_new = sk_N.transform(x[0].numpy())      # (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_T(num_sampling, kernel='rbf', random_state=0).fit(X_i)
X_new_i = LN_test.transform(X_i)          # (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn = {err}')

Error when compared to sklearn = 0.0001354495906829834
