In [2]:
!pip install pykeops[full] > log.log


[31mERROR: breathe 4.26.1 has requirement Sphinx<3.5,>=3.0, but you'll have sphinx 1.8.5 which is incompatible.[0m


# To showcase and share test results

Please don't keep all the dirty work here, just clean results that may be useful to share.

In [40]:
import numpy as np
import torch
from sklearn.utils import check_random_state, as_float_array
from scipy.linalg import svd
from pykeops.torch import LazyTensor
from sklearn.kernel_approximation import Nystroem
import scipy

from pykeops.numpy import LazyTensor as LazyTensor_n
from pykeops.numpy.cluster import grid_cluster
from pykeops.numpy.cluster import cluster_ranges_centroids
from pykeops.numpy.cluster import sort_clusters
from pykeops.numpy.cluster import from_matrix
from scipy.sparse.linalg import aslinearoperator, eigsh

# Basic Nystrom code

In [49]:
##############################################################################

'''
The two classes below implement the Nystrom algorithm. One can transform
the data into the approximated feature-space and/or obtain the approximated 
kernel.

Example of usage:

Let X_i be a LazyTensor of shape =  (1, length, features), then 

LN = LazyNystrom_T(n_components=100 ,kernel='rbf', gamma=1.) # creates an instance
LN.fit(X_i)  # fits to data         
X_new_i = LN.transform(X_i)  # transform data to approximated features
K_approx = LN.K_approx(X_i)  # obtain approximated kernel

'''

class LazyNystrom_N:
    '''
        Class to implement Nystrom on torch LazyTensors.
        This class works as an interface between lazy tensors and 
        the Nystrom algorithm in NumPy.

        * The fit method computes K^{-1}_q.

        * The transform method maps the data into the feature space underlying
        the Nystrom-approximated kernel.

        * The method K_approx directly computes the Nystrom approximation.

        Parameters:

        n_components [int] = how many samples to select from data.
        kernel [str] = type of kernel to use. Current options = {linear, rbf}.
        gamma [float] = exponential constant for the RBF kernel. 
        random_state=[None, float] = to set a random seed for the random
                                     sampling of the samples. To be used when 
                                     reproducibility is needed.

    '''
  
    def __init__(self, n_components=100, kernel='linear', gamma:float = 1., 
                 random_state=None): 

        self.n_components = n_components
        self.kernel = kernel
        self.random_state = random_state
        self.gamma = gamma


    def fit(self, X:LazyTensor):
        ''' 
        Args:   X = lazy tensor with features of shape 
                (1, n_samples, n_features)

        Returns: Fitted instance of the class
        '''

        # Basic checks
        assert type(X) == LazyTensor, 'Input to fit(.) must be a LazyTensor.'
        assert X.shape[1] >= self.n_components, f'The application needs X.shape[1] >= n_components.'

        X = X.sum(dim=0).numpy()
        # Number of samples
        n_samples = X.shape[0]
        # Define basis
        rnd = check_random_state(self.random_state)
        inds = rnd.permutation(n_samples)
        basis_inds = inds[:self.n_components]
        basis = X[basis_inds]
        # Build smaller kernel
        basis_kernel = self._pairwise_kernels(basis, kernel = self.kernel)  
        # Get SVD
        U, S, V = svd(basis_kernel)
        S = np.maximum(S, 1e-12)
        self.normalization_ = np.dot(U / np.sqrt(S), V)
        self.components_ = basis
        self.component_indices_ = inds
        return self


    def _pairwise_kernels(self, x:np.array, y:np.array = None, kernel='linear',
                          gamma = 1.):
        '''Helper function to build kernel
        
        Args:   X = torch tensor of dimension 2.
                K_type = type of Kernel to return
        '''
        
        if y is None:
            y = x
        if kernel == 'linear':
            K = x @ y.T 
        elif kernel == 'rbf':
            K =  ( (x[:,None,:] - y[None,:,:])**2 ).sum(-1)
            K = np.exp(- gamma* K)
  
        return K

    def transform(self, X:LazyTensor) -> LazyTensor:
        ''' Applies transform on the data.
        
        Args:
            X [LazyTensor] = data to transform
        Returns
            X [LazyTensor] = data after transformation
        '''
        
        X = X.sum(dim=0)
        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)

        return LazyTensor((K_nq @ self.normalization_.T)[None,:,:])

    
    def K_approx(self, X:LazyTensor) -> LazyTensor:
        ''' Function to return Nystrom approximation to the kernel.
        
        Args:
            X[LazyTensor] = data used in fit(.) function.
        Returns
            K[LazyTensor] = Nystrom approximation to kernel'''
        
        X = X.sum(dim=0).numpy()
        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
        K_approx = K_nq @ self.normalization_ @ K_nq.T
        K_approx = torch.tensor(K_approx)
        return LazyTensor(K_approx[None,:,:])




##########################################################################

# Same as LazyNystrom_N but written with Pytorch

class LazyNystrom_T:
    '''
        Class to implement Nystrom on torch LazyTensors.
        This class works as an interface between lazy tensors and 
        the Nystrom algorithm in NumPy.

        * The fit method computes K^{-1}_q.

        * The transform method maps the data into the feature space underlying
        the Nystrom-approximated kernel.

        * The method K_approx directly computes the Nystrom approximation.

        Parameters:

        n_components [int] = how many samples to select from data.
        kernel [str] = type of kernel to use. Current options = {linear, rbf}.
        gamma [float] = exponential constant for the RBF kernel. 
        random_state=[None, float] = to set a random seed for the random
                                     sampling of the samples. To be used when 
                                     reproducibility is needed.

    '''
  
    def __init__(self, n_components=100, kernel='linear',  gamma:float = 1., 
                 random_state=None ):
        
        self.n_components = n_components
        self.kernel = kernel
        self.random_state = random_state
        self.gamma = gamma


    def fit(self, X:LazyTensor):
        ''' 
        Args:   X = torch lazy tensor with features of shape 
                (1, n_samples, n_features)

        Returns: Fitted instance of the class
        '''

        # Basic checks: we have a lazy tensor and n_components isn't too large
        assert type(X) == LazyTensor, 'Input to fit(.) must be a LazyTensor.'
        assert X.shape[1] >= self.n_components, f'The application needs X.shape[1] >= n_components.'

        X = X.sum(dim=0) 
        # Number of samples
        n_samples = X.size(0)
        # Define basis
        rnd = check_random_state(self.random_state)
        inds = rnd.permutation(n_samples)
        basis_inds = inds[:self.n_components]
        basis = X[basis_inds]
        # Build smaller kernel
        basis_kernel = self._pairwise_kernels(basis, kernel = self.kernel)  
        # Get SVD
        U, S, V = torch.svd(basis_kernel)
        S = torch.maximum(S, torch.ones(S.size()) * 1e-12)
        self.normalization_ = torch.mm(U / np.sqrt(S), V.t())
        self.components_ = basis
        self.component_indices_ = inds
        
        return self


    def _pairwise_kernels(self, x:torch.tensor, y:torch.tensor = None, kernel='linear',
                          gamma = 1.) -> torch.tensor:
        '''Helper function to build kernel
        
        Args:   X = torch tensor of dimension 2.
                K_type = type of Kernel to return
        '''
        
        if y is None:
            y = x
        if kernel == 'linear':
            K = x @ y.T
        elif kernel == 'rbf':
            K =  ( (x[:,None,:] - y[None,:,:])**2 ).sum(-1)
            K = torch.exp(- gamma * K )

        return K

    def transform(self, X:LazyTensor) -> LazyTensor:
        ''' Applies transform on the data.
        
        Args:
            X [LazyTensor] = data to transform
        Returns
            X [LazyTensor] = data after transformation
        '''
        
        X = X.sum(dim=0)
        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
        return LazyTensor((K_nq @ self.normalization_.t())[None,:,:])

    
    def K_approx(self, X:LazyTensor) -> LazyTensor:
        ''' Function to return Nystrom approximation to the kernel.
        
        Args:
            X[LazyTensor] = data used in fit(.) function.
        Returns
            K[LazyTensor] = Nystrom approximation to kernel'''
        
        X = X.sum(dim=0)
        K_nq = self._pairwise_kernels(X, self.components_, self.kernel)
        K_approx = K_nq @ self.normalization_ @ K_nq.t()
        return LazyTensor(K_approx[None,:,:])

  
##########################################################################

# Similar to the above but utilizing KeOps

class Nystrom_NK:
    '''
        Class to implement Nystrom using numpy and PyKeops.
        * The fit method computes K^{-1}_q.
        * The transform method maps the data into the feature space underlying
        the Nystrom-approximated kernel.
        * The method K_approx directly computes the Nystrom approximation.
        Parameters:
        n_components [int] = how many samples to select from data.
        kernel [str] = type of kernel to use. Current options = {rbf}.
        sigma [float] = exponential constant for the RBF kernel. 
        eps[float] = size for square bins
        random_state=[None, float] = to set a random seed for the random
                                     sampling of the samples. To be used when 
                                     reproducibility is needed.
    '''
  
    def __init__(self, n_components=100, kernel='rbf', sigma:float = 1., 
                 eps:float = 0.05, random_state=None): 

        self.n_components = n_components
        self.kernel = kernel
        self.random_state = random_state
        self.sigma = sigma
        self.eps = eps


    def fit(self, x:np.ndarray):
        ''' 
        Args:   x = numpy array of shape (n_samples, n_features)
        Returns: Fitted instance of the class
        '''

        # Basic checks
        assert type(x) == np.ndarray, 'Input to fit(.) must be an array.'
        assert x.shape[0] >= self.n_components, f'The application needs X.shape[0] >= n_components.'

        # Number of samples
        n_samples = x.shape[0]
        # Define basis
        rnd = check_random_state(self.random_state)
        inds = rnd.permutation(n_samples) 
        basis_inds = inds[:self.n_components] 
        basis = x[basis_inds]
        # Build smaller kernel
        basis_kernel = self._pairwise_kernels(basis, kernel = self.kernel)  
        # Spectral decomposition
        S, U = self._spectral(basis_kernel)
        S = np.maximum(S, 1e-12)
        self.normalization_ = np.dot(U / np.sqrt(S), U.T)
        self.components_ = basis
        self.component_indices_ = inds
        return self


    def _pairwise_kernels(self, x:np.array, y:np.array = None, kernel='rbf',
                          sigma = 1.) -> LazyTensor:
        '''Helper function to build kernel
        
        Args:   X = torch tensor of dimension 2,
                K_type = type of Kernel to return.
        Returns:
                K_ij[LazyTensor]
        '''
        if y is None:
            y = x
        if kernel == 'linear': 
            K_ij = x @ y.T 
        elif kernel == 'rbf':
            x /= sigma
            y /= sigma
            x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
            K_ij = (-1*((x_i - x_j)**2).sum(2)).exp()
            # block-sparse reduction preprocess
            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij, self.sigma, self.eps)
        elif kernel == 'exp':
            x_i, x_j = LazyTensor_n(x[:, None, :]), LazyTensor_n(y[None, :, :])
            K_ij = (-1 * (abs(x_i - x_j)).sum(2)).exp()
            # block-sparse reduction preprocess
            K_ij = self._Gauss_block_sparse_pre(x, y, K_ij, self.sigma, self.eps)
        return K_ij


    def _spectral(self, X_i:LazyTensor):
        '''
        Helper function to compute eigendecomposition of K_q.
        Args: X_i[numpy LazyTensor]
        Returns S[np.array] eigenvalues,
                U[np.array] eigenvectors
        '''
        K_linear = aslinearoperator(X_i)
        k = K_linear.shape[0] - 1
        S, U = eigsh(K_linear, k=k, which='LM')
        return S, U
        

    def transform(self, x:np.ndarray) -> np.array:
        ''' Applies transform on the data.
        
        Args:
            X [np.array] = data to transform
        Returns
            X [np.array] = data after transformation
        '''
        
        K_nq = self._pairwise_kernels(x, self.components_, self.kernel)
        x_new = K_nq @ self.normalization_.T
        return x_new

    
    def K_approx(self, x:np.array) -> np.array:
        ''' Function to return Nystrom approximation to the kernel.
        
        Args:
            X[np.array] = data used in fit(.) function.
        Returns
            K[np.array] = Nystrom approximation to kernel'''
       
        K_nq = self._pairwise_kernels(x, self.components_, self.kernel)
        # For arrays: K_approx = K_nq @ self.normalization_ @ K_nq.T
        # But to use @ with lazy tensors we have:
        K_approx = K_nq @ (K_nq @ self.normalization_ ).T
        
        return K_approx.T 


    def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor, 
                               sigma:float = 1., eps:float = 0.05):
        ''' 
        Helper function to preprocess data for block-sparse reduction
        of the Gaussian kernel
    
        Args: 
            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
            eps[float] = size for square bins
        Returns:
            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
                                set sparse ranges
        '''

        # class labels
        x_labels = grid_cluster(x, eps) 
        y_labels = grid_cluster(y, eps) 
        # compute one range and centroid per class
        x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
        y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
        # sort points
        x, x_labels = sort_clusters(x, x_labels)
        y, y_labels = sort_clusters(y, y_labels) 
        # Compute a coarse Boolean mask:
        D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
        keep = D < (4 * sigma) ** 2  # self.sigma 
        # mask -> set of integer tensors
        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
        K_ij.ranges = ranges_ij  # block-sparsity pattern

        return K_ij


## Testing the fit and transform methods - numpy version

Note: Given $a$ and $b$ two vectors, I am computing the error as $$e = \frac{\| a- b\|_2}{L} $$ where $L = len(a)$. My reasoning for the $1/L$ factor is that the error will grow linearly with the size of the vectors. 

In [5]:
# We test the LazyNystrom_N fit/transform methods using a Linear kernel

length = 1000
num_sampling = 100

x = torch.randint(10,(1,length,3),dtype=torch.float32)
X_i = LazyTensor(x)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='linear', n_components=num_sampling, random_state=0).fit(x[0].numpy())  # input: (length, features) array
x_new = sk_N.transform(x[0].numpy())                                                           # output: (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_N(num_sampling, random_state=0).fit(X_i)   # input: (1, length, features) lazy tensor
X_new_i = LN_test.transform(X_i)                                 # output: (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn = {err}')

Compiling libKeOpstorch31970816d1 in /root/.cache/pykeops-1.4.2-cpython-36:
       formula: Sum_Reduction(Var(0,3,1),1)
       aliases: Var(0,3,1); 
       dtype  : float32
... Done.
Compiling libKeOpstorcha99f5d65b6 in /root/.cache/pykeops-1.4.2-cpython-36:
       formula: Sum_Reduction(Var(0,100,1),1)
       aliases: Var(0,100,1); 
       dtype  : float32
... Done.
Error when compared to sklearn = 0.0


In [6]:
# We test the LazyNystrom_N fit/transform methods using a Gaussian kernel

length = 1000
num_sampling = 100

x = torch.randint(10,(1,length,3),dtype=torch.float32)
X_i = LazyTensor(x)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='rbf', gamma=1., n_components=num_sampling, random_state=0).fit(x[0].numpy())
x_new = sk_N.transform(x[0].numpy())      # (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_N(num_sampling,kernel='rbf', gamma=1., random_state=0).fit(X_i) # input: (1, length, features) lazy tensor
X_new_i = LN_test.transform(X_i)                                                      # output: (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn =  {err}')

Error when compared to sklearn =  0.0


## Testing the fit and transform methods - torch version

In [7]:
# We test the LazyNystrom_T fit/transform methods using a Linear kernel

length = 1000
num_sampling = 100

x = torch.randint(10,(1,length,3),dtype=torch.float32)
X_i = LazyTensor(x)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='linear', n_components=num_sampling, random_state=0).fit(x[0].numpy())
x_new = sk_N.transform(x[0].numpy())      # (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_T(num_sampling, random_state=0).fit(X_i)                   # input: (1, length, features) lazy tensor
X_new_i = LN_test.transform(X_i)                                                 # output: (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn = {err}')

Error when compared to sklearn = 6.183534264564514e-06


In [53]:
# We test the LazyNystrom_T fit/transform methods using a Gaussian kernel

length = 1000
num_sampling = 100

x = torch.randint(10,(1,length,3),dtype=torch.float32)
print(x)
X_i = LazyTensor(x)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='rbf', n_components=num_sampling, random_state=0).fit(x[0].numpy())
x_new = sk_N.transform(x[0].numpy())      # (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_T(num_sampling, kernel='rbf', random_state=0).fit(X_i)
X_new_i = LN_test.transform(X_i)          # (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn = {err}')

tensor([[[7., 6., 3.],
         [2., 5., 4.],
         [6., 0., 2.],
         ...,
         [2., 8., 4.],
         [1., 9., 2.],
         [2., 1., 7.]]])
Error when compared to sklearn = 0.00013295495986938476


#Testing the methods of the LazyNystrom_NK class

In [54]:
# Testing the fit/transform method with linear kernel

length = 1000
num_sampling = 100

X = np.random.uniform(0,100,size = (length,3))

# ours
nystrom = Nystrom_NK(kernel = 'linear', n_components = num_sampling, random_state = 0)
n_fit = nystrom.fit(X)
x_new = nystrom.transform(X)


# sklearn

sk_n = Nystroem(kernel='linear', gamma=1., n_components=num_sampling, random_state=0).fit(X)
X_new_sk = sk_N.transform(X)      # (length, num_sampling) array

# Print the L2 error
err = np.linalg.norm(x_new - X_new_sk) / X_new_sk.size
print(f'Error when compared to sklearn =  {err}')

Error when compared to sklearn =  0.03129212019556455


There was a typo in the code! (in pairwise kernels -> linear kernel) Fix in master

In [55]:
# Testing the fit/transform method with linear kernel

length = 1000
num_sampling = 100

X = np.random.uniform(0,100,size = (length,3))

# ours
nystrom = Nystrom_NK(random_state = 0)
n_fit = nystrom.fit(X)
x_new = nystrom.transform(X)


# sklearn

sk_n = Nystroem(kernel='rbf', gamma=1., n_components=num_sampling, random_state=0).fit(X)
X_new_sk = sk_N.transform(X)      # (length, num_sampling) array

# Print the L2 error
err = np.linalg.norm(x_new - X_new_sk) / X_new_sk.size
print(f'Error when compared to sklearn =  {err}')

Error when compared to sklearn =  9.045198995833238e-06


In [56]:
# Testing the fit/transform method with linear kernel

length = 1000
num_sampling = 100

X = np.random.uniform(0,100,size = (length,3))

# ours
nystrom = Nystrom_NK(kernel = 'exp', random_state = 0)
n_fit = nystrom.fit(X)
x_new = nystrom.transform(X)


# sklearn

# the resulting difference is low because exp and rbf are so similar but need to
# change the sklearn kernel to exp lol
sk_n = Nystroem(kernel='rbf', gamma=1., n_components=num_sampling, random_state=0).fit(X)
X_new_sk = sk_N.transform(X)      # (length, num_sampling) array

# Print the L2 error
err = np.linalg.norm(x_new - X_new_sk) / X_new_sk.size
print(f'Error when compared to sklearn =  {err}')

Error when compared to sklearn =  8.344281579492499e-06
