# Spectral decomposition with kernels

In [1]:
import time
import sys

import matplotlib.pyplot as plt
import numpy as np
from scipy.linalg import eigh

sys.path.append('..')
from weights import NearestNeighbors, KernelComputer, KernelRegressor

np.random.seed(0)

The following command are using LaTeX backend to produce Type 1 font in PDF produced by matplotlib (it produces Type 3 otherwise). You can uncomment those lines.

In [2]:
%matplotlib inline
plt.rcParams['text.latex.preamble'] = [r'\usepackage{newtxmath}'] 
plt.rcParams['text.usetex'] = True
plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = "Times"
plt.rc('text', usetex=True)

In [3]:
def get_data(nb, d, nl=None, train=False):
    x = np.random.randn(nb, d)
    x /= np.sqrt((x ** 2).sum(axis=1))[:, np.newaxis]
    r = np.tile(np.arange(1, 5), int(np.ceil(nb / 4)))[:nb]
    x *= r[:, np.newaxis]
    if train:
        nl = min(nb, nl)
        y = np.tile(np.eye(4), (int(np.ceil(nl / 4)), 1))[:nl]
        return x, y
    y = r - 1
    return x, y


In [4]:
def get_K(x1, x2, sigma):
    # x1 = xtrain, x2 = xtest
    sigma2 = sigma ** 2
    K = x1 @ x2.T
    K *= 2
    _attr1 = np.sum(x1 ** 2, axis=1)[:, np.newaxis]
    K -= _attr1
    K -= np.sum(x2 ** 2, axis=1)
    K /= 2*sigma2
    np.exp(K, out=K)
    return K


def get_SZ(x1, x2, sigma, SS=None, reshape=True):
    """
    To be written, something of the type
    
    Returns
    -------
    SZ: ndarray
        SZ[i, j*m] = :math:`\partial_{1,m}` k(x1[j], x2[i])
    """
    if SS is None:
        SS = get_K(x1, x2, sigma)
    SZ = np.tile(SS[...,np.newaxis], (1, 1, x1.shape[1]))
    # diff[i,j,k] = x1[i,k] - x2[j,k]
    diff = x1[:, np.newaxis, :] - x2[np.newaxis, ...]
    diff /= sigma**2
    SZ *= diff
    n1, n2, d = SZ.shape

    if reshape:
        # return SZ.reshape(n1, -1, order='F') # slower than the following
        SZ_reshape = np.empty((n1, n2*d), SZ.dtype)
        for i in range(d):
            SZ_reshape[:, i*n2:(i+1)*n2] = SZ[..., i]
        return SZ_reshape
    return SZ


def get_ZZ(x1, x2, sigma, SS=None, reshape=True):
    if SS is None:
        SS = get_K(x1, x2, sigma)
    d = x1.shape[1]
    ZZ = np.tile(SS[...,np.newaxis, np.newaxis], (1, 1, d, d,))
    # diff[i,j,k] = x1[i,k] - x2[j,k]
    diff = x1[:, np.newaxis, :] - x2[np.newaxis, ...]
    # prod_diff[i,j,k,l] = diff[i,j,l]*diff[i,j,k] = (x1[i,l] - x2[j,l]) * (x1[i,k] - x2[j,k])
    prod_diff = diff[:,:, np.newaxis, :]*diff[:,:,:,np.newaxis]
    prod_diff /= sigma**4
    prod_diff *= -1
    for i in range(d):
        prod_diff[:, :, i, i] += 1 / (sigma**2)
    ZZ *= prod_diff
    if reshape:
        # return ZZ.transpose((0, 2, 1, 3)).reshape(n1 * d, n2 * d, order='F') # slower
        n1, n2, = SS.shape
        ZZ_reshape = np.empty((n1*d, n2*d), ZZ.dtype)
        for i in range(d):
            for j in range(i):
                ZZ_reshape[n1*i:n1*(i+1), n2*j:n2*(j+1)] = ZZ[..., i, j]
                ZZ_reshape[n1*j:n1*(j+1), n2*i:n2*(i+1)] = ZZ[..., j, i]
            ZZ_reshape[n1*i:n1*(i+1), n2*i:n2*(i+1)] = ZZ[..., i, i]
        return ZZ_reshape
    return ZZ


def get_ST(x1, x2, sigma, SS=None, SZ=None):
    if SS is None:
        SS = get_K(x1, x2, sigma)
    if SZ is None:
        SZ = get_SZ(x1, x2, sigma, SS)
    n1, n2 = SS.shape
    d = x1.shape[1]
    ST = np.zeros((n1, n2*(d+1)), dtype=np.float)
    ST[:, :n2] = SS
    ST[:, n2:] = SZ
    return ST


def get_TZ(x1, x2, sigma, SZ=None, ZZ=None):
    if SZ is None:
        SZ = get_SZ(x1, x2, sigma)
    if ZZ is None:
        ZZ = get_ZZ(x1, x2, sigma)
    n1, d = x1.shape
    n2 = x2.shape[0]
    TZ = np.zeros((n1*(d+1), n2*d), dtype=np.float)
    TZ[:n1,:] = SZ
    TZ[n1:,:] = ZZ
    return TZ


def get_TT(x1, sigma, SS=None, SZ=None, ZZ=None):
    if SS is None:
        SS = get_K(x1, sigma)
    if SZ is None:
        SZ = get_SZ(x1, sigma)
    if ZZ is None:
        ZZ = get_ZZ(x1, sigma)
    n, d = x1.shape
    TT = np.zeros((n*(d+1), n*(d+1)), dtype=np.float)
    TT[:n, :n] = SS
    TT[:n, n:] = SZ
    TT[n:, :n] = SZ.transpose()
    TT[n:, n:] = ZZ
    return TT

In [5]:
def get_ypred_small(x_train, y_train, x_test, sigma=.2, mu=1, 
                    Tikhonov=True, lambd=1, full_matrix=False,
                    k=4, psi=lambda x: (x+1)**(-1),
                   ):
    """Solution with small representation
    
    Parameters
    ----------
    x_train, x_test: array_like
        design matrices of the data of shape (number_data, data_dimension)
    y_train: ndarray
        label matrix with one-hot encoding of shape (number_data, nb_classes)
    sigma: float
        bandwidth of the Gaussian kernel
    mu: float
        regularizer of the kernel diffusion operator
    Tikhonov: boolean, optional
        specify a call to np.linalg.solve rather than scipy.linalg.eigh
    lambd: float, optional
        value of Tikhonov regularization
    k: int, optional
        number of eigen vector to consider in spectral clutering
    psi: function, optional
        filtering function for spectral clustering
        
    Returns
    -------
    y_pred: ndarray of int
        array of prediction of shape (number_data,)
    """
    n, nl = len(x_train), len(y_train)
    SS = get_K(x_train, x_train, sigma)
    if full_matrix:
        A = SS @ SS
        A /= n
    else:
        A = SS[..., :nl] @ SS[:nl]
        A /= nl
    
    SZ = get_SZ(x_train, x_train, sigma, SS)
    B = SZ @ SZ.transpose()
    B /= n
    B += mu * SS
    B += 10**(-7) * np.eye(B.shape[0])

    b = SS[..., :nl] @ y_train[:nl]
    b /= nl
    
    if Tikhonov:
        c = np.linalg.solve(A + lambd * B, b)
    else:
        v, e = eigh(A, B, subset_by_index = [A.shape[0]-k, A.shape[0]-1])
        c = (e * psi(v)) @ (e.T @ b)
        
    S_test_S = get_K(x_test, x_train, sigma)
    y_pred = np.argmax(S_test_S @ c, axis=1)
    return y_pred

In [6]:
def get_ypred_full(x_train, y_train, x_test, sigma=.2, mu=1, 
                   Tikhonov=True, lambd=1, full_matrix=False,
                   k=4, psi=lambda x: (x+1)**(-1),
                  ):
    """Solution with full representation
    
    Parameters
    ----------
    x_train, x_test: array_like
        design matrices of the data of shape (number_data, data_dimension)
    y_train: ndarray
        label matrix with one-hot encoding of shape (number_data, nb_classes)
    sigma: float
        bandwidth of the Gaussian kernel
    mu: float
        regularizer of the kernel diffusion operator
    Tikhonov: boolean, optional
        specify a call to np.linalg.solve rather than scipy.linalg.eigh
    lambd: float, optional
        value of Tikhonov regularization
    k: int, optional
        number of eigen vector to consider in spectral clutering
    psi: function, optional
        filtering function for spectral clustering
        
    Returns
    -------
    y_pred: ndarray of int
        array of prediction of shape (number_data,)
    """
    n, nl = len(x_train), len(y_train)
    SS = get_K(x_train, x_train, sigma)
    SZ = get_SZ(x_train, x_train, sigma, SS)
    ST = get_ST(x_train, x_train, sigma, SS, SZ)
    if full_matrix:
        A = ST.transpose() @ ST
        A /= n
    else:
        A = ST.transpose()[..., :nl] @ ST[:nl]
        A /= nl
    
    ZZ = get_ZZ(x_train, x_train, sigma, SS)
    TZ = get_TZ(x_train, x_train, sigma, SZ, ZZ)
    B = TZ @ TZ.transpose()
    B /= n
    TT = get_TT(x_train, sigma, SS, SZ, ZZ)
    B += mu * TT
    B += 10**(-7) * np.eye(B.shape[0])

    b = ST.transpose()[..., :nl] @ y_train[:nl]
    b /= nl
    
    if Tikhonov:
        c = np.linalg.solve(A + lambd * B, b)
    else:
        v, e = eigh(A, B, subset_by_index = [A.shape[0]-k, A.shape[0]-1])
        c = (e * psi(v)) @ (e.T @ b)
        
    S_test_T = get_ST(x_test, x_train, sigma)
    y_pred = np.argmax(S_test_T @ c, axis=1)
    return y_pred

In [7]:
dim = 5
n_train = 2000
n_test = 10000
n_label = 10 * dim

x_train, y_train = get_data(n_train, dim, nl=n_label, train=True)
x_test, y_test = get_data(n_test, dim)

Small solution

In [8]:
t = time.time()
y_pred = get_ypred_small(x_train, y_train, x_test, mu=1/n_train, sigma=2e-1, Tikhonov=True, lambd=1,)
#                          k=500, psi=lambda x: (x+1e-3)**(-1), full_matrix=True)
t = time.time() - t
error = 1 - (y_pred == y_test).mean()
print(error, t)

0.5698 2.0897278785705566


Full solution

In [9]:
t = time.time()
y_pred = get_ypred_full(x_train, y_train, x_test, mu=1/n_train, sigma=1e-2, Tikhonov=True, lambd=1,)
#                        k=500, psi=lambda x: (x+1e-3)**(-1), full_matrix=True)
t = time.time() - t
error = 1 - (y_pred == y_test).mean()
print(error, t)

0.7492 48.268500328063965
