In [1]:
from derivatives import rbf_derivative, rbf_derivative_memory, rbf_derivative_slow
import numpy as np
from sklearn.datasets import load_boston, make_regression
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import rbf_kernel

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import scale
import time

In [2]:
boston = load_boston()
x, y = scale(boston.data), scale(boston.target)
# x, y = make_regression(n_samples=1000, n_features=50, n_informative=50)

y_pred = {}
mse = {}
derivative = {}

random_state = 123
train_percent = 0.3
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=train_percent)



### KRR 

In [3]:
from sklearn.kernel_ridge import KernelRidge

In [4]:
param_grid = {
    'alpha': np.array([0.0001, 0.001, 0.01, 0.1, 1.]) / x_train.shape[0],
    'gamma': x_train.shape[1] / 2*np.logspace(-6, 6, num=10)
}

grid_search = GridSearchCV(
    KernelRidge(),
    param_grid=param_grid,
    n_jobs=2
)

grid_search.fit(x_train, y_train)

krr_model = grid_search.best_estimator_

y_pred['krr'] = krr_model.predict(x_test)


mse['krr'] = mean_squared_error(y_pred['krr'], y_test)

print('MSE, KRR: {:.4f}'.format(mse['krr']))

weights = krr_model.dual_coef_
gamma = krr_model.gamma
lam = krr_model.alpha

MSE, KRR: 0.3573


In [6]:
from derivatives import (rbf_derivative,
                         rbf_derivative_slow,
                         rbf_derivative_memory)

In [7]:
kernel_mat = rbf_kernel(x_train, x_test, gamma=gamma)
%timeit rbf_derivative(x_train, x_test, weights, gamma)
%timeit rbf_derivative_slow(x_train, x_test, weights, gamma=gamma)
%timeit rbf_derivative_memory(x_train, x_test, weights, gamma=gamma)

3.31 ms ± 317 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
669 ms ± 7.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
15.1 ms ± 604 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Derivative - Slow Way

In [8]:
t0 = time.time()

derivative['slow'] = np.zeros(shape=x_test.shape)

derivative['slow'] = rbf_derivative_slow(x_train, x_test, weights, gamma=gamma)          

t1_slow = time.time() - t0



print('Shape of x_test: ', format(x_test.shape))
print('Derivative (Slow): {:.4f} secs'.format(t1_slow))
print('Shape: ', derivative['slow'].shape)

Shape of x_test:  (355, 13)
Derivative (Slow): 0.6928 secs
Shape:  (355, 13)


### Derivative - Paper

In [9]:
num_test = x_test.shape[0]
derivative['paper'] = np.zeros(shape=x_test.shape)
kernel_mat = rbf_kernel(x_test, x_train, gamma=gamma)
constant = 2 * gamma
t0 = time.time()
# for itest in range(num_test):
#     derivative['paper'][itest, :] = \
#     np.dot((x_test[itest, :] - x_train).T, (kernel_mat[itest, :] * weights).T)
# derivative['paper'] *= - 2 * gamma

derivative['paper'] = rbf_derivative(x_train, x_test, weights, gamma)

t1_paper = time.time() - t0
print('Derivative (Paper): {:.4f} secs'.format(t1_paper))
print('Derivative shape:', derivative['paper'].shape)
print(gamma)
print(x_train.shape, x_test.shape)

Derivative (Paper): 0.0043 secs
Derivative shape: (355, 13)
6.5e-06
(151, 13) (355, 13)


In [10]:
np.testing.assert_array_almost_equal(derivative['paper'],
                                     derivative['slow'])

print('Speedup: x{:.1f}'.format(t1_slow/t1_paper))

Speedup: x159.9


### Memory Hog Way

In [11]:
print('Kernel Matrix:', format(kernel_mat.shape))
print('Weights:', format(weights.shape))
print('X train:', format(x_train.shape))
print('X test:', format(x_test.shape))

t0 = time.time()

derivative['memory'] = rbf_derivative_memory(x_train, x_test, weights, gamma)
t1_memory = time.time() - t0

print('Derivative (Memory): {:.4f} secs'.format(t1_memory))
print('Derivative shape: {}'.format(derivative['memory'].shape))

Kernel Matrix: (355, 151)
Weights: (151,)
X train: (151, 13)
X test: (355, 13)
Derivative (Memory): 0.0193 secs
Derivative shape: (355, 13)


In [12]:
np.testing.assert_array_almost_equal(derivative['paper'],
                                     derivative['memory'])
print('Speedup (slow): x{:.1f}'.format(t1_slow/t1_memory))
print('Speedup (paper): x{:.1f}'.format(t1_memory/t1_paper))

%timeit rbf_derivative_memory(x_train, x_test, weights, gamma)

Speedup (slow): x35.9
Speedup (paper): x4.5
16.1 ms ± 1.13 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


### GPs (ARD Kernel)

In [13]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel, RBF, WhiteKernel

In [14]:
kernel = RBF(length_scale=np.repeat(1.0,x_train.shape[1]),
             length_scale_bounds=(1e-2, 1e2))    
gpr_model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=4)

gpr_model.fit(x_train, y_train)

y_pred['gpr'] = gpr_model.predict(x_test)


mse['gpr'] = mean_squared_error(y_pred['gpr'], y_test)

print('MSE, GPR: {:.4f}'.format(mse['gpr']))

MSE, GPR: 0.2699


In [15]:
print(gpr_model.kernel_.get_params())
weights = gpr_model.alpha_
length_scale = gpr_model.kernel_.get_params()['length_scale']
gp_kernel = gpr_model.kernel_

{'length_scale': array([   1.83453438,   32.97706877,  100.        ,    2.94829116,
          1.49420516,    1.51240221,    1.59413373,  100.        ,
          0.94157534,    0.70380556,    2.58995062,    2.79771452,
          0.82886236]), 'length_scale_bounds': (0.01, 100.0)}


In [16]:
from derivatives import ard_kernel

kernel_mat_ard = ard_kernel(x_test, x_train, length_scale)

kernel_mat_built = gp_kernel(x_test, x_train)

print(kernel_mat_ard.shape, kernel_mat_built.shape)

np.testing.assert_array_almost_equal(kernel_mat_ard, kernel_mat_built)

(355, 151) (355, 151)


In [17]:
kernel_mat = rbf_kernel(x_train, x_test, gamma=gamma)
print('Kernel Matrix:', kernel_mat.shape)
print('Weights:', weights.shape)

Kernel Matrix: (151, 355)
Weights: (151,)


In [18]:
def ard_paper(x_train, x_test, weights, gamma):
    
    kernel_mat = ard_kernel(x_test, x_train, gamma)
    
    num_test = x_test.shape[0]
    
    derivative = np.zeros(shape=x_test.shape)
    
    for itest in range(num_test):
        
        derivative= \
        np.dot((x_test[itest, :] - x_train).T,
               (kernel_mat[itest, :] * weights).T)
        
    derivative *= - 1 / gamma**2
    return derivative

In [19]:
from derivatives import ard_derivative

num_test = x_test.shape[0]
derivative['ard'] = np.zeros(shape=x_test.shape)
kernel_mat = rbf_kernel(x_test, x_train, gamma=gamma)
constant = 2 * gamma
t0 = time.time()
# for itest in range(num_test):
#     derivative['paper'][itest, :] = \
#     np.dot((x_test[itest, :] - x_train).T, (kernel_mat[itest, :] * weights).T)
# derivative['paper'] *= - 2 * gamma

derivative['ard'] = ard_derivative(x_train, x_test, weights, gamma)

t1_paper = time.time() - t0
print('Derivative (ard): {:.4f} secs'.format(t1_paper))
print('Derivative ard:', derivative['ard'].shape)

Derivative (ard): 0.0044 secs
Derivative ard: (355, 13)


### Cython

In [20]:
%load_ext cython

In [21]:
%%cython

cimport cython
import numpy as np
cimport numpy as np

@cython.boundscheck(False)
@cython.wraparound(False)
def rbf_cython(np.float64_t[:, :] x_train, 
                   np.float64_t[:, :] x_function,
                   np.float64_t[:] weights,
                   np.float64_t[:, :] kernel_mat,
                   np.float64_t gamma):
    """This function calculates the rbf derivative using
    Cython. It has been fairly optimized and provides x100
    speedup over the original python function.
    
    Parameters
    ----------
    x_train : array, [N x D], float64
        The training data used to find the kernel model.

    x_function  : array, [M x D], float
        The test points (or vector) to use.

    weights   : array, [N x D], float64
        The weights found from the kernel model
            y = K * weights

    kernel_mat: array, [N x M], float64
        The rbf kernel matrix with the similarities between the test
        points and the training points.

    n_derivative : int, (default = 1) {1, 2}, int
        chooses which nth derivative to calculate

    gamma : float, default: None, float64
        the parameter for the rbf_kernel matrix function

    Returns
    -------

    derivative : array, [M x D], float64
        returns the derivative with respect to training points used in
        the kernel model and the test points.

    Information
    -----------
    Author: Juan Emmanuel Johnson
    Email : jej2744@rit.edu
            juan.johnson@uv.es
    """
    cdef int d_dimensions = x_function.shape[1]
    cdef int n_test = x_function.shape[0]
    cdef int n_train = x_train.shape[0]
    cdef int idim, iTest, iTrain
    
    # initialize the derivative
    cdef np.float64_t[:,:] derivative = np.zeros((n_test, d_dimensions))

    # consolidate the parameters
    cdef np.float64_t theta = 2.0 * gamma

        
    # loop through dimensions
    for idim in range(d_dimensions):

        # loop through the number of test points
        for iTest in range(n_test):

            # loop through the number of test points
            for iTrain in range(n_train):

                # calculate the derivative for the test points
                derivative[iTest, idim] += theta * weights[iTrain] * \
                                          (x_train[iTrain, idim] -
                                           x_function[iTest, idim]) * \
                                          kernel_mat[iTrain, iTest]
                            
    return np.asarray(derivative)

In [22]:
t0 = time.time()

kernel_mat = rbf_kernel(x_train, x_test, gamma)

derivative['cython'] = rbf_cython(x_train,
                                  x_test,
                                  weights,
                                  kernel_mat,
                                  gamma)
t1cython = time.time() - t0
print('Derivative (cython): {:.4f} secs'.format(t1cython))
print('Derivative shape: {}'.format(derivative['cython'].shape))

Derivative (cython): 0.0094 secs
Derivative shape: (355, 13)


In [23]:
time_cy = %timeit -o rbf_cython(x_train,x_test,weights,kernel_mat,gamma)

776 µs ± 5.24 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [24]:
print(time_cy)

776 µs ± 5.24 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


#### Cython (Parallel)

In [25]:
%%cython

cimport cython
import numpy as np
cimport numpy as np
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)
def rbf_cython_parallel(np.float64_t[:, :] x_train, 
                   np.float64_t[:, :] x_function,
                   np.float64_t[:] weights,
                   np.float64_t[:, :] kernel_mat,
                   np.float64_t gamma):
    """This function calculates the rbf derivative using
    Cython. It has been fairly optimized and provides x100
    speedup over the original python function.
    
    Parameters
    ----------
    x_train : array, [N x D], float64
        The training data used to find the kernel model.

    x_function  : array, [M x D], float
        The test points (or vector) to use.

    weights   : array, [N x D], float64
        The weights found from the kernel model
            y = K * weights

    kernel_mat: array, [N x M], float64
        The rbf kernel matrix with the similarities between the test
        points and the training points.

    n_derivative : int, (default = 1) {1, 2}, int
        chooses which nth derivative to calculate

    gamma : float, default: None, float64
        the parameter for the rbf_kernel matrix function

    Returns
    -------

    derivative : array, [M x D], float64
        returns the derivative with respect to training points used in
        the kernel model and the test points.

    Information
    -----------
    Author: Juan Emmanuel Johnson
    Email : jej2744@rit.edu
            juan.johnson@uv.es
    """
    cdef int d_dimensions = x_function.shape[1]
    cdef int n_test = x_function.shape[0]
    cdef int n_train = x_train.shape[0]
    cdef int idim, iTest, iTrain
    
    # initialize the derivative
    cdef np.float64_t[:,:] derivative = np.zeros((n_test, d_dimensions))

    # consolidate the parameters
    cdef np.float64_t theta = 2.0 * gamma

        
    # loop through dimensions
    for idim in prange(d_dimensions, nogil=True):

        # loop through the number of test points
        for iTest in prange(n_test):

            # loop through the number of test points
            for iTrain in prange(n_train):

                # calculate the derivative for the test points
                derivative[iTest, idim] += theta * weights[iTrain] * \
                                          (x_train[iTrain, idim] -
                                           x_function[iTest, idim]) * \
                                          kernel_mat[iTrain, iTest]
                            
    return np.asarray(derivative)

In [27]:
%timeit rbf_cython_parallel(x_train,x_test,weights,kernel_mat,gamma)

835 µs ± 58.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
