# Gaussian process kernels

In [1]:
%matplotlib notebook

import sys
import numpy as np
import scipy
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm # Colormaps
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.gridspec as gridspec
import seaborn as sns

tfd = tfp.distributions
psd_kernels = tfp.positive_semidefinite_kernels

sns.set_style('darkgrid')
np.random.seed(42)
#

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
from rational_quadratic import RationalQuadratic
psd_kernels.RationalQuadratic = RationalQuadratic

## Kernel function

The [covariance](https://en.wikipedia.org/wiki/Covariance_function) can be modelled by a [kernel](https://en.wikipedia.org/wiki/Positive-definite_kernel) function $K(X,Y)$ that models how much the samples from $X$ and $Y$ change together. $K(X,Y)$ is positive if there is some similarity between the samples and will be zero if the samples are completely independent.

### Radial basis function kernel
A popular kernel is the [radial basis function](https://en.wikipedia.org/wiki/Radial_basis_function_kernel) (RBF) kernel: 

$$K(X,X') = \exp \left( -\frac{1}{2\sigma^2} \lVert X-X' \rVert^2 \right) $$

TODO: Multidimensional?

This kernel is illustrated in the next figure which shows the distance with respect to $0$: $K(0,y)$. Note that the similarity outputted by the kernel decreases exponentially towards $0$ the farther we move move away from the center, and that the similarity is maximum at the center $\mathbf{x}=\mathbf{y}$. Also note that the parameter $\sigma$ (width) defines how fast the similarity decreases. This means that the output of the RBF kernel function $K(\mathbf{x},\mathbf{y})$ is a similarity based on how close $\mathbf{x}$ lies to $\mathbf{y}$, the greater $\sigma$ is, the greater this distance effect.

aka
- Guassian kernel
- Squared exponention

Smooth?
- https://en.wikipedia.org/wiki/Gaussian_blur#Low-pass_filter
- https://en.wikipedia.org/wiki/Gaussian_filter
- https://calculatedcontent.com/2012/02/06/kernels_part_1/
- http://crsouza.com/2010/03/17/kernel-functions-for-machine-learning-applications/
- https://www.quora.com/Support-Vector-Machines/What-is-the-intuition-behind-Gaussian-kernel-in-SVM-How-can-I-visualize-the-transformation-function-%CF%95-that-corresponds-to-the-Gaussian-kernel-Why-is-the-Gaussian-kernel-popular
- https://stats.stackexchange.com/questions/131138/what-makes-the-gaussian-kernel-so-magical-for-pca-and-also-in-general

Using the RBF kernel will result in [smooth](https://en.wikipedia.org/wiki/Smoothness) functions sampled from the Gaussian process.

### Prior
The specification of this covariance function implies a distribution over functions $f(X)$. By choosing a specific kernel function $K$ it possible to set some [prior](https://en.wikipedia.org/wiki/Prior_probability) information on this distribution. For example the RBF kernel captures the prior information that the data lies in a [smooth](https://en.wikipedia.org/wiki/Smoothness) space. This is because it is infinitely differentiable over its full range. I will illustrate this more later in this tutorial.

### Valid kernels
In order to be a valid kernel function the resulting kernel matrix should be [positive definite](https://en.wikipedia.org/wiki/Positive-definite_matrix). Which means that the matrix should be [symmetric](https://en.wikipedia.org/wiki/Symmetric_matrix) since $K(\mathbf{x},\mathbf{y}) = K(\mathbf{y},\mathbf{x})$, and that the output of the kernel function should be positive ($K(\mathbf{x},\mathbf{y}) \geq 0$). Being positive definite also means that the kernel matrix is [invertable](https://en.wikipedia.org/wiki/Invertible_matrix).

In [3]:
# Plotting function to be used below

def plot_kernel(X, y, Σ, description, fig, subplot_spec, xlim,
                scatter=False, rotate_x_labels=False):
    """Plot kernel matrix and samples."""
    grid_spec = gridspec.GridSpecFromSubplotSpec(
        1, 2, width_ratios=[2,1], height_ratios=[1],
        wspace=0.18, hspace=0.0,
        subplot_spec=subplot_spec)
    ax1 = fig.add_subplot(grid_spec[0])
    ax2 = fig.add_subplot(grid_spec[1])
    # Plot samples
    if scatter:
        for i in range(y.shape[1]):
            ax1.scatter(X, y[:,i], alpha=0.8, s=3)
    else:
        for i in range(y.shape[1]):
            ax1.plot(X, y[:,i], alpha=0.8)
    ax1.set_ylabel('$y$', fontsize=13, labelpad=0)
    ax1.set_xlabel('$x$', fontsize=13, labelpad=0)
    ax1.set_xlim(xlim)
    if rotate_x_labels:
        for l in ax1.get_xticklabels():
            l.set_rotation(30)
    ax1.set_title(f'Samples from {description}')
    # Plot covariance matrix
    im = ax2.imshow(Σ, cmap=cm.YlGnBu)
    divider = make_axes_locatable(ax2)
    cax = divider.append_axes('right', size='5%', pad=0.02)
    cbar = plt.colorbar(im, ax=ax2, cax=cax)
    cbar.ax.set_ylabel('$K(X,X)$', fontsize=8)
    ax2.set_title(f'Covariance matrix\n{description}')
    ax2.set_xlabel('X', fontsize=10, labelpad=0)
    ax2.set_ylabel('X', fontsize=10, labelpad=0)
    # Show 5 custom ticks on x an y axis of covariance plot
    nb_ticks = 5
    ticks = list(range(xlim[0], xlim[1]+1))
    ticks_idx = np.rint(np.linspace(
        1, len(ticks), num=min(nb_ticks,len(ticks)))-1).astype(int)
    ticks = list(np.array(ticks)[ticks_idx])
    ax2.set_xticks(np.linspace(0, len(X), len(ticks)))
    ax2.set_yticks(np.linspace(0, len(X), len(ticks)))
    ax2.set_xticklabels(ticks)
    ax2.set_yticklabels(ticks)
    if rotate_x_labels:
        for l in ax2.get_xticklabels():
            l.set_rotation(30)
    ax2.grid(False)
#

# White noise kernel

In [4]:
# Plot kernel matrix and samples of white noise kernel

nb_of_samples = 150  # Number of test points.
nb_of_realizations = 3  # Number of function realizations
# Generate independent samples that can be transformed
xlim = (-2, 2)
X = np.expand_dims(np.linspace(*xlim, nb_of_samples), 1)

# Start plotting
fig = plt.figure(figsize=(7, 2.7)) 
gs = gridspec.GridSpec(
    1, 1, figure=fig, wspace=0.0, hspace=0.0)

# Sample from the prior
Σ = np.eye(nb_of_samples)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
# Plot
plot_kernel(
    X, y, Σ, 'white noise kernel', 
    fig, gs[0], xlim, scatter=True)
fig.tight_layout()
plt.show()
#

<IPython.core.display.Javascript object>

## Exponentiated quadratic

$$k(x_a, x_b) = \sigma^2\exp\left(-\frac{(x_a - x_b)^2}{2\ell^2}\right)$$

With:
* $\ell$ the length scale
* $\sigma^2$ the output variance ($\sigma$ is amplitude)
```
k(x, y) = amplitude**2 * exp(-||x - y||**2 / (2 * length_scale**2))
```

https://www.tensorflow.org/probability/api_docs/python/tfp/positive_semidefinite_kernels/ExponentiatedQuadratic

In [5]:
def exponentiated_quadratic_tf(amplitude, length_scale):
    amplitude_tf = tf.constant(amplitude, dtype=tf.float64)
    length_scale_tf = tf.constant(length_scale, dtype=tf.float64)
    kernel = psd_kernels.ExponentiatedQuadratic(
        amplitude=amplitude_tf, 
        length_scale=length_scale_tf)
    return kernel
                                  

def exponentiated_quadratic(xa, xb, amplitude, length_scale):
    kernel = exponentiated_quadratic_tf(amplitude, length_scale)
    kernel_matrix = kernel.matrix(xa, xb)
    with tf.Session() as sess:
        return sess.run(kernel_matrix)

In [6]:
# Plot exponentiated quadratic distance

xlim = (-4, 4)
X = np.expand_dims(np.linspace(*xlim, num=100), 1)
zero = np.array([[0.]])
# Make the plots
fig, ax = plt.subplots(figsize=(5.4, 3))
Σ = exponentiated_quadratic(
    zero, X, length_scale=1, amplitude=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell = 1$, $\\sigma = 1$')
Σ = exponentiated_quadratic(
    zero, X, length_scale=0.5, amplitude=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell = 0.5$, $\\sigma = 1$')
Σ = exponentiated_quadratic(
    zero, X, length_scale=1., amplitude=0.5)
ax.plot(X[:,0], Σ[0,:], label='$\\ell = 1$, $\\sigma = 0.5$')
ax.set_xlabel('$X_a - X_b$', fontsize=11)
ax.set_ylabel('$K(X_a,X_b)$', fontsize=11)
ax.set_title('Exponentiated quadratic distance plot')
ax.set_ylim([0, 1.1])
ax.set_xlim(*xlim)
ax.legend(loc=1)
plt.tight_layout()
plt.show()
#

<IPython.core.display.Javascript object>

In [7]:
# Plot kernel matrix and samples of exponentiated quadratic

nb_of_samples = 250  # Number of test points.
nb_of_realizations = 3  # Number of function realizations
# Generate input points
xlim = (-4, 4)
X = np.expand_dims(np.linspace(*xlim, nb_of_samples), 1)

# Start plotting
fig = plt.figure(figsize=(7, 10)) 
gs = gridspec.GridSpec(
    4, 1, figure=fig, wspace=0.2, hspace=0.4)

# Plot first
Σ = exponentiated_quadratic(
    X, X, length_scale=1, amplitude=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell = 1$, $\\sigma = 1$', 
    fig, gs[0], xlim)

# Plot second
Σ = exponentiated_quadratic(
    X, X, length_scale=0.3, amplitude=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell = 0.3$, $\\sigma = 1$', 
    fig, gs[1], xlim)

# Plot second
Σ = exponentiated_quadratic(
    X, X, length_scale=2, amplitude=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell = 2$, $\\sigma = 1$', 
    fig, gs[2], xlim)

# Plot third
Σ = exponentiated_quadratic(
    X, X, length_scale=1, amplitude=10)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell = 1$, $\\sigma = 10$',  
    fig, gs[3], xlim)

plt.suptitle('Exponentiated quadratic', y=0.99)
fig.subplots_adjust(
    left=0.07, bottom=0.04, right=0.93, top=0.95)
plt.show()
#

<IPython.core.display.Javascript object>

### RBF kernel as smooth prior

Note that the identity matrix $I$ contains ones on the diagonal and zeros everywhere else. This means that all samples taken from $\mathcal{N}(0,I)$ are independent of eachother and we can just sample each sample seperately. These independent samples are then transformed with the help of the linear transformation $L$ that maps the independent samples to a space where they are more correlated. This correlation is defined by the kernel matrix $K=LL^\top$.  
The next figures illustrate how increasing the width $\sigma$ of the kernel increases it smoothnes.

## Rational Quadratic

$$k(x_a, x_b) = \sigma^2 \left( 1 + \frac{(x_a - x_b)^2}{2 \alpha \ell^2} \right)^{-\alpha}$$

With:
* $\sigma$ the amplitude
* $\ell$ the length scale
* $\alpha$ the scale mixture


```
k(x, y) = amplitude**2 * (1. + ||x - y|| ** 2 / (2 * scale_mixture_rate * length_scale**2)) ** -scale_mixture_rate
```

In [8]:
def rational_quadratic_tf(length_scale, scale_mixture, amplitude):
    amplitude_tf = tf.constant(amplitude, dtype=tf.float64)
    length_scale_tf = tf.constant(length_scale, dtype=tf.float64)
    scale_mixture_tf = tf.constant(scale_mixture, dtype=tf.float64)
    kernel = psd_kernels.RationalQuadratic(
        amplitude=amplitude_tf,
        length_scale=length_scale_tf,
        scale_mixture_rate=scale_mixture_tf)
    return kernel


def rational_quadratic(xa, xb, length_scale, scale_mixture):
    kernel = rational_quadratic_tf(length_scale, scale_mixture, 1)
    kernel_matrix = kernel.matrix(xa, xb)
    with tf.Session() as sess:
        return sess.run(kernel_matrix)

In [9]:
# Plot rational quadratic distance

xlim = (-7, 7)
X = np.expand_dims(np.linspace(*xlim, num=200), 1)
zero = np.array([[0.]])
# Make the plots
fig, ax = plt.subplots(figsize=(5.4, 3))
Σ = rational_quadratic(
    zero, X, length_scale=1, scale_mixture=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=1$, $\\alpha=1$')
Σ = rational_quadratic(
    zero, X, length_scale=5, scale_mixture=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=5$, $\\alpha=1$')
Σ = rational_quadratic(
    zero, X, length_scale=0.2, scale_mixture=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=0.2$, $\\alpha=1$')
Σ = rational_quadratic(
    zero, X, length_scale=1, scale_mixture=0.2)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=1$, $\\alpha=0.2$')
Σ = rational_quadratic(
    zero, X, length_scale=1, scale_mixture=1000)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=1$, $\\alpha=1000$')

ax.set_xlabel('$X_a - X_b$', fontsize=11)
ax.set_ylabel('$K(X_a,X_b)$', fontsize=11)
ax.set_title('Rational quadratic distance plot ($\\sigma=1$)')
ax.set_ylim((0, 1.1))
ax.set_xlim(*xlim)
ax.legend(loc=1)
plt.tight_layout()
plt.show()
#

<IPython.core.display.Javascript object>

In [10]:
# Plot kernel matrix and samples of exponentiated quadratic

nb_of_samples = 250  # Number of test points.
nb_of_realizations = 3  # Number of function realizations
# Generate input points
xlim = (-5, 5)
X = np.expand_dims(np.linspace(*xlim, nb_of_samples), 1)

# Start plotting
fig = plt.figure(figsize=(7, 12)) 
gs = gridspec.GridSpec(
    5, 1, figure=fig, wspace=0.2, hspace=0.5)

# Plot first
Σ = rational_quadratic(
    X, X, length_scale=1, scale_mixture=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=1$, $\\alpha=1$', 
    fig, gs[0], xlim)

# Plot second
Σ = rational_quadratic(
    X, X, length_scale=5, scale_mixture=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=5$, $\\alpha=1$', 
    fig, gs[1], xlim)

# Plot third
Σ = rational_quadratic(
    X, X, length_scale=0.2, scale_mixture=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=0.2$, $\\alpha=1$', 
    fig, gs[2], xlim)

# Plot fourth
Σ = rational_quadratic(
    X, X, length_scale=1, scale_mixture=0.2)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=1$, $\\alpha=0.2$', 
    fig, gs[3], xlim)

# Plot fifth
Σ = rational_quadratic(
    X, X, length_scale=1, scale_mixture=1000)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=1$, $\\alpha=1000$', 
    fig, gs[4], xlim)

fig.suptitle('Rational quadratic ($\\sigma=1$)', y=0.99)
fig.subplots_adjust(
    left=0.06, bottom=0.04, right=0.94, top=0.95)
plt.show()
#

<IPython.core.display.Javascript object>

## Periodic

$$k(x_a, x_b) = \sigma^2\exp\left(-\frac{2}{\ell^2}\sin^2 \left( \pi \frac{\lvert x_a - x_b \rvert}{p}\right) \right)$$

With:
* $\sigma$ amplitude
* $\ell$ length scale
* $p$ period


```
k(x, y) = amplitude**2 * exp( -2  / length_scale ** 2 * sum_k sin(pi * |x_k - y_k| / period) ** 2)
```

https://www.tensorflow.org/probability/api_docs/python/tfp/positive_semidefinite_kernels/ExponentiatedQuadratic#properties

In [11]:
def periodic_tf(length_scale, period):
    amplitude_tf = tf.constant(1, dtype=tf.float64)
    length_scale_tf = tf.constant(length_scale, dtype=tf.float64)
    period_tf = tf.constant(period, dtype=tf.float64)
    kernel = psd_kernels.ExpSinSquared(
        amplitude=amplitude_tf, 
        length_scale=length_scale_tf,
        period=period_tf)
    return kernel

def periodic(xa, xb, length_scale, period):
    kernel = periodic_tf(length_scale, period)
    kernel_matrix = kernel.matrix(xa, xb)
    with tf.Session() as sess:
        return sess.run(kernel_matrix)

In [12]:
# Plot periodic distance

xlim = (-2, 2)
X = np.expand_dims(np.linspace(*xlim, num=200), 1)
zero = np.array([[0.]])

# Make the plots
fig, ax = plt.subplots(figsize=(5.4, 3))
Σ = periodic(
    zero, X, length_scale=1, period=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=1$, $p=1$')
Σ = periodic(
    zero, X, length_scale=2, period=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=2$, $p=1$')
Σ = periodic(
    zero, X, length_scale=0.5, period=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=0.5$, $p=1$')
ax.set_xlabel('$X_a - X_b$', fontsize=11)
ax.set_ylabel('$K(X_a,X_b)$', fontsize=11)
ax.set_title('Periodic distance plot ($\\sigma=1$)')
ax.set_ylim((0, 1.1))
ax.set_xlim(*xlim)
ax.legend(loc=1)
fig.tight_layout()
fig.show()


# Second plot
fig, ax = plt.subplots(figsize=(5.4,3))
Σ = periodic(
    zero, X, length_scale=1, period=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=1$, $p=1$')
Σ = periodic(
    zero, X, length_scale=1, period=0.5)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=1$, $p=0.5$')
Σ = periodic(
    zero, X, length_scale=1, period=2)
ax.plot(X[:,0], Σ[0,:], label='$\\ell=1$, $p=2$')
ax.set_xlabel('$X_a - X_b$', fontsize=11)
ax.set_ylabel('$K(X_a,X_b)$', fontsize=11)
ax.set_title('Periodic distance plot ($\\sigma=1$)')
ax.set_ylim((0, 1.1))
ax.set_xlim(*xlim)
ax.legend(loc=1)
fig.tight_layout()
plt.show()
#

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [13]:
# Plot kernel matrix and samples of periodic

nb_of_samples = 250  # Number of test points.
nb_of_realizations = 3  # Number of function realizations
# Generate input points
xlim = (-2, 2)
X = np.expand_dims(np.linspace(*xlim, nb_of_samples), 1)

# Start plotting
fig = plt.figure(figsize=(7, 12)) 
gs = gridspec.GridSpec(
    5, 1, figure=fig, wspace=0.2, hspace=0.5)

# Plot first
Σ = periodic(
    X, X, length_scale=1, period=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=1$, $p=1$', 
    fig, gs[0], xlim)

# Plot second
Σ = periodic(
    X, X, length_scale=2, period=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=2$, $p=1$', 
    fig, gs[1], xlim)

# Plot third
Σ = periodic(
    X, X, length_scale=0.5, period=1)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=0.5$, $p=1$', 
    fig, gs[2], xlim)

# Plot fourth
Σ = periodic(
    X, X, length_scale=1, period=0.5)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=1$, $p=0.5$', 
    fig, gs[3], xlim)

# Plot fifth
Σ = periodic(
    X, X, length_scale=1, period=2)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell=1$, $p=2$',
    fig, gs[4], xlim)

fig.suptitle('Periodic ($\\sigma=1$)', y=0.99)
fig.subplots_adjust(
    left=0.06, bottom=0.04, right=0.94, top=0.95)
plt.show()
#

<IPython.core.display.Javascript object>

## Local periodic

$$k(x_a, x_b) = \sigma^2 \exp \left(-\frac{2}{\ell_p^2}\sin^2 \left( \pi \frac{\lvert x_a - x_b \rvert}{p}\right) \right) \exp\left(-\frac{(x_a - x_b)^2}{2\ell_{eq}^2}\right)$$

With:
* $\sigma$ amplitude
* $\ell_p$ length scale periodic
* $p$ period
* $\ell_{eq}$ length scale exponentiated quadratic

In [14]:
def local_periodic_tf(periodic_length_scale, period, 
                      amplitude, local_length_scale):
    periodic_length_scale_tf = tf.constant(
        periodic_length_scale, dtype=tf.float64)
    period_tf = tf.constant(period, dtype=tf.float64)
    amplitude_tf = tf.constant(amplitude, dtype=tf.float64)
    local_length_scale_tf = tf.constant(
        local_length_scale, dtype=tf.float64)
    kernel = (
        psd_kernels.ExpSinSquared(
            amplitude=amplitude_tf, 
            length_scale=periodic_length_scale_tf,
            period=period_tf) *
        psd_kernels.ExponentiatedQuadratic(
            length_scale=local_length_scale_tf))
    return kernel


def local_periodic(xa, xb, periodic_length_scale, period, 
                   amplitude, local_length_scale):
    kernel = local_periodic_tf(periodic_length_scale, period, 
        amplitude, local_length_scale)
    kernel_matrix = kernel.matrix(xa, xb)
    with tf.Session() as sess:
        return sess.run(kernel_matrix)

In [15]:
# Plot locally periodic distance

xlim = (-2, 2)
X = np.expand_dims(np.linspace(*xlim, num=200), 1)
zero = np.array([[0.]])

# Make the plots
fig, ax = plt.subplots(figsize=(5.4, 3))
Σ = local_periodic(
    zero, X, 
    periodic_length_scale=1, period=1, amplitude=1, 
    local_length_scale=1)
ax.plot(X[:,0], Σ[0,:], label='$\\ell_{eq}=1$')
Σ = local_periodic(
    zero, X, 
    periodic_length_scale=1, period=1, amplitude=1, 
    local_length_scale=0.5)
ax.plot(X[:,0], Σ[0,:], label='$\\ell_{eq}=0.5$')
Σ = local_periodic(
    zero, X, 
    periodic_length_scale=1, period=1, amplitude=1, 
    local_length_scale=2)
ax.plot(X[:,0], Σ[0,:], label='$\\ell_{eq}=2$')
ax.set_xlabel('$X_a - X_b$', fontsize=11)
ax.set_ylabel('$K(X_a,X_b)$', fontsize=11)
ax.set_title('Local periodic distance plot ($\\sigma=1$, $\\ell_p=1$, $p=1$)')
ax.set_ylim((0, 1.1))
ax.set_xlim(*xlim)
ax.legend(loc=1)
fig.tight_layout()
plt.show()
#

<IPython.core.display.Javascript object>

In [16]:
# Plot kernel matrix and samples of local periodic

nb_of_samples = 250  # Number of test points.
nb_of_realizations = 3  # Number of function realizations
# Generate input points
xlim = (-3, 3)
X = np.expand_dims(np.linspace(*xlim, nb_of_samples), 1)

# Start plotting
fig = plt.figure(figsize=(7, 8)) 
gs = gridspec.GridSpec(
    3, 1, figure=fig, wspace=0.2, hspace=0.4)

# Plot first
Σ = local_periodic(
    X, X, 
    periodic_length_scale=1, period=1, amplitude=1, 
    local_length_scale=1.5)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell_{eq}=1.5$', 
    fig, gs[0], xlim)

# Plot second
Σ = local_periodic(
    X, X, 
    periodic_length_scale=1, period=1, amplitude=1, 
    local_length_scale=0.5)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell_{eq}=0.5$', 
    fig, gs[1], xlim)

# Plot third
Σ = local_periodic(
    X, X, 
    periodic_length_scale=1, period=1, amplitude=1, 
    local_length_scale=3)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, '$\\ell_{eq}=3$', 
    fig, gs[2], xlim)

fig.suptitle('Local periodic ($\\sigma=1$, $\\ell_p=1$, $p=1$)', y=0.99)
fig.subplots_adjust(
    left=0.07, bottom=0.05, right=0.93, top=0.93)
plt.show()
#

<IPython.core.display.Javascript object>

## Combined kernel

In [17]:
def combined_kernel_tf():
    smooth_kernel = exponentiated_quadratic_tf(
        amplitude=252.5150, 
        length_scale=151.8924)
    local_periodic_kernel = local_periodic_tf(
        periodic_length_scale=1.5774, 
        period=0.9997, 
        amplitude=3.1232, 
        local_length_scale=139.4979)
    irregular_kernel = rational_quadratic_tf(
        length_scale=1.6982, 
        scale_mixture=0.0600,
        amplitude=1.2975)  
    return (smooth_kernel + 
            local_periodic_kernel + 
            irregular_kernel)


def combined_kernel(xa, xb):
    kernel = combined_kernel_tf()
    kernel_matrix = kernel.matrix(xa, xb)
    with tf.Session() as sess:
        return sess.run(kernel_matrix)

In [18]:
# Plot kernel matrix and samples of combined kernel

nb_of_samples = 250  # Number of test points.

# Start plotting
fig = plt.figure(figsize=(7, 8)) 
gs = gridspec.GridSpec(
    3, 1, figure=fig, wspace=0.2, hspace=0.5)

# Plot first
nb_of_realizations = 1
xlim = (0, 5)
X = np.expand_dims(np.linspace(*xlim, nb_of_samples), 1)
Σ = combined_kernel(X, X)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, 'fitted kernel', fig, gs[0], xlim)

# Plot second
nb_of_realizations = 1
xlim = (0, 10)
X = np.expand_dims(np.linspace(*xlim, nb_of_samples), 1)
Σ = combined_kernel(X, X)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, 'fitted kernel', fig, gs[1], xlim)

# Plot third
nb_of_realizations = 5
xlim = (0, 50)
X = np.expand_dims(np.linspace(*xlim, nb_of_samples), 1)
Σ = combined_kernel(X, X)
y = np.random.multivariate_normal(
    mean=np.zeros(nb_of_samples), cov=Σ, 
    size=nb_of_realizations).T
plot_kernel(
    X, y, Σ, 'fitted kernel', fig, gs[2], xlim)

fig.suptitle('Combined kernel', y=0.97)
plt.show()
#

<IPython.core.display.Javascript object>

In [19]:
# Print versions used
print('Python: {}.{}.{}'.format(*sys.version_info[:3]))
print('numpy: {}'.format(np.__version__))
print('scipy: {}'.format(scipy.__version__))
print('matplotlib: {}'.format(matplotlib.__version__))
print('seaborn: {}'.format(sns.__version__))
#

Python: 3.6.6
numpy: 1.15.1
scipy: 1.1.0
matplotlib: 2.2.3
seaborn: 0.9.0


- https://www.cs.toronto.edu/~duvenaud/cookbook/
- http://www.gaussianprocess.org/gpml/chapters/RW4.pdf
- file:///home/peter/Downloads/holiday/kernels.pdf
- https://mlkernels.readthedocs.io/en/latest/kernels.html
- http://ml.dcs.shef.ac.uk/gpss/gpws14/KernelDesign.pdf