# Conformalized Kernel Ridge Regression

In [None]:
import gc
import os
import time
from math import sqrt

import numpy as np
import scipy.stats as st
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [None]:
from utils import _save, _load

from scipy.linalg import cholesky, solve_triangular
from sklearn.metrics.pairwise import pairwise_kernels as kernel
from sklearn.utils import check_random_state, resample
from sklearn.cross_validation import train_test_split, ShuffleSplit
from sklearn.gaussian_process import GaussianProcess

from utils.mpl_mid_point_norm import MidPointNorm



In [None]:
import time

In [None]:
def mkdirifnot(path):
    if not os.path.exists(path):
        os.mkdir(path)
    return path

BASE_PATH = mkdirifnot(os.path.join(".", "new-%s"%(time.strftime("%Y%m%d_%H%M%S"),)))

PLOT_PATH = mkdirifnot(os.path.join(BASE_PATH, "plots"))
SAMPLE_PLOT_PATH = mkdirifnot(os.path.join(PLOT_PATH, "sample"))
VISUAL_PLOT_PATH = mkdirifnot(os.path.join(PLOT_PATH, "visual"))
OUTPUT_PLOT_PATH = mkdirifnot(os.path.join(PLOT_PATH, "output"))
OUTPUT_PATH = mkdirifnot(os.path.join(BASE_PATH, "dumps"))

## Preamble

Compute RBF kernel:
$$ K(x, x') = \mathop{\text{exp}}\bigl\{-\gamma\|x-x'\|^2\bigr\} \,, $$
for arbitrary datasets $X\in\mathcal{X}^{n\times 1}$ and $Z\in\mathcal{X}^{m\times 1}$,
$\mathcal{X} \subseteq \mathbb{R}^{d\times 1}$.

Obtain a matrix
$$ Q
    = \begin{pmatrix}
        aI_{n+m}
        + \begin{pmatrix}
            K_XX & K_{XX^*}\\
            K_{X^*X} & K_{X^*X^*}
          \end{pmatrix}
       \end{pmatrix}^{-1}
    \,, $$
and return its block structure accroding to:
$$ Q
    = \begin{pmatrix}
        Q_X + Q_XK_{XX^*} M^{-1} K_{X^*X} Q_X & - Q_X K_{XX^*} M^{-1} \\
        - M^{-1} K_{X^*X} Q_X & M^{-1}
    \end{pmatrix}
    \,. $$

Since only $A$ and $B$ vector are required for construction of Conformal Confidence
Region for Regression construction, we prepare a special routine to compute them in bulk:
$$ B'e_i
    = \begin{pmatrix}
        - Q_X K_{Xz_i} \\
        1
    \end{pmatrix} m_i^{-1} a \,, $$
and
$$ A'e_i
    = \begin{pmatrix}
        Q_X y + Q_X K_{Xz_i} m_i^{-1} K_{z_iX} Q_X y \\
        - m_i^{-1} K_{z_iX} Q_X y
    \end{pmatrix} a
    = \begin{pmatrix} Q_X y \\ 0 \end{pmatrix} a
    - \begin{pmatrix}
        - Q_X K_{Xz_i} \\
        1
    \end{pmatrix} m_i^{-1} K_{z_iX} Q_X y a
    = \begin{pmatrix} a Q_X y \\ 0 \end{pmatrix}
    - B'e_i K_{z_iX} Q_X y \,, $$
with
$$ m_i = a + K(z_i, z_i) - K_{z_iX} Q_X K_{Xz_i} \,. $$

Note that $a Q_x = I_n - K_x Q_x$.

LOO residuals are computed using the following result: for all $i=1,\ldots, n$ it is
true that $\hat{r}_i = e_i' a Q_X y$, which is given by 
$$ \hat{r}_i
    = a m_i^{-1} \bigl(y_i - k_{-i}(x_i)Q_{-i}y_{-i} \bigr)
     = a m_i^{-1} \hat{r}_{i\vert -i}
    \,, $$
using the block inversion of a row-columns permuted matrix $Q_X$. In a compacter matrix
form this is given by
$$ \hat{r} = a \mathop{\text{diag}}(Q_X) \hat{r}_{\text{loo}} \,, $$
which, when all $m_i$ are non-zero, is equivalent to:
$$ \hat{r}_{\text{loo}}
    = a^{-1} \mathop{\text{diag}}(Q_X)^{-1} \hat{r}
    = a^{-1} \mathop{\text{diag}}(Q_X)^{-1} a Q_X y
    = \mathop{\text{diag}}(Q_X)^{-1} Q_X y
    \,. $$

In [None]:
from utils.KRR import KRR_AB

A separate fast LOO computation procedure

In [None]:
from utils.KRR import KRR_loo

A confidence region builder for measure
$$ \alpha_i^y = - \hat{r}^y_i = - (a_i + b_i y) \,. $$
The p-value is computed as
$$ p^y = n^{-1} \bigl\lvert\{i=1,\ldots, n\,:\, y\in S_i \}\bigr\rvert \,, $$
where
$$ S_i = \{y\in \mathbb{R}\,:\, \alpha_i^y \leq \alpha_n^y \} \,. $$


A confidence region builder for measures
$$ \alpha_i^y = \hat{r}^y_i \text{ and } \alpha_i^y = -\hat{r}^y_i \,, $$
used in Vovk, Burnaev (2014) for Ridge Regression.

In [None]:
from utils.conformal import CCR

Now a confidence measure builder for measure
$$ \alpha^y_i = - \lvert \hat{r}^y_i\rvert\,. $$

A confidence region builder for Vovk's original Ridge Regression Confidence Machine.

In [None]:
from utils.conformal import RRCM

## Exploration

A gaussian process generator

In [None]:
from utils.functions import get_functions
DGP = get_functions()

Fix the random state.

In [None]:
random_state = np.random.RandomState(0x12345678)

Generate mesh of $X$ samples for the Gaussian Process.

In [None]:
dim_ = 2
mesh_ = np.meshgrid(*dim_*[np.linspace(-1, 1, num=51)])
XX = np.concatenate([ax_.reshape((-1,1)) for ax_ in mesh_], axis=1)

Set the width of the RBF kernel and the regularization parameter.

In [None]:
gamma = 50.0

Set the levels

In [None]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
lvl_cols_ = ["%4.1f%%"%(100*lv_,) for lv_ in levels]

Generate A GP with the specified kernel

In [None]:
name_ = "gaussian"
yy = DGP[name_](XX, random_state=random_state, nugget=1e-9, gamma=gamma)
if yy.ndim == 1:
    yy = yy.reshape((-1, 1))

Make a nice 3D plot

In [None]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], yy.reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0, antialiased=False)
ax.view_init(60, 30)
ax.set_title("A sample trajectory of a 2D Gaussian process")
plt.show()

Train/test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(XX, yy, test_size=0.25, random_state=random_state)
print X_train.shape, X_test.shape

Fit a Gaussian process regression to the train dataset $(X, y)$

In [None]:
gp = GaussianProcess(thetaL=1e-4, thetaU=1e2, beta0=0,
                     normalize=False, nugget=1e-6).fit(X_train, y_train)
print gp.theta_

Get a prediction on the whole dataset.

In [None]:
y_pred, sigma2_pred = gp.predict(XX, eval_MSE=True)
if sigma2_pred.ndim == 1:
    sigma2_pred = sigma2_pred.reshape((-1, 1))

Predicted levels

In [None]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], y_pred.reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0, antialiased=False)
ax.view_init(60, 30)
plt.show()

Absolute error

In [None]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], np.abs(yy - y_pred).reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0, antialiased=False)
ax.view_init(60, 30)
plt.show()

Prediction variance:
$$ \mathtt{var}(\hat{y}^*_{|(X, y), x^*}) = K(x^*, x^*) - k_X(x^*)'Q_X k_X(x^*) \,. $$

In [None]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], np.sqrt(sigma2_pred).reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0, antialiased=False)
ax.view_init(60, 30)
plt.show()

Now the estimate of the forecast error.

In [None]:
ratio_ = np.abs(yy-y_pred) / (np.sqrt(sigma2_pred + gp.sigma2) * st.norm.ppf(0.95))
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], ratio_.reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0, antialiased=False)
ax.view_init(60, 30)
plt.show()

The key parameters are: $\sigma^2$, $\gamma$ and $\mathtt{nugget}$.

The function to analyse the conformal procedure.

In [None]:
def _pccia(key, levels, y, y_hat, m, A, B):
    z_levels = (sqrt(m) * norm.ppf(1 - .5 * levels))
    bayes_ = [np.array([[a, b]]) for a, b in zip(y_hat - z_levels, y_hat + z_levels)]
    rrcm_ = RRCM(A, B, levels=levels)
    ccr_ = CCR(A, B, levels=levels)
    def _helper(y, regions):
        return np.asarray([np.any(((int_[:, 0] <= y) & (y <= int_[:, 1]))) for int_ in regions]), \
               np.asarray([np.sum(int_[:, 1] - int_[:, 0]) for int_ in regions]), \
               np.asarray([[int_.min(), int_.max()] for int_ in regions])
    return key, _helper(y, rrcm_), _helper(y, ccr_), _helper(y, bayes_)

Re-compute the necessary vectors.

In [None]:
A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test,
                                nugget=gp.nugget, loo=False, metric="rbf",
                                gamma=gp.theta_[0])
MM *= gp.sigma2

GP prediction

In [None]:
y_hat_gp, mse_gp = gp.predict(X_test, eval_MSE=True)
assert np.allclose(y_hat_, y_hat_gp, rtol=1e-3)
assert np.allclose(MM[:, 0], mse_gp + gp.sigma2 * gp.nugget)

Initialize the parallel backend

In [None]:
from scipy.stats import norm
from itertools import chain

from joblib import Parallel, delayed
parallel_ = Parallel(n_jobs=-1, verbose=1)

Create a generator for jobs and run them in parallel.

In [None]:
jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
         for k in xrange(y_test.shape[0]))

Execute

In [None]:
results_ = parallel_(jobs_)

Combine

In [None]:
width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                   for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                  for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                    for key_, rrcm_, ccr_, bayes_ in results_], axis=0)

Show the coverage.

In [None]:
hits_.mean(axis=0)

In [None]:
np.median(width_, axis=0)

Excess

In [None]:
for i, type_ in enumerate(["rrcm", "crr", "bayes"]):
    for j, sign_ in enumerate(lvl_cols_):
        excess_u_ = y_test - bounds_[:, j, 1, i, np.newaxis]
        excess_d_ = bounds_[:, j, 0, i] - y_test
        excess_u_[excess_u_ < 0] = 0
        excess_d_[excess_d_ < 0] = 0
        excess_ = 2 * (excess_u_ - excess_d_) / width_[:, j, i, np.newaxis]

        fig = plt.figure(figsize=(16, 9))
        ax = fig.add_subplot(111, projection='3d')

        ax.set_title("""Excess of `%s`-type %s-interval for `%s`"""
                     %(type_, sign_, name_,))
        ax.plot_trisurf(X_test[:, 0], X_test[:, 1], excess_[..., 0], cmap=plt.cm.coolwarm,
                        lw=0, alpha=.95, norm=MidPointNorm())
        ax.view_init(60, 60)
        plt.show()
        break
    break

## Simple experiments

## 1D case

In [None]:
from utils.functions_1d import get_functions
func1d_ = get_functions()

from scipy.stats import norm
from itertools import chain
from joblib import Parallel, delayed
parallel_ = Parallel(n_jobs=-1, verbose=1)

Now add some noise

In [None]:
# random_state = np.random.RandomState(0x0ABACABA)
# random_state = np.random.RandomState(0x0DABACAB)
random_state = np.random.RandomState(0x0EABACAB)
use_loo = True

noise_ = 1e-6
z_a = norm.ppf(1 - .5 * levels)
for size_ in [400,]:
## train
    X_train = random_state.uniform(size=(size_, 1))
## test
    X_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
    X = np.concatenate([X_train, X_test], axis=0)
    train_ = np.s_[:X_train.shape[0]]
    for nugget in np.logspace(-4, 0, num=3):
        for func_ in func1d_.iterkeys():
            print size_, nugget, func_,
            y = func1d_[func_](X)
            y += random_state.normal(size=y.shape) * noise_

            y_train = y[train_]
            y_test = np.delete(y, train_, axis=0)

            gp = GaussianProcess(thetaL=1e-4, thetaU=1e4, beta0=0,
                                 normalize=False, nugget=nugget).fit(X_train, y_train)

            A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test, forecast=True, loo=use_loo,
                                            nugget=gp.nugget, metric="rbf", gamma=gp.theta_[0])
            MM *= gp.sigma2

            jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
                     for k in xrange(y_test.shape[0]))
            results_ = parallel_(jobs_)

            width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                               for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                              for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                                for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            hit_prob_ = hits_.mean(axis=0)
            hit_prob_std_ = hits_.std(axis=0)

            for j, name_ in enumerate(["rrcm", "crr"]):
                fig = plt.figure(figsize=(12, 6))
                order_ = [1, 4, 2, 5, 3, 6]

                ax = fig.add_subplot(2, 3, order_[0])
                ax.plot(X_test, y_test, color="blue")
                ax.plot(X_test, y_hat_, color="red")
                ax.set_title("Actual/Prediction: %s"%(func_,))

                ax = fig.add_subplot(2, 3, order_[1])
                ax.plot(X_test, np.abs(y_hat_ - y_test))
                ax.set_title("Absolute error")

                for i in xrange(4):
                    ax = fig.add_subplot(2, 3, order_[i + 2])

                    ax.plot(X_test, np.abs(y_hat_ - y_test), alpha=0.25, label="$|y-\\hat{y}|$")
                    ax.plot(X_test, np.sqrt(MM), label="bayes")
            #         ax.plot(X_test, (bounds_[:, i, 1, 2] - bounds_[:, i, 0, 2]) / (2 * z_a[i]), label="bayes")
                    ax.plot(X_test, (bounds_[:, i, 1, j] - bounds_[:, i, 0, j]) / (2 * z_a[i]), label=name_)
                    ax.set_title("Absolute error/accuracy %s %s-CI"%(name_, lvl_cols_[i],))
                    ax.legend(loc="best")

                fig.tight_layout()
                fig_name_ = os.path.join(PLOT_PATH, "%s 1k-%d %.1e %.1e %s%s.png"
                                         %(func_, X_train.shape[0], nugget, noise_, name_,
                                           " loo" if use_loo else ""))
                print fig_name_

                fig.savefig(fig_name_)
                plt.close()
#                 plt.show()
#                 break

Produce the 1D test plots for a $10k$ grid.

In [None]:
random_state = np.random.RandomState(0x0BADC0DE)

z_a = norm.ppf(1 - .5 * levels)
for size_ in [10, 20,]:
## train
    X_train = random_state.uniform(size=(size_, 1))
## test
    X_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
    X = np.concatenate([X_train, X_test], axis=0)
    train_ = np.s_[:X_train.shape[0]]
    for nugget in np.logspace(-4, 0, num=3):
        for func_ in func1d_.iterkeys():
            print size_, nugget, func_,
            y = func1d_[func_](X)

            y_train = y[train_]
            y_test = np.delete(y, train_, axis=0)

            gp = GaussianProcess(thetaL=1e-4, thetaU=1e4, beta0=0,
                                 normalize=False, nugget=nugget).fit(X_train, y_train)

            A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test, forecast=True,
                                            nugget=gp.nugget, metric="rbf", gamma=gp.theta_[0])
            MM *= gp.sigma2

            jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
                     for k in xrange(y_test.shape[0]))
            results_ = parallel_(jobs_)

            width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                               for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                              for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                                for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            hit_prob_ = hits_.mean(axis=0)
            hit_prob_std_ = hits_.std(axis=0)

            for j, name_ in enumerate(["rrcm", "crr"]):
                fig = plt.figure(figsize=(12, 6))
                order_ = [1, 4, 2, 5, 3, 6]

                ax = fig.add_subplot(2, 3, order_[0])
                ax.plot(X_test, y_test, color="blue")
                ax.plot(X_test, y_hat_, color="red")
                ax.set_title("Actual/Prediction: %s"%(func_,))

                ax = fig.add_subplot(2, 3, order_[1])
                ax.plot(X_test, np.abs(y_hat_ - y_test))
                ax.set_title("Absolute error")

                for i in xrange(4):
                    ax = fig.add_subplot(2, 3, order_[i + 2])

                    ax.plot(X_test, np.abs(y_hat_ - y_test), alpha=0.25, label="$|y-\\hat{y}|$")
                    ax.plot(X_test, np.sqrt(MM), label="bayes")
            #         ax.plot(X_test, (bounds_[:, i, 1, 2] - bounds_[:, i, 0, 2]) / (2 * z_a[i]), label="bayes")
                    ax.plot(X_test, (bounds_[:, i, 1, j] - bounds_[:, i, 0, j]) / (2 * z_a[i]), label=name_)
                    ax.set_title("Absolute error/accuracy %s %s-CI"%(name_, lvl_cols_[i],))
                    ax.legend(loc="best")

                fig.tight_layout()
                fig_name_ = os.path.join(PLOT_PATH, "%s 1k-%d %.1e %s.png"%(func_, X_train.shape[0],
                                                                            nugget, name_,))
                print fig_name_

                fig.savefig(fig_name_)
                plt.close()
            #     plt.show()
            #     break

Previous

In [None]:
from scipy.stats import norm
from itertools import chain
from joblib import Parallel, delayed
parallel_ = Parallel(n_jobs=-1, verbose=1)

step_ = 7
for func_ in ["heaviside",]:#func1d_.iterkeys():
    print func_,
    X = np.linspace(0, 1, num=1001).reshape((-1, 1))
    y = func1d_[func_](X)

    train_ = np.s_[::5]
    X_train, y_train = X[train_], y[train_]
    X_test, y_test = X, y# np.delete(X, train_, axis=0), np.delete(y, train_, axis=0)
    
    gp = GaussianProcess(thetaL=1e-4, thetaU=1e4, beta0=0,
                         normalize=False, nugget=1e-6).fit(X_train, y_train)
    
    A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test, forecast=True,
                                    nugget=gp.nugget, metric="rbf", gamma=gp.theta_[0])
    MM *= gp.sigma2

    jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
             for k in xrange(y_test.shape[0]))
    results_ = parallel_(jobs_)

    width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                       for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                      for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                        for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    hit_prob_ = hits_.mean(axis=0)
    hit_prob_std_ = hits_.std(axis=0)

    fig = plt.figure(figsize=(12, 6))
    order_ = [1, 4, 2, 5, 3, 6]

    ax = fig.add_subplot(2, 3, order_[0])
    ax.plot(X_test, y_test, color="blue")
    ax.plot(X_test, y_hat_, color="red")
    ax.set_title("Actual/Prediction: %s"%(func_,))

    ax = fig.add_subplot(2, 3, order_[1])
    ax.plot(X_test, np.abs(y_hat_ - y_test))
    ax.set_title("Absolute error")

    for j, name_ in enumerate(["rrcm", "bayes"]):
        for i, i_ in enumerate([2, 3]):
            ax = fig.add_subplot(2, 3, order_[i + 3 + j*2 - 1])
            ax.plot(X_test, bounds_[:, i_, 1, j] - bounds_[:, i_, 0, j], color="red")
            ax.plot(X_test, y_test[:, 0] - bounds_[:, i_, 0, j], color="blue")
            ax.axhline(y=0, color="red")
            ax.set_title("the %s-CI(%0.1f%%$\pm$%0.1f%%)\n of %s"
                         %(lvl_cols_[i_], 100*hit_prob_[i_, j], 100*hit_prob_std_[i_,j], name_.upper(),))
    fig.tight_layout()
#     fig.savefig(os.path.join(PLOT_PATH, "1k-%d %s.png"%(step_, func_,)))
#     plt.close()
    plt.show()
    break

## 2D case

In [None]:
from utils.functions_2d import func2D
DGP_ = func2D()

In [None]:
random_state = np.random.RandomState(0xDEADC0DE)

In [None]:
from joblib import Parallel, delayed
parallel_ = Parallel(n_jobs=-1, verbose=1)

# X_train = random_state.uniform(size=(100, 2)) * 2 - 1
X_train = random_state.uniform(size=(1500, 2)) * 2 - 1

mesh_ = np.meshgrid(*2*[np.linspace(-1, 1, num=51)])
X_test = np.concatenate([ax_.reshape((-1, 1)) for ax_ in mesh_], axis=1)

X = np.concatenate([X_train, X_test], axis=0)
train_ = np.s_[:X_train.shape[0]]

z_a = norm.ppf(1 - .5 * levels)

levels_ = np.linspace(-.75, .75, num=16) * 100

i, int_name_ = [(0, "rrcm"), (1, "crr"), (2, "bayes")][0]

# nugget = 1e-2 ## 20160504
nugget = 1e-6

use_loo = True

for theta0_ in np.logspace(-2, 2, num=5):
    for name_ in ["f1", "f2", "f3", "f4", "f5"][-1:]:
        y = DGP_[name_](X)

        y_train = y[train_]
        y_test = np.delete(y, train_, axis=0)

    #     gp = GaussianProcess(thetaL=1e-4, thetaU=1e4, beta0=0, theta0=0.1,
        gp = GaussianProcess(beta0=0, theta0=theta0_, normalize=False,
                             nugget=nugget).fit(X_train, y_train)

        A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test, forecast=True, loo=use_loo,
                                        nugget=gp.nugget, metric="rbf", gamma=gp.theta_[0])
        MM *= gp.sigma2

        jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
                 for k in xrange(y_test.shape[0]))
        results_ = parallel_(jobs_)

        width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                           for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
        hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                          for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
        bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                            for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
        hit_prob_ = hits_.mean(axis=0)
        hit_prob_std_ = hits_.std(axis=0)

        delta_ = (bounds_[:, :, 1] - bounds_[:, :, 0]) / (2 * z_a[np.newaxis, :, np.newaxis])
        rel_ = (delta_[..., i] / delta_[..., 2] - 1) * 100
        ae_ = np.abs(y_test - y_hat_[:, 0])

    ## Actual surface: 3D
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], y_test.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.75)
        ax.view_init(60, 30)
        ax.set_title("Actual")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s (tht %.1e).png"
                                 %(name_, theta0_,))
        print fig_name_
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## Absolute prediction error: 3D
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], ae_.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, 30)
        ax.set_title("Absolute error")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s abs_error (tht %.1e).png"
                                 %(name_, theta0_,))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## \hat{y}: 3D
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], y_hat_.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, 30)
        ax.set_title("Predicted")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s predicted (tht %.1e).png"
                                 %(name_, theta0_,))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## \hat{\sigma}: 3D
        sigma_hat_ = np.sqrt(MM[:, 0] - gp.sigma2 * gp.nugget)
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], sigma_hat_.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, 30)
        ax.set_title("$\\hat{\\sigma}$")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s sigma (tht %.1e).png"
                                 %(name_, theta0_,))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## \delta
        sigma_hat_ = np.sqrt(MM[:, 0] - gp.sigma2 * gp.nugget)
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], delta_[:, -1, 0].reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, 30)
        ax.set_title("RRCM 5%-CI scaled half-width")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s rrcm25-shw (tht %.1e%s).png"
                                 %(name_, theta0_, ", loo" if use_loo else "",))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## Actual values: top-down
        fig = plt.figure(figsize=(12, 15))
        ax = fig.add_subplot(321)
        cont_ = ax.contourf(mesh_[0], mesh_[1],
                            y_test.reshape(mesh_[0].shape), 20,
                            cmap=plt.cm.coolwarm, lw=0, alpha=0.9)
        ax.set_title("Actual value %s"%(name_,))
        plt.colorbar(cont_)

    ## Absolute arror: top-down
        ax = fig.add_subplot(322)
        cont_ = ax.contourf(mesh_[0], mesh_[1],
                            ae_.reshape(mesh_[0].shape), 20,
                            cmap=plt.cm.coolwarm, lw=0, alpha=0.9)
        ax.set_title("Absolute prediction error")
        plt.colorbar(cont_)

        ## CCI relative precision wrt. bayesian CI.
        for j, pct_ in enumerate(lvl_cols_):
            ax = fig.add_subplot(3, 2, j + 3)

            cont_ = ax.contour(mesh_[0], mesh_[1], rel_[:, j].reshape(mesh_[0].shape),
                               levels=levels_, colors="k", linestyles="solid", extend="both")
            ax.clabel(cont_, inline=1, fontsize=8, fmt='%.0f')

            CS3 = ax.contourf(mesh_[0], mesh_[1], rel_[:, j].reshape(mesh_[0].shape),
                        cmap=plt.cm.coolwarm, lw=1, levels=levels_,
                        antialiased=False, alpha=1.0, extend='both',)
            CS3.cmap.set_over('white')

            ax.set_title("""rel. %s(%.1f%%) / bayes(%.1f%%) %s-CI%s"""
                         %(int_name_, 100-hit_prob_[j, i]*100,
                           100-hit_prob_[j, -1]*100, pct_,
                           "(loo)" if use_loo else "",))
        fig.tight_layout()

        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s efficiency (tht %.1e%s).png"
                                 %(name_, theta0_, ", loo" if use_loo else "",))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()
        #     break

## Advanced Experiments

In [None]:
random_state = np.random.RandomState(0x0ABACABA)

In [None]:
from math import sqrt

In [None]:
gamma = 10.0
dim_ = 2
resolution=101

In [None]:
dgp_opts_ = {name_: dict(scale=1.0) for name_ in DGP}
dgp_opts_["gaussian"].update(dict(metric="rbf", gamma=gamma))

In [None]:
# def surface(ax, mesh, yy, name, **kwargs):
#     ax.plot_surface(mesh[0], mesh[1], yy.reshape(mesh[0].shape),
#                     cstride=1, rstride=1, cmap=plt.cm.coolwarm,
#                     lw=0, antialiased=False, **kwargs)
#     ax.set_title("A sample surface $y\\sim \\mathtt{%s}$"%(name,))
#     ax.set_ylabel("y")
#     return ax

In [None]:
def run_experiment(X, y, X_test, y_test):
## Run the GP regression
    gp = GaussianProcess(thetaL=1e-4, thetaU=1e2, beta0=0,
                         normalize=False, nugget=1e-6,
                         storage_mode='light').fit(X, y)
## Compute the necessary matrices
    A, B, y_hat_, MM, loo_ = KRR_AB(X, y, X_test, nugget=gp.nugget,
                                    sigma2=gp.sigma2, metric="rbf", gamma=gp.theta_[0])
#     y_hat_gp, mse_gp = gp.predict(X_test, eval_MSE=True)
#     assert np.allclose(MM[:, 0], mse_gp + gp.sigma2 * gp.nugget)
#     assert np.allclose(y_hat_[:, 0], y_hat_gp, rtol=1e-3)

## Run in parallel
    parallel_ = Parallel(n_jobs=-1, verbose=1)
    jobs_ = (delayed(_pccia)(k, levels, y_test[k],
                             y_hat_[k], MM[k], A[0, k], B[k])
             for k in xrange(y_test.shape[0]))
    results_ = parallel_(jobs_)
# ## Combine the results
    width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                       for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                      for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                        for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    return width_, hits_, bounds_, y_hat_[:, 0], MM[:, 0], gp

In [None]:
experiment_ = dict()
for name_, dgp_ in DGP.iteritems():
    print "%s:"%(name_,),
## Create a dediacted validation sample
    mesh_ = np.meshgrid(*dim_*[np.linspace(-1, 1, num=resolution)])
    XX = np.concatenate([ax_.reshape((-1,1)) for ax_ in mesh_], axis=1)
    yy = dgp_(XX, random_state=random_state, **dgp_opts_[name_])
# ## A typical realisation
#     fig = plt.figure(figsize=(8, 6))
#     ax = fig.add_subplot(111, projection='3d')
#     surface(ax, mesh_, yy, name_).view_init(60, 30)
#     fig.savefig(os.path.join(SAMPLE_PLOT_PATH, "%s.png"%(name_)), )
#     plt.close()
## Now do the train/validation split
    XX0, X_validate = train_test_split(XX, test_size=0.25, random_state=random_state)
    for N in [100, 400, 1600]:
        print "N = %d,"%(N,),
        X_train = resample(XX0, replace=False, n_samples=N, random_state=random_state)
        X_full = np.concatenate([X_train, X_validate], axis=0)
## the dgp: add some independent gaussian noise.
        for noise_level_ in [1e-6, 1e-1]:
            print "noise = %2.2e"%(noise_level_)
            y_full = dgp_(X_full, random_state=random_state,
                          nugget=noise_level_, **dgp_opts_[name_])
            if name_ != "gaussian":
                y_full += random_state.normal(size=y_full.shape) * sqrt(noise_level_)
            y_train, y_validate = y_full[:N], y_full[N:]
## The experiment
            result_ = run_experiment(X_train, y_train, X_validate, y_validate)
# ## Save
            experiment_[name_, N, noise_level_] = (result_, X_full, y_full, N)
#             break
#         break
#     break

In [None]:
_save(experiment_, os.path.join(OUTPUT_PATH, "experiment_02_"), gz=9)

Box plots.

In [None]:
for tuple_, result_ in experiment_.iteritems():
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result_
    name_, size_, noise_ = tuple_
    X_test, y_test = X_full[N:], y_full[N:]

    fig, ax_ = plt.subplots(nrows=1, ncols=3, sharex=True,
                            sharey=True, figsize=(16, 9))
    for i, type_ in enumerate(["rrcm", "crr", "bayes"]):
        ax_[i].boxplot(width_[..., i])
        ax_[i].set_title("""`%s` `%s`(N=%d, noise=%.1E)"""
                         %(type_, name_, size_, noise_,))
        ax_[i].set_ylabel("width")
        ax_[i].set_xticklabels(lvl_cols_)
        ax_[i].grid()
    fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "width_box - %s %.1E %d.png"
                             %(name_, noise_, size_, )), )
    plt.close()

In [None]:
def process(result):
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result
    X_test, y_test = X_full[N:], y_full[N:]
    ratio_ = np.abs(y_test-y_hat_).reshape((-1,1,1)) / (bounds_[:, :, 1] - bounds_[:, :, 0])
    lvl_cols_ = ["%4.1f%%"%(100*lv_,) for lv_ in levels]
    return pd.concat({
        "median width": pd.DataFrame(np.median(width_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "mean width": pd.DataFrame(np.mean(width_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "95% width": pd.DataFrame(np.percentile(width_, 95, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "max width": pd.DataFrame(np.max(width_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "coverage": pd.DataFrame(np.mean(hits_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "avg. abs-width ratio": pd.DataFrame(np.median(ratio_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "mse/var": pd.DataFrame(np.full((4, 3), (y_test - y_hat_).var() / y_test.var()), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
    }, axis=0, names=["measure"]).unstack().stack(level=0)


Collect

In [None]:
df_ = pd.concat({tuple_: process(result_) for tuple_, result_ in experiment_.iteritems()},
            axis=0, names = ["fun", "N", "noise"])

Coverage table.

In [None]:
df_.xs("coverage", level=-2, axis=0).unstack()

Max width table

In [None]:
df_.xs("max width", level=-2, axis=0).unstack()

Actual, predicted and abs-accuracy

In [None]:
for tuple_, result_ in experiment_.iteritems():
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result_
    name_, size_, noise_ = tuple_
## Plot the error bars
    X_test, y_test = X_full[N:], y_full[N:]

    fig = plt.figure(figsize=(16, 9))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_trisurf(X_test[:, 0], X_test[:, 1], np.abs(y_test-y_hat_) / y_test.std(),
                    cmap=plt.cm.coolwarm, lw=0) #, norm=MidPointNorm())
    ax.set_title("""abs/std ratio of `y` for `%s`(N=%d, noise=%.1E)"""
                 %(name_, size_, noise_,))
    ax.view_init(60, 60)
    fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "abs_std_ratio - %s %.1E %d.png"
                             %(name_, noise_, size_, )), )
    plt.close()

    fig = plt.figure(figsize=(16, 9))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_trisurf(X_test[:, 0], X_test[:, 1], y_test,
                    cmap=plt.cm.coolwarm, lw=0) #, norm=MidPointNorm())
    ax.set_title("""Actual value of `y` for `%s`(N=%d, noise=%.1E)"""
                 %(name_, size_, noise_,))
    ax.view_init(60, 60)
    fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "actual - %s %.1E %d.png"
                             %(name_, noise_, size_,)), )
    plt.close()

    fig = plt.figure(figsize=(16, 9))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_trisurf(X_test[:, 0], X_test[:, 1], y_hat_,
                    cmap=plt.cm.coolwarm, lw=0) #, norm=MidPointNorm())
    ax.set_title("""Predicted value of `y` for `%s`(N=%d, noise=%.1E)"""
                 %(name_, size_, noise_,))
    ax.view_init(60, 60)
    fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "predicted - %s %.1E %d.png"
                             %(name_, noise_, size_,)), )
    plt.close()

Abs-width ratio for the intervals

In [None]:
for tuple_, result_ in experiment_.iteritems():
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result_
    name_, size_, noise_ = tuple_
## Plot the error bars
    X_test, y_test = X_full[N:], y_full[N:]

    for i, type_ in enumerate(["rrcm", "crr", "bayes"]):
        for j, sign_ in enumerate(lvl_cols_):
            fig = plt.figure(figsize=(16, 9))
            ax = fig.add_subplot(111, projection='3d')
            ax.plot_trisurf(X_test[:, 0], X_test[:, 1], np.abs(y_test-y_hat_) / width_[:, j, i],
                            cmap=plt.cm.coolwarm, lw=0) #, norm=MidPointNorm())
            ax.set_title("""abs/width ratio for `%s`-type %s-interval for `%s`(N=%d, noise=%.1E)"""
                         %(type_, sign_, name_, size_, noise_,))
            ax.view_init(60, 60)
            fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "abs_width_ratio - %s %.1E %d %s %s.png"
                                     %(name_, noise_, size_, type_, sign_, )), )
            plt.close()

Excess plot:


In [None]:
for tuple_, result_ in experiment_.iteritems():
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result_
    name_, size_, noise_ = tuple_
## Plot the error bars
    X_test, y_test = X_full[N:], y_full[N:]
    for i, type_ in enumerate(["rrcm", "crr", "bayes"]):
        for j, sign_ in enumerate(lvl_cols_):
            excess_u_ = y_test - bounds_[:, j, 1, i]
            excess_d_ = bounds_[:, j, 0, i] - y_test
            excess_u_[excess_u_ < 0] = 0
            excess_d_[excess_d_ < 0] = 0
            excess_ = 2 * (excess_u_ - excess_d_) / width_[:, j, i]

            fig = plt.figure(figsize=(16, 9))
            ax = fig.add_subplot(111, projection='3d')

            ax.set_title("""Excess of `%s`-type %s-interval for `%s`(N=%d, noise=%.1E)"""
                         %(type_, sign_, name_, size_, noise_,))
            ax.plot_trisurf(X_test[:, 0], X_test[:, 1], excess_, cmap=plt.cm.coolwarm,
                            lw=0, alpha=.95, norm=MidPointNorm())
            ax.view_init(60, 60)

            fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "excess - %s %.1E %d %s %s.png"
                                     %(name_, noise_, size_, type_, sign_, )), )
            plt.close()

<hr/>
<hr/>

<hr/>
<hr/>