# CKRR -- plots

In [None]:
import os, time
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from utils.mpl_mid_point_norm import MidPointNorm

from utils.state import _load
from scipy.stats import binom_test
from scipy.stats import ttest_1samp

In [None]:
def mkdirifnot(path):
    if not os.path.exists(path):
        os.mkdir(path)
    return path

In [None]:
BASE_PATH = "."
DATA_PATH = os.path.join(BASE_PATH, "..", "thesis_exp")
OUTPUT_PATH = mkdirifnot(os.path.join(BASE_PATH, "output_pdf-2"))

PROFILE_PATH = mkdirifnot(os.path.join(OUTPUT_PATH, "profile"))
EXP1D_PATH = mkdirifnot(os.path.join(OUTPUT_PATH, "exp_1d"))
EXP2D_PATH = mkdirifnot(os.path.join(OUTPUT_PATH, "exp_2d"))

In [None]:
from joblib import Parallel, delayed

In [None]:
def load_dumps(path, n_jobs=-1, verbose=1, include_target=False):
    parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)
    jobs_ = (delayed(_load)(os.path.join(path, fname_))
             for fname_ in os.listdir(path)
             if fname_.endswith(".gz"))
    dumps_ = parallel_(jobs_)
    experiment = [exp_ for dump_ in dumps_ for exp_ in dump_]
    
    temp_ = dict()
    for exp_ in experiment:
        key_ = exp_[0][:-1]
        if key_ not in temp_:
            temp_[key_] = list()
        temp_[key_].append((exp_[0][-1], exp_[1:]))

    temp_ = {key_ : sorted(res_, key=lambda x: x[0])
             for key_, res_ in temp_.iteritems()}

    results_ = dict()
    for key_, result_ in temp_.iteritems():
        ratio_ = np.stack([np.mean((res_[1][0]-res_[1][1])**2, axis=0, keepdims=True) /
                           np.std(res_[1][0], axis=0, keepdims=True)**2 for res_ in result_], axis=0)
        sizes_ = np.array([res_[0] for res_ in result_])
        coverage_ = np.stack([np.stack([res_[1][3+2*j] for res_ in result_], axis=0)
                              for j in xrange(6)], axis=0)
        width_ = np.stack([np.stack([res_[1][2+2*j] for res_ in result_], axis=0)
                           for j in xrange(6)], axis=0)

        if include_target:
            target_ = np.stack([res_[1][0] for res_ in result_], axis=0)
            target_hat_ = np.stack([res_[1][1] for res_ in result_], axis=0)
            results_[key_] = ratio_, sizes_, coverage_, width_, target_, target_hat_
        else:
            results_[key_] = ratio_, sizes_, coverage_, width_

    return results_

In [None]:
def load_profiles(path, n_jobs=-1, verbose=1):
    parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)
    jobs_ = (delayed(_load)(os.path.join(path, fname_))
             for fname_ in os.listdir(path)
             if fname_.endswith(".gz"))
    dumps_ = parallel_(jobs_)
    experiment = [exp_ for dump_ in dumps_ for exp_ in dump_]
    
    temp_ = dict()
    for exp_ in experiment:
        key_ = exp_[0][:-1]
        if key_ not in temp_:
            temp_[key_] = list()
        temp_[key_].append((exp_[0][-1], exp_[1:]))

    temp_ = {key_ : sorted(res_, key=lambda x: x[0])
             for key_, res_ in temp_.iteritems()}

    results_ = dict()
    for key_, result_ in temp_.iteritems():
        ratio_ = np.stack([np.mean((res_[1][0]-res_[1][1])**2, axis=0, keepdims=True) /
                           np.std(res_[1][0], axis=0, keepdims=True)**2 for res_ in result_], axis=0)
        sizes_ = np.array([res_[0] for res_ in result_])
        bounds_ = np.stack([np.stack([res_[1][2+j] for res_ in result_], axis=0)
                            for j in xrange(6)], axis=0)
        target_ = np.stack([res_[1][0] for res_ in result_], axis=0)
        target_hat_ = np.stack([res_[1][1] for res_ in result_], axis=0)
        results_[key_] = ratio_, sizes_, bounds_, target_, target_hat_

    return results_

In [None]:
def coverage_plot(ax, sizes, cov, levels):
    cov_med_ = np.median(cov, axis=-1)
    cov_lo_, cov_hi_ = np.percentile(cov, [25, 75], axis=-1)

    ax.set_ylim(0.65, 1.025)
    ax.set_xlim(25, 1600)
    ax.locator_params(axis="x", nbins=5)
    ax.set_yticks(1-levels)
    for i in xrange(cov_med_.shape[1]):
        ax.plot(sizes, cov_med_[:, i], color="bgrm"[i%4])
        ax.plot(sizes, cov_hi_[:, i], color="bgrm"[i%4], alpha=0.5)
        ax.plot(sizes, cov_lo_[:, i], color="bgrm"[i%4], alpha=0.5)
        ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
    return ax

def nomorethan(x, bound=0):
    x_ = np.array(x, dtype=float)
    x_[x_>bound] = np.nan
    return x_

def width_plot(ax, sizes, width):
    avg_width_ = width.mean(axis=-1)
    aw_med_ = np.median(avg_width_, axis=-2)
    aw_q95_ = np.percentile(avg_width_, [95,], axis=-2)[0]
    aw_min_ = np.percentile(avg_width_, [ 5,], axis=-2)[0]
    aw_max_ = np.max(avg_width_, axis=-2)

    ax.set_xlim(25, 1600)
    ax.locator_params(axis="x", nbins=5)
    for i in xrange(aw_med_.shape[1]):
        ax.plot(sizes, nomorethan(aw_med_[:, i], 2), color="bgrm"[i%4])
#         ax.plot(sizes, nomorethan(aw_q95_[:, i], 2), color="bgrm"[i%4], alpha=0.5, marker='x')
        ax.plot(sizes, nomorethan(aw_max_[:, i], 2), color="bgrm"[i%4], alpha=0.5, marker='v')
        ax.plot(sizes, nomorethan(aw_min_[:, i], 2), color="bgrm"[i%4], alpha=0.5, marker='^')
    return ax


In [None]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]

titles_ = pd.Index(["GPR-p", "GPR-f", "RRCM", "CRR", "RRCM-loo", "CRR-loo"], name="type")
ncms_ = pd.Index(["GPR", "GPR", "RRCM", "CRR", "RRCM", "CRR"], name="type")
levels_ = pd.Index(["%0.2f"%(lvl,) for lvl in levels], name="alpha")

## Experiment 1D

In [None]:
XX_test = np.linspace(0, 1, num=1001).reshape((-1, 1))

exp_gauss_1d = load_dumps(os.path.join(DATA_PATH, 'exp_gauss_1d_25'), verbose=1, n_jobs=-1)
exp_nongauss_1d = load_dumps(os.path.join(DATA_PATH, 'exp_nongauss_1d_25'), verbose=1, n_jobs=-1)

exp_gauss_1d.update(exp_nongauss_1d)

Make coverage tables

In [None]:
from IPython.display import HTML, display
import matplotlib.gridspec as gridspec

for key_ in sorted(exp_gauss_1d.keys(), key=lambda x: (x[0], x[1], x[3], x[2])):
    name_, noise_, theta0_, nugget_ = key_
    ratio_, sizes_, coverage_, width_ = exp_gauss_1d[key_]

    output_path_ = mkdirifnot(os.path.join(EXP1D_PATH, name_))
    output_path_ = mkdirifnot(os.path.join(output_path_, "%g_%g"%(noise_, nugget_,)))

    theta_= ("%g" if isinstance(theta0_, float) else "$'%s'$")%(theta0_,)
    title_template_ = "%%s ($\\theta=%s, \\lambda=%g, \\gamma=%g$)"%(theta_, nugget_, noise_)

    theta_= ("%g" if isinstance(theta0_, float) else "%s")%(theta0_,)
    filename_template_ = "%%s%s %g %g %s %%s"%(name_, noise_, nugget_, theta_)

    ## width dynamics
    output_path_current_ = mkdirifnot(os.path.join(output_path_, "width"))
    for j in xrange(6):
        output_path_local_ = mkdirifnot(os.path.join(output_path_current_, titles_[j]))
        fig = plt.figure(figsize=(4, 3))
        ax = fig.add_subplot(111)
#         ax.set_yscale("log")
        width_plot(ax, sizes_, width_[j])
        ax.set_title(title_template_%(titles_[j],))

        filename_ = (filename_template_%("width ", titles_[j],)).replace(" ", "_").replace(".", ",")
        fig_file_name_ = os.path.join(output_path_local_, filename_ + ".pdf")
        fig.savefig(fig_file_name_, dpi=120)
        plt.close()
#         print fig_file_name_

    ## Coverage asymptotics
    output_path_current_ = mkdirifnot(os.path.join(output_path_, "coverage"))
    for j in xrange(6):
        output_path_local_ = mkdirifnot(os.path.join(output_path_current_, titles_[j]))
        fig = plt.figure(figsize=(4, 3))
        ax = fig.add_subplot(111)
        coverage_plot(ax, sizes_, coverage_[j], levels)
        ax.set_title(title_template_%(titles_[j],))

        filename_ = (filename_template_%("coverage ", titles_[j],)).replace(" ", "_").replace(".", ",")
        fig_file_name_ = os.path.join(output_path_local_, filename_ + ".pdf")
        fig.savefig(fig_file_name_, dpi=120)
        plt.close()
#         print fig_file_name_

    ## rmse/var dynamics
    output_path_current_ = output_path_
    fig = plt.figure(figsize=(4, 3))
    ax = fig.add_subplot(111)
    ratio_ = nomorethan(ratio_.mean(axis=-1), 1)
#     ax.set_ylim(bottom = -0.001)
    ax.plot(sizes_, ratio_)
    ax.set_title(title_template_%('MSE - var',))

    filename_ = (filename_template_%("", "ratio",)).replace(" ", "_").replace(".", ",")
    fig_file_name_ = os.path.join(output_path_, filename_ + ".pdf")
    fig.savefig(fig_file_name_, dpi=120)
    plt.close()
#     print fig_file_name_
#     break

## Profile plots

In [None]:
XX_test = np.linspace(0, 1, num=501).reshape((-1, 1))

prof_gauss = load_profiles(os.path.join(DATA_PATH, 'prof_gauss'), verbose=1, n_jobs=1)
prof_nongauss = load_profiles(os.path.join(DATA_PATH, 'prof_nongauss'), verbose=1, n_jobs=1)

prof_gauss.update(prof_nongauss)

In [None]:
for key_ in sorted(prof_gauss.keys(), key=lambda x: (x[0], x[1], x[3], x[2])):
    name_, noise_, theta0_, nugget_ = key_
    ratio_, sizes_, bounds_, y_test_, y_hat_ = prof_gauss[key_]
    ## Skip
#     if name_ != "heaviside": continue
#     if theta0_ == "auto": continue

    output_path_ = mkdirifnot(os.path.join(PROFILE_PATH, name_))
    output_path_ = mkdirifnot(os.path.join(output_path_, "%g_%g"%(noise_, nugget_,)))

    theta_= ("%g" if isinstance(theta0_, float) else "$'%s'$")%(theta0_,)
    title_template_ = "%%s: %%s ($\\theta=%s, \\lambda=%g, \\gamma=%g$)"%(theta_, nugget_, noise_)

    theta_= ("%g" if isinstance(theta0_, float) else "%s")%(theta0_,)
    filename_template_ = "%%s%s %g %g %s %%s"%(name_, noise_, nugget_, theta_)

    ## Profile
    for s_ in xrange(len(sizes_)):
#         if s_ > 1: continue
        output_path_current_ = mkdirifnot(os.path.join(output_path_, "%d"%(sizes_[s_],)))
        for i_ in xrange(4):
            # max_, min_ = np.percentile(bounds_[:, s_, :, i_], [92.5, 7.5])*2
            max_, min_ = y_test_[s_].max()*1.5, y_test_[s_].min()*1.5
            if name_=="heaviside": min_, max_ = -0.95, 1.95
            for ncm_ in pd.unique(ncms_):
                fig = plt.figure(figsize=(5, 4))
                ax = fig.add_subplot(111)
                if np.isfinite(min_) and np.isfinite(max_):
                    ax.set_ylim(min_, max_)
                ax.plot(XX_test, y_test_[s_], c="#c0c0c0", lw=2, alpha=.5, label="$y_x$")
                ax.plot(XX_test, y_hat_[s_], c='k', label="$\\hat{y}_x$")
                for j, b in enumerate(np.flatnonzero(ncms_==ncm_)):
                    ax.plot(XX_test, bounds_[b, s_, :, i_, 0], color="rb"[j], label=titles_[b])
                    ax.plot(XX_test, bounds_[b, s_, :, i_, 1], color="rb"[j])
                ax.set_title(title_template_%("%.1f%%-%s"%(levels[i_]*100, ncm_,), name_,))
                ax.legend(loc="best", ncol=2)

                filename_ = (filename_template_%("profile ", "%dp-%s %d"%(levels[i_]*100, ncm_, sizes_[s_],),))
                fig_file_name_ = os.path.join(output_path_current_,
                                              filename_.replace(" ", "_").replace(".", ",") + ".pdf")
                fig.savefig(fig_file_name_, dpi=120)
                plt.close()

## Experiment 2D

In [None]:
nd = 2
mesh_ = np.meshgrid(*nd*[np.linspace(0, 1, num=51)])
XX_test = np.concatenate([ax_.reshape((-1, 1)) for ax_ in mesh_], axis=1)

exp_gauss_2d = load_dumps(os.path.join(DATA_PATH, 'exp_gauss_2d_25'),
                          verbose=1, n_jobs=1, include_target=True)
exp_nongauss_2d = load_dumps(os.path.join(DATA_PATH, 'exp_nongauss_2d_25'),
                             verbose=1, n_jobs=1, include_target=True)

exp_gauss_2d.update(exp_nongauss_2d)

In [None]:
from IPython.display import display, HTML

# EXP2D_PATH
for key_ in sorted(exp_gauss_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]:
    name_, noise_, theta0_, nugget_ = key_
    ratio_, sizes_, coverage_, width_, y_test, y_hat = exp_gauss_2d[key_]

    df_coverage_ = pd.Panel(np.mean(coverage_[:, 1:], axis=-1), items=titles_, minor_axis=levels_,
                            major_axis=pd.Index(sizes_[1:], name="size"))

    avg_width_ = np.mean(width_, axis=-1)[:, 1:]
    aw_med_ = np.median(avg_width_, axis=-2)
    aw_q95_ = np.percentile(avg_width_, [95,], axis=-2)[0]
    aw_min_ = np.percentile(avg_width_, [ 5,], axis=-2)[0]
    aw_max_ = np.max(avg_width_, axis=-2)
    pn_med_ = pd.Panel(aw_med_, items=titles_, minor_axis=levels_, major_axis=pd.Index(sizes_[1:], name="size"))
    pn_q95_ = pd.Panel(aw_q95_, items=titles_, minor_axis=levels_, major_axis=pd.Index(sizes_[1:], name="size"))

    pv_ = np.stack([ttest_1samp(coverage_[:, 1:, j], (1 - levels[j]), axis=-1)[1] for j in xrange(4)], axis=-1)
    pn_pv_ = pd.Panel(pv_, items=titles_, minor_axis=levels_, major_axis=pd.Index(sizes_[1:], name="size"))

    df_output_ = pd.concat({"width. med": pn_med_.to_frame(),
                            "width 95%": pn_q95_.to_frame(),
                            "coverage": df_coverage_.to_frame(),
                            "t-test": pn_pv_.to_frame()},
                           axis=0, names=["statistic"])\
                   .swaplevel(0, 1, axis=0).sort_index(axis=0)
    print key_
#     display(HTML(df_output_.to_html(float_format=lambda f: "%0.3f"%(f,))))
#     print df_output_.to_latex(float_format=lambda f: "%0.3f"%(f,))
    
    ## Show the true surface
    y_test_ = y_test[1].mean(axis=-1)
    y_hat_ = y_hat[1].mean(axis=-1)
    fig = plt.figure(figsize=(6, 3))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_surface(mesh_[0], mesh_[1], y_test_.reshape(mesh_[0].shape),
                    cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                    antialiased=False, alpha=0.9)
    ax.view_init(60, -60)
    plt.show()

    ## The approximated surface
    fig = plt.figure(figsize=(6, 3))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_surface(mesh_[0], mesh_[1], y_hat_.reshape(mesh_[0].shape),
                    cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                    antialiased=False, alpha=0.9)
    ax.view_init(60, -60)
    plt.show()
    
#     ## The GPR-p
#     fig = plt.figure(figsize=(6, 3))
#     ax = fig.add_subplot(111, projection='3d')
#     ax.plot_surface(mesh_[0], mesh_[1], avg_width_[0, 0, :, 2].reshape(mesh_[0].shape),
#                     cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
#                     antialiased=False, alpha=0.9)
#     ax.view_init(60, -60)
#     ax.set_title(titles_[0])
#     plt.show()
    
#     ## The RRCM / GPR-p : j=0..3, i=2..5
#     i, j = 3, 2
#     awr_ = avg_width_[i, 0, :, j] / avg_width_[0, 0, :, j] - 1
#     fig = plt.figure(figsize=(6, 3))
#     ax = fig.add_subplot(111, projection='3d')
#     ax.plot_surface(mesh_[0], mesh_[1], awr_.reshape(mesh_[0].shape),
#                     cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
#                     antialiased=False, alpha=0.9)
#     ax.view_init(60, -60)
#     ax.set_title(titles_[2])
#     plt.show()
    
    print ratio_.mean(axis=-1)[1, 0]
    break

In [None]:
plt.hist(np.log(awr_), bins=100)

In [None]:
pn_pv_.to_frame()

In [None]:
coverage_ = dict()
for key_, item_ in exp_gauss_1d.iteritems():
    sizes_ = pd.Index(item_[1], name="size")
    noise_, theta0_, nugget_ = key_[1:]
    df_cov_ = pd.Panel(item_[2].mean(axis=-1), items=titles_, major_axis=sizes_, minor_axis=levels_).to_frame()
    pv_ = np.stack([ttest_1samp(item_[2][:,:,j], (1-levels[j]), axis=-1)[1] for j in xrange(4)], axis=-1)
    df_cov_pv_ = pd.Panel(pv_, items=titles_, major_axis=sizes_, minor_axis=levels_).to_frame()
    coverage_[key_] = df_cov_, df_cov_pv_, item_[2]

In [None]:
from scipy.stats import ttest_1samp

In [None]:
item_[2].shape

In [None]:
np.stack([ttest_1samp(item_[2][:,:,j], (1-levels[j]), axis=-1)[1] for j in xrange(4)], axis=-1)

In [None]:
np.abs(item_[2].mean(axis=-1)-(1-levels)[np.newaxis, np.newaxis]) / item_[2].std(axis=-1)

In [None]:
hits_ = np.round(item_[2].mean(axis=-1)*2601)
pv_ = np.stack([np.vectorize(lambda x: binom_test(x, n=2601, p=1 - levels[j]))(hits_[..., j]) for j in range(4)], axis=-1)
pv_[pv_ < 0.001] = 0

In [None]:
pv_

In [None]:
item_[2].shape

In [None]:
item_[2][..., j, :].shape

In [None]:
df_.T

In [None]:
print df_.T.to_latex()

In [None]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], m_w_[1, 1, :, 0].reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                antialiased=False, alpha=0.9)
ax.view_init(60, -60)

## Sample pictures

In [None]:
XX_test = np.linspace(-1, 1, num=501).reshape((-1, 1))
prof_nongauss = load_profiles(os.path.join(".", 'prof_nongauss_lap'), verbose=1, n_jobs=1)

In [None]:
for key_ in sorted(prof_nongauss.keys(), key=lambda x: (x[0], x[1], x[3], x[2])):
    name_, noise_, theta0_, nugget_ = key_
    ratio_, sizes_, bounds_, y_test_, y_hat_ = prof_nongauss[key_]
    ## Skip
#     if name_ != "heaviside": continue
#     if theta0_ == "auto": continue

    output_path_ = mkdirifnot(os.path.join(PROFILE_PATH, name_ + "lap"))
    output_path_ = mkdirifnot(os.path.join(output_path_, "%g %g"%(noise_, nugget_,)))

    theta_= ("%g" if isinstance(theta0_, float) else "$'%s'$")%(theta0_,)
    title_template_ = "%%s: %%s ($\\theta=%s, \\lambda=%g, \\gamma=%g$)"%(theta_, nugget_, noise_)

    theta_= ("%g" if isinstance(theta0_, float) else "%s")%(theta0_,)
    filename_template_ = "%%s%s %g %g %s %%s"%(name_, noise_, nugget_, theta_)

    ## Profile
    for s_ in xrange(len(sizes_)):
#         if s_ > 1: continue
        output_path_current_ = mkdirifnot(os.path.join(output_path_, "%d"%(sizes_[s_],)))
        for i_ in xrange(4):
            # max_, min_ = np.percentile(bounds_[:, s_, :, i_], [92.5, 7.5])*2
#             max_, min_ = y_test_[s_].max()*1.5, y_test_[s_].min()*1.5
            if name_=="heaviside": min_, max_ = -0.95, 1.95
            for ncm_ in pd.unique(ncms_):
                fig = plt.figure(figsize=(5, 4))
                ax = fig.add_subplot(111)
#                 if np.isfinite(min_) and np.isfinite(max_):
#                     ax.set_ylim(min_, max_)
                ax.plot(XX_test, y_test_[s_], c="#c0c0c0", lw=2, alpha=.5, label="$y_x$")
                ax.plot(XX_test, y_hat_[s_], c='k', label="$\\hat{y}_x$")
                for j, b in enumerate(np.flatnonzero(ncms_==ncm_)):
                    ax.plot(XX_test, bounds_[b, s_, :, i_, 0], color="rb"[j], label=titles_[b])
                    ax.plot(XX_test, bounds_[b, s_, :, i_, 1], color="rb"[j])
                ax.set_title(title_template_%("%.1f%%-%s"%(levels[i_]*100, ncm_,), name_,))
                ax.legend(loc="best", ncol=2)

                filename_ = (filename_template_%("profile ", "%dp-%s %d"%(levels[i_]*100, ncm_, sizes_[s_],),))
                fig_file_name_ = os.path.join(output_path_current_,
                                              filename_.replace(" ", "_").replace(".", ",") + ".pdf")
                fig.savefig(fig_file_name_, dpi=120)
                plt.close()

In [None]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
titles_ = ["GPR", "RRCM" ,"CRR", "RRCM-loo", "CRR-loo"]
colors_ = "bgrm"

import numpy as np
from sklearn.grid_search import ParameterGrid
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcess
from sklearn.base import clone

from scipy.stats import norm
from joblib import Parallel, delayed

from utils.functions_1d import f6, pressure2, heaviside

from utils.conformal import RRCM, CRR
from utils.KRR import KRR_AB

def _helper(y, A, B, proc=RRCM, levels=levels, parallel=None, n_jobs=1, verbose=0):
    if not isinstance(parallel, Parallel):
        parallel = Parallel(n_jobs=n_jobs, verbose=verbose)

## Construct the CKRR confidence interval: RRCM
    regions = parallel(delayed(proc)(A[k], B[k], levels=levels)
                       for k in xrange(y.shape[0]))

## See if the transformed test target valeus are with the conformal region
    hits_ = np.asarray(
        [[np.any(((int_[:, 0] <= target) & (target <= int_[:, 1]))).astype(float)
          for int_ in region]
         for target, region in zip(y, regions)])

    width_ = np.asarray(
        [[np.sum(int_[:, 1] - int_[:, 0]) for int_ in region] for region in regions])
    
    bounds_ = np.asarray(
        [[[int_[:, 0].min(), int_[:, 1].max()] for int_ in region] for region in regions])
    return hits_, width_, bounds_

n_jobs, verbose = -1, 0
parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)

np.seterr(all="ignore")

random_state = np.random.RandomState(0x6AE89C43)
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]

## Initialize
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
kernel = 'rbf' # 'laplacian'
gp = GaussianProcess(beta0=0, normalize=False, corr='squared_exponential')

funcs_ = [f6, pressure2, heaviside]

grid_ = ParameterGrid(dict(size=[150, 500,],
                           nugget=[1e-6, 1e-2],
                           theta0=[1e-1, 1.0, 1e+1,]))

## Get a sample realisation
XX_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
XX_train = random_state.uniform(size=(10000, 1))

XX = np.concatenate([XX_test, XX_train], axis=0)
test_ = np.s_[:XX_test.shape[0]]

output_path_ = mkdirifnot(os.path.join(PLOT_PATH, "profiles"))
for dgp_ in funcs_:
    for noise_ in [1e-6, 1e-1,]:
        yy = dgp_(XX)
        if yy.ndim == 1:
            yy = yy.reshape((-1, 1))
        if noise_ > 0:
            yy += random_state.normal(size=yy.shape) * noise_
        yy_train, yy_test = np.delete(yy, test_, axis=0), yy[test_].copy()
        del yy

        for i_, par_ in enumerate(grid_):
            size_, nugget_, theta0_ = par_['size'], par_['nugget'], par_['theta0']

            
            theta_= ("%g" if isinstance(theta0_, float) else "$'%s'$")%(theta0_,)
            title_template_ = "%%s %%g ($n=%d, \\theta=%s, \\lambda=%g$)"%(size_, theta_, nugget_)

            # Draw random train sample
            train_ = random_state.choice(range(XX_train.shape[0]),
                                         size=size_, replace=False)
            X, y = XX_train[train_], yy_train[train_]

            Xscl_, yscl_ = clone(scaler).fit(X), clone(scaler).fit(y)
            X_, XX_test_ = Xscl_.transform(X), Xscl_.transform(XX_test)
            y_, yy_test_ = yscl_.transform(y), yscl_.transform(yy_test)
        
            # Fir a gpr
            gp_ = clone(gp)
            gp_.nugget = nugget_
            if isinstance(theta0_, float):
                gp_.theta0 = theta0_
            elif theta0_ == "auto":
                gp_.thetaL, gp_.thetaU, gp_.theta0 = 1e-4, 1e4, float(size_)
            gp_.fit(X_, y_)

            # Compute the A, B matrices
            A, B, y_hat_, MM, loo_, A_loo, B_loo = KRR_AB(
                X_, y_, XX_test_, forecast=True, nugget=gp_.nugget,
                metric=kernel, gamma=gp_.theta_[0])
            del loo_

            # Inflate by the estimated magnitude
            MM *= gp_.sigma2


        ## Construct the Bayesian interval
            z_a = norm.ppf(1 - .5 * levels)
            half_width_ = np.sqrt(MM) * z_a[np.newaxis]
            b_bounds_ = yscl_.inverse_transform(
                np.stack([y_hat_ - half_width_, y_hat_ + half_width_], axis=-1))
            b_width_ = b_bounds_[..., 1] - b_bounds_[..., 0]
            b_hits_ = ((b_bounds_[..., 0] <= yy_test) & (yy_test <= b_bounds_[..., 1])).astype(float)

        ## Construct the CKRR confidence interval: RRCM
            rrcm_hits_, rrcm_width_, rrcm_bounds_ = _helper(yy_test_, A[0], B, proc=RRCM,
                                              levels=levels, parallel=parallel_)
            if yscl_.scale_ is not None:
                rrcm_width_ *= yscl_.scale_

        ## Construct the CKRR confidence interval: CCR-sided
            crr_hits_, crr_width_, crr_bounds_ = _helper(yy_test_, A[0], B, proc=CRR,
                                            levels=levels, parallel=parallel_)
            if yscl_.scale_ is not None:
                crr_width_ *= yscl_.scale_

        ## Construct the CKRR confidence interval: RRCM
            loo_rrcm_hits_, loo_rrcm_width_, loo_rrcm_bounds_ = _helper(yy_test_, A_loo[0], B_loo, proc=RRCM,
                                                      levels=levels, parallel=parallel_)
            if yscl_.scale_ is not None:
                loo_rrcm_width_ *= yscl_.scale_

        ## Construct the CKRR confidence interval: CCR-sided
            loo_crr_hits_, loo_crr_width_, loo_crr_bounds_ = _helper(yy_test_, A_loo[0], B_loo, proc=CRR,
                                                    levels=levels, parallel=parallel_)
            if yscl_.scale_ is not None:
                loo_crr_width_ *= yscl_.scale_

            rrcm_bounds = yscl_.inverse_transform(rrcm_bounds_)
            crr_bounds = yscl_.inverse_transform(crr_bounds_)
            loo_rrcm_bounds = yscl_.inverse_transform(loo_rrcm_bounds_)
            loo_crr_bounds = yscl_.inverse_transform(loo_crr_bounds_)
            y_hat = yscl_.inverse_transform(y_hat_)

            bounds = np.stack([rrcm_bounds, crr_bounds,
                               loo_rrcm_bounds, loo_crr_bounds],
                              axis=-1)

            ## Profile
            col_ = list("rbgk")
            fig = plt.figure(figsize=(5, 3))
            ax = fig.add_subplot(111)
            ax.plot(XX_test, yy_test, c="k", alpha=0.5)
            ax.plot(XX_test, y_hat, c='y')
            for j in range(4):
                ax.plot(XX_test, bounds[:, -2, 0, j], color=col_[j], alpha=.5)
                ax.plot(XX_test, bounds[:, -2, 1, j], color=col_[j], alpha=.5)
            ax.plot(XX_test, b_bounds_[:, -2, 0], color="m")
            ax.plot(XX_test, b_bounds_[:, -2, 1], color="m")
            ax.set_title(title_template_%(dgp_.__name__, noise_))

#             plt.show()
            
            fig_file_name_ = os.path.join(output_path_, "%s %g %d %s %g.png"
                                          %(dgp_.__name__, noise_, size_, theta_, nugget_,))
            fig.savefig(fig_file_name_)
            plt.close()
            print fig_file_name_

## Experiment #1

In [None]:
experiment_1 = load_dumps('./exp1')

Create the plots for the first experiment

In [None]:
import matplotlib.gridspec as gridspec
gs = gridspec.GridSpec(3, 2, height_ratios=[6, 6, 1])
# ax = figure.add_subplot(gs[0, 0])

In [None]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
titles_ = ["GPR", "RRCM" ,"CRR", "RRCM-loo", "CRR-loo"]
colors_ = "bgrm"
# gs = gridspec.GridSpec(3, 2, height_ratios=[6, 6, 1])
gs = gridspec.GridSpec(2, 2, height_ratios=[6, 6])

for key_, (rmse_, sizes_, coverage_, width_) in experiment_1.iteritems():
    output_path_ = mkdirifnot(os.path.join(PLOT_PATH, key_[0]))
    theta_= ("%g" if isinstance(key_[2], float) else "$'%s'$")%(key_[2],)
    title_template_ = "%%s ($\\theta=%s, \\lambda=%g$)"%(theta_, key_[3])
    cov_ = np.median(coverage_, axis=-1)
    cov_lo_, cov_hi_ = np.percentile(coverage_, [25, 75], axis=-1)
    fig = plt.figure(figsize=(7, 6))
#     ax = fig.add_subplot(gs[-1:, :])
#     ax.plot(sizes_, rmse_)
#     ax.set_yticks([])
    for j in xrange(1, 5):
#         ax = fig.add_subplot(2, 2, j+1-1)
        ax = fig.add_subplot(gs[j-1])
        ax.set_ylim(0.65, 1.025)
        ax.set_xlim(25, 1600)
        ax.locator_params(axis="x", nbins=5)
        ax.set_yticks(1-levels)
        ax.set_title(title_template_%(titles_[j],))
        for i in xrange(4):
            ax.plot(sizes_, cov_[j, :, i], color=colors_[i])
            ax.plot(sizes_, cov_hi_[j, :, i], color=colors_[i], alpha=0.5)
            ax.plot(sizes_, cov_lo_[j, :, i], color=colors_[i], alpha=0.5)
            ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
#     fig.tight_layout()
    theta_= ("%g" if isinstance(key_[2], float) else "%s")%(key_[2],)
    fig_file_name_ = os.path.join(output_path_, "%s %g %s %g conf_coverage.png"
                                  %(key_[0], key_[1], theta_, key_[3],))
    fig.savefig(fig_file_name_)
    plt.close()
#     plt.show()
    print fig_file_name_
    
## Bayes
    fig = plt.figure(figsize=(7, 3))
    ax = fig.add_subplot(111)
    ax.set_ylim(0.65, 1.025)
    ax.set_xlim(25, 1600)
    ax.locator_params(axis="x", nbins=5)
    ax.set_yticks(1-levels)
    ax.set_title(title_template_%(titles_[0],))
    for i in xrange(4):
        ax.plot(sizes_, cov_[0, :, i], color=colors_[i])
        ax.plot(sizes_, cov_hi_[0, :, i], color=colors_[i], alpha=0.5)
        ax.plot(sizes_, cov_lo_[0, :, i], color=colors_[i], alpha=0.5)
        ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
    
    fig_file_name_ = os.path.join(output_path_, "%s %g %s %g gpr_coverage.png"
                                  %(key_[0], key_[1], theta_, key_[3],))
    fig.savefig(fig_file_name_)
    plt.close()
#     plt.show()
#     break

In [None]:
np.stack(MM.shape, axis=-1)

In [None]:
import numpy as np

from utils.functions import gaussian
from utils.functions_2d import f2, f5

func_ = f5


# random_state = np.random.RandomState(0xCAFFE14E)
nd = 2
mesh_ = np.meshgrid(*nd*[np.linspace(-1, 1, num=51)])
XX_test = np.concatenate([ax_.reshape((-1, 1)) for ax_ in mesh_], axis=1)
XX_train = 2*random_state.uniform(size=(1500, nd))-1

# XX_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
# XX_train = random_state.uniform(size=(1000, 1))



XX = np.concatenate([XX_test, XX_train], axis=0)
test_ = np.s_[:XX_test.shape[0]]
# yy = gaussian(XX, scale=1.0, nugget=1e-6, metric=kernel,
#               gamma=100, random_state=random_state)
yy = func_(XX)
if yy.ndim == 1:
    yy = yy.reshape((-1, 1))
yy += random_state.normal(size=yy.shape) * 1e-1


yy_train, yy_test = np.delete(yy, test_, axis=0), yy[test_].copy()

In [None]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(2, 2, j, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], yy_test.reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                antialiased=False, alpha=0.9)
ax.view_init(60, -60)

In [None]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(2, 2, j, projection='3d')
ax.plot_trisurf(XX_train[:, 0], XX_train[:, 1], yy_train[:,0],
                cmap=plt.cm.coolwarm, lw=0,
                antialiased=False, alpha=0.9, shade=True)
ax.view_init(60, -60)

In [None]:
gp = GaussianProcess(beta0=0, normalize=False, corr='squared_exponential', nugget=1e-1,
                     theta0=10)#, thetaL=1, thetaU=1e+4)
gp.fit(XX_train, yy_train)

In [None]:
gp.theta_

In [None]:
hat_, mse_ = gp.predict(XX_test, eval_MSE=True)

In [None]:
np.mean((yy_test-hat_)**2) / np.var(yy_test)

In [None]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(2, 2, j, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], np.abs(yy_test-hat_).reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                antialiased=False, alpha=0.9)
ax.view_init(60, -60)

# plt.plot(np.sqrt(mse_))

In [None]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(2, 2, j, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], hat_.reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                antialiased=False, alpha=0.9)
ax.view_init(60, -60)

In [None]:
plt.plot(XX_test, yy_test)
plt.plot(XX_test, hat_, c='r', lw=2)


In [None]:
plt.plot(XX_test, np.abs(yy_test-hat_), c='r', lw=2)

## Experiment #3

In [None]:
experiment_3 = load_dumps('./exp3')
import matplotlib.gridspec as gridspec
gs = gridspec.GridSpec(3, 2, height_ratios=[6, 6, 1])
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
titles_ = ["GPR", "RRCM" ,"CRR", "RRCM-loo", "CRR-loo"]
colors_ = "bgrm"
# gs = gridspec.GridSpec(3, 2, height_ratios=[6, 6, 1])
gs = gridspec.GridSpec(2, 2, height_ratios=[6, 6])

for key_, (rmse_, sizes_, coverage_, width_) in experiment_3.iteritems():
    output_path_ = mkdirifnot(os.path.join(PLOT_PATH, key_[0]))
    theta_= ("%g" if isinstance(key_[2], float) else "$'%s'$")%(key_[2],)
    title_template_ = "%%s ($\\theta=%s, \\lambda=%g$)"%(theta_, key_[3])
    cov_ = np.median(coverage_, axis=-1)
    cov_lo_, cov_hi_ = np.percentile(coverage_, [25, 75], axis=-1)
    fig = plt.figure(figsize=(7, 6))
#     ax = fig.add_subplot(gs[-1:, :])
#     ax.plot(sizes_, rmse_)
#     ax.set_yticks([])
    for j in xrange(1, 5):
#         ax = fig.add_subplot(2, 2, j+1-1)
        ax = fig.add_subplot(gs[j-1])
        ax.set_ylim(0.65, 1.025)
        ax.set_xlim(25, 1600)
        ax.locator_params(axis="x", nbins=5)
        ax.set_yticks(1-levels)
        ax.set_title(title_template_%(titles_[j],))
        for i in xrange(4):
            ax.plot(sizes_, cov_[j, :, i], color=colors_[i])
            ax.plot(sizes_, cov_hi_[j, :, i], color=colors_[i], alpha=0.5)
            ax.plot(sizes_, cov_lo_[j, :, i], color=colors_[i], alpha=0.5)
            ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
#     fig.tight_layout()
    theta_= ("%g" if isinstance(key_[2], float) else "%s")%(key_[2],)
    fig_file_name_ = os.path.join(output_path_, "%s %g %s %g conf_coverage.png"
                                  %(key_[0], key_[1], theta_, key_[3],))
    fig.savefig(fig_file_name_)
    plt.close()
#     plt.show()
    print fig_file_name_
    
## Bayes
    fig = plt.figure(figsize=(7, 3))
    ax = fig.add_subplot(111)
    ax.set_ylim(0.65, 1.025)
    ax.set_xlim(25, 1600)
    ax.locator_params(axis="x", nbins=5)
    ax.set_yticks(1-levels)
    ax.set_title(title_template_%(titles_[0],))
    for i in xrange(4):
        ax.plot(sizes_, cov_[0, :, i], color=colors_[i])
        ax.plot(sizes_, cov_hi_[0, :, i], color=colors_[i], alpha=0.5)
        ax.plot(sizes_, cov_lo_[0, :, i], color=colors_[i], alpha=0.5)
        ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
    
    fig_file_name_ = os.path.join(output_path_, "%s %g %s %g gpr_coverage.png"
                                  %(key_[0], key_[1], theta_, key_[3],))
    fig.savefig(fig_file_name_)
    plt.close()
#     plt.show()
#     break

## Experiment #5

In [None]:
experiment_5 = load_dumps('./exp5')
import matplotlib.gridspec as gridspec
gs = gridspec.GridSpec(3, 2, height_ratios=[6, 6, 1])
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
titles_ = ["GPR", "RRCM" ,"CRR", "RRCM-loo", "CRR-loo"]
colors_ = "bgrm"
# gs = gridspec.GridSpec(3, 2, height_ratios=[6, 6, 1])
gs = gridspec.GridSpec(2, 2, height_ratios=[6, 6])

for key_, (rmse_, sizes_, coverage_, width_) in experiment_5.iteritems():
    output_path_ = mkdirifnot(os.path.join(PLOT_PATH, key_[0]))
    theta_= ("%g" if isinstance(key_[2], float) else "$'%s'$")%(key_[2],)
    title_template_ = "%%s ($\\theta=%s, \\lambda=%g$)"%(theta_, key_[3])
    cov_ = np.median(coverage_, axis=-1)
    cov_lo_, cov_hi_ = np.percentile(coverage_, [25, 75], axis=-1)
    fig = plt.figure(figsize=(7, 6))
#     ax = fig.add_subplot(gs[-1:, :])
#     ax.plot(sizes_, rmse_)
#     ax.set_yticks([])
    for j in xrange(1, 5):
#         ax = fig.add_subplot(2, 2, j+1-1)
        ax = fig.add_subplot(gs[j-1])
        ax.set_ylim(0.65, 1.025)
        ax.set_xlim(25, 1600)
        ax.locator_params(axis="x", nbins=5)
        ax.set_yticks(1-levels)
        ax.set_title(title_template_%(titles_[j],))
        for i in xrange(4):
            ax.plot(sizes_, cov_[j, :, i], color=colors_[i])
            ax.plot(sizes_, cov_hi_[j, :, i], color=colors_[i], alpha=0.5)
            ax.plot(sizes_, cov_lo_[j, :, i], color=colors_[i], alpha=0.5)
            ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
#     fig.tight_layout()
    theta_= ("%g" if isinstance(key_[2], float) else "%s")%(key_[2],)
    fig_file_name_ = os.path.join(output_path_, "%s %g %s %g conf_coverage.png"
                                  %(key_[0], key_[1], theta_, key_[3],))
    fig.savefig(fig_file_name_)
    plt.close()
#     plt.show()
    print fig_file_name_
    
## Bayes
    fig = plt.figure(figsize=(7, 3))
    ax = fig.add_subplot(111)
    ax.set_ylim(0.65, 1.025)
    ax.set_xlim(25, 1600)
    ax.locator_params(axis="x", nbins=5)
    ax.set_yticks(1-levels)
    ax.set_title(title_template_%(titles_[0],))
    for i in xrange(4):
        ax.plot(sizes_, cov_[0, :, i], color=colors_[i])
        ax.plot(sizes_, cov_hi_[0, :, i], color=colors_[i], alpha=0.5)
        ax.plot(sizes_, cov_lo_[0, :, i], color=colors_[i], alpha=0.5)
        ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
    
    fig_file_name_ = os.path.join(output_path_, "%s %g %s %g gpr_coverage.png"
                                  %(key_[0], key_[1], theta_, key_[3],))
    fig.savefig(fig_file_name_)
    plt.close()
#     plt.show()
#     break

In [None]:
import numpy as np
from sklearn.grid_search import ParameterGrid
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcess
from sklearn.base import clone

from scipy.stats import norm
from joblib import Parallel, delayed

from utils.functions_1d import f6, pressure2, heaviside

from utils.conformal import RRCM, CRR
from utils.KRR import KRR_AB

def _helper(y, A, B, proc=RRCM, levels=levels, parallel=None, n_jobs=1, verbose=0):
    if not isinstance(parallel, Parallel):
        parallel = Parallel(n_jobs=n_jobs, verbose=verbose)

## Construct the CKRR confidence interval: RRCM
    regions = parallel(delayed(proc)(A[k], B[k], levels=levels)
                       for k in xrange(y.shape[0]))

## See if the transformed test target valeus are with the conformal region
    hits_ = np.asarray(
        [[np.any(((int_[:, 0] <= target) & (target <= int_[:, 1]))).astype(float)
          for int_ in region]
         for target, region in zip(y, regions)])

    width_ = np.asarray(
        [[np.sum(int_[:, 1] - int_[:, 0]) for int_ in region] for region in regions])
    
    bounds_ = np.asarray(
        [[[int_[:, 0].min(), int_[:, 1].max()] for int_ in region] for region in regions])
    return hits_, width_, bounds_

n_jobs, verbose = -1, 0
parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)

np.seterr(all="ignore")

random_state = np.random.RandomState(0x6AE89C43)
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]

## Initialize
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
kernel = 'rbf' # 'laplacian'
gp = GaussianProcess(beta0=0, normalize=False, corr='squared_exponential')

funcs_ = [f6, pressure2, heaviside]

grid_ = ParameterGrid(dict(size=[150, 500,],
                           nugget=[1e-6, 1e-2],
                           theta0=[1e-1, 1.0, 1e+1,]))

## Get a sample realisation
XX_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
XX_train = random_state.uniform(size=(10000, 1))

XX = np.concatenate([XX_test, XX_train], axis=0)
test_ = np.s_[:XX_test.shape[0]]

output_path_ = mkdirifnot(os.path.join(PLOT_PATH, "profiles"))
for dgp_ in funcs_:
    for noise_ in [1e-6, 1e-1,]:
        yy = dgp_(XX)
        if yy.ndim == 1:
            yy = yy.reshape((-1, 1))
        if noise_ > 0:
            yy += random_state.normal(size=yy.shape) * noise_
        yy_train, yy_test = np.delete(yy, test_, axis=0), yy[test_].copy()
        del yy

        for i_, par_ in enumerate(grid_):
            size_, nugget_, theta0_ = par_['size'], par_['nugget'], par_['theta0']

            
            theta_= ("%g" if isinstance(theta0_, float) else "$'%s'$")%(theta0_,)
            title_template_ = "%%s %%g ($n=%d, \\theta=%s, \\lambda=%g$)"%(size_, theta_, nugget_)

            # Draw random train sample
            train_ = random_state.choice(range(XX_train.shape[0]),
                                         size=size_, replace=False)
            X, y = XX_train[train_], yy_train[train_]

            Xscl_, yscl_ = clone(scaler).fit(X), clone(scaler).fit(y)
            X_, XX_test_ = Xscl_.transform(X), Xscl_.transform(XX_test)
            y_, yy_test_ = yscl_.transform(y), yscl_.transform(yy_test)
        
            # Fir a gpr
            gp_ = clone(gp)
            gp_.nugget = nugget_
            if isinstance(theta0_, float):
                gp_.theta0 = theta0_
            elif theta0_ == "auto":
                gp_.thetaL, gp_.thetaU, gp_.theta0 = 1e-4, 1e4, float(size_)
            gp_.fit(X_, y_)

            # Compute the A, B matrices
            A, B, y_hat_, MM, loo_, A_loo, B_loo = KRR_AB(
                X_, y_, XX_test_, forecast=True, nugget=gp_.nugget,
                metric=kernel, gamma=gp_.theta_[0])
            del loo_

            # Inflate by the estimated magnitude
            MM *= gp_.sigma2


        ## Construct the Bayesian interval
            z_a = norm.ppf(1 - .5 * levels)
            half_width_ = np.sqrt(MM) * z_a[np.newaxis]
            b_bounds_ = yscl_.inverse_transform(
                np.stack([y_hat_ - half_width_, y_hat_ + half_width_], axis=-1))
            b_width_ = b_bounds_[..., 1] - b_bounds_[..., 0]
            b_hits_ = ((b_bounds_[..., 0] <= yy_test) & (yy_test <= b_bounds_[..., 1])).astype(float)

        ## Construct the CKRR confidence interval: RRCM
            rrcm_hits_, rrcm_width_, rrcm_bounds_ = _helper(yy_test_, A[0], B, proc=RRCM,
                                              levels=levels, parallel=parallel_)
            if yscl_.scale_ is not None:
                rrcm_width_ *= yscl_.scale_

        ## Construct the CKRR confidence interval: CCR-sided
            crr_hits_, crr_width_, crr_bounds_ = _helper(yy_test_, A[0], B, proc=CRR,
                                            levels=levels, parallel=parallel_)
            if yscl_.scale_ is not None:
                crr_width_ *= yscl_.scale_

        ## Construct the CKRR confidence interval: RRCM
            loo_rrcm_hits_, loo_rrcm_width_, loo_rrcm_bounds_ = _helper(yy_test_, A_loo[0], B_loo, proc=RRCM,
                                                      levels=levels, parallel=parallel_)
            if yscl_.scale_ is not None:
                loo_rrcm_width_ *= yscl_.scale_

        ## Construct the CKRR confidence interval: CCR-sided
            loo_crr_hits_, loo_crr_width_, loo_crr_bounds_ = _helper(yy_test_, A_loo[0], B_loo, proc=CRR,
                                                    levels=levels, parallel=parallel_)
            if yscl_.scale_ is not None:
                loo_crr_width_ *= yscl_.scale_

            rrcm_bounds = yscl_.inverse_transform(rrcm_bounds_)
            crr_bounds = yscl_.inverse_transform(crr_bounds_)
            loo_rrcm_bounds = yscl_.inverse_transform(loo_rrcm_bounds_)
            loo_crr_bounds = yscl_.inverse_transform(loo_crr_bounds_)
            y_hat = yscl_.inverse_transform(y_hat_)

            bounds = np.stack([rrcm_bounds, crr_bounds,
                               loo_rrcm_bounds, loo_crr_bounds],
                              axis=-1)

            ## Profile
            col_ = list("rbgk")
            fig = plt.figure(figsize=(5, 5))
            ax = fig.add_subplot(111)
            ax.plot(XX_test, yy_test, c="k")
            ax.plot(XX_test, y_hat, linestyle=':')
            for j in range(4):
                ax.plot(XX_test, bounds[:, -2, 0, j], color=col_[j], alpha=.5)
                ax.plot(XX_test, bounds[:, -2, 1, j], color=col_[j], alpha=.5)
            ax.plot(XX_test, b_bounds_[:, -2, 0], color="m")
            ax.plot(XX_test, b_bounds_[:, -2, 1], color="m")
            ax.set_title(title_template_%(dgp_.__name__, noise_))

#             plt.show()
            
            fig_file_name_ = os.path.join(output_path_, "%s %g %d %s %g.png"
                                          %(dgp_.__name__, noise_, size_, theta_, nugget_,))
            fig.savefig(fig_file_name_)
            plt.close()

In [None]:
plt.plot(sizes_, rmse_)

In [None]:
results_.keys()

In [None]:
rmse_, sizes_, coverage_, width_ = results_[('heaviside', 0.01, 1, 1e-06)]

In [None]:
X_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
w_ = np.mean(width_, axis=-1)
# w_std_ = np.std(width_, axis=-1)

In [None]:
for j in xrange(11):
    for i in xrange(5):
        plt.plot(X_test, w_[i, j, ..., -1].T, label=titles_[i])
        plt.legend(loc="best", ncol=2)
#     plt.title(titles_[j])
    plt.show()

In [None]:
np.concatenate([exp_[1] for exp_ in experiment], axis=1)

In [None]:
X_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
plt.plot(X_test, np.concatenate([exp_[1] for exp_ in experiment[:88]], axis=1));

Experiment #2

In [None]:
base_ = "./exp2"
dumps_ = [_load(os.path.join(base_, fname_))
          for fname_ in os.listdir(base_)
          if fname_.endswith(".gz")]
experiment = [exp_ for dump_ in dumps_ for exp_ in dump_]

In [None]:
nd = 2
mesh_ = np.meshgrid(*nd*[np.linspace(-1, 1, num=51)])
X_test = np.concatenate([ax_.reshape((-1, 1)) for ax_ in mesh_], axis=1)

In [None]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
titles_ = ["GPR", "RRCM" ,"CRR", "RRCM-loo", "CRR-loo"]
colors_ = "bgrm"

In [None]:
temp_ = dict()
for exp_ in experiment:
    key_ = exp_[0][:-1]
    if key_ not in temp_:
        temp_[key_] = list()
    temp_[key_].append((exp_[0][-1], exp_[1:]))
    
temp_ = {key_ : sorted(res_, key=lambda x: x[0])
         for key_, res_ in temp_.iteritems()}

In [None]:
results_ = dict()
for key_, result_ in temp_.iteritems():
    rmse_ = np.stack([np.mean((res_[1][0]-res_[1][1])**2, axis=-1) for res_ in result_], axis=0)
    sizes_ = np.array([res_[0] for res_ in result_])
    coverage_ = np.stack([np.stack([res_[1][3::2][j] for res_ in result_], axis=0) for j in xrange(5)], axis=0)
    width_ = np.stack([np.stack([res_[1][2::2][j] for res_ in result_], axis=0) for j in xrange(5)], axis=0)
    results_[key_] = rmse_, sizes_, coverage_, width_

    print key_
    fig = plt.figure(figsize=(12, 6))
    for j in xrange(1, len(titles_)):
        ax = fig.add_subplot(2, 2, j, projection='3d')
        s_, pv_ = 0, 0
        rel_ = np.median(width_, axis=-1)[j, s_, :, pv_] / np.median(width_, axis=-1)[0, s_, :, pv_] - 1
        ax.plot_surface(mesh_[0], mesh_[1], rel_.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, -60)
        ax.set_title("%s / %s - 1"%(titles_[j], titles_[0]))
    plt.show()

In [None]:
print key_
fig = plt.figure(figsize=(5, 3))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], rmse_[0].reshape(mesh_[0].shape),
                cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                antialiased=False, alpha=0.9)
ax.view_init(60, -60)
ax.set_title(title_template_%("%s %g"%(key_[:2]),))
plt.show()

In [None]:
for key_, (rmse_, sizes_, coverage_, width_) in results_.iteritems():
    output_path_ = mkdirifnot(os.path.join(PLOT_PATH, key_[0]))
    theta_= ("%g" if isinstance(key_[2], float) else "$'%s'$")%(key_[2],)
    title_template_ = "%%s ($\\theta=%s, \\lambda=%g$)"%(theta_, key_[3])
    cov_ = np.median(coverage_, axis=-1)
    cov_lo_, cov_hi_ = np.percentile(coverage_, [25, 75], axis=-1)
#     break

    fig = plt.figure(figsize=(5, 3))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_surface(mesh_[0], mesh_[1], rmse_[0].reshape(mesh_[0].shape),
                    cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                    antialiased=False, alpha=0.9)
    ax.view_init(60, -60)
    ax.set_title(title_template_%("%s %g"%(key_[:2]),))
    plt.show()
#     break


In [None]:
for exp_ in experiment:
    
#     theta_= ("%g" if isinstance(key_[2], float) else "$'%s'$")%(key_[2],)
#     title_template_ = "%%s ($\\theta=%s, \\lambda=%g$)"%(theta_, key_[3])
    y_test_ = exp_[1].mean(axis=-1, keepdims=True)

    fig = plt.figure(figsize=(5, 3))
#     ax = fig.add_subplot(121, projection='3d')
#     ax.plot_surface(mesh_[0], mesh_[1], y_test_.reshape(mesh_[0].shape),
#                     cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
#                     antialiased=False, alpha=0.9)
#     ax.view_init(60, -60)
#     ax.set_title(exp_[0][0])
    
    y_test_ = (exp_[1]-exp_[2]).mean(axis=-1, keepdims=True)
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_surface(mesh_[0], mesh_[1], y_test_.reshape(mesh_[0].shape),
                    cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                    antialiased=False, alpha=0.9)
    ax.view_init(60, -60)
    ax.set_title(exp_[0][0])
    plt.show()


Experiment #3

In [None]:
base_ = "./exp3"
dumps_ = [_load(os.path.join(base_, fname_))
          for fname_ in os.listdir(base_)
          if fname_.endswith(".pic.gz")]
experiment = [exp_ for dump_ in dumps_ for exp_ in dump_]

temp_ = dict()
for exp_ in experiment:
    key_ = exp_[0][:-1]
    if key_ not in temp_:
        temp_[key_] = list()
    temp_[key_].append((exp_[0][-1], exp_[1:]))
    
temp_ = {key_ : sorted(res_, key=lambda x: x[0])
         for key_, res_ in temp_.iteritems()}

results_ = dict()
for key_, result_ in temp_.iteritems():
    rmse_ = np.stack([np.mean((res_[1][0]-res_[1][1])**2) for res_ in result_], axis=0)
    sizes_ = np.array([res_[0] for res_ in result_])
    coverage_ = np.stack([np.stack([res_[1][3::2][j] for res_ in result_], axis=0) for j in xrange(5)], axis=0)
    width_ = np.stack([np.stack([res_[1][2::2][j] for res_ in result_], axis=0) for j in xrange(5)], axis=0)
    results_[key_] = rmse_, sizes_, coverage_, width_



In [None]:

levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
titles_ = ["GPR", "RRCM" ,"CRR", "RRCM-loo", "CRR-loo"]
colors_ = "bgrm"
# gs = gridspec.GridSpec(3, 2, height_ratios=[6, 6, 1])
gs = gridspec.GridSpec(2, 2, height_ratios=[6, 6])

for key_, (rmse_, sizes_, coverage_, width_) in results_.iteritems():
    output_path_ = mkdirifnot(os.path.join(PLOT_PATH, key_[0]))
    theta_= ("%g" if isinstance(key_[2], float) else "$'%s'$")%(key_[2],)
    title_template_ = "%%s ($\\theta=%s, \\lambda=%g$)"%(theta_, key_[3])
    cov_ = np.median(coverage_, axis=-1)
    cov_lo_, cov_hi_ = np.percentile(coverage_, [25, 75], axis=-1)
    fig = plt.figure(figsize=(7, 6))
#     ax = fig.add_subplot(gs[-1:, :])
#     ax.plot(sizes_, rmse_)
#     ax.set_yticks([])
    for j in xrange(1, 5):
#         ax = fig.add_subplot(2, 2, j+1-1)
        ax = fig.add_subplot(gs[j-1])
        ax.set_ylim(0.65, 1.025)
        ax.set_xlim(25, 1600)
        ax.locator_params(axis="x", nbins=5)
        ax.set_yticks(1-levels)
        ax.set_title(title_template_%(titles_[j],))
        for i in xrange(4):
            ax.plot(sizes_, cov_[j, :, i], color=colors_[i])
            ax.plot(sizes_, cov_hi_[j, :, i], color=colors_[i], alpha=0.5)
            ax.plot(sizes_, cov_lo_[j, :, i], color=colors_[i], alpha=0.5)
            ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
#     fig.tight_layout()
    theta_= ("%g" if isinstance(key_[2], float) else "%s")%(key_[2],)
    fig_file_name_ = os.path.join(output_path_, "%s %g %s %g conf_coverage.png"
                                  %(key_[0], key_[1], theta_, key_[3],))
    fig.savefig(fig_file_name_)
    plt.close()
#     plt.show()
    print fig_file_name_
    
## Bayes
    fig = plt.figure(figsize=(7, 3))
    ax = fig.add_subplot(111)
    ax.set_ylim(0.65, 1.025)
    ax.set_xlim(25, 1600)
    ax.locator_params(axis="x", nbins=5)
    ax.set_yticks(1-levels)
    ax.set_title(title_template_%(titles_[0],))
    for i in xrange(4):
        ax.plot(sizes_, cov_[0, :, i], color=colors_[i])
        ax.plot(sizes_, cov_hi_[0, :, i], color=colors_[i], alpha=0.5)
        ax.plot(sizes_, cov_lo_[0, :, i], color=colors_[i], alpha=0.5)
        ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
    
    fig_file_name_ = os.path.join(output_path_, "%s %g %s %g gpr_coverage.png"
                                  %(key_[0], key_[1], theta_, key_[3],))
    fig.savefig(fig_file_name_)
    plt.close()
#     plt.show()
#     break

In [None]:
X_test = np.linspace(0, 1, num=1001)

In [None]:
exp_[2]

In [None]:
for exp_ in experiment:
    
    theta_= ("%g" if isinstance(exp_[0][2], float) else "$'%s'$")%(exp_[0][2],)
    title_template_ = "%%s ($\\theta=%s, \\lambda=%g$)"%(theta_, exp_[0][3])
    y_test_ = exp_[2][..., -1, np.newaxis]#(axis=-1, keepdims=True)

    fig = plt.figure(figsize=(5, 3))
#     ax = fig.add_subplot(121, projection='3d')
#     ax.plot_surface(mesh_[0], mesh_[1], y_test_.reshape(mesh_[0].shape),
#                     cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
#                     antialiased=False, alpha=0.9)
#     ax.view_init(60, -60)
#     ax.set_title(exp_[0][0])
    
    y_test_ = (exp_[1] - exp_[2]).mean(axis=-1, keepdims=True)
    ax = fig.add_subplot(111)
    ax.plot(X_test, y_test_)
    ax.set_title(title_template_%("%s %g"%(exp_[0][:2]),))
    plt.show()

In [None]:

from utils.functions import gaussian
random_state = np.random.RandomState(0xCAFFE14E)

levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]


## Define the grid
kernel = 'rbf' # 'laplacian'

## Run: experiment #3
# nd = 2
# mesh_ = np.meshgrid(*nd*[np.linspace(-1, 1, num=51)])
# X_test = np.concatenate([ax_.reshape((-1, 1)) for ax_ in mesh_], axis=1)
X_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
test_ = np.s_[:X_test.shape[0]]
## Draw f(x)
XX_train = random_state.uniform(size=(10000, 1))
XX = np.concatenate([X_test, XX_train], axis=0)
yy = gaussian(XX, scale=1.0, nugget=1e-2, metric=kernel,
              gamma=100.0, random_state=random_state)
if yy.ndim == 1:
    yy = yy.reshape((-1, 1))

## Split the pooled sample
yy_train, y_test = np.delete(yy, test_, axis=0), yy[test_].copy()
# del XX, yy


In [None]:
plt.scatter(XX, yy)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcess

from utils.conformal import RRCM, CRR
from utils.KRR import KRR_AB

from sklearn.grid_search import ParameterGrid
from sklearn.base import clone

from scipy.stats import norm
from joblib import Parallel, delayed

## Define the grid
grid_ = ParameterGrid(dict(dgp=[gaussian,],
                           size=[25, 50, 100, 200, 400, 600, 800, 1000, 1200, 1400, 1600,],
                           # size=[150, 1500,],
                           nugget=[1e-6, 1e-2,],
                           theta0=[1e-1, 1, 1e+1, "auto"],
                           noise=[1e-6,]))

## Initialize
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
gp = GaussianProcess(beta0=0, normalize=False, corr='squared_exponential')
kernel = 'rbf' # 'laplacian'

n_jobs, verbose = -1, 0
parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)

def _helper(y, A, B, proc=RRCM, levels=levels, parallel=None, n_jobs=1, verbose=0):
    if not isinstance(parallel, Parallel):
        parallel = Parallel(n_jobs=n_jobs, verbose=verbose)

## Construct the CKRR confidence interval: RRCM
    regions = parallel(delayed(proc)(A[k], B[k], levels=levels)
                       for k in xrange(y.shape[0]))

## See if the transformed test target valeus are with the conformal region
    hits_ = np.asarray(
        [[np.any(((int_[:, 0] <= target) & (target <= int_[:, 1]))).astype(float)
          for int_ in region]
         for target, region in zip(y, regions)])

    width_ = np.asarray(
        [[np.sum(int_[:, 1] - int_[:, 0]) for int_ in region] for region in regions])
    
    bounds_ = np.asarray(
        [[[int_[:, 0].min(), int_[:, 1].max()] for int_ in region] for region in regions])
    return hits_, width_, bounds_


## Run: experiment #3
# nd = 2
# mesh_ = np.meshgrid(*nd*[np.linspace(-1, 1, num=51)])


In [None]:
for i_, par_ in enumerate(grid_):
    break

In [None]:
    n_replications, replications = 20, list()

    dgp_, size_, noise_ = par_['dgp'], par_['size'], par_['noise']
    nugget_, theta0_ = par_['nugget'], par_['theta0']

    tick_ = time.time()


In [None]:
## START: one replication
## Draw random train sample
train_ = random_state.choice(range(XX_train.shape[0]),
                             size=size_, replace=False)
X_train, y_train = XX_train[train_], yy_train[train_]

## Standardize the sample
Xscl_, yscl_ = clone(scaler).fit(X_train), clone(scaler).fit(y_train)
X_train_, X_test_ = Xscl_.transform(X_train), Xscl_.transform(X_test)
y_train_, y_test_ = yscl_.transform(y_train), yscl_.transform(y_test)


In [None]:
## Fit a GPR
gp_ = clone(gp)
gp_.nugget = nugget_
if isinstance(theta0_, float):
    gp_.theta0 = theta0_
elif theta0_ == "auto":
    gp_.thetaL, gp_.thetaU, gp_.theta0 = 1e-4, 1e4, float(size_)
gp_.fit(X_train_, y_train_)


In [None]:
## Compute the A, B matrices
A, B, y_hat_, MM, loo_, A_loo, B_loo = KRR_AB(
    X_train_, y_train_, X_test_, forecast=True,
    nugget=gp_.nugget, metric=kernel, gamma=gp_.theta_[0])
del loo_
## Inflate by the estimated magnitude
MM *= gp_.sigma2

## Construct the Bayesian interval
z_a = norm.ppf(1 - .5 * levels)
half_width_ = np.sqrt(MM) * z_a[np.newaxis]
b_bounds_ = yscl_.inverse_transform(
    np.stack([y_hat_ - half_width_, y_hat_ + half_width_], axis=-1))
b_width_ = b_bounds_[..., 1] - b_bounds_[..., 0]
b_hits_ = ((b_bounds_[..., 0] <= y_test) & (y_test <= b_bounds_[..., 1])).astype(float)

## Construct the CKRR confidence interval: RRCM
rrcm_hits_, rrcm_width_, rrcm_bounds_ = _helper(y_test_, A[0], B, proc=RRCM,
                                  levels=levels, parallel=parallel_)
rrcm_bounds_ = yscl_.inverse_transform(rrcm_bounds_)
if yscl_.scale_ is not None:
    rrcm_width_ *= yscl_.scale_

## Construct the CKRR confidence interval: CCR-sided
crr_hits_, crr_width_, crr_bounds_ = _helper(y_test_, A[0], B, proc=CRR,
                                levels=levels, parallel=parallel_)
crr_bounds_ = yscl_.inverse_transform(crr_bounds_)
if yscl_.scale_ is not None:
    crr_width_ *= yscl_.scale_

## Construct the CKRR confidence interval: RRCM
loo_rrcm_hits_, loo_rrcm_width_, loo_rrcm_bounds_ = _helper(y_test_, A_loo[0], B_loo, proc=RRCM,
                                          levels=levels, parallel=parallel_)
loo_rrcm_bounds_ = yscl_.inverse_transform(loo_rrcm_bounds_)
if yscl_.scale_ is not None:
    loo_rrcm_width_ *= yscl_.scale_

## Construct the CKRR confidence interval: CCR-sided
loo_crr_hits_, loo_crr_width_, loo_crr_bounds_ = _helper(y_test_, A_loo[0], B_loo, proc=CRR,
                                        levels=levels, parallel=parallel_)
loo_crr_bounds_ = yscl_.inverse_transform(loo_crr_bounds_)
if yscl_.scale_ is not None:
    loo_crr_width_ *= yscl_.scale_


In [None]:
aa_, bb_ = b_bounds_[:, 0].T
aa, bb = loo_rrcm_bounds_[:, 0].T
aa_, bb_ = rrcm_bounds_[:, 0].T

In [None]:
plt.plot(X_test, aa)
plt.plot(X_test, bb)
plt.plot(X_test, aa_)
plt.plot(X_test, bb_)
plt.plot(X_test, y_test)
plt.plot(X_test, y_hat_)

In [None]:
plt.plot(X_test, aa)
plt.plot(X_test, bb)
plt.plot(X_test, y_test)

## Main experiment

In [None]:
import time
from utils.monte_carlo import run_ckrr_mc_experiment
from utils.state import _save
from sklearn.grid_search import ParameterGrid
from utils.functions_1d import get_functions

Select the functions to experiment with.

In [None]:
funcs_ = ["f6", "pressure2", "heaviside"]
func1d_ = {fname_: fn_
           for fname_, fn_ in get_functions().iteritems() if fname_ in funcs_}

Plot typical profiles for the 1D test functions.

In [None]:
for fname_, fn_ in func1d_.iteritems():
    fig = plt.figure(figsize=(5, 4))
    X = np.linspace(0, 1, num=1001)
    y = fn_(X)
    ax = fig.add_subplot(111)
    ax.plot(X, y)
    if fname_ != 'pressure2':
        ax.set_ylim(-0.1, 1.1)
    fig_name_ = os.path.join(PLOT_PATH, "1d_func_%s.png"%(fname_,))
    fig.savefig(fig_name_)
    plt.close()
    print fig_name_

## experiment #1: validity

## experiment #2: gaussian case

## experiment #3: 2d case

In [None]:
print np.percentile((np.abs(exp_[1] - exp_[2])**2), [25, 75], axis=0)

In [None]:
2

In [None]:
import time
from sklearn.base import clone
from sklearn.utils import check_random_state
from sklearn.preprocessing import StandardScaler
from scipy.stats import norm
from joblib import Parallel, delayed
from sklearn.grid_search import ParameterGrid

Define the search grid

In [None]:
grid_ = ParameterGrid(dict(dgp=func1d_.values(),
                           size=[25, 50, 100, 200, 400, 600, 800, 1000, 1200, 1400, 1600,],
                           nugget=[1e-6, 1e-2],
                           theta0=[1e-1, 1, 1e+1],
                           use_loo=[True, False],
                           noise=[0.0, 1e-1]))

In [None]:
grid_ = ParameterGrid(dict(dgp=func1d_.values()[:1],
                           size=[25, 50, 100, 200, 400, 600, 800],#, 1000, 1200, 1400, 1600,],
                           nugget=[1e-6,],
                           theta0=[1e+1,],
                           use_loo=[False,],
                           noise=[0.0,]))

The experiment

In [None]:
random_state = np.random.RandomState(0x0BADA550)

levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]

Initialize

In [None]:
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
gp = GaussianProcess(beta0=0, normalize=False, corr='squared_exponential')
kernel = 'rbf' # 'laplacian'
z_a = norm.ppf(1 - .5 * levels)
n_jobs, verbose = -1, 0

parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)
c_proc = delayed(RRCM)

Run (maybe paly with bad samples)

In [None]:
X_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
test_ = np.s_[:X_test.shape[0]]

experiment = list()
for i_, par_ in enumerate(grid_):
    print par_
    n_replications, replications = 20, list()

    dgp_, size_, noise_ = par_['dgp'], par_['size'], par_['noise']
    nugget_, theta0_, use_loo_ =  par_['nugget'], par_['theta0'], par_['use_loo']
    tick_ = time.time()
    while n_replications > 0:
    ## START: one replication
    ## Draw random train sample
        X_train = random_state.uniform(size=(size_, 1))

    ## Draw f(x)
        XX = np.concatenate([X_test, X_train], axis=0)
        yy = dgp_(XX)
        if yy.ndim == 1:
            yy = yy.reshape((-1, 1))
        if noise_ > 0:
            yy += random_state.normal(size=yy.shape)

    ## Split the pooled sample
        y_train, y_test = np.delete(yy, test_, axis=0), yy[test_]

    ## Standardize the sample
        Xscl_, yscl_ = clone(scaler).fit(X_train), clone(scaler).fit(y_train)
        X_train_, X_test_ = Xscl_.transform(X_train), Xscl_.transform(X_test)
        y_train_, y_test_ = yscl_.transform(y_train), yscl_.transform(y_test)

    ## Fit a GPR
        gp_ = clone(gp)
        gp_.theta0, gp_.nugget = theta0_, nugget_
        gp_.fit(X_train_, y_train_)

    ## Compute the A, B matrices
        A, B, y_hat_, MM, loo_ = KRR_AB(X_train_, y_train_, X_test_, loo=use_loo_,
                                        forecast=True, nugget=gp_.nugget,
                                        metric=kernel, gamma=gp_.theta_[0])
        del loo_
    ## Inflate by the estimated magnitude
        MM *= gp_.sigma2

    ## Construct the Bayesian interval
        half_width_ = np.sqrt(MM) * z_a[np.newaxis]
        b_bounds_ = yscl_.inverse_transform(
            np.stack([y_hat_ - half_width_, y_hat_ + half_width_], axis=-1))
        b_width_ = b_bounds_[..., 1] - b_bounds_[..., 0]
        b_hits_ = ((b_bounds_[..., 0] <= y_test) & (y_test <= b_bounds_[..., 1])).astype(float)

    ## Construct the CKRR confidence interval
        regions = parallel_(c_proc(A[0, k], B[k], levels=levels)
                            for k in xrange(y_test.shape[0]))
    ## See if the transformed test target valeus are with the conformal region
        c_hits_ = np.asarray(
            [[np.any(((int_[:, 0] <= y) & (y <= int_[:, 1]))).astype(float)
              for int_ in region]
             for y, region in zip(y_test_, regions)])

        c_width_ = np.asarray(
            [[np.sum(int_[:, 1] - int_[:, 0]) for int_ in region] for region in regions])
        if yscl_.scale_ is not None:
            c_width_ *= yscl_.scale_

    ## END: one replication
        n_replications -= 1
    
        # y_test_hat_ = yscl_.inverse_transform(y_hat_)
        # replications.append((y_test, y_test_hat_, b_width_, c_width_,
        #                      b_hits_.mean(axis=0, keepdims=True),
        #                      c_hits_.mean(axis=0, keepdims=True)))
        replications.append((b_hits_.mean(axis=0, keepdims=True),
                             c_hits_.mean(axis=0, keepdims=True)))
    tock_ = time.time()
## Consolidate the simultions
    b_coverage_ = np.concatenate([rep_[0] for rep_ in replications], axis=0)
    c_coverage_ = np.concatenate([rep_[1] for rep_ in replications], axis=0)

    key_ = dgp_.__name__, noise_, use_loo_, theta0_, nugget_, size_
    experiment.append((key_, b_coverage_, c_coverage_))

In [None]:
test_ = _load('./exp1/exp1 -20160511_221424.gz')

In [None]:
test_

In [None]:
[(key_, np.median(b_, axis=0), np.median(c_, axis=0)) for key_, b_, c_ in experiment]

In [None]:
from utils import _save, _load
basename_ = os.path.join(OUTPUT_PATH, "..", "partial2 10.0 0.0 1e-06 noloo")
_save(experiment, "%s-%04d "%(basename_, i_), gz=9)

In [None]:
experiment = _load('./new-20160511_194105/partial 10.0 0.0 1e-06 noloo-0032 -20160511_202641.gz')

In [None]:
import pandas as pd

In [None]:
b_coverage_ = np.concatenate([np.stack([replication_[4]
                                  for replication_ in replications_], axis=-1)
                        for par_, replications_ in experiment], axis=0)
c_coverage_ = np.concatenate([np.stack([replication_[5]
                                  for replication_ in replications_], axis=-1)
                        for par_, replications_ in experiment], axis=0)

In [None]:
b_cov_mean_ = np.mean(b_coverage_ , axis=-1).reshape((3, 11, 4))
b_cov_median_ = np.median(b_coverage_ , axis=-1).reshape((3, 11, 4))
b_cov_std_ = np.std(b_coverage_, axis=-1).reshape((3, 11, 4))

c_cov_mean_ = np.mean(c_coverage_ , axis=-1).reshape((3, 11, 4))
c_cov_median_ = np.median(c_coverage_ , axis=-1).reshape((3, 11, 4))
c_cov_std_ = c_coverage_.std(axis=-1).reshape((3, 11, 4))

In [None]:
# plt.plot(grid_.param_grid[0]["size"], c_dyn_[0]+c_cov_std_[0])
plt.plot(grid_.param_grid[0]["size"], c_cov_mean_[0])
plt.plot(grid_.param_grid[0]["size"], c_cov_median_[0])
# plt.plot(grid_.param_grid[0]["size"], c_dyn_[0]-c_cov_std_[0])

In [None]:
plt.plot(grid_.param_grid[0]["size"], b_cov_mean_[0])
plt.plot(grid_.param_grid[0]["size"], b_cov_median_[0])

In [None]:
plt.plot(b_coverage_.std(axis=-1))

In [None]:
b_coverage_, c_coverage_ = \
                    [np.stack([rep_[j] for rep_ in replications], axis=-1) for j in [4, 5]]
b_perf_ = b_coverage_.mean(axis=-1), b_coverage_.std(axis=-1)
c_perf_ = c_coverage_.mean(axis=-1), c_coverage_.std(axis=-1)

In [None]:
c_perf_

In [None]:
b_perf_

In [None]:
plt.plot(y_test)
plt.plot(y_test_hat_)
plt.show()

plt.plot(b_width_)
plt.show()

plt.plot(c_width_)
plt.show()

In [None]:
# random_state = np.random.RandomState(0xC01DF00D)
# random_state = np.random.RandomState(0xA1157AFF)
# random_state = np.random.RandomState(0x0BADA550)

In [None]:
b_hits_.mean(axis=0)

In [None]:
c_hits_.mean(axis=0)

Sandbox

In [None]:
# random_state = np.random.RandomState(0x0ABACABA)
# random_state = np.random.RandomState(0x0DABACAB)

In [None]:
basename_ = os.path.join(OUTPUT_PATH, "func_1d")
for i_, par_ in enumerate(grid_):
    head_ = _unpack(**par_)
    tick_ = time.time()
    result_ = run_ckrr_mc_experiment(random_state=random_state, n_jobs=-1, verbose=0,
                                     levels=levels, nd=1, ng=1001,
                                     n_replications=20, **par_)
    _save((head_, result_), "%s-%04d "%(basename_, i_), gz=9)
    tock_ = time.time()
    print head_, "%0.3fsec."%(tock_ - tick_,)

Dependence on the sample size: 1D

In [None]:
results_ = list()
n_replications = 20
use_loo = False

funcs_ = [("f1", func1d_["f1"]),
          ("pressure2", func1d_["pressure2"])]

for fname_, fun_ in [("f1", func1d_["f1"])]: #func1d_.iteritems():
    # Domain dependent rbf precision
    for theta0_ in theta0_list:
        for nugget_ in nugget_list:
            for size_ in size_list:
                print fname_, nugget_, size_
                tick_ = time.time()
                X_test_, reps_ = run_ckrr_mc_experiment(fun_, levels, ccr_proc=CCR, nd=1, ng=1001, theta0=theta0_,
                                                        n_replications=n_replications, size=size_, nugget=nugget_,
                                                        use_loo=use_loo, random_state=random_state)
                tock_ = time.time()
                b_coverage_, c_coverage_ = \
                    [np.stack([rep_[j] for rep_ in reps_], axis=-1) for j in [4, 5]]
                b_perf_ = b_coverage_.mean(axis=-1), b_coverage_.std(axis=-1)
                c_perf_ = c_coverage_.mean(axis=-1), c_coverage_.std(axis=-1)
                results_.append((fname_, theta0_, nugget_, size_, c_perf_, b_perf_))
                print "%0.3fsec."%(tock_ - tick_,)
            break
        break

In [None]:
results_

Typical experiment

In [None]:
res_ = run_ckrr_mc_experiment(fun, levels, ccr_proc=RRCM, nd=1, ng=1001,
                              n_replications=20, size=200, nugget=1e-6,
                              use_loo=False, random_state=random_state)

In [None]:
X_test_, reps_ = res_
y_test_, y_hat_, b_width_, c_width_, b_coverage_, c_coverage_ = \
    [np.stack([rep_[j] for rep_ in reps_], axis=-1) for j in xrange(6)]
abs_err_ = np.abs(y_test_ - y_hat_)

b_perf_ = b_coverage_.mean(axis=-1), b_coverage_.std(axis=-1)
c_perf_ = c_coverage_.mean(axis=-1), c_coverage_.std(axis=-1)

In [None]:
b_coverage_.std(axis=-1), b_coverage_.mean(axis=-1)

In [None]:
c_coverage_.std(axis=-1), c_coverage_.mean(axis=-1)

Typcal plot

In [None]:
fig = plt.figure(figsize=(12, 6))
order_ = [1, 4, 2, 5, 3, 6]

ax = fig.add_subplot(2, 3, order_[0])
ax.plot(X_test_, np.mean(y_test_[:, 0], axis=-1), color="blue")
ax.plot(X_test_, np.mean(y_hat_[:, 0], axis=-1), color="red")
ax.set_title("Actual/Prediction: %s"%(func_,))

ax = fig.add_subplot(2, 3, order_[1])
ae_rmse_ = np.sqrt(np.mean(abs_err_[:, 0] ** 2, axis=-1))

ax.plot(X_test_, ae_rmse_)
ax.set_title("Root mean squared error")

for i in xrange(4):
    ax = fig.add_subplot(2, 3, order_[i + 2])

    ax.plot(X_test_, np.median(abs_err_[:, 0], axis=-1), alpha=0.25, label="$|y-\\hat{y}|$")
    ax.plot(X_test_, np.median(b_width_[:, i], axis=-1) / 2,
            label="bayes ($%0.1f\\pm%0.1f$)"%(100*b_perf_[0][0, i], 100*b_perf_[1][0, i]))
    ax.plot(X_test_, np.median(c_width_[:, i], axis=-1) / 2,
            label="%s ($%.1f\\pm%.1f$)"%(name_, 100*c_perf_[0][0, i], 100*c_perf_[1][0, i]))
    ax.set_title("Accuracy %s %s-CI"%(name_, lvl_cols_[i],))
    ax.legend(loc="best", ncol=1)

fig.tight_layout()
fig_name_ = os.path.join(PLOT_PATH, "%s 1k-%d %.1e %.1e %s%s.png"
                         %(func_, X_train.shape[0], nugget, noise_, name_,
                           " loo" if use_loo else ""))
print fig_name_

# fig.savefig(fig_name_)
# plt.close()
plt.show()

Function on the test set.

In [None]:
plt.plot(X_test_, np.median(y_test_[:, 0], axis=-1))

Mean absolute square error

In [None]:
ae_ = abs_err_[:, 0].copy()
ae_rmse_ = np.sqrt(np.mean(ae_ ** 2, axis=-1))
plt.plot(X_test_, ae_rmse_, "-")

* the minimum and maximum of all of the data[1] (as in figure 2)
* the lowest datum still within 1.5 IQR of the lower quartile, and the highest datum still within 1.5 IQR of the upper quartile (often called the Tukey boxplot)[2][3] (as in figure 3)
* one standard deviation above and below the mean of the data
* the 9th percentile and the 91st percentile
* the 2nd percentile and the 98th percentile.

In [None]:
ae_ = abs_err_[:, 0].copy()
ae_max = np.max(ae_, axis=-1)
ae_qnt = np.percentile(ae_, [25, 75], axis=-1)
ae_iqr = np.diff(ae_qnt, axis=0)
ae_mean = np.sqrt(np.mean(ae_**2, axis=-1))
ae_median = np.median(ae_, axis=-1)
ae_std = np.std(ae_, axis=-1)
ae_[(ae_ > (ae_qnt[1] + ae_iqr * 1.5).T)] = -np.inf
ae_hiqr = ae_.max(axis=-1)

# plt.plot(X_test_, ae_max, "-")
# plt.plot(X_test_, ae_hiqr, "-")
# plt.plot(X_test_, ae_median, "-")
plt.plot(X_test_, ae_mean, "-")
# plt.plot(X_test_, ae_hiqr - ae_mean, "-")
# plt.plot(X_test_, ae_mean + ae_std - ae_mean, "-")


In [None]:
plt.plot(X_test_, (np.median(c_width_, axis=-1) / 2))
plt.plot(X_test_, ae_mean)

In [None]:
plt.plot(X_test_, (np.median(b_width_, axis=-1) / 2))
plt.plot(X_test_, ae_mean)

In [None]:
# plt.plot(X_test_, b_width_[..., 0, 0], "r")
# plt.plot(X_test_, c_width_[..., 0, 0], "b")
plt.plot(X_test_, np.median(c_width_, axis=-1) / np.median(b_width_, axis=-1) - 1 )
# plt.plot(abs_err_[..., 0])

## Simple experiments

## 1D case

In [None]:
from utils.functions_1d import get_functions
func1d_ = get_functions()

from scipy.stats import norm
from itertools import chain
from joblib import Parallel, delayed
parallel_ = Parallel(n_jobs=-1, verbose=1)

Now add some noise

In [None]:
# random_state = np.random.RandomState(0x0ABACABA)
# random_state = np.random.RandomState(0x0DABACAB)
random_state = np.random.RandomState(0x0EABACAB)
use_loo = True

noise_ = 1e-6
z_a = norm.ppf(1 - .5 * levels)
for size_ in [400,]:
## train
    X_train = random_state.uniform(size=(size_, 1))
## test
    X_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
    X = np.concatenate([X_train, X_test], axis=0)
    train_ = np.s_[:X_train.shape[0]]
    for nugget in np.logspace(-4, 0, num=3):
        for func_ in func1d_.iterkeys():
            print size_, nugget, func_,
            y = func1d_[func_](X)
            y += random_state.normal(size=y.shape) * noise_

            y_train = y[train_]
            y_test = np.delete(y, train_, axis=0)

            gp = GaussianProcess(thetaL=1e-4, thetaU=1e4, beta0=0,
                                 normalize=False, nugget=nugget).fit(X_train, y_train)

            A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test, forecast=True, loo=use_loo,
                                            nugget=gp.nugget, metric="rbf", gamma=gp.theta_[0])
            MM *= gp.sigma2

            jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
                     for k in xrange(y_test.shape[0]))
            results_ = parallel_(jobs_)

            width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                               for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                              for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                                for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            hit_prob_ = hits_.mean(axis=0)
            hit_prob_std_ = hits_.std(axis=0)

            for j, name_ in enumerate(["rrcm", "crr"]):
                fig = plt.figure(figsize=(12, 6))
                order_ = [1, 4, 2, 5, 3, 6]

                ax = fig.add_subplot(2, 3, order_[0])
                ax.plot(X_test, y_test, color="blue")
                ax.plot(X_test, y_hat_, color="red")
                ax.set_title("Actual/Prediction: %s"%(func_,))

                ax = fig.add_subplot(2, 3, order_[1])
                ax.plot(X_test, np.abs(y_hat_ - y_test))
                ax.set_title("Absolute error")

                for i in xrange(4):
                    ax = fig.add_subplot(2, 3, order_[i + 2])

                    ax.plot(X_test, np.abs(y_hat_ - y_test), alpha=0.25, label="$|y-\\hat{y}|$")
                    ax.plot(X_test, np.sqrt(MM), label="bayes")
            #         ax.plot(X_test, (bounds_[:, i, 1, 2] - bounds_[:, i, 0, 2]) / (2 * z_a[i]), label="bayes")
                    ax.plot(X_test, (bounds_[:, i, 1, j] - bounds_[:, i, 0, j]) / (2 * z_a[i]), label=name_)
                    ax.set_title("Absolute error/accuracy %s %s-CI"%(name_, lvl_cols_[i],))
                    ax.legend(loc="best")

                fig.tight_layout()
                fig_name_ = os.path.join(PLOT_PATH, "%s 1k-%d %.1e %.1e %s%s.png"
                                         %(func_, X_train.shape[0], nugget, noise_, name_,
                                           " loo" if use_loo else ""))
                print fig_name_

                fig.savefig(fig_name_)
                plt.close()
#                 plt.show()
#                 break

Produce the 1D test plots for a $10k$ grid.

In [None]:
random_state = np.random.RandomState(0x0BADC0DE)

z_a = norm.ppf(1 - .5 * levels)
for size_ in [10, 20,]:
## train
    X_train = random_state.uniform(size=(size_, 1))
## test
    X_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
    X = np.concatenate([X_train, X_test], axis=0)
    train_ = np.s_[:X_train.shape[0]]
    for nugget in np.logspace(-4, 0, num=3):
        for func_ in func1d_.iterkeys():
            print size_, nugget, func_,
            y = func1d_[func_](X)

            y_train = y[train_]
            y_test = np.delete(y, train_, axis=0)

            gp = GaussianProcess(thetaL=1e-4, thetaU=1e4, beta0=0,
                                 normalize=False, nugget=nugget).fit(X_train, y_train)

            A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test, forecast=True,
                                            nugget=gp.nugget, metric="rbf", gamma=gp.theta_[0])
            MM *= gp.sigma2

            jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
                     for k in xrange(y_test.shape[0]))
            results_ = parallel_(jobs_)

            width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                               for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                              for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                                for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
            hit_prob_ = hits_.mean(axis=0)
            hit_prob_std_ = hits_.std(axis=0)

            for j, name_ in enumerate(["rrcm", "crr"]):
                fig = plt.figure(figsize=(12, 6))
                order_ = [1, 4, 2, 5, 3, 6]

                ax = fig.add_subplot(2, 3, order_[0])
                ax.plot(X_test, y_test, color="blue")
                ax.plot(X_test, y_hat_, color="red")
                ax.set_title("Actual/Prediction: %s"%(func_,))

                ax = fig.add_subplot(2, 3, order_[1])
                ax.plot(X_test, np.abs(y_hat_ - y_test))
                ax.set_title("Absolute error")

                for i in xrange(4):
                    ax = fig.add_subplot(2, 3, order_[i + 2])

                    ax.plot(X_test, np.abs(y_hat_ - y_test), alpha=0.25, label="$|y-\\hat{y}|$")
                    ax.plot(X_test, np.sqrt(MM), label="bayes")
            #         ax.plot(X_test, (bounds_[:, i, 1, 2] - bounds_[:, i, 0, 2]) / (2 * z_a[i]), label="bayes")
                    ax.plot(X_test, (bounds_[:, i, 1, j] - bounds_[:, i, 0, j]) / (2 * z_a[i]), label=name_)
                    ax.set_title("Absolute error/accuracy %s %s-CI"%(name_, lvl_cols_[i],))
                    ax.legend(loc="best")

                fig.tight_layout()
                fig_name_ = os.path.join(PLOT_PATH, "%s 1k-%d %.1e %s.png"%(func_, X_train.shape[0],
                                                                            nugget, name_,))
                print fig_name_

                fig.savefig(fig_name_)
                plt.close()
            #     plt.show()
            #     break

Previous

In [None]:
from scipy.stats import norm
from itertools import chain
from joblib import Parallel, delayed
parallel_ = Parallel(n_jobs=-1, verbose=1)

step_ = 7
for func_ in ["heaviside",]:#func1d_.iterkeys():
    print func_,
    X = np.linspace(0, 1, num=1001).reshape((-1, 1))
    y = func1d_[func_](X)

    train_ = np.s_[::5]
    X_train, y_train = X[train_], y[train_]
    X_test, y_test = X, y# np.delete(X, train_, axis=0), np.delete(y, train_, axis=0)
    
    gp = GaussianProcess(thetaL=1e-4, thetaU=1e4, beta0=0,
                         normalize=False, nugget=1e-6).fit(X_train, y_train)
    
    A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test, forecast=True,
                                    nugget=gp.nugget, metric="rbf", gamma=gp.theta_[0])
    MM *= gp.sigma2

    jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
             for k in xrange(y_test.shape[0]))
    results_ = parallel_(jobs_)

    width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                       for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                      for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                        for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    hit_prob_ = hits_.mean(axis=0)
    hit_prob_std_ = hits_.std(axis=0)

    fig = plt.figure(figsize=(12, 6))
    order_ = [1, 4, 2, 5, 3, 6]

    ax = fig.add_subplot(2, 3, order_[0])
    ax.plot(X_test, y_test, color="blue")
    ax.plot(X_test, y_hat_, color="red")
    ax.set_title("Actual/Prediction: %s"%(func_,))

    ax = fig.add_subplot(2, 3, order_[1])
    ax.plot(X_test, np.abs(y_hat_ - y_test))
    ax.set_title("Absolute error")

    for j, name_ in enumerate(["rrcm", "bayes"]):
        for i, i_ in enumerate([2, 3]):
            ax = fig.add_subplot(2, 3, order_[i + 3 + j*2 - 1])
            ax.plot(X_test, bounds_[:, i_, 1, j] - bounds_[:, i_, 0, j], color="red")
            ax.plot(X_test, y_test[:, 0] - bounds_[:, i_, 0, j], color="blue")
            ax.axhline(y=0, color="red")
            ax.set_title("the %s-CI(%0.1f%%$\pm$%0.1f%%)\n of %s"
                         %(lvl_cols_[i_], 100*hit_prob_[i_, j], 100*hit_prob_std_[i_,j], name_.upper(),))
    fig.tight_layout()
#     fig.savefig(os.path.join(PLOT_PATH, "1k-%d %s.png"%(step_, func_,)))
#     plt.close()
    plt.show()
    break

## 2D case

In [None]:
from utils.functions_2d import func2D
DGP_ = func2D()

In [None]:
random_state = np.random.RandomState(0xDEADC0DE)

In [None]:
from joblib import Parallel, delayed
parallel_ = Parallel(n_jobs=-1, verbose=1)

# X_train = random_state.uniform(size=(100, 2)) * 2 - 1
X_train = random_state.uniform(size=(1500, 2)) * 2 - 1

mesh_ = np.meshgrid(*2*[np.linspace(-1, 1, num=51)])
X_test = np.concatenate([ax_.reshape((-1, 1)) for ax_ in mesh_], axis=1)

X = np.concatenate([X_train, X_test], axis=0)
train_ = np.s_[:X_train.shape[0]]

z_a = norm.ppf(1 - .5 * levels)

levels_ = np.linspace(-.75, .75, num=16) * 100

i, int_name_ = [(0, "rrcm"), (1, "crr"), (2, "bayes")][0]

# nugget = 1e-2 ## 20160504
nugget = 1e-6

use_loo = True

for theta0_ in np.logspace(-2, 2, num=5):
    for name_ in ["f1", "f2", "f3", "f4", "f5"][-1:]:
        y = DGP_[name_](X)

        y_train = y[train_]
        y_test = np.delete(y, train_, axis=0)

    #     gp = GaussianProcess(thetaL=1e-4, thetaU=1e4, beta0=0, theta0=0.1,
        gp = GaussianProcess(beta0=0, theta0=theta0_, normalize=False,
                             nugget=nugget).fit(X_train, y_train)

        A, B, y_hat_, MM, loo_ = KRR_AB(X_train, y_train, X_test, forecast=True, loo=use_loo,
                                        nugget=gp.nugget, metric="rbf", gamma=gp.theta_[0])
        MM *= gp.sigma2

        jobs_ = (delayed(_pccia)(k, levels, y_test[k], y_hat_[k], MM[k], A[0, k], B[k])
                 for k in xrange(y_test.shape[0]))
        results_ = parallel_(jobs_)

        width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                           for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
        hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                          for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
        bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                            for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
        hit_prob_ = hits_.mean(axis=0)
        hit_prob_std_ = hits_.std(axis=0)

        delta_ = (bounds_[:, :, 1] - bounds_[:, :, 0]) / (2 * z_a[np.newaxis, :, np.newaxis])
        rel_ = (delta_[..., i] / delta_[..., 2] - 1) * 100
        ae_ = np.abs(y_test - y_hat_[:, 0])

    ## Actual surface: 3D
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], y_test.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.75)
        ax.view_init(60, 30)
        ax.set_title("Actual")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s (tht %.1e).png"
                                 %(name_, theta0_,))
        print fig_name_
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## Absolute prediction error: 3D
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], ae_.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, 30)
        ax.set_title("Absolute error")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s abs_error (tht %.1e).png"
                                 %(name_, theta0_,))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## \hat{y}: 3D
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], y_hat_.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, 30)
        ax.set_title("Predicted")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s predicted (tht %.1e).png"
                                 %(name_, theta0_,))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## \hat{\sigma}: 3D
        sigma_hat_ = np.sqrt(MM[:, 0] - gp.sigma2 * gp.nugget)
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], sigma_hat_.reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, 30)
        ax.set_title("$\\hat{\\sigma}$")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s sigma (tht %.1e).png"
                                 %(name_, theta0_,))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## \delta
        sigma_hat_ = np.sqrt(MM[:, 0] - gp.sigma2 * gp.nugget)
        fig = plt.figure(figsize=(10, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(mesh_[0], mesh_[1], delta_[:, -1, 0].reshape(mesh_[0].shape),
                        cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
                        antialiased=False, alpha=0.9)
        ax.view_init(60, 30)
        ax.set_title("RRCM 5%-CI scaled half-width")
        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s rrcm25-shw (tht %.1e%s).png"
                                 %(name_, theta0_, ", loo" if use_loo else "",))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()

    ## Actual values: top-down
        fig = plt.figure(figsize=(12, 15))
        ax = fig.add_subplot(321)
        cont_ = ax.contourf(mesh_[0], mesh_[1],
                            y_test.reshape(mesh_[0].shape), 20,
                            cmap=plt.cm.coolwarm, lw=0, alpha=0.9)
        ax.set_title("Actual value %s"%(name_,))
        plt.colorbar(cont_)

    ## Absolute arror: top-down
        ax = fig.add_subplot(322)
        cont_ = ax.contourf(mesh_[0], mesh_[1],
                            ae_.reshape(mesh_[0].shape), 20,
                            cmap=plt.cm.coolwarm, lw=0, alpha=0.9)
        ax.set_title("Absolute prediction error")
        plt.colorbar(cont_)

        ## CCI relative precision wrt. bayesian CI.
        for j, pct_ in enumerate(lvl_cols_):
            ax = fig.add_subplot(3, 2, j + 3)

            cont_ = ax.contour(mesh_[0], mesh_[1], rel_[:, j].reshape(mesh_[0].shape),
                               levels=levels_, colors="k", linestyles="solid", extend="both")
            ax.clabel(cont_, inline=1, fontsize=8, fmt='%.0f')

            CS3 = ax.contourf(mesh_[0], mesh_[1], rel_[:, j].reshape(mesh_[0].shape),
                        cmap=plt.cm.coolwarm, lw=1, levels=levels_,
                        antialiased=False, alpha=1.0, extend='both',)
            CS3.cmap.set_over('white')

            ax.set_title("""rel. %s(%.1f%%) / bayes(%.1f%%) %s-CI%s"""
                         %(int_name_, 100-hit_prob_[j, i]*100,
                           100-hit_prob_[j, -1]*100, pct_,
                           "(loo)" if use_loo else "",))
        fig.tight_layout()

        fig_name_ = os.path.join(OUTPUT_PLOT_PATH, "%s efficiency (tht %.1e%s).png"
                                 %(name_, theta0_, ", loo" if use_loo else "",))
        print fig_name_,
        fig.savefig(fig_name_)
#         plt.show()
        plt.close()
        #     break

## Advanced Experiments

In [None]:
random_state = np.random.RandomState(0x0ABACABA)

In [None]:
from math import sqrt

In [None]:
gamma = 10.0
dim_ = 2
resolution=101

In [None]:
dgp_opts_ = {name_: dict(scale=1.0) for name_ in DGP}
dgp_opts_["gaussian"].update(dict(metric="rbf", gamma=gamma))

In [None]:
# def surface(ax, mesh, yy, name, **kwargs):
#     ax.plot_surface(mesh[0], mesh[1], yy.reshape(mesh[0].shape),
#                     cstride=1, rstride=1, cmap=plt.cm.coolwarm,
#                     lw=0, antialiased=False, **kwargs)
#     ax.set_title("A sample surface $y\\sim \\mathtt{%s}$"%(name,))
#     ax.set_ylabel("y")
#     return ax

In [None]:
def run_experiment(X, y, X_test, y_test):
## Run the GP regression
    gp = GaussianProcess(thetaL=1e-4, thetaU=1e2, beta0=0,
                         normalize=False, nugget=1e-6,
                         storage_mode='light').fit(X, y)
## Compute the necessary matrices
    A, B, y_hat_, MM, loo_ = KRR_AB(X, y, X_test, nugget=gp.nugget,
                                    sigma2=gp.sigma2, metric="rbf", gamma=gp.theta_[0])
#     y_hat_gp, mse_gp = gp.predict(X_test, eval_MSE=True)
#     assert np.allclose(MM[:, 0], mse_gp + gp.sigma2 * gp.nugget)
#     assert np.allclose(y_hat_[:, 0], y_hat_gp, rtol=1e-3)

## Run in parallel
    parallel_ = Parallel(n_jobs=-1, verbose=1)
    jobs_ = (delayed(_pccia)(k, levels, y_test[k],
                             y_hat_[k], MM[k], A[0, k], B[k])
             for k in xrange(y_test.shape[0]))
    results_ = parallel_(jobs_)
# ## Combine the results
    width_ = np.stack([np.stack((rrcm_[1], ccr_[1], bayes_[1]), axis=1)
                       for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    hits_ = np.stack([np.stack((rrcm_[0], ccr_[0], bayes_[0]), axis=1)
                      for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    bounds_ = np.stack([np.stack((rrcm_[2], ccr_[2], bayes_[2]), axis=2)
                        for key_, rrcm_, ccr_, bayes_ in results_], axis=0)
    return width_, hits_, bounds_, y_hat_[:, 0], MM[:, 0], gp

In [None]:
experiment_ = dict()
for name_, dgp_ in DGP.iteritems():
    print "%s:"%(name_,),
## Create a dediacted validation sample
    mesh_ = np.meshgrid(*dim_*[np.linspace(-1, 1, num=resolution)])
    XX = np.concatenate([ax_.reshape((-1,1)) for ax_ in mesh_], axis=1)
    yy = dgp_(XX, random_state=random_state, **dgp_opts_[name_])
# ## A typical realisation
#     fig = plt.figure(figsize=(8, 6))
#     ax = fig.add_subplot(111, projection='3d')
#     surface(ax, mesh_, yy, name_).view_init(60, 30)
#     fig.savefig(os.path.join(SAMPLE_PLOT_PATH, "%s.png"%(name_)), )
#     plt.close()
## Now do the train/validation split
    XX0, X_validate = train_test_split(XX, test_size=0.25, random_state=random_state)
    for N in [100, 400, 1600]:
        print "N = %d,"%(N,),
        X_train = resample(XX0, replace=False, n_samples=N, random_state=random_state)
        X_full = np.concatenate([X_train, X_validate], axis=0)
## the dgp: add some independent gaussian noise.
        for noise_level_ in [1e-6, 1e-1]:
            print "noise = %2.2e"%(noise_level_)
            y_full = dgp_(X_full, random_state=random_state,
                          nugget=noise_level_, **dgp_opts_[name_])
            if name_ != "gaussian":
                y_full += random_state.normal(size=y_full.shape) * sqrt(noise_level_)
            y_train, y_validate = y_full[:N], y_full[N:]
## The experiment
            result_ = run_experiment(X_train, y_train, X_validate, y_validate)
# ## Save
            experiment_[name_, N, noise_level_] = (result_, X_full, y_full, N)
#             break
#         break
#     break

In [None]:
_save(experiment_, os.path.join(OUTPUT_PATH, "experiment_02_"), gz=9)

Box plots.

In [None]:
for tuple_, result_ in experiment_.iteritems():
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result_
    name_, size_, noise_ = tuple_
    X_test, y_test = X_full[N:], y_full[N:]

    fig, ax_ = plt.subplots(nrows=1, ncols=3, sharex=True,
                            sharey=True, figsize=(16, 9))
    for i, type_ in enumerate(["rrcm", "crr", "bayes"]):
        ax_[i].boxplot(width_[..., i])
        ax_[i].set_title("""`%s` `%s`(N=%d, noise=%.1E)"""
                         %(type_, name_, size_, noise_,))
        ax_[i].set_ylabel("width")
        ax_[i].set_xticklabels(lvl_cols_)
        ax_[i].grid()
    fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "width_box - %s %.1E %d.png"
                             %(name_, noise_, size_, )), )
    plt.close()

In [None]:
def process(result):
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result
    X_test, y_test = X_full[N:], y_full[N:]
    ratio_ = np.abs(y_test-y_hat_).reshape((-1,1,1)) / (bounds_[:, :, 1] - bounds_[:, :, 0])
    lvl_cols_ = ["%4.1f%%"%(100*lv_,) for lv_ in levels]
    return pd.concat({
        "median width": pd.DataFrame(np.median(width_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "mean width": pd.DataFrame(np.mean(width_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "95% width": pd.DataFrame(np.percentile(width_, 95, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "max width": pd.DataFrame(np.max(width_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "coverage": pd.DataFrame(np.mean(hits_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "avg. abs-width ratio": pd.DataFrame(np.median(ratio_, axis=0), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
        "mse/var": pd.DataFrame(np.full((4, 3), (y_test - y_hat_).var() / y_test.var()), index=lvl_cols_, columns=["rrcm", "crr", "bayes"]),
    }, axis=0, names=["measure"]).unstack().stack(level=0)


Collect

In [None]:
df_ = pd.concat({tuple_: process(result_) for tuple_, result_ in experiment_.iteritems()},
            axis=0, names = ["fun", "N", "noise"])

Coverage table.

In [None]:
df_.xs("coverage", level=-2, axis=0).unstack()

Max width table

In [None]:
df_.xs("max width", level=-2, axis=0).unstack()

Actual, predicted and abs-accuracy

In [None]:
for tuple_, result_ in experiment_.iteritems():
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result_
    name_, size_, noise_ = tuple_
## Plot the error bars
    X_test, y_test = X_full[N:], y_full[N:]

    fig = plt.figure(figsize=(16, 9))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_trisurf(X_test[:, 0], X_test[:, 1], np.abs(y_test-y_hat_) / y_test.std(),
                    cmap=plt.cm.coolwarm, lw=0) #, norm=MidPointNorm())
    ax.set_title("""abs/std ratio of `y` for `%s`(N=%d, noise=%.1E)"""
                 %(name_, size_, noise_,))
    ax.view_init(60, 60)
    fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "abs_std_ratio - %s %.1E %d.png"
                             %(name_, noise_, size_, )), )
    plt.close()

    fig = plt.figure(figsize=(16, 9))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_trisurf(X_test[:, 0], X_test[:, 1], y_test,
                    cmap=plt.cm.coolwarm, lw=0) #, norm=MidPointNorm())
    ax.set_title("""Actual value of `y` for `%s`(N=%d, noise=%.1E)"""
                 %(name_, size_, noise_,))
    ax.view_init(60, 60)
    fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "actual - %s %.1E %d.png"
                             %(name_, noise_, size_,)), )
    plt.close()

    fig = plt.figure(figsize=(16, 9))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_trisurf(X_test[:, 0], X_test[:, 1], y_hat_,
                    cmap=plt.cm.coolwarm, lw=0) #, norm=MidPointNorm())
    ax.set_title("""Predicted value of `y` for `%s`(N=%d, noise=%.1E)"""
                 %(name_, size_, noise_,))
    ax.view_init(60, 60)
    fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "predicted - %s %.1E %d.png"
                             %(name_, noise_, size_,)), )
    plt.close()

Abs-width ratio for the intervals

In [None]:
for tuple_, result_ in experiment_.iteritems():
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result_
    name_, size_, noise_ = tuple_
## Plot the error bars
    X_test, y_test = X_full[N:], y_full[N:]

    for i, type_ in enumerate(["rrcm", "crr", "bayes"]):
        for j, sign_ in enumerate(lvl_cols_):
            fig = plt.figure(figsize=(16, 9))
            ax = fig.add_subplot(111, projection='3d')
            ax.plot_trisurf(X_test[:, 0], X_test[:, 1], np.abs(y_test-y_hat_) / width_[:, j, i],
                            cmap=plt.cm.coolwarm, lw=0) #, norm=MidPointNorm())
            ax.set_title("""abs/width ratio for `%s`-type %s-interval for `%s`(N=%d, noise=%.1E)"""
                         %(type_, sign_, name_, size_, noise_,))
            ax.view_init(60, 60)
            fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "abs_width_ratio - %s %.1E %d %s %s.png"
                                     %(name_, noise_, size_, type_, sign_, )), )
            plt.close()

Excess plot:


In [None]:
for tuple_, result_ in experiment_.iteritems():
    (width_, hits_, bounds_, y_hat_, y_hat_sigma_, gp), X_full, y_full, N = result_
    name_, size_, noise_ = tuple_
## Plot the error bars
    X_test, y_test = X_full[N:], y_full[N:]
    for i, type_ in enumerate(["rrcm", "crr", "bayes"]):
        for j, sign_ in enumerate(lvl_cols_):
            excess_u_ = y_test - bounds_[:, j, 1, i]
            excess_d_ = bounds_[:, j, 0, i] - y_test
            excess_u_[excess_u_ < 0] = 0
            excess_d_[excess_d_ < 0] = 0
            excess_ = 2 * (excess_u_ - excess_d_) / width_[:, j, i]

            fig = plt.figure(figsize=(16, 9))
            ax = fig.add_subplot(111, projection='3d')

            ax.set_title("""Excess of `%s`-type %s-interval for `%s`(N=%d, noise=%.1E)"""
                         %(type_, sign_, name_, size_, noise_,))
            ax.plot_trisurf(X_test[:, 0], X_test[:, 1], excess_, cmap=plt.cm.coolwarm,
                            lw=0, alpha=.95, norm=MidPointNorm())
            ax.view_init(60, 60)

            fig.savefig(os.path.join(OUTPUT_PLOT_PATH, "excess - %s %.1E %d %s %s.png"
                                     %(name_, noise_, size_, type_, sign_, )), )
            plt.close()

<hr/>
<hr/>

<hr/>
<hr/>