# Kernel ridge regression for functions in the RKHS

Use this notebook to run one of three types of experiments:
1. **Kernel experiment** (```varying_variable='kernel'```): 
    
    Vary the kernel, keeping the distrubution of X and the function f fixed

2. **Dimension experiment** (```varying_variable='d'```): 

    Vary the dimension of X, keeping the function f and kernel fixed (X='gauss')

3. **Mixtures experiment** (```varying_variable='M'```): 

    Vary the number of mixtures in X, keeping the function f and kernel fixed (X='mog', d=2)

In [1]:
# install using `conda install -c conda-forge line_profiler`
%load_ext line_profiler
%load_ext autoreload
%autoreload 2

In [2]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
import numpy as np
# set global seed
# np.random.seed(123)

import pandas as pd
from datetime import datetime
from copy import deepcopy
# utils for plotting
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

# utils for timing
from goodpoints.tictoc import tic, toc, TicToc
# utils for kernel ridge regression
from goodpoints.krr.util_estimators import get_estimator, get_sigma_heuristic
# utils for evaluating kernels
from goodpoints.krr.util_k_mmd import kernel_eval, to_regression_kernel
# utils for generate samples from the data distribution
from goodpoints.krr.util_sample import get_Xy, ToyData , get_toy_dataset, logistic
# utils for dataset thinning
from goodpoints.krr.util_thin import sd_thin, kt_thin2

## Set hyperparameters

In [3]:
### Toy dataset parameters

n = 2**10

X_name = 'unif'  # ['gauss', 'mog']
X_var = 1
f_name = 'logistic'
noise = None

d = 1
M = 2
k = 2

### Regression parameters

kernel = 'gauss'  # ['gauss', 'laplace']
sigma =  0.125
alpha = 0.001

### Experiment parameters

varying_variable = 'kernel' # ['kernel', 'd', 'M']
n_repeats = 100

### Thinning parameters

m = None # Thinned dataset will have size n/2**m

In [4]:
# parameter checks

assert X_name in ['unif', 'gauss', 'mog']
if X_name == 'mog': assert d <= 2

assert f_name in ['logistic']

assert kernel in ['gauss', 'laplace']

assert varying_variable in ['kernel', 'd', 'M']
if X_name == 'mog': assert varying_variable != 'd', \
        ValueError('cannot running d-varying experiment when X is mog')
if varying_variable in ['d', 'M']: assert kernel == 'laplacian', 'laplacian is not set as default kernel'

In [5]:
# Determine auxiliary parameters
task = 'regression'
refit = 'accuracy'
postprocess = None

# filename = '_'.join([X_name, f_name, f'noise={noise}', f'alpha={alpha}', f'sigma={sigma}'])
filename = '_'.join([X_name, f_name])
print(filename)

unif_logistic


Kernels:
- RBF:
$$\mathbf{k}(x, y) = \exp(-\gamma ||x-y||_2^2)$$
- Laplacian:
$$\mathbf{k}(x, y) = \exp(-\gamma ||x-y||_2)$$

Median heuristic to choose the bandwidth parameter, i.e., median of squared pairwise distances:
- For Gaussian data, we can compute this exactly. Assume $X\sim \mathcal{N}(0,\sigma^2 I_d)$. For the RBF kernel, $X_1-X_2\sim \mathcal{N}(0,2\sigma^2 I_d)$. Then $(X_1-X_2)^2$ follows a chi-squared distribution with $d$ degrees of freedom, mean $d\cdot \sqrt{2}\sigma$ and median roughly $d(1-\frac{2}{9d})^3 \cdot \sqrt{2}\sigma$. For the Laplacian kernel, $||x-y||_1$ follows a folded normal distribution (https://en.wikipedia.org/wiki/Folded_normal_distribution) with median roughly $\sqrt{2}\sigma$.

Available kernels in sklearn: 
https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics.pairwise

## Get dataset

<!-- Go simulate the step function, we construct a probability based on the logistic function with "noise" parameter $\sigma$:
$$p_{\sigma}(x)=\frac{1}{1+e^{-x/\sigma}}$$

Given $x_i$ sampled i.i.d. from $X_i\sim p_{data}(x)$, generate $y_i\sim \text{Bernoulli}(p(x_i))$.

The baseline loss is the integral under (and above) the curve for negative (and positive) values of $X_i$:
$$\text{baseline loss} = 2\int_{-\sqrt 3}^0 \frac{1}{1+e^{-x/\sigma}} \frac{1}{2\sqrt 3}dx = \frac{1}{\sqrt 3}\sigma\cdot \ln \left( \frac{2}{1+e^{-\sqrt 3 / \sigma}} \right)$$

If we plot the baseline loss as a function of $\sigma$: -->

In [6]:
# compute the Bayes optimal risk wrt 0-1 loss
labels, f, prob = logistic(np.linspace(-np.sqrt(3), np.sqrt(3), 10000)[:, np.newaxis], k)
# NOTE: the Bayes optimal classifier simply rounds the probability
baseline_loss = np.mean(np.abs(np.round(prob) - prob))
print(baseline_loss)

0.1668435108432453


In [7]:
def classification_accuracy(labels, pred):
    # implement classification rule
    pred[pred > 0.5] = 1
    pred[pred <= 0.5] = 0
    return accuracy_score(labels, pred)

In [8]:
# toy_data_noise = ToyData(X_name, f_name, X_var=X_var, noise=noise, 
#                     d=d, M=M)
# toy_data_no_noise = ToyData(X_name, f_name, X_var=X_var, noise=noise, 
#                     d=d, M=M)
# X_train, y_train = toy_data_noise.sample(n)
# X_test, y_test = toy_data_no_noise.sample(n)

X_train, y_train = get_toy_dataset(X_name, f_name, n, X_var, d, noise, M, k)
X_test, y_test = get_toy_dataset(X_name, f_name, 10000, X_var, d, noise, M, k)

print(X_train.shape, y_train.shape)

sampling dataset with params ToyData(X_name=unif, f_name=logistic, X_var=1, d=1, noise=None, M=2, k=2)
sampling dataset with params ToyData(X_name=unif, f_name=logistic, X_var=1, d=1, noise=None, M=2, k=2)
(1024, 1) (1024,)


In [9]:
np.std(X_train), np.std(y_train)

(1.0009770398031608, 0.4363431622296944)

In [10]:
def trace_X(X):
    d = X.shape[-1]

    if d== 1:
        return go.Histogram(
            x=X.squeeze(),
            histnorm='probability density',
            name='histogram (normalized)',
            nbinsx=100,
        )

    elif d==2:
        x,y = X[:,0], X[:,1]

        return go.Scatter(
            x=x,
            y=y,
            mode='markers',
            marker=dict(
                symbol='x',
                opacity=0.5,
                color='white',
                size=8,
                line=dict(width=1),
            ),
            name='X data points'
        )

    print(f"cannot plot X with dimension {d}")

def contour(X, pdf_eval):
    d = X.shape[-1]

    if d== 1:
        ticks = 100
        X_coor = np.linspace(X.min(), X.max(), ticks)
        pdf = pdf_eval(X_coor)
        return go.Scatter(
            x=X_coor, 
            y=pdf.squeeze(), 
            mode='lines',
            name='pdf'
        )

    elif d==2:
        x,y = X[:,0], X[:,1]

        ticks = 100
        X_coor, Y_coor = np.linspace(x.min(), x.max(), ticks), np.linspace(y.min(), y.max(), ticks)
        XY_grid = np.stack(np.meshgrid(X_coor, Y_coor), axis=-1)

        pdf = pdf_eval(XY_grid)
        # print(pdf)

        return go.Contour(
            x=X_coor, 
            y=Y_coor, 
            z=pdf.squeeze(), 
            showscale=False, 
            contours=dict(coloring='none'),
            name='pdf'
        )
    
    print(f'cannot plot contour in {d} dimensions')

In [11]:
fig = go.Figure(
    data=[
        trace_X(X_train), 
        # contour(X_train, toy_data_noise.pdf)
    ], 
    # layout=dict(showlegend=True)
)
fig.show()

In [12]:
def trace_Xy(X, y, name=None, color=None, alpha=0.5, mode='markers'):
    d = X.shape[-1]

    if d==1:
        Xy_sort = np.array( sorted(list(zip(X.squeeze(), y)), key=lambda x: x[0]) )
        X_sort, y_sort = Xy_sort[:,0], Xy_sort[:,1]

        return go.Scatter(
            x=X_sort.squeeze(), 
            y=y_sort,
            mode=mode,
            name=name,
            opacity=alpha,
            marker=dict(
                color=color
            )
        )

    elif d==2:
        x1,x2 = X[:,0], X[:,1]
        return go.Scatter3d(x=x1, y=x2, z=y, mode='markers', name=name, marker=dict(
            symbol='circle',
            opacity=alpha,
            color=color,
            size=2,
            # line=dict(width=1),
        ))

    else:
        print(f"cannot plot data with dimension {d}")

In [13]:
labels, f, prob = logistic(X_train, k)

fig = go.Figure(data=[
    trace_Xy(X_train, y_train, name='train'),
    # trace_Xy(X_test, y_test, name='test'),
    trace_Xy(X_train, f, name='f', mode='lines', alpha=1),
    trace_Xy(X_train, prob, name='prob', mode='lines', alpha=1),
])
fig.update_layout(
    title='Logistic function',
    height=400,
    width=800,
)
fig.show()
# save fig
fig.write_image(f"figures/{filename}_function.png")

### Thinning functions

In [14]:
%%time
sd_coreset = sd_thin(X_train, m=m)
print('sd coreset:', len(sd_coreset))
X_train_sd_thin, y_train_sd_thin = X_train[sd_coreset], y_train[sd_coreset]

sd coreset: 32
CPU times: user 465 µs, sys: 111 µs, total: 576 µs
Wall time: 583 µs


In [15]:
from functools import partial

# KERNEL THINNING

# Define kernel params
params_k_swap = {"name": kernel, "var": sigma**2, "d": int(d)}
params_k_split = {"name": kernel, "var": sigma**2, "d": int(d)}

split_kernel = partial(kernel_eval, params_k=params_k_split)
swap_kernel = partial(kernel_eval, params_k=params_k_swap)

regression_split_kernel = to_regression_kernel(split_kernel)
regression_swap_kernel = to_regression_kernel(swap_kernel)

In [16]:
# from goodpoints import compress
# %lprun -f kt_thin2 
kt_coreset = kt_thin2(X_train, split_kernel, swap_kernel, m=m)

In [17]:

print('kt coreset:', len(kt_coreset))
X_train_kt_thin, y_train_kt_thin = X_train[kt_coreset], y_train[kt_coreset]

kt coreset: 32


In [18]:
Xy_train = get_Xy(X_train, y_train)
print(Xy_train.shape)

(1024, 2)


In [19]:
# %lprun -f kt_thin2 
ktr_coreset = kt_thin2(Xy_train, regression_split_kernel, regression_swap_kernel, m=m)

In [20]:

X_train_ktr_thin, y_train_ktr_thin = X_train[ktr_coreset], y_train[ktr_coreset]

In [21]:
# fig = make_subplots(
#     rows=4, cols=1, 
#     subplot_titles=['full', 'st' ,'kt (non-regression)', 'kt (regression)'],
#     shared_xaxes=True,
# )

# fig.add_trace(
#     trace_X(X_train),
#     row=1, col=1,
# )
# fig.add_trace(
#     contour(X_train, toy_data_noise.pdf),
#     row=1, col=1,
# )
# fig.add_trace(
#     trace_X(X_train_sd_thin),
#     row=2, col=1
# )
# fig.add_trace(
#     contour(X_train_sd_thin, toy_data_noise.pdf),
#     row=2, col=1
# )
# fig.add_trace(
#     trace_X(X_train_kt_thin),
#     row=3, col=1
# )
# fig.add_trace(
#     contour(X_train_kt_thin, toy_data_noise.pdf),
#     row=3, col=1
# )
# fig.add_trace(
#     trace_X(X_train_ktr_thin),
#     row=4, col=1
# )
# fig.add_trace(
#     contour(X_train_ktr_thin, toy_data_noise.pdf),
#     row=4, col=1
# )
# fig.update_layout(height=900, showlegend=False)
# fig.show()

## KRR (Full)

In [22]:
krr_full = get_estimator(
    task, 
    'full', 
    alpha=alpha, 
    kernel=kernel, 
    sigma=sigma, 
    postprocess=postprocess
)

In [23]:
krr_full.get_params()

{'alpha': 0.001, 'kernel': 'gauss', 'postprocess': None, 'sigma': 0.125}

In [24]:
%%time
krr_full.fit(X_train, y_train)

CPU times: user 93.2 ms, sys: 12.3 ms, total: 105 ms
Wall time: 126 ms


In [25]:
%%time
print('Score:', krr_full.score(X_test, y_test))

Score: 0.2856698950021448
CPU times: user 201 ms, sys: 124 ms, total: 325 ms
Wall time: 325 ms


In [26]:
print('RMSE:', mean_squared_error(y_test, krr_full.predict(X_test)))

RMSE: 0.13594952690123038


In [27]:
print('acc:', classification_accuracy(y_test, krr_full.predict(X_test)))

acc: 0.8293


In [28]:
random_idx = np.random.choice(np.arange(len(X_train)), 100)
fig = go.Figure(data=[
    # trace_Xy(X_train, y_train, name='train', alpha=0.1),
    trace_Xy(X_test[random_idx], y_test[random_idx], name='test', alpha=0.5, color='red'),
    trace_Xy(X_test, krr_full.predict(X_test), name='pred', alpha=1, mode='lines', color='#00cc96'),
])
fig.update_layout(
    title='full',
    height=400,
    width=800,
)
fig.show()
# save fig
# fig.write_image(f"figures/{filename}_full.png")

## Standard Thinning (ST)

In [29]:
krr_sd_thin = get_estimator(
    task, 
    'st', 
    alpha=alpha / np.power(n, 1/4), 
    kernel=kernel, 
    sigma=sigma, 
    m=m, 
    postprocess=postprocess
)

In [30]:
krr_sd_thin.get_params()

{'alpha': 0.00017677669529663688,
 'kernel': 'gauss',
 'm': None,
 'postprocess': None,
 'sigma': 0.125}

In [31]:
%%time
krr_sd_thin.fit(X_train, y_train)

CPU times: user 716 µs, sys: 205 µs, total: 921 µs
Wall time: 927 µs


In [32]:
%%time
print('Score:', krr_sd_thin.score(X_test, y_test))

Score: -30.20024225564901
CPU times: user 9.75 ms, sys: 0 ns, total: 9.75 ms
Wall time: 9.76 ms


In [34]:
print('RMSE:', mean_squared_error(y_test, krr_sd_thin.predict(X_test)))

RMSE: 5.937952417491904


In [33]:
print('acc:', classification_accuracy(y_test, krr_sd_thin.predict(X_test)))

acc: 0.5673


In [35]:
random_idx = np.random.choice(np.arange(len(X_train)), 100)
fig = go.Figure(data=[
    trace_Xy(krr_sd_thin.X_fit_, krr_sd_thin.y_fit_, name='coreset', alpha=1),
    trace_Xy(X_test[random_idx], y_test[random_idx], name='test', alpha=0.5),
    trace_Xy(X_test, krr_sd_thin.predict(X_test), name='pred', alpha=1, mode='lines'),
])
fig.update_layout(
    title='st',
    height=400,
    width=800,
)
fig.show()
# save fig
# fig.write_image(f"figures/{filename}_st.png")

## Kernel Thinning (KT)

In [36]:
# krr_kt_thin = get_regressor('kt', kernel=kernel, alpha=alpha, sigma=sigma, use_regression_kernel=use_regression_kernel, m=m)
krr_kt_thin = get_estimator(
    task,
    'kt', 
    kernel=kernel, 
    alpha=alpha / np.power(n, 1/4),
    sigma=sigma, 
    m=m, 
    postprocess=postprocess
)

In [37]:
krr_kt_thin

In [38]:
krr_kt_thin.get_params()

{'alpha': 0.00017677669529663688,
 'kernel': 'gauss',
 'm': None,
 'postprocess': None,
 'sigma': 0.125,
 'store_K': True,
 'ydim': 1}

In [39]:
# %%time
# To run line profiler, uncomment the next line
# %lprun -f krr_kt_thin.fit krr_kt_thin.fit(X_train, y_train)
krr_kt_thin.fit(X_train, y_train)

In [40]:
%%time
print('Score:', krr_kt_thin.score(X_test, y_test))

Score: -1.577737342353374
CPU times: user 9.23 ms, sys: 339 µs, total: 9.57 ms
Wall time: 9.58 ms


In [41]:
print('RMSE:', mean_squared_error(y_test, krr_kt_thin.predict(X_test)))

RMSE: 0.49058855243071176


In [42]:
print('acc:', classification_accuracy(y_test, krr_kt_thin.predict(X_test)))

acc: 0.715


In [43]:
random_idx = np.random.choice(np.arange(len(X_train)), 100)
fig = go.Figure(data=[
    trace_Xy(krr_kt_thin.X_fit_, krr_kt_thin.y_fit_, name='coreset',alpha=1),
    trace_Xy(X_test[random_idx], y_test[random_idx], name='test', alpha=0.5),
    trace_Xy(X_test, krr_kt_thin.predict(X_test), name='pred', alpha=1, mode='lines'),
])
fig.update_layout(
    title='kt',
    height=400,
    width=800,
)
fig.show()
# save fig
# fig.write_image(f"figures/{filename}_kt.png")

## FALKON

In [44]:
krr_falkon = get_estimator(
    task,
    'falkon',
    kernel=kernel,
    sigma=sigma,
    alpha=alpha,
    m=m,
    postprocess=postprocess,
)



In [45]:
krr_falkon

In [46]:
if krr_falkon:
    # %lprun -f krr_falkon.fit 
    krr_falkon.fit(X_train, y_train)

In [47]:
%%time
if krr_falkon:
    print('Score:', krr_falkon.score(X_test, y_test))
    print('RMSE:', mean_squared_error(y_test, krr_falkon.predict(X_test)))
    print('acc:', classification_accuracy(y_test, krr_falkon.predict(X_test)))

Score: 0.2876493640940845
RMSE: 0.13557279927253046
acc: 0.8256
CPU times: user 36 ms, sys: 31 µs, total: 36 ms
Wall time: 36 ms


In [48]:
if krr_falkon:
    random_idx = np.random.choice(np.arange(len(X_train)), 100)
    fig = go.Figure(data=[
        # trace_Xy(krr_kt_thin.X_fit_, krr_kt_thin.y_fit_, name='train',alpha=1),
        trace_Xy(X_test[random_idx], y_test[random_idx], name='test', alpha=0.5, color='red'),
        trace_Xy(X_test, krr_falkon.predict(X_test).squeeze(), name='pred', alpha=1, mode='lines', color='#00cc96'),
    ])
    fig.update_layout(
        title='falkon',
        height=400,
        width=800,
    )
    fig.show()
    # save fig
    # fig.write_image(f"figures/{filename}_falkon.png")

## FALKON + KT

In [49]:
# krr_falkon_kt = get_estimator(
#     task,
#     'falkon+kt',
#     kernel=kernel,
#     sigma=sigma,
#     alpha=alpha,
#     m=m,
# )

In [50]:
# %lprun -f krr_falkon_kt.fit krr_falkon_kt.fit(X_train, y_train)

In [51]:
# %%time
# if krr_falkon_kt:
#     print('Score:', krr_falkon_kt.score(X_test, y_test))
#     print('RMSE:', np.sqrt(mean_squared_error(y_test, krr_falkon_kt.predict(X_test))))

## Grid Search

We now run a full grid search with cross validation across different-size datasets.

Reference: https://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_hist_grad_boosting_comparison.html#sphx-glr-auto-examples-ensemble-plot-forest-hist-grad-boosting-comparison-py

In [52]:
from sklearn.model_selection import GridSearchCV, RepeatedKFold
from itertools import product

In [53]:
"""
Varying variables (during grid search, these are NOT parallelized)
"""

if varying_variable == 'kernel':
    varying_variable_values = ['gauss', 'laplace',]
elif varying_variable == 'd':
    varying_variable_values = [2,4, 10, 100]
elif varying_variable == 'M':
    varying_variable_values = [3, 4, 6, 8]
else:
    raise ValueError(f"invalid experiment: cannot vary '{varying_variable}'")

In [54]:
print('Running experiment with varying variable:', varying_variable)
print('taking values:', varying_variable_values)

Running experiment with varying variable: kernel
taking values: ['gauss', 'laplace']


In [55]:
# # Default param grid to search for each model
# default_param_grid = {
#     "sigma" :   [1,],
#     "alpha" :   [0.0001,],

#     # "sigma" :   1/np.sqrt(2*np.array([0.5, 1., 2, 5.])),
#     # "alpha" :   [0.01, 0.02]
# }

# Model constructors and data size for each model
# We allow for different data sizes to avoid running Full KR on large datasets
model_configs = {
    'full' : {
        'logn' : np.arange(8, 13, 2),
        'kwargs': {
            'postprocess' : postprocess,
            'alpha' : alpha,
            'sigma' : sigma,
        },
    },
    'st' : {
        # 'logn' : np.arange(8, 17, 2),
        'logn' : np.arange(8, 13, 2),
        'kwargs' : {
            'm' : m,
            'postprocess' : postprocess,
            'alpha' : alpha,
            'sigma' : sigma,
        },
    },
    'kt' : {
        # 'logn' : np.arange(8, 17, 2),
        'logn' : np.arange(8, 13, 2),
        'kwargs' : {
            'm' : m,
            'postprocess' : postprocess,
            'alpha' : alpha,
            'sigma' : sigma,
        },
    },
    'falkon' : {
        'logn' : np.arange(8, 13, 2),
        'kwargs' : {
            'm' : m,
            'postprocess' : postprocess,
            'alpha' : alpha, # https://falkonml.github.io/falkon/examples/falkon_regression_tutorial.html
            'sigma' : sigma,
        },
    },
}

# cv = RepeatedKFold(n_repeats=n_repeats, n_splits=k_fold)

In [56]:
model_configs

{'full': {'logn': array([ 8, 10, 12]),
  'kwargs': {'postprocess': None, 'alpha': 0.001, 'sigma': 0.125}},
 'st': {'logn': array([ 8, 10, 12]),
  'kwargs': {'m': None, 'postprocess': None, 'alpha': 0.001, 'sigma': 0.125}},
 'kt': {'logn': array([ 8, 10, 12]),
  'kwargs': {'m': None, 'postprocess': None, 'alpha': 0.001, 'sigma': 0.125}},
 'falkon': {'logn': array([ 8, 10, 12]),
  'kwargs': {'m': None, 'postprocess': None, 'alpha': 0.001, 'sigma': 0.125}}}

In [57]:
# Run experiment (depending on experiment_type)

results = []

i = 0
for name, config in model_configs.items():
    for logn in config['logn']:

        for v in varying_variable_values:
            kwargs = deepcopy(config['kwargs'])
            kwargs[varying_variable] = v
            model_name = f"{name}_{v}"
            
            X, y = get_toy_dataset(
                X_name=X_name,
                f_name=f_name,
                n=2**logn,
                X_var=X_var,
                d=kwargs['d'] if 'd' in kwargs else d,
                noise=noise,
                M=kwargs['M'] if 'M' in kwargs else M,
                k=k,
            )
            
            # Set kernel, alpha, sigma params
            if 'kernel' not in kwargs:
                kwargs['kernel'] = kernel

            if name in ['st', 'kt']:
                # NOTE: I think you need to set alpha to be proportional to sqrt(n)
                kwargs['alpha'] /= np.power(2**logn, 1/4)
                
            model = get_estimator(task, name=name, **kwargs)
            if model is None: continue
            
            print(X_name, f_name)
            print(f'i={i+1}: logn={logn}, model={model}')
            
            mean_scores = []
            # std_scores = []

            for _ in range(1 if name == 'full' else n_repeats):
                model.fit(X, y)

                # compute test score
                test_pred = model.predict(X_test).squeeze()
                mean_scores.append( 1- classification_accuracy(y_test, test_pred) )
                # std_scores.append( np.std(test_scores) / np.sqrt(len(test_scores)-1) ) # biased estimator of std

            results.append({
                "logn": logn, 
                "model": model_name, 
                "cv_results": None,
                "best_index_" : None,
                "mean_scores" : mean_scores,
                # "std_score" : np.std(mean_scores),
            })

            i += 1

sampling dataset with params

 ToyData(X_name=unif, f_name=logistic, X_var=1, d=1, noise=None, M=2, k=2)
unif logistic
i=1: logn=8, model=KernelRidgeRegressor(alpha=0.001, kernel='gauss', sigma=0.125)
unif logistic
i=2: logn=8, model=KernelRidgeRegressor(alpha=0.001, sigma=0.125)
sampling dataset with params ToyData(X_name=unif, f_name=logistic, X_var=1, d=1, noise=None, M=2, k=2)
unif logistic
i=3: logn=10, model=KernelRidgeRegressor(alpha=0.001, kernel='gauss', sigma=0.125)
unif logistic
i=4: logn=10, model=KernelRidgeRegressor(alpha=0.001, sigma=0.125)
sampling dataset with params ToyData(X_name=unif, f_name=logistic, X_var=1, d=1, noise=None, M=2, k=2)
unif logistic
i=5: logn=12, model=KernelRidgeRegressor(alpha=0.001, kernel='gauss', sigma=0.125)
unif logistic
i=6: logn=12, model=KernelRidgeRegressor(alpha=0.001, sigma=0.125)
unif logistic
i=7: logn=8, model=KernelRidgeSTRegressor(alpha=0.00025, kernel='gauss', sigma=0.125)
unif logistic
i=8: logn=8, model=KernelRidgeSTRegressor(alpha=0.00025, sigma=0.125)
uni

In [58]:
results

[{'logn': 8,
  'model': 'full_gauss',
  'cv_results': None,
  'best_index_': None,
  'mean_scores': [0.17930000000000001]},
 {'logn': 8,
  'model': 'full_laplace',
  'cv_results': None,
  'best_index_': None,
  'mean_scores': [0.24]},
 {'logn': 10,
  'model': 'full_gauss',
  'cv_results': None,
  'best_index_': None,
  'mean_scores': [0.17689999999999995]},
 {'logn': 10,
  'model': 'full_laplace',
  'cv_results': None,
  'best_index_': None,
  'mean_scores': [0.28690000000000004]},
 {'logn': 12,
  'model': 'full_gauss',
  'cv_results': None,
  'best_index_': None,
  'mean_scores': [0.1723]},
 {'logn': 12,
  'model': 'full_laplace',
  'cv_results': None,
  'best_index_': None,
  'mean_scores': [0.25860000000000005]},
 {'logn': 8,
  'model': 'st_gauss',
  'cv_results': None,
  'best_index_': None,
  'mean_scores': [0.27070000000000005,
   0.2845,
   0.33089999999999997,
   0.20820000000000005,
   0.28890000000000005,
   0.30889999999999995,
   0.23199999999999998,
   0.2349,
   0.3268999

In [59]:
# # Save results with pickle
# import pickle
# dataset_name = '_'.join(['toy', X_name, f_name, f'd={d}'])

# filename = '-'.join(['kernel_ridge_regression', dataset_name, datetime.now().strftime('%H_%M'), 
#                     str(use_regression_kernel)])
# pickle_file = filename + '.p'
# print(pickle_file)

# with open(pickle_file, 'wb') as f:
#     pickle.dump(results, f)

## Plot Results

In [60]:
import plotly.colors as colors
import seaborn as sns

from functools import reduce
from operator import concat

In [61]:
row_subplot_titles = ["Test Score vs n", "Log2 Test Score vs n"] #, "Train time vs n", "Predict time vs n"]

fig = make_subplots(
    rows=len(row_subplot_titles),
    cols=len(varying_variable_values),
    shared_yaxes=True,
    subplot_titles=reduce(concat, [[f'{varying_variable}={v}' for v in varying_variable_values] for _ in row_subplot_titles]),
    vertical_spacing=0.1,
)

model_names = list(model_configs.keys())
colors_list = colors.qualitative.Plotly * (
    len(model_names) // len(colors.qualitative.Plotly) + 1
)
colors_used = set()

In [62]:
def plot_vs_n(print_name, attr_name, vvv, r, c, is_better='higher', scale='log2'):
    """
    Args:
    - vvv: varying variable value
    """
    
    for result in results:
        model_name = result["model"]
        model_name_prefix, vv_name = model_name.split('_') # E.g., kt_rbf -> (kt, rbf)

        # only select results with the correct varying variable value
        if vv_name != vvv:
            continue

        color = colors_list[model_names.index(model_name_prefix)]

        if scale == 'log2':
            y = np.log2(np.abs(result[f"mean_scores"]))
        elif scale == 'linear':
            y = np.abs(result[f"mean_scores"])

        trace = go.Box(
            x=[result['logn']]*len(result[f"mean_scores"]),
            y=y,
            name=model_name_prefix,
            # opacity=0.5,
            legendgroup=model_name_prefix,
            line_color=color,
            offsetgroup=model_name_prefix,
            showlegend=color not in colors_used,
        )

        fig.add_trace(trace, row=r, col=c)
        colors_used.add(color)

    # add line for baseline loss
    # if baseline_loss is not None:
    if scale == 'log2':
        hline = np.log2(np.abs(baseline_loss))
    elif scale == 'linear':
        hline = np.abs(baseline_loss)

    fig.add_hline(
        y=hline,
        row=r, col=c, line_dash="dash",
    )

    if c == 1: fig.update_yaxes(title_text=f"{scale}({print_name}) - {is_better} is better", row=r, col=c)
    fig.update_xaxes(title_text="log2(n)", type='linear', row=r, col=c)
    fig.update_yaxes(type='linear', row=r, col=c)
    
    # fig.update_traces(
    #     box_visible=True, 
    #     meanline_visible=True
    # )
    fig.update_layout(boxmode='group')

def plot_test_score_vs_n(vvv, r, c, scale):
    plot_vs_n(f"Test Class. Err.", "score", vvv, r, c, is_better='lower', scale=scale)

In [63]:
for c, vvv in enumerate(varying_variable_values):
    plot_test_score_vs_n(str(vvv), 1, c+1, scale='linear')
    plot_test_score_vs_n(str(vvv), 2, c+1, scale='log2')


In [64]:
fig.update_layout(
    # legend=dict(traceorder="normal", borderwidth=1),
    title=f"Experiment: {X_name}, {f_name}, noise={noise}, alpha={alpha}, sigma={sigma}, baseline={baseline_loss:.04f}",
    width=800,
    height=800,
    showlegend=True,
)

In [65]:
fig_file = 'figures/' + filename + '_results.png'
print(fig_file)
fig.write_image(fig_file)

figures/unif_logistic_results.png
