# Convex Regression

Convex regression experiments on synthetic problems.<br/>
See the [Notebook parameters](#__cvxreg_synt-settings__) cell for the settings.

In [None]:
%autosave 120
%pylab inline

In [None]:
import os
import sys
import time
import logging
import traceback

project_path = os.path.abspath('.' if 'requirements.txt' in os.listdir() else '..')
if project_path not in sys.path:
    sys.path.append(project_path)
print('project_path: {}'.format(project_path))

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from joblib import Parallel, delayed, Memory
from collections import OrderedDict
from IPython.display import display

from common.util import set_random_seed

In [None]:
logging.basicConfig(
    handlers=(
        # logging.FileHandler('.../file.log'),
        logging.StreamHandler(sys.stdout),
    ),
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S',
    format='%(asctime)s|%(levelname)s|%(message)s',
)

def info(*args):
    logging.info('PID:{}|'.format(os.getpid()) + args[0] + '\n', *args[1:])

## Notebook parameters <a class="anchor" id="__cvxreg_synt-settings__"></a>
The next cell is tagged by <code>parameters</code> for [papermill](https://papermill.readthedocs.io).

In [None]:
experiment_id = '_MISSING_ID'  # Name your experiment here!
loss = 'l2'  # 'l1', 'l2'
target_func = 'l1_quad'
#    'linear': linear function
#    'symm_l1': symmetric L1 norm (even, convex)
#    'trunc_l1': truncated L1 norm (convex)
#    'symm_quad': symmetric quadratic function (even, convex)
#    'trunc_quad': truncated quadratic function (convex)
covariate_distr = 'full_dim_normal'
#    'full_dim_normal[:std=1.0]': full dimensional normal distribution
#    'full_dim_uniform[:max=2.0][:min=-2.0]': full dimensional uniform distribution
#    'embed_uniform[:low_d=3][:meas_noise_std=0.1][:max=3.0][:min=-3.0]':
#        uniform random variable linearly embedded into a larger space with Gaussian measurement noise
#    'poly_uniform[:meas_noise_std=0.1][:max=1.0][:min=-1.0]':
#        uniform random variable with polynomial expansion and Gaussian measurement noise
observation_noise = 'normal'
#    'normal[:std=0.3]': Gaussian distribution
#    'rademacher': Rademacher distribution
global_random_seed = None  # nonnegative integer, setting under 10000 turns on caching
domain_dims = '3'  # domain dimensions
nsamples = '100,250'  # number of samples
nruns = 3  # number of experiment runs
ntestsamples = int(1e6)  # number of test samples to generate
parallel_nworkers = 1  # maximum number of parallel workers (make sure you have enough RAM too)
parallel_backend = 'multiprocessing'

In [None]:
def get_int_tuple(param):
    if isinstance(param, str):
        return tuple([int(v) for v in param.split(',')])
    elif isinstance(param, int):
        return (param,)
    return param

if global_random_seed is not None:
    global_random_seed = int(global_random_seed)
domain_dims = get_int_tuple(domain_dims)
nsamples = get_int_tuple(nsamples)
nruns = int(nruns)
ntestsamples = int(ntestsamples)
parallel_nworkers = int(parallel_nworkers)

In [None]:
seed_limit = 1e6
if global_random_seed is None:
    global_random_seed = 10000 + int(np.round((time.time() % 1) * seed_limit))
set_random_seed(global_random_seed)
setup_random_seed = np.random.randint(seed_limit)
data_random_seed = np.random.randint(seed_limit)
training_random_seed = np.random.randint(seed_limit)
testing_random_seed = np.random.randint(seed_limit)
info('random seeds, global:{}, setup:{}, data:{}, training:{}, testing:{}'.format(
    global_random_seed, setup_random_seed, data_random_seed,
    training_random_seed, testing_random_seed,
))

## Estimators

In [None]:
set_random_seed(setup_random_seed)
estimators = OrderedDict()

def get_estimator(estimator_name):
    return estimators[estimator_name]

In [None]:
# Ordinary Least-Squares estimator
from common.ols import OLSEstimator
estimators['OLS'] = OLSEstimator()

In [None]:
# # LSPA
# from algorithm.lspa.lspa import LSPAEstimator
# estimators['LSPA'] = LSPAEstimator(train_args={'ncenters': 'n**(d/(d+4))', 'nrestarts': 'd', 'nfinalsteps': 'n'})

In [None]:
# # CNLS
# from algorithm.cnls.cnls import CNLSEstimator
# estimators['CNLS_star'] = CNLSEstimator(train_args={'use_L': True})
# estimators['CNLS_ln'] = CNLSEstimator(train_args={'use_L': True, 'ln_L': True})

In [None]:
# Convex Adaptive Partitioning (CAP)
from algorithm.cap.cap import CAPEstimator
estimators['CAP'] = CAPEstimator()
# estimators['FastCAP'] = CAPEstimator(train_args={'nranddirs': 5})

In [None]:
# # PCNLS with random Voronoi partition
# from algorithm.pcnls.pcnls_voronoi import PCNLSVoronoiEstimator
# estimators['PCNLS-Voronoi'] = PCNLSVoronoiEstimator()

In [None]:
# Adaptive Max-Affine Partitioning (AMAP)
from algorithm.amap.amap import AMAPEstimator
estimators['AMAP'] = AMAPEstimator()

In [None]:
# # APCNLS
from algorithm.apcnls.apcnls import APCNLSEstimator
estimators['APCNLS_star'] = APCNLSEstimator(train_args={'use_L': True})
estimators['APCNLS_ln'] = APCNLSEstimator(train_args={'use_L': True, 'L': 'np.log(n)'})
# estimators['APCNLS_reg'] = APCNLSEstimator(train_args={'use_L': False, 'L_regularizer': 'AUTO'})

In [None]:
# DCF (convex)
from algorithm.dcf.dcf import DCFEstimator
estimators['DCF2c'] = DCFEstimator(variant=2, is_convex=True, loss=loss,
                                   train_args={
                                       'v_regularizer': 'd**(loss_p/2)',
                                       'L_regularizer': 'max(1.0, x_radius)**loss_p * (d*K/n)**(loss_p/2)',
                                       'L_regularizer_offset': 'np.log(n)',
                                       'L_sum_regularizer': '(x_radius*np.sqrt(d/n))**loss_p'})
estimators['DCF+c'] = DCFEstimator(variant='+', is_convex=True, loss=loss,
                                   train_args={
                                       'v_regularizer': 'd**(loss_p/2)',
                                       'L_regularizer': 'max(1.0, x_radius)**loss_p * (d*K/n)**(loss_p/2)',
                                       'L_regularizer_offset': 'np.log(n)',
                                       'L_sum_regularizer': '(x_radius*np.sqrt(d/n))**loss_p'})

### Non-convex regressors

In [None]:
# !pip install xgboost
from algorithm.external.xgboost import XgbEstimator
estimators['XGB'] = XgbEstimator(objective='reg:absoluteerror') if loss == 'l1' else XgbEstimator()

In [None]:
# !pip install scikit-learn
from algorithm.external.random_forest import RandomForestEstimator
estimators['RF'] = RandomForestEstimator()

In [None]:
# !pip install scikit-learn scikit-fda
from algorithm.external.kernel_regression import KernelRegEstimator
estimators['kreg_nor'] = KernelRegEstimator('normal')
estimators['kreg_epa'] = KernelRegEstimator('epanechnikov')
estimators['kreg_tri'] = KernelRegEstimator('tri_weight')

In [None]:
# DCF (non-convex)
from algorithm.dcf.dcf import DCFEstimator
estimators['DCF2'] = DCFEstimator(variant=2, is_convex=False, loss=loss,
                                  train_args={
                                      'v_regularizer': 'd**(loss_p/2)',
                                      'L_regularizer': 'max(1.0, x_radius)**loss_p * (d*K/n)**(loss_p/2)',
                                      'L_regularizer_offset': 'np.log(n)',
                                      'L_sum_regularizer': '(x_radius*np.sqrt(d/n))**loss_p'})
estimators['DCF+'] = DCFEstimator(variant='+', is_convex=False, loss=loss,
                                  train_args={
                                      'v_regularizer': 'd**(loss_p/2)',
                                      'L_regularizer': 'max(1.0, x_radius)**loss_p * (d*K/n)**(loss_p/2)',
                                      'L_regularizer_offset': 'np.log(n)',
                                      'L_sum_regularizer': '(x_radius*np.sqrt(d/n))**loss_p'})

## Caching

In [None]:
is_caching_enabled = (global_random_seed < 10000)  # caching is pointless without manual random seed setting
if is_caching_enabled:
    cache_dir = os.path.join(project_path, '_result_cache', experiment_id)
    print(f'cache_dir: {cache_dir}')
    persister_dict = {}
    for estimator_name in estimators.keys():
        estimator_cache_dir = os.path.join(cache_dir, estimator_name)
        os.makedirs(estimator_cache_dir, exist_ok=True)
        persister_dict[estimator_name] = Memory(estimator_cache_dir, verbose=2)

def cached_func(func, estimator_name):
    if is_caching_enabled:
        old_module = func.__module__
        func.__module__ = 'jupyter_notebook'
        func.__qualname__ = func.__name__
        _cached_func = persister_dict[estimator_name].cache(func)
        func.__module__ = old_module
        _cached_func.__module__ = old_module
        return _cached_func
    return func

print(f'is_caching_enabled: {is_caching_enabled}')

## Problem setting

In [None]:
L = np.inf  # Lipschitz limit (can be set as a function to measure L on the union of the training and test sets)
L_scaler = 1.0  # multiplying L (makes sense when L is measured on the data)
X_mean = 0.0

In [None]:
def loss_l1(yhat, y):  # L1-error
    return np.mean(np.abs(yhat - y))

def loss_l2(yhat, y):  # L2-error
    return np.mean(np.square(yhat - y))

def loss_inf(yhat, y):  # Linf-error
    return np.max(np.abs(yhat - y))

report_loss_name = loss
stat_losses = {'l1': loss_l1, 'l2': loss_l2, 'inf': loss_inf}

#### Target function

In [None]:
if target_func == 'linear':
    def fstar(X):
        return np.sum(X, axis=1)
    L = 1.0
elif target_func == 'symm_l1':
    def fstar(X):
        return np.sum(np.abs(X), axis=1)
    def L_func(X):
        return max(np.linalg.norm(np.sign(X), ord=2, axis=1))
    L = L_func
elif target_func == 'trunc_l1':
    def fstar(X):
        return np.sum(np.abs(np.maximum(X, 0.0)), axis=1)
    def L_func(X):
        return max(np.linalg.norm(np.sign(np.maximum(X, 0.0)), ord=2, axis=1))
    L = L_func
elif target_func == 'symm_quad':
    def fstar(X):
        return 0.5 * np.sum(np.square(X), axis=1)
    def L_func(X):
        return max(np.linalg.norm(X, ord=2, axis=1))
    L = L_func
elif target_func == 'trunc_quad':
    def fstar(X):
        return 0.5 * np.sum(np.square(np.maximum(X, 0.0)), axis=1)
    def L_func(X):
        return max(np.linalg.norm(np.maximum(X, 0.0), ord=2, axis=1))
    L = L_func
elif target_func == 'l1_quad':
    def fstar(X):
        return (
            np.sum(np.abs(np.maximum(1.0-X, 0.0)), axis=1)
            + np.sum(np.square(np.maximum(X-1.0, 0.0)), axis=1)
        )
    def L_func(X):
        return max(max(np.linalg.norm(np.sign(np.maximum(X_mean-X, 0.0)), ord=2, axis=1)),
                   max(np.linalg.norm(np.maximum(X-X_mean, 0.0), ord=2, axis=1)))
    L = L_func
else:
    raise Exception(f'Not supported target_func: {target_func}!')

#### Covariate distribution

In [None]:
covariate_distr_name = covariate_distr.split(':', 2)[0]
if covariate_distr_name == 'full_dim_normal':
    covariate_std = 1.0 if ':' not in covariate_distr else float(covariate_distr.split(':', 2)[1])
    assert covariate_std >= 0.0 

    def sample_X(n, d):
        return X_mean + np.random.randn(n, d) * covariate_std
elif covariate_distr_name == 'full_dim_uniform':
    covariate_max = 2.0 if ':' not in covariate_distr else float(covariate_distr.split(':', 2)[1])
    covariate_min = -covariate_max if covariate_distr.count(':') < 2 else float(covariate_distr.split(':', 3)[2])
    assert covariate_min < covariate_max

    def sample_X(n, d):
        return X_mean + np.random.rand(n, d) * (covariate_max - covariate_min) + covariate_min
elif covariate_distr_name == 'embed_uniform':
    low_d = 3 if ':' not in covariate_distr else int(covariate_distr.split(':', 2)[1])
    measurement_noise_std = 0.1 if covariate_distr.count(':') < 2 else float(covariate_distr.split(':', 3)[2])
    covariate_max = 3.0 if covariate_distr.count(':') < 3 else float(covariate_distr.split(':', 4)[3])
    covariate_min = -covariate_max if covariate_distr.count(':') < 4 else float(covariate_distr.split(':', 5)[4])
    assert low_d >= 1
    assert measurement_noise_std >= 0.0
    assert covariate_min < covariate_max

    def sample_X(n, d):
        X = np.random.randn(n, d) * measurement_noise_std
        X[:, :low_d] = np.random.rand(n, low_d) * (covariate_max - covariate_min) + covariate_min
        return X + X_mean
elif covariate_distr_name == 'poly_uniform':
    measurement_noise_std = 0.1 if ':' not in covariate_distr else float(covariate_distr.split(':', 2)[1])
    covariate_max = 1.0 if covariate_distr.count(':') < 2 else float(covariate_distr.split(':', 3)[2])
    covariate_min = -covariate_max if covariate_distr.count(':') < 3 else float(covariate_distr.split(':', 4)[3])
    assert measurement_noise_std >= 0.0
    assert covariate_min < covariate_max

    def sample_X(n, d):
        X = np.random.randn(n, d) * measurement_noise_std
        Z = np.random.rand(n) * (covariate_max - covariate_min) + covariate_min
        for power in range(d):
            X[:, power] += Z**power
        return X + X_mean
else:
    raise Exception(f'Not supported covariate_distr: {covariate_distr}!')

#### Observation noise distribution

In [None]:
observation_noise_name = observation_noise.split(':', 2)[0]
if observation_noise_name == 'normal':
    observation_noise_std = 0.3 if ':' not in observation_noise else float(observation_noise.split(':', 2)[1])

    def sample_noise(n):
        return np.random.randn(n) * observation_noise_std
elif observation_noise_name == 'rademacher':
    def sample_noise(n):
        return 2.0 * (np.random.randint(0, 2, n) - 0.5)
else:
    raise Exception(f'Not supported observation_noise: {observation_noise}!')

## Data

In [None]:
from common.estimator import EstimatorModel


def get_random_seed_offset(d, n, run):
    return d * n + run


def get_data(d, n, run, data_random_seed):
    seed = data_random_seed + get_random_seed_offset(d, n, run)
    print(f'seed: {seed}, d:{d}, n:{n}, run:{run}, data_random_seed:{data_random_seed}')
    set_random_seed(seed)

    X = sample_X(n, d)
    y_true = fstar(X)
    y = y_true + sample_noise(n)

    X_test = sample_X(ntestsamples, d)
    y_test = fstar(X_test)

    return X, y, y_true, X_test, y_test

In [None]:
from common.partition import cell_radiuses
from algorithm.apcnls.fpc import adaptive_farthest_point_clustering, get_data_radius
K_means = []
K_stds = []
eps_means = []
eps_stds = []
data_radiuses = []
ybar_test_errors = {}
report_loss = stat_losses[report_loss_name]
for d in domain_dims:
    for n in nsamples:
        K_vals = []
        eps_vals = []
        for run in range(nruns):
            X_train, y_train, y_true, X_test, y_test = get_data(d, n, run, data_random_seed)
            data_radiuses.append((get_data_radius(X_train), np.max(y_test - np.mean(y_test))))
            ybar_test_errors[(d, n, run)] = report_loss(np.mean(y_test), y_test)
            partition, center_idxs = adaptive_farthest_point_clustering(data=X_train, q=1,
                                                                        return_center_idxs=True)
            K_vals.append(partition.ncells)
            eps_vals.append(max(cell_radiuses(X_train, partition)))
        K_means.append(np.mean(K_vals))
        K_stds.append(np.std(K_vals))
        eps_means.append(np.mean(eps_vals))
        eps_stds.append(np.std(eps_vals))

In [None]:
data_radiuses

In [None]:
ybar_test_errors

In [None]:
d = domain_dims[-1]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 4))
ax1.set_title('d: {}, nruns: {}'.format(d, nruns))
ax1.set_xlabel('n')
ax1.set_ylabel('K')
ax1.plot(nsamples, np.array(nsamples)**(d/(2.+d)), 'k-', label='K^(d/(d+2))')
ax1.plot(nsamples, np.array(nsamples)**(d/(4.+d)), 'r-', label='K^(d/(d+4))')
ax1.errorbar(
    x=nsamples,
    y=K_means,
    yerr=K_stds,
    label='AFPC partition size',
)
ax1.legend(loc='upper left')

ax2.set_title('d: {}, nruns: {}'.format(d, nruns))
ax2.set_xlabel('n')
ax2.set_ylabel('epsilon')
ax2.errorbar(
    x=nsamples,
    y=eps_means,
    yerr=eps_stds,
    label='max AFPC cell radius',
)
ax2.legend(loc='upper right')
None

## Training

In [None]:
def run_experiment(d, n, L, estimator_name, run, data_random_seed, training_random_seed):
        X, y, y_true, X_test, y_test = get_data(d, n, run, data_random_seed)
        L_true = max(L(X), L(X_test)) if callable(L) else L
        Lscaler = eval(L_scaler) if isinstance(L_scaler, str) else L_scaler
        L_est = (L_true * Lscaler) if np.isfinite(L_true) else np.inf

        X_norms = np.linalg.norm(X, axis=1)
        X_test_norms = np.linalg.norm(X_test, axis=1)
        info(('\nExperiment, d: {}, n: {}, estimator: {}, L_true: {:.1f}, run: {},\n'
              'train data, minX: {:.2f}, maxX: {:.2f}, minXnorm: {:.4f}, maxXnorm: {:.2f},\n'
              '            miny: {:.2f}, meany: {:.4f}, stdy: {:.4f}, maxy: {:.2f},\n'
              ' test data, minX: {:.2f}, maxX: {:.2f}, minXnorm: {:.4f}, maxXnorm: {:.2f},\n'
              '            miny: {:.2f}, meany: {:.4f}, stdy: {:.4f}, maxy: {:.2f},\n').format(
            d, n, estimator_name, L_true, run,
            np.min(X), np.max(X), np.min(X_norms), np.max(X_norms),
            np.min(y), np.mean(y), np.std(y), np.max(y),
            np.min(X_test), np.max(X_test), np.min(X_test_norms), np.max(X_test_norms),
            np.min(y_test), np.mean(y_test), np.std(y_test), np.max(y_test),
        ))
        set_random_seed(training_random_seed + get_random_seed_offset(d, n, run))
        result = OrderedDict()
        estimator = get_estimator(estimator_name)

        train_args = OrderedDict()
        if np.isfinite(L_est):
            train_args['L'] = L_est
        result['L_est'] = L_est
        result['L_true'] = L_true

        real_time, cpu_time = time.time(), time.perf_counter()
        model = estimator.train(X, y, **train_args)
        result['model'] = model
        if isinstance(result, EstimatorModel):
            result['nweights'] = model.weights.shape[0]
            result['max_weight_norm'] = max(np.linalg.norm(model.weights, axis=1))
        yhat = estimator.predict(model, X)
        for loss_name, loss in stat_losses.items():
            result[f'train_{loss_name}-risk'] = loss(yhat, y)
            result[f'train_{loss_name}-err'] = loss(yhat, y_true)
        result['train_diff_mean'] = np.mean(yhat - y)
        result['train_diff_median'] = np.median(yhat - y)
        result['train_cpu_time'] = time.perf_counter() - cpu_time
        result['train_real_time'] = time.time() - real_time

        real_time, cpu_time = time.time(), time.perf_counter()
        yhat_test = estimator.predict(model, X_test)
        for loss_name, loss in stat_losses.items():
            result[f'test_{loss_name}-err'] = loss(yhat_test, y_test)
        result['test_cpu_time'] = time.perf_counter() - cpu_time
        result['test_real_time'] = time.time() - real_time

        info(('\nResult, d: {}, n: {}, estimator: {}, run: {}, loss:{}\n'
              ' train, err: {:.4f}, risk: {:.4f}, real_time: {}s,\n'
              '  test, err: {:.4f}, real_time: {}s').format(
            d, n, estimator_name, run, report_loss_name,
            result[f'train_{report_loss_name}-err'],
            result[f'train_{report_loss_name}-risk'],
            int(np.ceil(result['train_real_time'])),
            result[f'test_{report_loss_name}-err'],
            int(np.ceil(result['test_real_time'])),
        ))
        return ((d, n, estimator_name, run), result)

In [None]:
results = []
delayed_funcs = []
for d in domain_dims:
    for n in nsamples:
        for estimator_name in estimators.keys():
            for run in range(nruns):
                delayed_funcs.append(delayed(cached_func(run_experiment, estimator_name))(
                    d, n, L, estimator_name, run,
                    data_random_seed, training_random_seed,
                ))
results = OrderedDict(sorted(Parallel(n_jobs=parallel_nworkers, backend=parallel_backend)(delayed_funcs)))
info('All results have been calculated.')

## Evaluation

In [None]:
pd.options.display.max_rows = None
pd.options.display.max_columns = None

def collect_stat_keys_and_values(results, estimator_name):
    stat_keys = set()
    stat_values = OrderedDict()
    for k, r in results.items():
        if k[-2] != estimator_name:
            continue
        stat_values.setdefault(k[:-2], []).append(r)
        for sk in r.keys():
            stat_keys.add(sk)
    return stat_keys, stat_values

In [None]:
# Printing common statistics.

skipped_stats = ('model',)
stat_funcs = OrderedDict((
    ('mean', np.mean),
    ('std', np.std),
    ('min', np.min),
    ('median', np.median),
    ('max', np.max),
))

ds = set()
stats = OrderedDict()
for estimator_name in estimators.keys():
    stat_keys, stat_values = collect_stat_keys_and_values(results, estimator_name)
    stat = {}
    for (d, n), s in stat_values.items():
        ds.add(d)
        ss = OrderedDict()
        for sk in stat_keys:
            if sk in skipped_stats:
                continue
            for sf_name, sf in stat_funcs.items():
                ss[sk + '__' + sf_name] = sf([v[sk] for v in s])
        stat[(d, n)] = ss
    stat = pd.DataFrame(stat)
    stat.columns.names = ('d', 'n')
    print('\nestimator: {}'.format(estimator_name))
    stats[estimator_name] = stat
    display(stat)

In [None]:
# Plotting common statistics.

skipped_estimators = []  #['OLS']
test_error_means = {}
test_error_stds = {}

for d in ds:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 4))
    for estimator_name, stat in stats.items():
        if estimator_name in skipped_estimators:
            continue

        stat = stat.T
        stat = stat[stat.index.get_level_values(0) == d]
        if not stat.empty:
            ax1.set_title('d: {}, nruns: {}'.format(d, nruns))
            ax1.set_xlabel('n')
            ax1.set_ylabel(f'test {report_loss_name}-error')
            test_error_mean = stat[f'test_{report_loss_name}-err__mean']
            test_error_std = stat[f'test_{report_loss_name}-err__std']
            test_error_means[estimator_name] = test_error_mean
            test_error_stds[estimator_name] = test_error_std            
            ax1.errorbar(
                x=stat.index.get_level_values(1),
                y=test_error_mean,
                yerr=test_error_std,
                label=estimator_name,
            )
            ax1.legend(loc='upper right')

            ax2.set_title('d: {}, nruns: {}'.format(d, nruns))
            ax2.set_xlabel('n')
            ax2.set_ylabel(f'training {report_loss_name}-risk')
            ax2.errorbar(
                x=stat.index.get_level_values(1),
                y=stat[f'train_{report_loss_name}-risk__mean'],
                yerr=stat[f'train_{report_loss_name}-risk__std'],
                label=estimator_name,
            )
            ax2.legend(loc='upper right')

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 4))
    for estimator_name, stat in stats.items():
        if estimator_name in skipped_estimators:
            continue

        stat = stat.T
        stat = stat[stat.index.get_level_values(0) == d]
        if not stat.empty:
            if 'nweights__mean' in stat:
                ax1.set_title('d: {}, nruns: {}'.format(d, nruns))
                ax1.set_xlabel('n')
                ax1.set_ylabel('number of weight vectors')
                ax1.errorbar(
                    x=stat.index.get_level_values(1),
                    y=stat['nweights__mean'],
                    yerr=stat['nweights__std'],
                    label=estimator_name,
                )
                ax1.legend(loc='upper right')

            ax2.set_title('d: {}, nruns: {}'.format(d, nruns))
            ax2.set_xlabel('n')
            ax2.set_ylabel('training time (sec)')
            ax2.errorbar(
                x=stat.index.get_level_values(1),
                y=stat['train_cpu_time__mean'],
                yerr=stat['train_cpu_time__std'],
                label=estimator_name,
            )
            ax2.legend(loc='upper left')

In [None]:
test_error_means = pd.DataFrame(test_error_means).T
test_error_means

In [None]:
test_error_stds = pd.DataFrame(test_error_stds).T
test_error_stds

In [None]:
ols_test_error_means = test_error_means.loc['OLS']
test_error_means.div(ols_test_error_means)

In [None]:
test_error_stds.div(ols_test_error_means)

In [None]:
output_dir = os.path.join(project_path, '_result_cache',
                          experiment_id,
                          f'stats-seed{global_random_seed}-r{nruns}' 
                          + '-d' + ','.join([str(d) for d in domain_dims])
                          + '-n' + ','.join([str(n) for n in nsamples]))
os.makedirs(output_dir, exist_ok=True)
for k, v in stats.items():
    v.to_csv(os.path.join(output_dir, f'stats-{k}.csv'))

In [None]:
# Printing estimator model specific statistics.

estimator_names = []
model_fields = []

stats = OrderedDict()
for estimator_name in estimators.keys():
    if estimator_name not in estimator_names:
        continue
    stat_keys, stat_values = collect_stat_keys_and_values(results, estimator_name)
    stat = {}
    for (d, n), s in stat_values.items():
        ss = OrderedDict()
        for sk in stat_keys:
            if sk != 'model':
                continue
            for field in model_fields:
                for sf_name, sf in stat_funcs.items():
                    vals = [v for v in [getattr(v[sk], field) for v in s] if v is not None]
                    ss[field + '__' + sf_name] = None if len(vals) == 0 else sf(vals)
        stat[(d, n)] = ss
    stat = pd.DataFrame(stat)
    stat.columns.names = ('d', 'n')
    print('\nestimator: {}'.format(estimator_name))
    stats[estimator_name] = stat
    display(stat)