# Dataset regression experiments <a class="anchor" id="__dataset_top__"></a>

Convex and Lipschitz regression experiments on public datasets.<br/>
See the [Notebook parameters](#__dataset_notebook_params__) cell for the settings.
Select and configure the [estimators](#__dataset_estimators__).

Go to the [performance results](#__dataset_notebook_results__).

In [None]:
!date

In [None]:
import os
import sys
import time
import traceback

project_path = os.path.abspath('.')
while project_path != '/' and 'requirements.txt' not in os.listdir(project_path):
    project_path = os.path.abspath(os.path.join(project_path, '..'))
assert project_path != '/', 'Could not find project_path!'
if project_path not in sys.path:
    sys.path.append(project_path)
print('project_path: {}'.format(project_path))
cache_data_dir = os.path.join(project_path, '_cache_data')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

from functools import partial
from joblib import Parallel, delayed, Memory
from collections import OrderedDict
from IPython.display import display

from ai.gandg.common.util import set_random_seed, eprint
from ai.gandg.common.logging_helper import info, start_main_logging
log_queue = start_main_logging()

## Notebook parameters <a id="__dataset_notebook_params__"></a>
[Go to the top.](#__dataset_top__)

The next cell is tagged by <code>parameters</code> for [papermill](https://papermill.readthedocs.io).

In [None]:
# This cell is tagged 'parameters' for papermill.
# These default parameter settings may be overwritten in the next cell.
experiment_id = '_MISSING_ID'  # Name your experiment here!
loss = 'l2'  # 'l2'
data_name = 'cpusmall'
#    'concrete', 'combined_cycle_power_plant', 'cpusmall',
#    'red_wine', 'white_wine', 'parkinsons_telemonitoring',
#    'nberces5818v1_n2012', 'nberces5818v1_n1997', 'nberces5818v1_s1987',
#    'pumadyn-8nm', 'pumadyn-8nh', 'pumadyn-8fm', 'pumadyn-8fh',
#    'pumadyn-32nm', 'pumadyn-32nh', 'pumadyn-32fm', 'pumadyn-32fh',
nruns = 2  # number of evaluations of each experiment
std_scaling = False
min_max_scaling = False  # min/max scaling of the entire data
normalize_data = True  # scalings to make |x-mean(x)| and |y-mean(y)| unit length
shuffle_data = True  # whether or not to do an initial shuffle of the samples
negate_target = False
nsamples = 128,256,512  # number of training samples
global_random_seed = None
parallel_nworkers = 2  # maximum number of parallel workers (make sure yo u have enough RAM too)

In [None]:
def get_int_tuple(param):
    if isinstance(param, str):
        return tuple([int(v) for v in param.split(',')])
    elif isinstance(param, int):
        return (param,)
    return param

if global_random_seed is not None:
    global_random_seed = int(global_random_seed)
nruns = int(nruns)
nsamples = get_int_tuple(nsamples)
negate_target = bool(negate_target)
std_scaling = bool(std_scaling)
min_max_scaling = bool(min_max_scaling)
normalize_data = bool(normalize_data)
shuffle_data = bool(shuffle_data)
parallel_nworkers = int(parallel_nworkers)
assert (not std_scaling) or (not min_max_scaling)

In [None]:
seed_limit = 1e6
if global_random_seed is None:
    global_random_seed = 10000 + int(np.round((time.time() % 1) * seed_limit))
set_random_seed(global_random_seed)
setup_random_seed = np.random.randint(seed_limit)
data_random_seed = np.random.randint(seed_limit)
experiment_random_seed = np.random.randint(seed_limit)
training_random_seed = np.random.randint(seed_limit)
testing_random_seed = np.random.randint(seed_limit)
info('random seeds, global:{}, setup:{}, data:{}, exp:{}, training:{}, testing:{}'.format(
    global_random_seed, setup_random_seed, data_random_seed,
    experiment_random_seed, training_random_seed, testing_random_seed,
))

## Problem setting

In [None]:
is_convex = False
if data_name == 'concrete':
    dataset = 'UCI:165:concrete_data.csv'  # https://doi.org/10.24432/C5PK67
    data_reader_fn = pd.read_csv

    def prepare_data(df):  # X.shape == (1030, 8)
        target_label = 'Concrete compressive strength'
        y = df.loc[:, target_label]
        X = df.drop(['Unnamed: 0', target_label], axis=1)
        return X, y

elif data_name == 'combined_cycle_power_plant':
    dataset = 'UCI:294:combined_cycle_power_plant.csv'  # https://doi.org/10.24432/C5002N
    data_reader_fn = pd.read_csv
    
    def prepare_data(df):
        y = df['PE']
        X = df.drop(['Unnamed: 0', 'PE'], axis=1)
        return X, y

elif data_name in ('red_wine', 'white_wine'):
    wine_color = data_name.split('_')[0]
    dataset = f'UCI:186:{wine_color}_wine_data.csv'  # https://doi.org/10.24432/C56S3T
    data_reader_fn = pd.read_csv
    
    def prepare_data(df):
        index_label = 'Unnamed: 0'
        index = df.loc[:, index_label]
        df = df.drop(index_label, axis=1)
        if wine_color == 'red':
            df = df[index < 1599]
        elif wine_color == 'white':
            df = df[index >= 1599]
        else:
            print(f'Invalid wine_color: {wine_color}')
        y = df['quality']
        X = df.drop('quality', axis=1)
        return X, y

elif data_name == 'parkinsons_telemonitoring':
    dataset = 'UCI:189:parkinsons_telemonitoring.csv'
    target_label = 'total_UPDRS'
    # target_label = 'motor_UPDRS'
    data_reader_fn = pd.read_csv
    
    def prepare_data(df):  # X.shape == (5875, 17)
        y = df[target_label]
        X = df.drop(['Unnamed: 0', 'total_UPDRS', 'motor_UPDRS'], axis=1)
        return X, y

elif data_name.startswith('nberces5818v1_'):
    dataset = f"https://data.nber.org//nberces/nberces5818v1/{data_name}.csv"
    data_reader_fn = pd.read_csv
    
    def prepare_data(df, filter_year=(2000, 2015)):
        X = df[['year', 'vship', 'cap', 'prodh', 'emp', 'prode']]
        if filter_year is not None:
            if isinstance(filter_year, int):
                X = X[X['year'] == filter_year]
            else:
                X = X[X['year'].between(*filter_year)]
            X = X.drop('year', axis=1).reset_index(drop=True)
        X['emp-prode'] = X['emp'] - X['prode']
        X = np.log(np.maximum(0.1, X))
        y = X['vship']
        X.drop(['vship', 'emp'], axis=1, inplace=True)
        return X, y

elif data_name == 'cpusmall':
    dataset = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/cpusmall"
    
    def data_reader_fn(dataset):
        df = pd.read_csv(dataset, sep=' ', header=None)
        df = df.apply(lambda ser:
                      ser if ser.dtype != 'object'
                      else ser.map(lambda s: s.split(':')[-1]))
        return df
    
    def prepare_data(df):
        y = df.iloc[:, 0]
        X = df.iloc[:, 1:]
        return X, y

elif data_name.startswith('pumadyn-'):
    dataset = os.path.join(cache_data_dir, data_name)
    if not os.path.isdir(dataset):
        raise Exception(f'Dataset {data_name} has to be downloaded manually'
                        f' and unpacked into directory: {cache_data_dir}!')

    def data_reader_fn(dataset):
        return pd.read_csv(os.path.join(dataset, 'accel/Prototask.data.gz'), header=None, sep='\s+')
        # return pd.read_csv(os.path.join(dataset, 'Dataset.data.gz'), header=None, sep='\s+')

    def prepare_data(df):
        y = df.iloc[:, -1]
        X = df.iloc[:, :-1]
        if '32' in data_name:
            X = pd.concat([X.iloc[:, :12], X.iloc[:, 12:17] / 10000.0, X.iloc[:, 17:]], axis=1)
        return X, y

else:
    raise Exception(f'Unknown data_name: {data_name}!')

In [None]:
from ai.gandg.common.experiment import loss_l1, loss_l2, loss_linf

report_loss_name = loss
stat_losses = {'l1': loss_l1, 'l2': loss_l2, 'linf': loss_linf}

## Estimators <a class="anchor" id="__dataset_estimators__"></a>
[Go to the top.](#__dataset_top__)

In [None]:
from ai.gandg.common.estimator import Estimator, EstimatorModel, ConstEstimator

set_random_seed(setup_random_seed)
estimators = OrderedDict()

def get_estimator(estimator_name):
    return estimators[estimator_name]

In [None]:
# Ordinary Least-Squares estimator
from ai.gandg.common.ols import OLSEstimator
estimators['OLS'] = OLSEstimator()

In [None]:
# !pip install xgboost
from ai.gandg.algorithm.external.xgboost import XgbEstimator
estimators['XGB'] = XgbEstimator(objective='reg:absoluteerror') if loss == 'l1' else XgbEstimator()

In [None]:
# !pip install scikit-learn
from ai.gandg.algorithm.external.random_forest import RandomForestEstimator
estimators['RF'] = RandomForestEstimator()

In [None]:
# # !pip install scikit-learn
from ai.gandg.algorithm.external.nearest_neighbors import NearestNeighborsEstimator
# # estimators['KNN1'] = NearestNeighborsEstimator(n_neighbors=1)
# # estimators['KNN5'] = NearestNeighborsEstimator(n_neighbors=5)
# # estimators['KNN+'] = NearestNeighborsEstimator(n_neighbors='AFPC', cv=5, afpc_ntrials=10)
# estimators['KNN*'] = NearestNeighborsEstimator(n_neighbors='n**(d/(2+d))', cv=5)
estimators['KNN'] = NearestNeighborsEstimator(n_neighbors='np.log(n)*n**(2/(2+d))', cv=5)

In [None]:
# # # !pip install scikit-learn scikit-fda
from ai.gandg.algorithm.external.kernel_regression import KernelRegEstimator
estimators['kreg_nor'] = KernelRegEstimator('normal')
# estimators['kreg_epa'] = KernelRegEstimator('epanechnikov')
# estimators['kreg_tri'] = KernelRegEstimator('tri_weight')

In [None]:
# Delta-Convex Fitting (DCF)
from ai.gandg.algorithm.dcf.dcf import DCFEstimator
from ai.gandg.optim.socprog import SOCP_BACKEND__CLARABEL, SOCP_BACKEND__LBFGS

dcf_train_args = {
    # 'verbose': 0,
    # 'local_opt_type': 'smooth',
    # 'normalize': True,
    # 'L_sum_regularizer': '(x_radius/n)**2',
    # 'L_regularizer': 'max(1.0, x_radius)**2 * (d*K/n)',
    # 'L_regularizer_offset': '(y_radius/x_radius)*np.log(n)',
    # 'local_opt_L_regularizer_offset': 'np.log(n)',
    # 'backend': SOCP_BACKEND__LBFGS,  # SOCP_BACKEND__LBFGS or SOCP_BACKEND__CLARABEL
}
# estimators['DCF1'] = DCFEstimator(variant=1, train_args=dcf_train_args)
# estimators['DCF1-'] = DCFEstimator(variant=1, negate_y=True, train_args=dcf_train_args)
estimators['DCFi'] = DCFEstimator(variant=np.inf, train_args=dcf_train_args)
estimators['DCFi-'] = DCFEstimator(variant=np.inf, negate_y=True, train_args=dcf_train_args)
# estimators['DCF2'] = DCFEstimator(variant=2, train_args=dcf_train_args)
# estimators['DCF2-'] = DCFEstimator(variant=2, negate_y=True, train_args=dcf_train_args)
estimators['DCF+'] = DCFEstimator(variant='+', train_args=dcf_train_args)
estimators['DCF+-'] = DCFEstimator(variant='+', negate_y=True, train_args=dcf_train_args)
# # # Symmetrized DCF variants:
# estimators['DCF1s'] = DCFEstimator(variant=1, is_symmetrized=True, train_args=dcf_train_args)
# estimators['DCFis'] = DCFEstimator(variant=np.inf, is_symmetrized=True, train_args=dcf_train_args)
estimators['DCF2s'] = DCFEstimator(variant=2, is_symmetrized=True, train_args=dcf_train_args)
estimators['DCF+s'] = DCFEstimator(variant='+', is_symmetrized=True, train_args=dcf_train_args)

In [None]:
estimators['MMA'] = DCFEstimator(variant='mma', is_symmetrized=False, train_args=dcf_train_args)
# estimators['MMAs'] = DCFEstimator(variant='mma', is_symmetrized=True, train_args=dcf_train_args)

In [None]:
# # Delta-Convex Fitting (initial solutions, iDCF)

# dcf_train_args_i = dict(dcf_train_args)
# dcf_train_args_i['local_opt_maxiter'] = 0

# estimators['iDCFi'] = DCFEstimator(variant=np.inf, train_args=dcf_train_args_i)
# estimators['iDCFi-'] = DCFEstimator(variant=np.inf, negate_y=True, train_args=dcf_train_args_i)
# estimators['iDCFis'] = DCFEstimator(variant=np.inf, is_symmetrized=True, train_args=dcf_train_args_i)
# estimators['iDCF+'] = DCFEstimator(variant='+', train_args=dcf_train_args_i)
# estimators['iDCF+-'] = DCFEstimator(variant='+', negate_y=True, train_args=dcf_train_args_i)
# estimators['iDCF+s'] = DCFEstimator(variant='+', is_symmetrized=True, train_args=dcf_train_args_i)

## Preparing the raw data

In [None]:
def get_raw_data(
    dataset, reader_fn=pd.read_csv,
    shuffle_data=False, data_dir=cache_data_dir,
    min_max_scaling=False, std_scaling=False,
):
    is_uci = dataset.startswith('UCI:')
    fname = dataset.split(':')[-1] if is_uci else os.path.basename(dataset)
    raw_data_fpath = os.path.join(data_dir, fname)
    if not os.path.exists(raw_data_fpath):
        print('Downloading raw data ...', end='')
        os.makedirs(data_dir, exist_ok=True)
        if is_uci:
            from ucimlrepo import fetch_ucirepo
            data = fetch_ucirepo(id=int(dataset.split(':')[1]))
            df = pd.concat([data.data.features, data.data.targets], axis=1)
            df.to_csv(raw_data_fpath)
        else:
            import requests
            with open(raw_data_fpath, 'wb') as f:
                f.write(requests.get(dataset).content)
        print(' done')
        print(f'Raw data is saved to: {raw_data_fpath}')
    print(f'Loading raw data from: {raw_data_fpath}')
    df = reader_fn(raw_data_fpath)
    df = df.astype(float)
    if shuffle_data:
        print('Data is shuffled.')
        df = df.sample(frac=1).reset_index(drop=True)
    if min_max_scaling:
        df_min = df.min(axis=0)
        df -= df_min
        print(f'\nmin_max_scaling, df_min:\n{df_min}')
        df_max = df.max(axis=0)
        df /= df_max
        print(f'\nmin_max_scaling, df_max:\n{df_max}')
    elif std_scaling:
        df -= df.mean(axis=0)
        df_std = df.std(axis=0)
        df /= df_std
        print(f'std_scaling, df_std:\n{df_std}')
    return df


set_random_seed(data_random_seed)
X, y = prepare_data(get_raw_data(dataset,
                                 reader_fn=data_reader_fn,
                                 shuffle_data=shuffle_data,
                                 std_scaling=std_scaling,
                                 min_max_scaling=min_max_scaling))

def data_normalizer(X, y):
    X -= np.mean(X, axis=0)
    sx = np.linalg.norm(X, ord='fro') / np.sqrt(X.shape[0])
    X /= sx
    y -= np.mean(y)
    sy = np.std(y)
    y /= sy
    print(f"data normalization, sx: {sx:.4f}, sy: {sy:.4f}")
    return X, y

if normalize_data:
    X, y = data_normalizer(X, y)
print(f'\nX.shape: {X.shape}')

In [None]:
print('Feature statistics:')
X.describe().T

In [None]:
print('Response statistics:')
pd.DataFrame(y).describe().T

## Caching

In [None]:
from ai.gandg.common.cache import ResultCache
result_cache = ResultCache(
    is_enabled=(global_random_seed < 10000), # caching is pointless without manual random seed setting
    project_path=project_path,
    experiment_id=experiment_id,
)
print(f'is_caching_enabled: {result_cache.is_enabled()}')
output_dir = None
if result_cache.is_enabled():
    output_dir = os.path.join(result_cache.cache_dir,
                              f'stats-seed{global_random_seed}-r{nruns}'
                              + '-n' + ','.join([str(n) for n in nsamples]))
    os.makedirs(output_dir, exist_ok=True)
    print(f'output_dir: {output_dir}')

## Data sampling

In [None]:
from ai.gandg.common.experiment import get_random_seed_offset

def get_data(n, run, data_random_seed):
    d = X.shape[1]
    seed = data_random_seed + get_random_seed_offset(d, n, run)
    print(f'seed: {seed}, d:{d}, n:{n}, run:{run}, data_random_seed:{data_random_seed}')
    set_random_seed(seed)

    assert n < len(y), f'Too few data, n:{n}, len(y):{len(y)}'
    sample_range = np.arange(len(y))
    train_index = np.random.choice(sample_range, size=n, replace=False)
    test_index = np.setdiff1d(sample_range, train_index, assume_unique=True)
    X_train = X.iloc[train_index, :].values.copy()
    y_train = y.iloc[train_index].values.copy()
    X_test = X.iloc[test_index, :].values.copy()
    y_test = y.iloc[test_index].values.copy()
    if negate_target:
        y_train = -y_train
        y_test = -y_test
    return X_train, y_train, X_test, y_test

### AFPC statistics

In [None]:
def get_clustering(data):
    from ai.gandg.algorithm.apcnls.fpc import adaptive_farthest_point_clustering
    partition, center_idx = adaptive_farthest_point_clustering(
        data=data, q=1, return_center_idxs=True,
    )
    return partition, data[center_idx, :]
    # from algorithm.dcf.dcf import get_dcf_partition
    # n, d = data.shape
    # def get_dcf_param(param, default, n, d):
    #     value = dcf_train_args.get(param, default)
    #     if isinstance(value, str):
    #         value = eval(value)
    #     return value
    # return get_dcf_partition(
    #     data,
    #     ntrials=get_dcf_param('afpc_ntrials', 1, n, d),
    #     min_cell_size=get_dcf_param('afpc_min_cell_size', 0, n, d),
    #     kmeans_objval=get_dcf_param('kmeans_objval', False, n, d),
    #     kmeans_kwargs=get_dcf_param('kmeans_kwargs', None, n, d),
    # )[:2]

In [None]:
from ai.gandg.notebooks.clustering_stats import (
    get_clustering_stats, plot_partition_size, plot_partition_epsilon,
)

afpc_stats = get_clustering_stats(
    nsamples=nsamples,
    nruns=nruns,
    data_random_seed=data_random_seed,
    get_data_func=get_data,
    get_cluster_func=get_clustering,
    report_loss=stat_losses[report_loss_name],
)
print('\nData statistics:')
with pd.option_context('display.max_rows', None):
    display(afpc_stats)
if output_dir is not None:
    afpc_stats.to_csv(os.path.join(output_dir, 'data_stats.csv'))

d = X.shape[1]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 4))
plot_partition_size(ax1, d, nsamples, afpc_stats)
plot_partition_epsilon(ax2, d, nsamples, afpc_stats)
None

In [None]:
from ai.gandg.notebooks.clustering_stats import (
    get_clustering_cell_size_distribution,
    plot_partition_cell_size_distribution,
)
max_n = max(nsamples)
afpc_cs_stats = get_clustering_cell_size_distribution(
    max_n, nruns, data_random_seed,
    get_data_func=get_data, get_cluster_func=get_clustering,
)
fig, ax1 = plt.subplots(1, 1, figsize=(6, 4))
plot_partition_cell_size_distribution(ax1, d, afpc_cs_stats, n=max_n)
None

## Training

In [None]:
from ai.gandg.common.experiment import (
    calc_experiment_result,
    prepare_experiment_calc_funcs,
)

def run_experiment(n, estimator_name, run, data_random_seed, training_random_seed):
    result = calc_experiment_result(
        n=n, estimator_name=estimator_name, run=run,
        get_data_func=get_data, get_estimator_func=get_estimator,
        stat_losses=stat_losses, report_loss_name=report_loss_name, log_queue=log_queue,
        data_random_seed=data_random_seed, training_random_seed=training_random_seed,
    )
    return ((n, estimator_name, run), result)

delayed_funcs = prepare_experiment_calc_funcs(
    nsamples=nsamples, nruns=nruns, estimators=estimators,
    data_random_seed=data_random_seed, training_random_seed=training_random_seed,
    result_cache=result_cache, run_experiment_func=run_experiment,
)
try:
    results = OrderedDict(sorted(Parallel(n_jobs=parallel_nworkers)(delayed_funcs)))
except Exception:
    eprint(traceback.format_exc())
    time.sleep(3)
    raise
info('All results have been calculated.')

## Evaluation

In [None]:
skipped_estimators = ()

In [None]:
from ai.gandg.common.experiment import collect_estimator_stats

all_stats = OrderedDict()
for estimator_name in list(estimators.keys()):
    stats = collect_estimator_stats(estimator_name, results)
    print('\nestimator: {}'.format(estimator_name))
    all_stats[estimator_name] = stats
    with pd.option_context('display.max_rows', None):
        display(stats)

if output_dir is not None:
    for k, v in all_stats.items():
        v.to_csv(os.path.join(output_dir, f'stats-{k}.csv'))

In [None]:
from ai.gandg.notebooks.evaluation import plot_standard_stats

plot_standard_stats(
    all_stats=all_stats, report_loss_name=report_loss_name,
    skipped_estimators=skipped_estimators,
)

### Test L2-risk

In [None]:
from ai.gandg.common.experiment import collect_stats_by_name

test_risk = pd.concat([
    collect_stats_by_name(all_stats, 'test_l2-risk__mean'),
    collect_stats_by_name(all_stats, 'test_l2-risk__std'),
], axis=1, keys=['mean', 'std']).swaplevel(0, 1, axis=1).sort_index(axis=1)

print('Test risk:')
display(np.round(test_risk, decimals=4).T)

## Performance <a id="__dataset_notebook_results__"></a>
[Go to the top.](#__dataset_top__)

In [None]:
from ai.gandg.notebooks.evaluation import plot_bar_perf

estimator_names = OrderedDict([
    ('KNN', 'k-NN'),
    ('kreg_nor', 'NW-G'),
    ('kreg_tri', 'NW-T'),
    ('DCFi', r'$\textrm{DCF}_{\hspace{-1mm}\infty}$'),
    ('DCFi-', r'$\textrm{DCF}_{\hspace{-1mm}\infty}^-$'),
    ('DCF2s', r'$\textrm{DCF}_{\hspace{-1mm}2}^\Delta$'),
    ('DCFis', r'$\textrm{DCF}_{\hspace{-1mm}\infty}^\Delta$'),
    ('MMA', r'$\textrm{MMA}$'),
    ('MMAs', r'$\textrm{MMA}^\Delta$'),
    ('DCF+', r'$\textrm{DCF}_{\hspace{-1mm}+}$'),
    ('DCF+-', r'$\textrm{DCF}_{\hspace{-1mm}+}^-$'),
    ('DCF+s', r'$\textrm{DCF}_{\hspace{-1mm}+}^\Delta$'),
    ('RF', 'RF'),
    ('XGB', 'XGB'),
    ('OLS', 'OLS'),
    ('iDCF+s', r'$\textrm{i-DCF}_{\hspace{-1mm}+}^\Delta$'),
])
estimator_names = {k: v for k, v in estimator_names.items() if k in estimators}

In [None]:
scaling = 'noFS'
if min_max_scaling:
    scaling = 'MM'
elif std_scaling:
    scaling = 'STD'

title=f'{data_name} ({scaling})'

In [None]:
plot_bar_perf(results, 'train_l2-risk', estimator_names, ylabel='Train MSE', title=title)

In [None]:
plot_bar_perf(results, 'test_l2-risk', estimator_names, ylabel='Test MSE', title=title)

In [None]:
plot_bar_perf(results, 'train_real_time', estimator_names, yscale='log', ylabel='train.(s, log scale)', title=title)

In [None]:
plot_bar_perf(results, 'test_real_time', estimator_names, yscale='log', ylabel='pred.(ms, log scale)',
              title=title, nscale={n: 1e6/(len(y)-n) for n in nsamples})

## Extra analysis for DCF estimators

In [None]:
dcf_estimator_names = {k: v for k, v in estimator_names.items()
                       if k.startswith('DCF') or k.startswith('MMA')}

In [None]:
from ai.gandg.notebooks.evaluation import (
    plot_dcf_nparams,
    plot_dcf_training_times,
    plot_dcf_niterations,
    print_dcf_lipschitz_constants,
)
plot_dcf_nparams(results, dcf_estimator_names, max_n)
plot_dcf_training_times(results, dcf_estimator_names, max_n)
plot_dcf_niterations(results, dcf_estimator_names, max_n)
print('DCF Lipschitz constants:')
print_dcf_lipschitz_constants(results)

In [None]:
!date