In [10]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

<IPython.core.display.Javascript object>

In [11]:
import sys
sys.path.append('../../')

from matplotlib import pyplot
import numpy as np
import pandas
from matplotlib import pyplot as plt
from scipy.stats import rankdata
from IPython.display import display, HTML

from bayesian_benchmarks.database_utils import Database
from bayesian_benchmarks.data import classification_datasets, _ALL_REGRESSION_DATATSETS, _ALL_CLASSIFICATION_DATATSETS
ALL_DATATSETS = {}
ALL_DATATSETS.update(_ALL_REGRESSION_DATATSETS)
ALL_DATATSETS.update(_ALL_CLASSIFICATION_DATATSETS)
from bayesian_benchmarks.data import regression_datasets


In [15]:
def rankarray(A):
    ranks = []
    for a in A:
        ranks.append(rankdata(a))
    return np.array(ranks)


def read_regression_classification(fs, models_names, datasets, task):
    if task == 'classification':
        fields = ['dataset', 'N', 'D', 'K'] + [m[1] for m in models_names]
    else:
        fields = ['dataset', 'N', 'D'] + [m[1] for m in models_names]

    results = {}
    for f in fs:
        results[f] = {'table':{f:[] for f in fields}, 'vals':[]}

    with Database('../tasks/elevators_temp.db') as db:

        for dataset in datasets:
            for f in fs:
                results[f]['table']['dataset'].append(dataset[:10])
                results[f]['table']['N'].append(ALL_DATATSETS[dataset].N)
                results[f]['table']['D'].append(ALL_DATATSETS[dataset].D)
                if task == 'classification':
                    results[f]['table']['K'].append(ALL_DATATSETS[dataset].K)

            row = {f:[] for f in fs}
            for model, name in models_names:
                res = db.read(task, fs, {'model':model, 
                                         'dataset':dataset})
                    
                if len(res) == 0:
                    for f in fs:
                        results[f]['table'][name].append('')
                        row[f].append(np.nan)
                else:
                    print('{} {} {}'.format(model, dataset, len(res)))
                    for i, f in enumerate(fs):
                        L = [float(l[i]) for l in res]
                        m = np.average(L)
                        std = np.std(L) if len(L) > 1 else np.nan
                        if m < 1000 and m > -1000:
                            r = '{:.3f}({:.3f})'.format(m, std)
                            row[f].append(m)
                        else:
                            r = 'nan'
                            row[f].append(np.nan)

                        results[f]['table'][name].append(r)

            #             stderr = np.std(L)/float(len(L))**0.5
            #             r = '{:.3f} ({:.3f})'.format(m, stderr)
            for f in fs:   
                results[f]['vals'].append(row[f])


    for f in fs:
        if 'unnormalized' not in f:
            vals = np.array(results[f]['vals'])

            avgs = np.nanmean(vals, 0)
            meds = np.nanmedian(vals, 0)
            rks = np.nanmean(rankarray(vals), 0)

            for s, n in [[avgs, 'avg'], [meds, 'median'], [rks, 'avg rank']]:
                results[f]['table']['dataset'].append(n)
                results[f]['table']['N'].append('')
                results[f]['table']['D'].append('')
                if task == 'classification':
                    results[f]['table']['K'].append('')
                for ss, name in zip(s, [m[1] for m in models_names]):
                    results[f]['table'][name].append('{:.3f}'.format(ss))
    
    return results, fields


In [19]:
models_names = [['RegNet', 'SGD'], ['RegNetpcaess', 'PCA+ESS'], ['RegNetpcavi', 'PCA+VI'], 
               ['RegNetpcalow_rank_gaussian', 'PCA+SWAG']]
regression_datasets = ['wilson_elevators', 'wilson_keggdirected', 'wilson_keggundirected', 'wilson_protein']
fs = 'test_loglik', 'test_rmse', 'test_loglik_unnormalized', 'test_rmse_unnormalized', 'test_calibration'

results, fields = read_regression_classification(fs, models_names, regression_datasets, 'regression')


RegNetpcaess wilson_elevators 20
RegNetpcavi wilson_elevators 20




In [22]:
print('normalised test loglikelihood')
display(HTML(pandas.DataFrame(results['test_loglik']['table'], columns=fields).to_html(index=False)))
# print(pandas.DataFrame(results['test_loglik']['table'], columns=fields).to_latex())

# print('unnormalized test loglikelihood')
# display(HTML(pandas.DataFrame(results['test_loglik_unnormalized']['table'], columns=fields).to_html(index=False)))


# print('normalised test rmse')
# display(HTML(pandas.DataFrame(results['test_rmse']['table'], columns=fields).to_html(index=False)))

print('normalised test rmse')
display(HTML(pandas.DataFrame(results['test_rmse_unnormalized']['table'], columns=fields).to_html(index=False)))

print('test_calibration')
display(HTML(pandas.DataFrame(results['test_calibration']['table'], columns=fields).to_html(index=False)))

normalised test loglikelihood


dataset,N,D,SGD,PCA+ESS,PCA+VI,PCA+SWAG
wilson_ele,16599.0,18.0,,-0.397(0.050),-0.380(0.041),
wilson_keg,48827.0,20.0,,,,
wilson_keg,63608.0,27.0,,,,
wilson_pro,45730.0,9.0,,,,
avg,,,,-0.397,-0.380,
median,,,,-0.397,-0.380,
avg rank,,,1.5,1.750,2.750,4.0


normalised test rmse


dataset,N,D,SGD,PCA+ESS,PCA+VI,PCA+SWAG
wilson_ele,16599,18,,0.093(0.004),0.090(0.002),
wilson_keg,48827,20,,,,
wilson_keg,63608,27,,,,
wilson_pro,45730,9,,,,


test_calibration


dataset,N,D,SGD,PCA+ESS,PCA+VI,PCA+SWAG
wilson_ele,16599.0,18.0,,0.947(0.024),0.968(0.012),
wilson_keg,48827.0,20.0,,,,
wilson_keg,63608.0,27.0,,,,
wilson_pro,45730.0,9.0,,,,
avg,,,,0.947,0.968,
median,,,,0.947,0.968,
avg rank,,,1.5,1.750,2.750,4.0


In [6]:
fs = ['test_loglik', 'test_acc']
results, fields = read_regression_classification(fs, models_names, classification_datasets, 'classification')


linear abalone 20
variationally_sparse_gp abalone 10
variationally_sparse_gp_minibatch abalone 10
deep_gp_doubly_stochastic abalone 10
svm abalone 10
knn abalone 10
gradient_boosting_machine abalone 10
adaboost abalone 10
mlp abalone 10
linear acute-inflammation 20
variationally_sparse_gp acute-inflammation 10
variationally_sparse_gp_minibatch acute-inflammation 10
deep_gp_doubly_stochastic acute-inflammation 10
svm acute-inflammation 10
knn acute-inflammation 10
gradient_boosting_machine acute-inflammation 10
adaboost acute-inflammation 10
mlp acute-inflammation 10
linear acute-nephritis 20
variationally_sparse_gp acute-nephritis 10
variationally_sparse_gp_minibatch acute-nephritis 10
deep_gp_doubly_stochastic acute-nephritis 10
svm acute-nephritis 10
knn acute-nephritis 10
gradient_boosting_machine acute-nephritis 10
adaboost acute-nephritis 10
mlp acute-nephritis 10
linear adult 20
variationally_sparse_gp_minibatch adult 10
svm adult 10
knn adult 10
gradient_boosting_machine adult 1

adaboost dermatology 10
mlp dermatology 10
linear echocardiogram 20
variationally_sparse_gp echocardiogram 10
variationally_sparse_gp_minibatch echocardiogram 1
deep_gp_doubly_stochastic echocardiogram 10
svm echocardiogram 10
knn echocardiogram 10
gradient_boosting_machine echocardiogram 10
adaboost echocardiogram 10
mlp echocardiogram 10
linear ecoli 20
variationally_sparse_gp ecoli 10
variationally_sparse_gp_minibatch ecoli 2
deep_gp_doubly_stochastic ecoli 10
svm ecoli 10
knn ecoli 10
gradient_boosting_machine ecoli 10
adaboost ecoli 10
mlp ecoli 10
linear energy-y1 20
variationally_sparse_gp energy-y1 10
variationally_sparse_gp_minibatch energy-y1 5
deep_gp_doubly_stochastic energy-y1 10
svm energy-y1 10
knn energy-y1 10
gradient_boosting_machine energy-y1 10
adaboost energy-y1 10
mlp energy-y1 10
linear energy-y2 20
variationally_sparse_gp energy-y2 10
variationally_sparse_gp_minibatch energy-y2 4
deep_gp_doubly_stochastic energy-y2 10
svm energy-y2 10
knn energy-y2 10
gradient_b

gradient_boosting_machine monks-1 10
adaboost monks-1 10
mlp monks-1 10
linear monks-2 20
variationally_sparse_gp monks-2 10
variationally_sparse_gp_minibatch monks-2 10
deep_gp_doubly_stochastic monks-2 10
svm monks-2 10
knn monks-2 10
gradient_boosting_machine monks-2 10
adaboost monks-2 10
mlp monks-2 10
linear monks-3 20
variationally_sparse_gp monks-3 10
variationally_sparse_gp_minibatch monks-3 10
deep_gp_doubly_stochastic monks-3 10
svm monks-3 10
knn monks-3 10
gradient_boosting_machine monks-3 10
adaboost monks-3 10
mlp monks-3 10
linear mushroom 20
variationally_sparse_gp_minibatch mushroom 1
deep_gp_doubly_stochastic mushroom 2
svm mushroom 10
knn mushroom 10
gradient_boosting_machine mushroom 10
adaboost mushroom 10
mlp mushroom 10
linear musk-1 20
variationally_sparse_gp musk-1 10
variationally_sparse_gp_minibatch musk-1 8
deep_gp_doubly_stochastic musk-1 10
svm musk-1 10
knn musk-1 10
gradient_boosting_machine musk-1 10
adaboost musk-1 10
mlp musk-1 10
linear musk-2 20
va

deep_gp_doubly_stochastic spectf 10
svm spectf 10
knn spectf 10
gradient_boosting_machine spectf 10
adaboost spectf 10
mlp spectf 10
linear statlog-australian-credit 20
variationally_sparse_gp statlog-australian-credit 10
variationally_sparse_gp_minibatch statlog-australian-credit 10
deep_gp_doubly_stochastic statlog-australian-credit 10
svm statlog-australian-credit 10
knn statlog-australian-credit 10
gradient_boosting_machine statlog-australian-credit 10
adaboost statlog-australian-credit 10
mlp statlog-australian-credit 10
linear statlog-german-credit 20
variationally_sparse_gp statlog-german-credit 10
variationally_sparse_gp_minibatch statlog-german-credit 10
deep_gp_doubly_stochastic statlog-german-credit 10
svm statlog-german-credit 10
knn statlog-german-credit 10
gradient_boosting_machine statlog-german-credit 10
adaboost statlog-german-credit 10
mlp statlog-german-credit 10
linear statlog-heart 20
variationally_sparse_gp statlog-heart 10
variationally_sparse_gp_minibatch statlo

In [7]:
print('test loglikelihood')
display(HTML(pandas.DataFrame(results['test_loglik']['table'], columns=fields).to_html(index=False)))
# print(pandas.DataFrame(results['test_loglik']['table'], columns=fields).to_latex())

print('test accuracy')
display(HTML(pandas.DataFrame(results['test_acc']['table'], columns=fields).to_html(index=False)))



test loglikelihood


dataset,N,D,K,lin,SVGP,SVGP_mb,DGP,svm,knn,gbm,ab,mlp
abalone,4177.0,9.0,3.0,-0.760(0.039),-2.150(0.181),-2.074(0.165),-1.825(0.170),-0.746(0.040),-2.564(0.324),-0.654(0.030),-1.053(0.007),-0.708(0.038)
acute-infl,120.0,7.0,2.0,-0.049(0.015),-0.008(0.002),-0.055(0.007),-0.063(0.008),-0.018(0.001),-0.000(0.000),-0.000(0.000),-0.025(0.063),-0.030(0.008)
acute-neph,120.0,7.0,2.0,-0.031(0.008),-0.007(0.002),-0.045(0.005),-0.051(0.006),-0.019(0.001),-0.000(0.000),-0.000(0.000),-0.085(0.256),-0.017(0.004)
adult,48842.0,15.0,2.0,-0.342(0.005),,-0.321(0.004),,-0.359(0.005),-1.146(0.048),-0.290(0.004),-0.665(0.000),-0.315(0.005)
annealing,898.0,32.0,5.0,-0.365(0.061),-0.743(0.358),-0.597(0.169),-1.051(0.333),-0.342(0.086),-0.769(0.394),-0.104(0.062),-1.219(0.021),-0.279(0.052)
arrhythmia,452.0,263.0,13.0,-1.343(0.298),-1.162(0.285),-1.179(0.301),-1.289(0.254),-1.071(0.143),-5.885(1.417),-1.331(0.344),-2.568(0.360),-1.333(0.287)
audiology-,196.0,60.0,18.0,-1.072(0.229),-0.914(0.338),-1.148(0.310),-1.140(0.311),-1.453(0.216),-4.130(1.691),-0.979(0.378),-2.904(0.326),-0.867(0.215)
balance-sc,625.0,5.0,3.0,-0.363(0.107),-0.039(0.046),-0.052(0.040),-0.018(0.014),-0.220(0.074),-2.087(0.818),-0.397(0.119),-0.994(0.009),-0.139(0.050)
balloons,16.0,5.0,2.0,-0.708(0.355),,,-0.615(0.119),-0.617(0.139),-0.576(0.161),-1.630(2.162),-2.676(2.902),-0.652(0.463)
bank,4521.0,17.0,2.0,-0.271(0.028),-0.252(0.024),-0.254(0.024),-0.253(0.024),-0.286(0.029),-1.143(0.224),-0.235(0.020),-0.646(0.002),-0.281(0.036)


test accuracy


dataset,N,D,K,lin,SVGP,SVGP_mb,DGP,svm,knn,gbm,ab,mlp
abalone,4177.0,9.0,3.0,0.636(0.029),0.664(0.024),0.672(0.025),0.669(0.026),0.661(0.024),0.633(0.029),0.696(0.022),0.688(0.024),0.668(0.028)
acute-infl,120.0,7.0,2.0,1.000(0.000),1.000(0.000),1.000(0.000),1.000(0.000),1.000(0.000),1.000(0.000),1.000(0.000),0.983(0.050),1.000(0.000)
acute-neph,120.0,7.0,2.0,1.000(0.000),1.000(0.000),1.000(0.000),1.000(0.000),1.000(0.000),1.000(0.000),1.000(0.000),0.992(0.025),1.000(0.000)
adult,48842.0,15.0,2.0,0.843(0.005),,0.849(0.005),,0.849(0.004),0.830(0.003),0.868(0.004),0.859(0.005),0.854(0.004)
annealing,898.0,32.0,5.0,0.848(0.041),0.892(0.046),0.898(0.034),0.862(0.043),0.876(0.044),0.880(0.041),0.967(0.017),0.870(0.036),0.890(0.047)
arrhythmia,452.0,263.0,13.0,0.707(0.092),0.761(0.056),0.761(0.053),0.754(0.043),0.674(0.051),0.615(0.048),0.754(0.061),0.613(0.070),0.678(0.056)
audiology-,196.0,60.0,18.0,0.795(0.096),0.755(0.069),0.745(0.065),0.730(0.081),0.630(0.105),0.535(0.081),0.810(0.073),0.265(0.125),0.705(0.072)
balance-sc,625.0,5.0,3.0,0.863(0.048),0.986(0.019),0.976(0.024),0.997(0.006),0.916(0.036),0.835(0.062),0.870(0.049),0.908(0.042),0.970(0.026)
balloons,16.0,5.0,2.0,0.500(0.316),,,0.600(0.300),0.600(0.300),0.600(0.300),0.650(0.450),0.400(0.300),0.550(0.350)
bank,4521.0,17.0,2.0,0.892(0.018),0.892(0.015),0.891(0.016),0.891(0.017),0.892(0.013),0.890(0.014),0.900(0.011),0.892(0.015),0.894(0.013)


In [8]:
# fields = ['dataset', 'N', 'D']

                
# colours = ['C{}'.format(i) for i in range(10)]

# fields = fields + [m[1] for m in models_names]
# results = {f:[] for f in fields}


# for dataset in regression_datasets:
    
#     fig, axs = plt.subplots(1, 2, figsize=(10, 5))

#     results['dataset'].append(dataset)
#     results['N'].append(ALL_REGRESSION_DATATSETS[dataset].N)
#     results['D'].append(ALL_REGRESSION_DATATSETS[dataset].D)

#     for (model, name), c in zip(models_names, colours):
#         with Database('../results/results.db') as db:
#             d = {'model':model, 'dataset':dataset}

#             res = db.read('active_learning_continuous', ['total_loglik', 'total_rmse'], d) 
#         if len(res)>0:
#             test_ll = res[0][0]
#             test_acc = res[0][1]

#             axs[0].plot(test_ll, label=model, color=c)r
#             axs[1].plot(test_acc, label=model, color=c)
#     axs[0].set_ylim(-10, 10)
#     plt.title('{} {} {}'.format(dataset,
#                                    ALL_REGRESSION_DATATSETS[dataset].N,
#                                    ALL_REGRESSION_DATATSETS[dataset].D))
#     plt.legend()
#     plt.show()


In [9]:

# fields = ['dataset', 'N', 'D', 'K']

# models_names = [['linear', 'lin'],
#                 ['variationally_sparse_gp', 'SVGP'],
#                 ['deep_gp_doubly_stochastic','DGP'],
#                 ['svm', 'svm'],
#                 ['knn', 'knn'],
#                 ['naive_bayes', 'nb'],
#                 ['decision_tree', 'dt'],
#                 ['random_forest', 'rf'],
#                 ['gradient_boosting_machine', 'gbm'],
#                 ['adaboost', 'ab'],
#                 ['mlp', 'mlp'],
#                 ]
                
# colours = ['C{}'.format(i) for i in range(10)]

# fields = fields + [m[1] for m in models_names]
# results = {f:[] for f in fields}


# for dataset in classification_datasets[:4]:  # don't show them all...
    
#     fig, axs = plt.subplots(1, 2, figsize=(10, 5))

#     results['dataset'].append(dataset)
#     results['N'].append(ALL_CLASSIFICATION_DATATSETS[dataset].N)
#     results['D'].append(ALL_CLASSIFICATION_DATATSETS[dataset].D)
#     results['K'].append(ALL_CLASSIFICATION_DATATSETS[dataset].K)

#     for (model, name), c in zip(models_names, colours):
#         with Database('../results/results.db') as db:
#             d = {'model':model, 'dataset':dataset}

#             res = db.read('active_learning_discrete', ['test_loglik', 'total_acc'], d) 
#         if len(res)>0:
#             test_ll = res[0][0]
#             test_acc = res[0][1]

#             axs[0].plot(test_ll, label=model, color=c)
#             axs[1].plot(test_acc, label=model, color=c)

#     plt.title('{} {} {} {}'.format(dataset,
#                                    ALL_CLASSIFICATION_DATATSETS[dataset].N,
#                                    ALL_CLASSIFICATION_DATATSETS[dataset].D,
#                                    ALL_CLASSIFICATION_DATATSETS[dataset].K))
#     plt.legend()
#     plt.show()