In [1]:
from bokeh.io import output_notebook
output_notebook()
%matplotlib inline

In [2]:
# READ DATA

import pandas
import numpy as np
import time
import report

pandas.set_option('display.max_colwidth', 30)

print("Reading experiment DB...")
start = time.time()
results = pandas.read_hdf('results.hdf', 'results')
print("Read all results in " + str(time.time() - start) + "s")

validation_sets = {
    'pro01': 'pro05',
    'pro02': 'pro01',
    'pro03': 'pro02',
    'pro04': 'pro03',
    'pro05': 'pro04',
    
    'per01': 'per03',
    'per02': 'per01',
    'per03': 'per02',
}

test_sets = {
    'pro01': 'pro01',
    'pro02': 'pro02',
    'pro03': 'pro03',
    'pro04': 'pro04',
    'pro05': 'pro05',
    
    'per01': 'per01',
    'per02': 'per02',
    'per03': 'per03',
}

experiments = {
    'pro': ['setup01_pro01', 'setup01_pro02', 'setup01_pro03', 'setup01_pro04', 'setup01_pro05'],
    'per': ['setup01_per01', 'setup01_per02', 'setup01_per03'],
    'both': ['setup01_both_pro01', 'setup01_both_pro02', 'setup01_both_pro03', 'setup01_both_pro04', 'setup01_both_pro05', 'setup01_both_per01', 'setup01_both_per02', 'setup01_both_per03'],
}

# replace 'setup' with actual setup and experiment with 'pro', 'per', or 'both', add columns 'trained_for' and 'original_name'
original_names = []
test_samples = []
validation_samples = []
for index, row in results.iterrows():
    setup = row['setup']
    original_names.append(setup)
    if 'both' in setup:
        row['experiment'] = 'both'
    elif 'pro' in setup:
        row['experiment'] = 'pro'
    else:
        row['experiment'] = 'per'
    tokens = setup.split('_')
    setup = tokens[0]
    test_samples.append(tokens[-1])
    validation_samples.append(validation_sets[tokens[-1]])
    row['setup'] = setup
    
results = results.assign(original_name=pandas.Series(original_names).values)
results = results.assign(test_sample=pandas.Series(test_samples).values)
results = results.assign(validation_sample=pandas.Series(validation_samples).values)

Reading experiment DB...
Read all results in 0.141917943954s


In [3]:
# EVALUATE VALIDATION DATA
import evaluation
reload(evaluation)
reload(evaluation.report)
import json

# average results per experiment on testing set
# validation_results = evaluation.average_validation(results)
validation_results = results[results['sample']==results['validation_sample']]

# find best
best_validation = evaluation.find_best(validation_results, per='original_name', score='tra_score', k=1, lower_is_better=False)
report.render_table(best_validation.sort_values(by='original_name'))

testing_configurations = {}
for index, row in best_validation.iterrows():
    testing_configurations[row['original_name']] = {
        'iteration': row['iteration'],
        'thresholds': [row['threshold']],
        'merge_function': row['merge_function'],
    }
    
# use this output to run the testing set evaluation
print(json.dumps(testing_configurations, indent=4))

Unnamed: 0,keep_segmentation,histogram_quantiles,dilate_mask,original_name,setup,validation_sample,iteration,test_run,discrete_queue,sample,merge_function,test_sample,experiment,init_with_max,mask_fragments,custom_fragments,seg_score,threshold,tra_score
3,True,False,0,setup01_both_per01,setup01,per03,100000,,True,per03,mean_aff,per01,both,False,True,True,0.902546,0.9,0.998616
11,True,False,0,setup01_both_per02,setup01,per01,150000,,True,per01,mean_aff,per02,both,False,True,True,0.934453,0.9,0.993019
17,True,False,0,setup01_both_per03,setup01,per02,150000,,True,per02,mean_aff,per03,both,False,True,True,0.87997,0.9,0.994717
23,True,False,0,setup01_both_pro01,setup01,pro05,150000,,True,pro05,mean_aff,pro01,both,False,True,True,0.822787,0.8,0.99564
27,True,False,0,setup01_both_pro02,setup01,pro01,100000,,True,pro01,mean_aff,pro02,both,False,True,True,0.816807,0.8,0.997055
35,True,False,0,setup01_both_pro03,setup01,pro02,150000,,True,pro02,mean_aff,pro03,both,False,True,True,0.826364,0.8,0.993675
41,True,False,0,setup01_both_pro04,setup01,pro03,150000,,True,pro03,mean_aff,pro04,both,False,True,True,0.803444,0.8,0.993867
43,True,False,0,setup01_both_pro05,setup01,pro04,50000,,True,pro04,mean_aff,pro05,both,False,True,True,0.819105,0.8,0.996444
53,True,False,0,setup01_per01,setup01,per03,150000,,True,per03,mean_aff,per01,per,False,True,True,0.901889,0.9,0.997483
55,True,False,0,setup01_per02,setup01,per01,50000,,True,per01,mean_aff,per02,per,False,True,True,0.93733,0.8,0.995636


{
    "setup01_both_pro05": {
        "merge_function": "mean_aff", 
        "thresholds": [
            0.8
        ], 
        "iteration": 50000
    }, 
    "setup01_both_per03": {
        "merge_function": "mean_aff", 
        "thresholds": [
            0.9
        ], 
        "iteration": 150000
    }, 
    "setup01_both_pro04": {
        "merge_function": "mean_aff", 
        "thresholds": [
            0.8
        ], 
        "iteration": 150000
    }, 
    "setup01_both_per01": {
        "merge_function": "mean_aff", 
        "thresholds": [
            0.9
        ], 
        "iteration": 100000
    }, 
    "setup01_per03": {
        "merge_function": "mean_aff", 
        "thresholds": [
            0.7
        ], 
        "iteration": 100000
    }, 
    "setup01_per02": {
        "merge_function": "mean_aff", 
        "thresholds": [
            0.8
        ], 
        "iteration": 50000
    }, 
    "setup01_per01": {
        "merge_function": "mean_aff", 
        "threshold

In [12]:
# EVALUATE TESTING DATA
import evaluation
reload(evaluation)
reload(evaluation.report)

# select only results from current validation best (see previous cell)
testing_select = tuple(
    (results['sample']==results['test_sample'])
    &
    (results['original_name']==name)
    &
    (results['threshold']==config['thresholds'][0])
    &
    (results['iteration']==config['iteration'])
    &
    (results['merge_function']==config['merge_function'])
    for name, config in testing_configurations.items()
)
f = testing_select[0]
for i in range(1, len(testing_select)):
    f = f | testing_select[i]
testing_results = results[f]
# report.render_table(testing_results)

# average results per experiment on validation set
evaluation.average(testing_results, 'experiment')

Unnamed: 0,experiment,keep_segmentation,histogram_quantiles,validation_sample,dilate_mask,original_name,setup,init_with_max,iteration,test_run,discrete_queue,sample,merge_function,test_sample,seg_score,threshold,tra_score,mask_fragments,custom_fragments
0,both,True,False,,0,,setup01,False,,,True,average over 8,mean_aff,,0.851076,,0.995555,True,True
1,per,True,False,,0,,setup01,False,,1.0,True,average over 3,mean_aff,,0.896517,,0.985367,True,True
2,pro,True,False,,0,,setup01,False,,,True,average over 5,mean_aff,,0.817954,,0.995176,True,True


In [4]:
# plot validation performance over iterations

import report
import evaluation

iteration_best = pandas.DataFrame()
for iteration in np.unique(results['iteration']):
    for setup in np.unique(results['setup']):
        relevant_results = results[(results.iteration==iteration)&(results.setup==setup)]
        if len(relevant_results) == 0:
            continue
        best = evaluation.find_best(relevant_results, k=1, score='tra_score', lower_is_better=False)
        iteration_best = pandas.concat([iteration_best, best])

groups = [
    {},
]
figures = [
    {'x_axis':'iteration', 'y_axis':'tra_score', 'hide_legend':False},
    {'x_axis':'iteration', 'y_axis':'seg_score'},
]
configurations = [
    {'setup':s, 'style':'line'} for s in np.unique(results['setup'])
]
report.plot(groups, figures, configurations, iteration_best)

Verbose set to: False
Preparing plot data
Prepared data in 0.00165200233459s


Plotted in 0.092267036438s


{"{} {'y_axis': 'seg_score', 'x_axis': 'iteration'}": <bokeh.plotting.figure.Figure at 0x7fa9b3545210>,
 "{} {'y_axis': 'tra_score', 'x_axis': 'iteration', 'hide_legend': False}": <bokeh.plotting.figure.Figure at 0x7faa50b03710>}

In [9]:
# plot validation performance over thresholds

import report
import evaluation

setup_results = report.filter(results, [{'sample':validation, 'setup':'setup130', 'custom_fragments':True, 'discrete_queue':True}])
setup_results = setup_results.sort_values(by='threshold')
print(len(setup_results))

groups = [
    {'sample':validation, 'iteration':i}
    for i in np.unique(setup_results['iteration'])
]
figures = [
    {'x_axis':'threshold', 'y_axis':'cremi_score'},
    {'x_axis':'threshold', 'y_axis':'voi_sum'},
    {'x_axis':'threshold', 'y_axis':'arand'},
    {'x_axis':'voi_split', 'y_axis':'voi_merge'},
]
configurations = [
    {'experiment':'cremi_gunpowder', 'setup':s, 'merge_function':m, 'init_with_max':i, 'style':'line'} for s in np.unique(setup_results['setup']) for m in np.unique(setup_results['merge_function']) for i in [True, False]
]
report.plot(groups, figures, configurations, setup_results)

1800
Verbose set to: False
Preparing plot data
Prepared data in 0.0991580486298s


Plotted in 4.40738511086s


{"{'sample': 'sample_C_padded_20160501.aligned.filled.cropped.62:153.truncated', 'iteration': 400000} {'y_axis': 'arand', 'x_axis': 'threshold'}": <bokeh.plotting.figure.Figure at 0x7fa4ab8e8bd0>,
 "{'sample': 'sample_C_padded_20160501.aligned.filled.cropped.62:153.truncated', 'iteration': 400000} {'y_axis': 'cremi_score', 'x_axis': 'threshold'}": <bokeh.plotting.figure.Figure at 0x7fa4ab8df4d0>,
 "{'sample': 'sample_C_padded_20160501.aligned.filled.cropped.62:153.truncated', 'iteration': 400000} {'y_axis': 'voi_merge', 'x_axis': 'voi_split'}": <bokeh.plotting.figure.Figure at 0x7fa4ab8e8650>,
 "{'sample': 'sample_C_padded_20160501.aligned.filled.cropped.62:153.truncated', 'iteration': 400000} {'y_axis': 'voi_sum', 'x_axis': 'threshold'}": <bokeh.plotting.figure.Figure at 0x7fa4ab8dfd90>,
 "{'sample': 'sample_C_padded_20160501.aligned.filled.cropped.62:153.truncated', 'iteration': 450000} {'y_axis': 'arand', 'x_axis': 'threshold'}": <bokeh.plotting.figure.Figure at 0x7fa4abd1b290>,
 "{