# Train / Test - Split

Train / test - split of the volume is made, using cutouts. Multicut is run on both subvolumes.

In [None]:
import numpy as np
import vigra
import os

# rand index for evaluating the multicut score
from sklearn.metrics import adjusted_rand_score as rand_index

# imports from Neurocut
from multicut_src import MetaSet
from multicut_src import DataSet
from multicut_src import multicut_workflow
from multicut_src import ExperimentSettings
from multicut_src import merge_small_segments

# initialize the MetaSet, that holds all datasets for experiments
# folder for saving the cache
cache_folder = "/path/to/cache"
#cache_folder = "/home/consti/Work/data_master/cache_neurocut/cache_examples"
meta = MetaSet(cache_folder)

In [None]:
# make the train / test - split
# only call this once !
meta.load()
ds = meta.get_dataset("dataset")

# we train on the lower ten and test on the upper ten
shape = ds.shape
z_split = int(shape[2] / 2)
# for this we use cutouts, which inherit from DataSet, 
# so we can basically use them in the same manner
ds.make_cutout([0,shape[0],0,shape[1],0,z_split])
ds.make_cutout([0,shape[0],0,shape[1],z_split,shape[2]])

# need to save the changes we have made to the dataset
meta.update_dataset("dataset", ds)
meta.save()

In [None]:
# now we can repeat the experiments from above with the train / test split
# reload the dataset
meta.load()
ds = meta.get_dataset("dataset")
ds_train = ds.get_cutout(0)
ds_test  = ds.get_cutout(1)
seg_id = 0

# ExperimentSettings holds all relveant options for the experiments
# they are initialised to sensible defaults and 
# we only have to change a few
mc_params = ExperimentSettings()

# cache folder for the RF
mc_params.set_rfcache(os.path.join(cache_folder, "rf_cache"))
# train RF with 500 trees
mc_params.set_ntrees(500)
# degree of anisotropy for the filter calculation
# (values bigger than 20 lead to calculation in 2d)
# set to 1. for isotropic data (default value)
mc_params.set_anisotropy(25.)
# flag to indicate whether special z - edge features are computed
# set to false for isotropic data (default value)
mc_params.set_use2d(True)

# otherwise, the default parameter should be ok

# list of features taken into account
# "raw" -> filters on raw data accumulated over the edges
# "prob" -> filters on probability maps accumulated over the edges
# "reg" -> region statistics, mapped to the edges
# "topo" -> topology features for the edges
feat_list = ("raw", "prob", "reg", "topo")

# we run multicuts for train -> train
# and train -> test
mc_nodes_train, mc_edges_train, mc_energy_train, t_inf_train = multicut_workflow(
    ds_train, ds_train,
    seg_id, seg_id,
    feat_list, mc_params)
mc_nodes_test, mc_edges_test, mc_energy_test, t_inf_test = multicut_workflow(
    ds_train, ds_test
    seg_id, seg_id,
    feat_list, mc_params)

# project the result back to the volume and merge small segments
mc_seg_train = ds_train.project_mc_result(seg_id, mc_nodes_train)
mc_seg_test = ds_test.project_mc_result(seg_id, mc_nodes_test)

mc_seg_train = merge_small_segments(mc_seg_train, 100)
mc_seg_test = merge_small_segments(mc_seg_test, 100)

In [None]:
# evaluate the result
print "Train-RI:", rand_index(mc_seg_train.ravel(), ds_train.gt().ravel())
print "Test-RI:", rand_index(mc_seg_test.ravel(), ds_test.gt().ravel())

# proper skneuro randindex
"""
from skneuro.learning import randIndex
print randIndex(ds_train.gt().ravel(), mc_seg_train.ravel(), True)
print randIndex(ds_test.gt().ravel(), mc_seg_test.ravel(), True)
"""

Again, this numbers don't look good. But I think this is due to the sklearn metric...

With the skneuro RandIndex, I obtain:

RI-train: 0.9997

RI-test: 0.9988