## Configuration

Enter below the Neuroglancer state ID, from the end of a link such as:
https://spelunker.cave-explorer.org/#!middleauth+https://global.daf-apis.com/nglstate/api/v1/6407833740378112

Also enter the name of the annotation layers with merge errors and split errors, indicated as line annotations; and the name of the segmentation layer.

In [None]:
# https://spelunker.cave-explorer.org/#!middleauth+https://global.daf-apis.com/nglstate/api/v1/5984476767191040
state_id = 5984476767191040
merge_err_layer_name = "merge"
split_err_layer_name = "split"

# sometimes we want segmentations from the same NG state, but sometimes
# we want them from a different state.  So:
# https://spelunker.cave-explorer.org/#!middleauth+https://global.daf-apis.com/nglstate/api/v1/6064644210819072
seg_state_id = 6064644210819072
seg_layer_name = "seg"

In [None]:
from caveclient import CAVEclient
import nglui
from nglui.statebuilder import *
import pandas as pd
import numpy as np
from zetta_utils.layer.volumetric.cloudvol import build_cv_layer;
from zetta_utils.geometry import Vec3D;

In [None]:
# Load the annotation data
client = CAVEclient()
state = client.state.get_state_json(state_id)

# I'm not sure what linked_segmentations does.  But in the current data I'm working with, it 
# just returns an empty list.  Maybe in some other data it does something useful.
ptA, ptB, segs = nglui.parser.line_annotations(state, merge_err_layer_name, linked_segmentations=True)
for i in range(0, len(segs)):
    if segs[i]: segs[i] = segs[i][0]
df = pd.DataFrame({"ptA": ptA, "ptB": ptB})
df

In [None]:
# Load the segmentation data from CloudVolume.
seg_state = client.state.get_state_json(seg_state_id)
seg_path = nglui.parser.get_layer(seg_state, seg_layer_name)['source']
if seg_path.startswith('precomputed://'): seg_path = seg_path[14:]
print(f'Loading segmentation data from {seg_path}')
index_resolution = Vec3D(24, 24, 45)
data_resolution = Vec3D(96, 96, 45)
cvl = build_cv_layer(path=seg_path,
                     allow_slice_rounding=True,
                     index_resolution=index_resolution,
                     data_resolution=data_resolution,
                     interpolation_mode='nearest',
                    )

In [None]:
# Define a function to look up the segment ID at a given point.
def seg_at_point(pos, cutout, cutout_base):
    i = np.floor(pos - cutout_base).astype(int)
    return cutout[i[0], i[1], i[2]]

In [None]:
def find_segments(df, label='Dataset'):
    df['segA'] = None
    df['segB'] = None

    # Determine the range of X, Y, and Z.
    min_x = min(df['ptA'].apply(lambda p: p[0]).min(), df['ptB'].apply(lambda p: p[0]).min())
    max_x = max(df['ptA'].apply(lambda p: p[0]).max(), df['ptB'].apply(lambda p: p[0]).max())
    
    min_y = min(df['ptA'].apply(lambda p: p[1]).min(), df['ptB'].apply(lambda p: p[1]).min())
    max_y = max(df['ptA'].apply(lambda p: p[1]).max(), df['ptB'].apply(lambda p: p[1]).max())
    
    min_z = min(df['ptA'].apply(lambda p: p[2]).min(), df['ptB'].apply(lambda p: p[2]).min())
    max_z = max(df['ptA'].apply(lambda p: p[2]).max(), df['ptB'].apply(lambda p: p[2]).max())
    print(f'{label} X ranges from {min_x} to {max_x}')
    print(f'{label} Y ranges from {min_y} to {max_y}')
    print(f'{label} Z ranges from {min_z} to {max_z}')

    # Iterate over that volume in blocks small enough to download, finding the segments
    # associated with any points in that block.
    x_stride = 512
    y_stride = 512
    z_stride = 128

    print('Finding segments...')
    for x in np.arange(min_x, max_x + 1, x_stride):
        print(f'x={x} ({100*(x-min_x)/(max_x-min_x):.0f}%)')
        for y in np.arange(min_y, max_y + 1, y_stride):
            for z in np.arange(min_z, max_z + 1, z_stride):
                ptA_in_range = df['ptA'].apply(lambda p: x <= p[0] < x + x_stride and y <= p[1] < y + y_stride and z <= p[2] < z + z_stride)
                ptB_in_range = df['ptB'].apply(lambda p: x <= p[0] < x + x_stride and y <= p[1] < y + y_stride and z <= p[2] < z + z_stride)
                
                # Get indexes where ptA or ptB are in range; if none, skip to next
                indexes_in_range = df[ptA_in_range | ptB_in_range].index
                if len(indexes_in_range) == 0: continue
                
                # Load a block (cutout) of segmentation data
                cutout = cvl[index_resolution, x:x+x_stride, y:y+y_stride, z:z+z_stride]
                cutout = cutout[0]  # (use only channel 0)
                cutout_base = Vec3D(x, y, z)
                for index in indexes_in_range:                
                    pt = df.loc[index, 'ptA']
                    if x <= pt[0] < x + x_stride and y < pt[1] < y + y_stride and z < pt[2] < z + z_stride:
                        df.at[index, 'segA'] = seg_at_point(Vec3D(*pt), cutout, cutout_base)
                    pt = df.loc[index, 'ptB']
                    if x <= pt[0] < x + x_stride and y < pt[1] < y + y_stride and z < pt[2] < z + z_stride:
                        df.at[index, 'segB'] = seg_at_point(Vec3D(*pt), cutout, cutout_base)
    same_seg = (df['segA'] == df['segB']).sum()
    diff_seg = (df['segA'] != df['segB']).sum()
    print(f'{label} has the same segment on {same_seg} rows, and different segments on {diff_seg} rows.')

In [None]:
find_segments(df, merge_err_layer_name)

In [None]:
same_seg = (df['segA'] == df['segB']).sum()
diff_seg = (df['segA'] != df['segB']).sum()
print(f'This dataset has the same segment on {same_seg} rows, and different segments on {diff_seg} rows.')
print(f'So, {diff_seg/len(df)} ={diff_seg}/{len(df)} merge errors fixed.')

In [None]:
# Now let's do the same work for split errors.
ptA, ptB, segs = nglui.parser.line_annotations(state, split_err_layer_name, linked_segmentations=True)
for i in range(0, len(segs)):
    if segs[i]: segs[i] = segs[i][0]
split_df = pd.DataFrame({"ptA": ptA, "ptB": ptB})
find_segments(split_df, split_err_layer_name)

In [None]:
same_seg = (split_df['segA'] == split_df['segB']).sum()
diff_seg = (split_df['segA'] != split_df['segB']).sum()
print(f'This dataset has the same segment on {same_seg} rows, and different segments on {diff_seg} rows.')
print(f'So, {same_seg/len(split_df)} ={same_seg}/{len(split_df)} split errors fixed.')