In [1]:
%load_ext autoreload
%autoreload 2

In [10]:
import sys
import logging

import numpy as np
import pandas as pd

from neuclease.dvid import DvidInstanceInfo, fetch_split_supervoxel_sizes, fetch_complete_mappings
from neuclease.merge_table import load_supervoxel_sizes, compute_body_sizes

In [3]:
handler = logging.StreamHandler(sys.stdout)
root_logger = logging.getLogger()
root_logger.handlers = []
root_logger.addHandler(handler)
root_logger.setLevel(logging.INFO)
logging.getLogger('kafka').setLevel(logging.WARNING)

### Load Original supervoxel sizes

In [4]:
orig_sizes_file = '/groups/flyem/data/scratchspace/copyseg-configs/labelmaps/hemibrain/8nm/compute-8nm-extended-fixed-STATS-ONLY-20180402.192015/supervoxel-sizes.h5'
%time orig_sv_sizes = load_supervoxel_sizes(orig_sizes_file)
total_voxel_count = orig_sv_sizes.sum()

Volume contains 188243164 supervoxels and 22.5 Teravoxels in total
CPU times: user 726 ms, sys: 1 s, total: 1.73 s
Wall time: 2.51 s


### Fetch Supervoxel split fragment sizes

In [18]:
# Initial agglo node
instance_info = DvidInstanceInfo('emdata3:8900', 'ac90', 'segmentation')

# Latest production node
instance_info = DvidInstanceInfo('emdata3:8900', '662e', 'segmentation')

In [20]:
split_fragment_sizes = fetch_split_supervoxel_sizes(instance_info)

### Combine orig/split sizes

In [21]:
sv_sizes = pd.concat((orig_sv_sizes, split_fragment_sizes))

In [22]:
assert sv_sizes.index.dtype == np.uint64
assert sv_sizes.dtype == np.uint32

### Fetch mapping

In [23]:
mapping = fetch_complete_mappings(instance_info, 'kafka')

Reading kafka messages from kafka.int.janelia.org:9092 for emdata3:8900 / ac90 / segmentation
Reading 2622 kafka messages took 2.2186641693115234 seconds
Fetching http://emdata3:8900/api/node/ac90/segmentation/mappings...
Fetching http://emdata3:8900/api/node/ac90/segmentation/mappings took 0:00:28.998321
Parsing mapping...
Parsing mapping took 0:00:07.946167
Appending missing identity-mappings...
Appending missing identity-mappings took 0:00:20.539866


### Compute body sizes

In [24]:
body_sizes = compute_body_sizes(sv_sizes, mapping)

Dropping unknown supervoxels
Applying sizes to mapping
Aggregating sizes by body


In [25]:
body_sizes.to_csv(f'/nrs/flyem/bergs/finish-02/body-sizes-{instance_info.uuid[:4]}.csv')

In [28]:
large_body_sizes = body_sizes[body_sizes >= 10e6]
large_body_sizes.shape

(174010,)