In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import logging

import numpy as np
import pandas as pd

from neuclease.dvid import DvidInstanceInfo, fetch_split_supervoxel_sizes, fetch_complete_mappings
from neuclease.merge_table import load_supervoxel_sizes, compute_body_sizes

In [3]:
handler = logging.StreamHandler(sys.stdout)
root_logger = logging.getLogger()
root_logger.handlers = []
root_logger.addHandler(handler)
root_logger.setLevel(logging.INFO)
logging.getLogger('kafka').setLevel(logging.WARNING)

### Load Original supervoxel sizes

In [4]:
orig_sizes_file = '/groups/flyem/data/scratchspace/copyseg-configs/labelmaps/hemibrain/8nm/compute-8nm-extended-fixed-STATS-ONLY-20180402.192015/supervoxel-sizes.h5'
%time orig_sv_sizes = load_supervoxel_sizes(orig_sizes_file)
total_voxel_count = orig_sv_sizes.sum()

Volume contains 188243164 supervoxels and 22.5 Teravoxels in total
CPU times: user 619 ms, sys: 822 ms, total: 1.44 s
Wall time: 1.52 s


### Fetch Supervoxel split fragment sizes

In [5]:
# Initial agglo node
instance_info = DvidInstanceInfo('emdata3:8900', 'ac90', 'segmentation')

# Latest production node
instance_info = DvidInstanceInfo('emdata3:8900', 'd5852c27b5c04687bb1be414f6dc2336', 'segmentation')

In [7]:
split_fragment_sizes = fetch_split_supervoxel_sizes(*instance_info)

Reading kafka messages from kafka.int.janelia.org:9092 for emdata3:8900 / d5852c27b5c04687bb1be414f6dc2336 / segmentation
Reading 380487 kafka messages took 17.42137622833252 seconds
Fetching sizes for 5910 split supervoxels...
Fetching sizes for 5910 split supervoxels took 0:00:11.259472


### Combine orig/split sizes

In [8]:
sv_sizes = pd.concat((orig_sv_sizes, split_fragment_sizes))

In [9]:
assert sv_sizes.index.dtype == np.uint64
assert sv_sizes.dtype == np.uint32

### Fetch mapping

In [10]:
mapping = fetch_complete_mappings(*instance_info)

Reading kafka messages from kafka.int.janelia.org:9092 for emdata3:8900 / d5852c27b5c04687bb1be414f6dc2336 / segmentation
Reading 380487 kafka messages took 17.014442443847656 seconds
Fetching http://emdata3:8900/api/node/d5852c27b5c04687bb1be414f6dc2336/segmentation/mappings...
Fetching http://emdata3:8900/api/node/d5852c27b5c04687bb1be414f6dc2336/segmentation/mappings took 0:00:31.407138
Parsing mapping...
Parsing mapping took 0:00:08.209481
Constructing missing identity-mappings...
Constructing missing identity-mappings took 0:00:20.960173


### Compute body sizes

In [11]:
body_sizes = compute_body_sizes(sv_sizes, mapping)

Dropping unknown supervoxels
Applying sizes to mapping
Aggregating sizes by body


In [12]:
body_sizes.to_csv(f'/nrs/flyem/bergs/tmp/body-sizes-{instance_info.uuid[:4]}.csv')

In [13]:
large_body_sizes = body_sizes[body_sizes >= 10e6]
large_body_sizes.shape

(193876,)

### Optional: Filter by 0.5 bodies

In [16]:
path = '/nrs/flyem/bergs/complete-ffn-agglo/bodies-0.5-including-psds-from-neuprint-52f9.csv'
focused_bodies = set(pd.read_csv(path, header=None, names=['body'])['body'])
filtered_body_sizes = body_sizes[body_sizes.index.isin(focused_bodies)]

path = f'/nrs/flyem/bergs/tmp/focused-body-sizes-{instance_info.uuid[:4]}.csv'
filtered_body_sizes.to_csv(path, header=True, index=True)
print(f"Wrote to {path}")

Wrote to /nrs/flyem/bergs/tmp/focused-body-sizes-d585.csv
