In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [551]:
import os
import sys
import copy
import pickle
import urllib
import logging
from datetime import datetime

import requests
from tqdm import tqdm_notebook
tqdm = tqdm_notebook

from IPython.core.display import display, HTML

import numpy as np
import pandas as pd

import holoviews as hv
import hvplot.pandas

from bokeh.plotting import save, output_notebook
output_notebook()

import graph_tool.all as gt

from dvidutils import LabelMapper
from libdvid import DVIDNodeService

from neuclease.dvid import *
from neuclease.misc.vnc_statuses import fetch_vnc_statuses
from neuclease.clio.api import fetch_json_annotations_all

In [16]:
from neuclease.clustering.blockmodel import construct_layered_graph

In [3]:
handler = logging.StreamHandler(sys.stdout)
root_logger = logging.getLogger()
root_logger.handlers = []
root_logger.addHandler(handler)
root_logger.setLevel(logging.INFO)
logging.getLogger('kafka').setLevel(logging.WARNING)

In [4]:
pwd

'/Users/bergs/workspace/neuclease/notebooks'

In [5]:
del vnc_master
del vnc_seg

In [6]:
vnc_locked_seg

('emdata5.janelia.org:8400',
 '75181d8f44874fcea7bdb620125fc50a',
 'segmentation')

In [7]:
SYNAPSES_DIR = '/Users/bergs/data/vnc/synapses'

In [356]:
CLIO_ANNOTATIONS_DIR = '/Users/bergs/data/vnc'

In [513]:
VNC_LINK_STATE = '/Users/bergs/workspace/flyem-private-links/vnc/dvid-seg.json'

In [8]:
with open(f'{SYNAPSES_DIR}/full_partner_df.pkl', 'rb') as f:
    partner_df = pickle.load(f)

In [9]:
with open(f'{SYNAPSES_DIR}/point_df-with-bodies-75181d.pkl', 'rb') as f:
    point_df = pickle.load(f)

In [19]:
partner_df = partner_df.merge(point_df['body'], 'left', left_on='pre_id', right_index=True)
partner_df = partner_df.merge(point_df['body'], 'left', left_on='post_id', right_index=True, suffixes=['_pre', '_post'])

In [None]:
with open(f'{SYNAPSES_DIR}/full_partner_df-with-bodies-75181d.pkl', 'wb') as f:
    partner_df = pickle.dump(partner_df, f)

In [20]:
partner_df

Unnamed: 0,pre_id,z_pre,y_pre,x_pre,kind_pre,conf_pre,user_pre,post_id,z_post,y_post,x_post,kind_post,conf_post,user_post,body_pre,body_post
0,240406077070379755,54662,27981,2795,PreSyn,0.836,$fpl,240366494676945628,54653,27993,2780,PostSyn,0.990273,$fpl,16698,20381
1,240406077070379755,54662,27981,2795,PreSyn,0.836,$fpl,240445659509951205,54671,27991,2789,PostSyn,1.000000,$fpl,16698,57807
2,240406077070379755,54662,27981,2795,PreSyn,0.836,$fpl,240581998918241028,54702,27975,2820,PostSyn,0.206855,$fpl,16698,43688825802
3,241531976046086898,54918,27537,2802,PreSyn,0.785,$fpl,241575956527975151,54928,27545,2799,PostSyn,0.997869,$fpl,22450,18817
4,241531976046086898,54918,27537,2802,PreSyn,0.785,$fpl,241369248371313377,54881,27559,2785,PostSyn,0.221628,$fpl,22450,44095597090
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86228828,224568732184258417,51061,37804,42865,PreSyn,0.759,$fpl,224524751675107214,51051,37783,42894,PostSyn,0.263888,$fpl,75444,40886614105
86228829,224568732184258417,51061,37804,42865,PreSyn,0.759,$fpl,224599518530807665,51068,37814,42865,PostSyn,0.999885,$fpl,75444,40886615625
86228830,224568732184258417,51061,37804,42865,PreSyn,0.759,$fpl,224524751696078691,51051,37793,42851,PostSyn,0.558182,$fpl,75444,40886616571
86228831,224568732184258417,51061,37804,42865,PreSyn,0.759,$fpl,224432392704665444,51030,37786,42852,PostSyn,0.839957,$fpl,75444,40886616854


In [22]:
ann = fetch_body_annotations(*vnc_locked)

In [241]:
vstats = fetch_vnc_statuses(*vnc_locked)

Pre-sorting 15781 coordinates by block index...
Pre-sorting 15781 coordinates by block index took 0:00:00.008698
Fetching labels from DVID...


  0%|          | 0/4 [00:00<?, ?it/s]

Fetching labels from DVID took 0:00:01.872234
Pre-sorting 3771 coordinates by block index...
Pre-sorting 3771 coordinates by block index took 0:00:00.005148
Fetching labels from DVID...


  0%|          | 0/4 [00:00<?, ?it/s]

Fetching labels from DVID took 0:00:00.344802
There are 82 duplicate bodies in the results, due to multi-soma and/or multi-cervical bodies!


In [None]:
clio_ann = fetch_json_annotations_all('VNC')

with open('f{CLIO_ANNOTATIONS_DIR}/clio-neurons.pkl', 'wb') as f:
    pickle.dump(clio_ann, f)

In [324]:
with open('f{CLIO_ANNOTATIONS_DIR}/clio-neurons.pkl', 'rb') as f:
    clio_ann = pickle.load(f)

In [325]:
clio_ann = clio_ann.rename(columns={'bodyid': 'body'})
clio_ann['body'] = clio_ann['body'].astype(np.uint64)

In [326]:
clio_ann = clio_ann.merge(vstats.reset_index().drop_duplicates('body')[['body', 'has_soma', 'is_cervical']], 'left', on='body')

In [327]:
clio_ann['has_soma'].fillna(False, inplace=True)
clio_ann['is_cervical'].fillna(False, inplace=True)
clio_ann['hemilineage'].fillna("", inplace=True)
clio_ann['soma_neuromere'].fillna("", inplace=True)

In [328]:
soma_neuromere = clio_ann.set_index('body')['soma_neuromere']
soma_neuromere = soma_neuromere.map(lambda s: 'ANm' if isinstance(s, str) and s.lower().startswith('anm') else s)
soma_neuromere = soma_neuromere.map(lambda s: s if s in ('T1', 'T2', 'T3', 'ANm') else "")
clio_ann['soma_nm'] = soma_neuromere.values
descending = clio_ann.query('is_cervical and not has_soma')
clio_ann.loc[descending.index, 'soma_nm'] = 'brain'
clio_ann.loc[descending.index, 'hemilineage'] = 'brain'

In [329]:
clio_ann['hemi_nm'] = clio_ann['soma_nm'] + '-' + clio_ann['hemilineage']
hemi_nm = clio_ann.query('hemilineage != "" and hemilineage != "TBD" and soma_nm != ""')[['body', 'hemi_nm']].copy()
hemi_nm['weight'] = 1
hemi_nm = hemi_nm.set_index(['body', 'hemi_nm'])['weight']

In [774]:
len(['0A', '0B',  '1A', '1B','2A', '3A', '3B', '4B', '5B', '6A', '6B', '7B', '8A', '8B', '9A', '9B',
    '10B', '11A', '11B', '12A', '12B', '13A', '13B', '14A', '15B', '16B', '17A', '18B', '19A', '19B','20A', '21A', '22A', '23B']) * 6

204

In [790]:
#clio_ann['soma_neuromere'].value_counts()

In [788]:
clio_ann.columns

Index(['user', 'group', 'root_side', 'body', 'last_modified_by', 'hemilineage',
       'class', 'exit_nerve', 'entry_nerve', 'status', 'typing_notes',
       'confidence', 'to_review', 'soma_neuromere', 'position', 'soma_side',
       'avg_location', 'long_tract', 'naming_user', 'description',
       'neuropils_dendritic', 'neuropils_axonal', 'old_bodyids', 'has_soma',
       'is_cervical', 'soma_nm', 'hemi_nm'],
      dtype='object')

In [783]:
#[f'{i}{k}' for i in range(24) for k in 'AB']

In [784]:
#[*starmap(lambda a,b: a+b, product(map(str, range(24)), 'AB'))]

In [785]:
#print(sorted(clio_ann['hemilineage'].unique()))

In [791]:
#clio_ann.head()

In [None]:
big_statuses = {'Prelim Roughly traced', 'Soma Anchor', 'Cervical Anchor', 'Sensory Anchor', 'Primary Anchor', 'PRT Orphan', 'Leaves'}
big_bodies = ann.query('status in @big_statuses').index
print(len(big_bodies))

big_partner_df = partner_df.query('body_pre in @big_bodies and body_post in @big_bodies')
strengths = big_partner_df.groupby(['body_pre', 'body_post']).size().rename('strength')

In [377]:
strengths.sort_values(ascending=False).reset_index(drop=True).iloc[::1000].hvplot(title='strength per edge')

In [380]:
_df = strengths.sort_values(ascending=False).to_frame().reset_index(drop=True)
_df['strength_sum'] = _df['strength'].cumsum()
_df['strength_frac'] = _df['strength_sum'] / _df['strength'].sum()
#_df.set_index('strength')['strength_sum'].iloc[::100].hvplot(flip_xaxis=True)

_df['edges'] = np.arange(1, len(_df)+1)
_df.iloc[::100].hvplot('edges', 'strength_sum', flip_xaxis=False, hover_cols=['strength', 'strength_sum', 'strength_frac'],
                       title='Connectivity capture as edges are added, sorted by edge strength',
                       height=500, width=1000)

In [734]:
roi_counts

Unnamed: 0_level_0,roi,PostSyn,PreSyn
body,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10000,CV,188.0,138.0
10000,IntTct,190.0,73.0
10000,LTct,1517.0,752.0
10001,IntNp(T1)(R),54.0,65.0
10001,ProLN(R),1.0,1.0
...,...,...,...
53609617775,AMNp(L),2.0,1.0
53609617775,IntNp(T2)(L),6.0,
53609617775,IntNp(T3)(L),,1.0
53613193093,CV,5.0,2.0


In [761]:
def extract_roi_counts(point_df, big_bodies):
    big_point_df = point_df.query('body in @big_bodies')
    roi_counts = big_point_df.groupby(['body', 'roi', 'kind']).size().rename('count')
    roi_counts = roi_counts[roi_counts > 0]
    roi_counts = roi_counts[roi_counts.index.get_level_values(1) != "<unspecified>"]
    roi_counts = roi_counts.reset_index().pivot(['body', 'roi'], 'kind', 'count')
    roi_counts.columns.name = ""
    roi_counts = roi_counts.reset_index()

    roi_pre = roi_counts.rename(columns={'roi': 'roi_pre'})
    roi_pre = roi_pre.set_index(['body', 'roi_pre'])['PreSyn'].rename('count').dropna().astype(int)
    roi_pre = roi_pre.loc[roi_pre > 0]

    roi_post = roi_counts.rename(columns={'roi': 'roi_post'})
    roi_post = roi_post.set_index(['body', 'roi_post'])['PostSyn'].rename('count').dropna().astype(int)
    roi_post = roi_post.loc[roi_post > 0]

    # Bilateralize
    roi_counts['biroi'] = roi_counts['roi'].map(lambda s: s[:-3] if s[-3:] in ('(L)', '(R)') else s)
    biroi_counts = roi_counts.reset_index().groupby(['body', 'biroi'])[['PreSyn', 'PostSyn']].sum().reset_index()

    biroi_pre = biroi_counts.rename(columns={'biroi': 'biroi_pre'})
    biroi_pre = biroi_pre.set_index(['body', 'biroi_pre'])['PreSyn'].rename('count').dropna().astype(int)
    biroi_pre = biroi_pre.loc[biroi_pre > 0]

    biroi_post = biroi_counts.rename(columns={'biroi': 'biroi_post'})
    biroi_post = biroi_post.set_index(['body', 'biroi_post'])['PostSyn'].rename('count').dropna().astype(int)
    biroi_post = biroi_post.loc[biroi_post > 0]

    return roi_pre, roi_post, biroi_pre, biroi_post

In [762]:
%time roi_pre, roi_post, biroi_pre, biroi_post = extract_roi_counts(point_df, big_bodies)

CPU times: user 12.9 s, sys: 3.03 s, total: 15.9 s
Wall time: 16.1 s


In [763]:
roi_pre.head()

body   roi_pre     
10000  CV              138
       IntTct           73
       LTct            752
10001  IntNp(T1)(R)     65
       ProLN(R)          1
Name: count, dtype: int64

In [764]:
roi_post.head()

body   roi_post    
10000  CV               188
       IntTct           190
       LTct            1517
10001  IntNp(T1)(R)      54
       ProLN(R)           1
Name: count, dtype: int64

In [765]:
biroi_pre.head()

body   biroi_pre
10000  CV           138
       IntTct        73
       LTct         752
10001  IntNp(T1)     65
       ProLN          1
Name: count, dtype: int64

In [766]:
biroi_post.head()

body   biroi_post
10000  CV             188
       IntTct         190
       LTct          1517
10001  IntNp(T1)       54
       ProLN            1
Name: count, dtype: int64

In [768]:
roi_pre.shape, biroi_pre.shape

((74804,), (63773,))

In [769]:
roi_post.shape, biroi_post.shape

((90254,), (75567,))

In [441]:
bilateralized_roi_counts = big_roi_counts.reset_index()
bilateralized_roi_counts['bilateralized_roi'] = bilateralized_roi_counts['roi'].map(lambda s: s[:-3] if s[-3:] in ('(L)', '(R)') else s)
bilateralized_roi_counts = bilateralized_roi_counts.groupby(['body', 'bilateralized_roi', 'kind'])['count'].sum()

In [725]:
# roi_pivot = big_roi_counts.reset_index().pivot(['body'], ['roi', 'kind'], 'count').fillna(0).astype(np.int16)

# rois = foo.columns.get_level_values(0)
# kinds = foo.columns.get_level_values(1)
# cols = []
# for roi, kind in zip(rois, kinds):
#     kind = kind[:-3].lower()
#     col = roi + '_' + kind
#     col = col.replace('(', '_').replace(')', '_').replace('__', '_')
#     cols.append(col)
# foo.columns = cols
# foo

In [717]:
foo.columns

Index(['roi', 'PostSyn', 'PreSyn'], dtype='object', name='')

body         roi_post         
10000        CV_post               188
             IntTct_post           190
             LTct_post            1517
10001        IntNp(T1)(R)_post      54
             ProLN(R)_post           1
                                  ... 
53609617775  IntNp(T1)(L)_post      12
             AMNp(L)_post            2
             IntNp(T2)(L)_post       6
53613193093  CV_post                 5
             IntNp(T1)(R)_post      86
Name: count, Length: 90254, dtype: int64

In [643]:
foo.columns.names

FrozenList([None, 'kind'])

Index(['', '', 'PostSyn', 'PreSyn'], dtype='object', name='kind')

In [795]:
bilateralized_roi_counts.index.names

FrozenList(['body', 'bilateralized_roi', 'kind'])

In [446]:
clio_ann['class'].value_counts()

Local Interneuron        9753
Ascending Interneuron    2147
Interneuron TBD          1769
Descending               1341
Motor neuron              479
Local Sensory              12
                            9
Interneuron (TBD)           7
Interneuron (local)         5
TBD                         4
Unknown                     3
Ascending Efferent          2
Interneuron_TBD             2
Efferent                    1
Name: class, dtype: int64

In [None]:
metadata_df = clio_ann.merge()

In [497]:
%%time

THRESHOLD = 5

strong_conn = strengths[strengths > THRESHOLD]
bodies = sorted(pd.unique(strong_conn.reset_index()[['body_pre', 'body_post']].values.reshape(-1)))

# We model PreSyn as body -> roi and PostSyn as roi -> body
biroi_presyn = bilateralized_roi_counts.reset_index().query('body in @bodies and kind == "PreSyn" and count > 0').set_index(['body', 'bilateralized_roi'])['count']
biroi_postsyn = bilateralized_roi_counts.reset_index().query('body in @bodies and kind == "PostSyn" and count > 0').set_index(['bilateralized_roi', 'body'])['count']

strong_hemi_nm = hemi_nm.reset_index().query('body in @bodies').set_index(['body', 'hemi_nm'])['weight']

group_counts = clio_ann.query('body in @bodies')['group'].value_counts()
groups = group_counts[group_counts > 1].index
strong_groups = clio_ann.query('body in @bodies and group in @groups').copy()
strong_groups['weight'] = 1
strong_groups['group'] = strong_groups['group'].astype('str')
strong_groups = strong_groups.set_index(['body', 'group'])['weight']

layers = {
    ('body', 'body'): 0,
    ('body', 'bilateralized_roi'): 1,
    ('bilateralized_roi', 'body'): 1,
    ('body', 'hemi_nm'): 2,
    ('body', 'group'): 3
}

with Timer(f"Inferring with cutoff {THRESHOLD}") as timer:
    g, node_to_vertex = construct_layered_graph([strong_conn, biroi_presyn, biroi_postsyn, strong_hemi_nm, strong_groups], )

    state_args = {
        "base_type": gt.LayeredBlockState,
        "ec": g.ep.layer,
        "recs": [g.ep.weight],
        "rec_types": ["discrete-geometric"],
        "deg_corr": True,
        "layers": True
    }

    def run(x):
        nbs = gt.minimize_nested_blockmodel_dl(g, state_args=state_args, multilevel_mcmc_args={"verbose": False})
        return nbs

    nbs = compute_parallel(run, [0], processes=1)[0]


Inferring with cutoff 5...
Inferring with cutoff 5 took 2:48:27.862224
CPU times: user 42 s, sys: 10.2 s, total: 52.3 s
Wall time: 2h 48min 28s


In [498]:
timestamp = datetime.now().strftime("%Y%m%d.%H%M%S")
d = f'test-t{THRESHOLD}-{timestamp}'
os.makedirs(d)

with open(f'{d}/graph.pkl', 'wb') as f:
    pickle.dump(g, f)

with open(f'{d}/node_to_vertex.pkl', 'wb') as f:
    pickle.dump(g, f)
    
with open(f'{d}/state_args.pkl', 'wb') as f:
    pickle.dump(state_args, f)

with open(f'{d}/state.pkl', 'wb') as f:
    pickle.dump(nbs, f)

In [550]:
bodies = node_to_vertex['body'].index
body_vertexes = node_to_vertex['body'].values

presyn_biroi_lists = biroi_presyn.reset_index().groupby('body')['bilateralized_roi'].agg(list).rename('presyn_rois').reset_index()
postsyn_biroi_lists = biroi_postsyn.reset_index().groupby('body')['bilateralized_roi'].agg(list).rename('postsyn_rois').reset_index()

df = pd.DataFrame({'body': bodies, 'block': nbs.get_bs()[0][body_vertexes]})
df = df.merge(clio_ann[['body', 'group']], 'left', on='body')
df = df.merge(presyn_biroi_lists, 'left', on='body')
df = df.merge(postsyn_biroi_lists, 'left', on='body')
df = df.merge(strong_hemi_nm.reset_index()[['body', 'hemi_nm']], 'left', on='body')

df

Unnamed: 0,body,block,group,presyn_rois,postsyn_rois,hemi_nm
0,10000,5422,10000,"[CV, IntTct, LTct]","[CV, IntTct, LTct]",brain-brain
1,10001,17551,,"[IntNp(T1), ProLN, mVAC(T1)]","[IntNp(T1), ProLN, mVAC(T1)]",
2,10002,5422,10000,"[CV, IntTct, LTct]","[CV, IntTct, LTct]",brain-brain
3,10003,22923,,[IntNp(T1)],[IntNp(T1)],
4,10004,24298,,[IntNp(T2)],[IntNp(T2)],T2-13B
...,...,...,...,...,...,...
22103,53591964492,17740,28538,"[AMNp, ANm, CV, HTct(UTct-T3), IntNp(T1), IntN...","[AMNp, CV, HTct(UTct-T3), IntNp(T1), IntNp(T2)...",brain-brain
22104,53595462076,9128,,"[ANm, IntTct, LTct]","[ANm, CV, IntNp(T1), IntTct, LTct]",brain-brain
22105,53609615822,23919,,"[CV, IntNp(T1)]","[CV, IntNp(T1)]",brain-brain
22106,53609617775,23919,,"[AMNp, IntNp(T1), IntNp(T3)]","[AMNp, CV, IntNp(T1), IntNp(T2)]",brain-brain


In [566]:
vc = df['block'].value_counts()
vc.hvplot.bar(width=15*len(vc)).opts(xrotation=45)

In [598]:
VNC_BASE_LINK_STATE = json.load(open(VNC_LINK_STATE, 'r'))
def vnc_link_for_bodies(bodies, selected=None, title=None):
    if selected is None:
        selected = bodies
    
    link = copy.deepcopy(VNC_BASE_LINK_STATE)
    if title:
        link["title"] = title

    link["layout"] = "3d"
        
    for l in link['layers']:
        url = l.get('source', {}).get('url', "")
        if url.startswith("dvid") and url.endswith("segmentation"):
            seg_layer = l
            break

    # All bodies in the segment query
    seg_layer["segmentQuery"] = ', '.join([*map(str, bodies)])
    
    # But only select a subset
    seg_layer["segments"] = [*map(str, selected)]
    
    return "https://clio-ng.janelia.org/#!" + urllib.parse.quote(json.dumps(link))

In [599]:
df

Unnamed: 0,body,block,group,presyn_rois,postsyn_rois,hemi_nm
0,10000,5422,10000,"[CV, IntTct, LTct]","[CV, IntTct, LTct]",brain-brain
1,10001,17551,,"[IntNp(T1), ProLN, mVAC(T1)]","[IntNp(T1), ProLN, mVAC(T1)]",
2,10002,5422,10000,"[CV, IntTct, LTct]","[CV, IntTct, LTct]",brain-brain
3,10003,22923,,[IntNp(T1)],[IntNp(T1)],
4,10004,24298,,[IntNp(T2)],[IntNp(T2)],T2-13B
...,...,...,...,...,...,...
22103,53591964492,17740,28538,"[AMNp, ANm, CV, HTct(UTct-T3), IntNp(T1), IntN...","[AMNp, CV, HTct(UTct-T3), IntNp(T1), IntNp(T2)...",brain-brain
22104,53595462076,9128,,"[ANm, IntTct, LTct]","[ANm, CV, IntNp(T1), IntTct, LTct]",brain-brain
22105,53609615822,23919,,"[CV, IntNp(T1)]","[CV, IntNp(T1)]",brain-brain
22106,53609617775,23919,,"[AMNp, IntNp(T1), IntNp(T3)]","[AMNp, CV, IntNp(T1), IntNp(T2)]",brain-brain


In [602]:
def generate_link_table(df):
    table_data = []

    for block, block_df in df.groupby('block'):
        presyn_rois = [*map(set, block_df['presyn_rois'].dropna())]
        common_presyn_rois = presyn_rois[0].intersection(*presyn_rois[1:])
        all_presyn_rois = presyn_rois[0].union(*presyn_rois[1:])

        postsyn_rois = [*map(set, block_df['postsyn_rois'].dropna())]
        common_postsyn_rois = postsyn_rois[0].intersection(*postsyn_rois[1:])
        all_postsyn_rois = postsyn_rois[0].union(*postsyn_rois[1:])

        hemi_nm = set(block_df['hemi_nm'].dropna())

        common_presyn_rois = sorted(common_presyn_rois) or ""
        all_presyn_rois = sorted(all_presyn_rois) or ""
        common_postsyn_rois = sorted(common_postsyn_rois) or ""
        all_postsyn_rois = sorted(all_postsyn_rois) or ""
        hemi_nm = sorted(hemi_nm) or ""

        # By default, select the bodies without groups so far.
        ungrouped_bodies = block_df.query('group.isnull() or group == ""')['body']
        title=f'block-{block} ({len(ungrouped_bodies)}/{len(block_df)} ungrouped/total)'
        link = vnc_link_for_bodies(block_df['body'], ungrouped_bodies, title)

        table_data.append((block, len(block_df), len(ungrouped_bodies), link, hemi_nm, common_presyn_rois, common_postsyn_rois, all_presyn_rois, all_postsyn_rois))

    cols = ["block", "num_bodies", "ungrouped", "link", "hemi_nm", "common_presyn_rois", "common_postsyn_rois", "all_presyn_rois", "all_postsyn_rois"]
    table_df = pd.DataFrame(table_data, columns=cols)

    table_html = "<table>"
    table_html += ''.join([f"<th>{col}</th>" for col in cols])
    for row in table_df.sort_values('num_bodies').itertuples():
        table_html += "<tr>"
        table_html += f"<td>{row.block}</td>"
        table_html += f"<td>{row.num_bodies}</td>"
        table_html += f"<td>{row.ungrouped}</td>"
        table_html += f'<td><a href="{row.link}">block-{row.block}</a></td>'
        table_html += f"<td>{row.hemi_nm}</td>"
        table_html += f"<td>{row.common_presyn_rois}</td>"
        table_html += f"<td>{row.common_postsyn_rois}</td>"
        table_html += f"<td>{row.all_presyn_rois}</td>"
        table_html += f"<td>{row.all_postsyn_rois}</td>"
        table_html += "<td></td>"
        table_html += "</tr>"
    table_html += "</table>"
    
    return table_df, table_html

In [603]:
table_df, table_html = generate_link_table(df)
HTML(table_html)

0,1,2,3,4,5,6,7,8,9
23948,8,2,block-23948,['T2-5B'],"['AMNp', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'LTct']","['AMNp', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'LTct']","['AMNp', 'ANm', 'CV', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'LTct', 'mVAC(T1)']","['AMNp', 'ANm', 'CV', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'mVAC(T1)']",
13777,10,4,block-13777,['brain-brain'],"['AMNp', 'ANm', 'IntNp(T3)']","['AMNp', 'ANm', 'IntNp(T2)', 'IntNp(T3)']","['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'WTct(UTct-T2)', 'mVAC(T1)', 'mVAC(T3)']","['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'WTct(UTct-T2)', 'mVAC(T1)', 'mVAC(T2)', 'mVAC(T3)']",
6167,10,5,block-6167,"['T2-10B', 'T2-1B', 'T2-9A', 'T2-9B', 'T3-9B']","['IntNp(T2)', 'mVAC(T2)']","['IntNp(T2)', 'mVAC(T2)']","['AMNp', 'ANm', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'LTct', 'mVAC(T1)', 'mVAC(T2)', 'mVAC(T3)']","['AMNp', 'ANm', 'CV', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'LTct', 'mVAC(T1)', 'mVAC(T2)', 'mVAC(T3)']",
859,12,4,block-859,['T2-17A'],,,"['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'NTct(UTct-T1)', 'WTct(UTct-T2)', 'mVAC(T1)', 'mVAC(T2)', 'mVAC(T3)']","['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'NTct(UTct-T1)', 'WTct(UTct-T2)', 'mVAC(T1)', 'mVAC(T2)', 'mVAC(T3)']",
9304,15,4,block-9304,,"['ANm', 'IntNp(T3)']",['ANm'],"['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct']","['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'WTct(UTct-T2)', 'mVAC(T1)']",
7343,17,13,block-7343,"['T2-1A', 'T2-21A']",['IntNp(T2)'],"['IntNp(T2)', 'IntTct', 'LTct']","['AMNp', 'IntNp(T1)', 'IntNp(T2)', 'IntTct', 'LTct', 'mVAC(T1)']","['AMNp', 'IntNp(T1)', 'IntNp(T2)', 'IntTct', 'LTct', 'mVAC(T1)']",
8328,17,12,block-8328,"['T1-3B', 'T1-6B']",['WTct(UTct-T2)'],['WTct(UTct-T2)'],"['ADMN', 'AMNp', 'ANm', 'DMetaN', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'MesoAN', 'NTct(UTct-T1)', 'PDMN', 'WTct(UTct-T2)']","['ADMN', 'AMNp', 'ANm', 'DMetaN', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'MesoAN', 'NTct(UTct-T1)', 'PDMN', 'WTct(UTct-T2)']",
19487,18,11,block-19487,"['T1-6B', 'T2-6B', 'T3-6B', 'brain-brain']","['HTct(UTct-T3)', 'IntTct']","['HTct(UTct-T3)', 'IntTct']","['ANm', 'CV', 'DMetaN', 'HTct(UTct-T3)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'NTct(UTct-T1)', 'WTct(UTct-T2)']","['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'NTct(UTct-T1)', 'WTct(UTct-T2)', 'mVAC(T1)']",
13644,21,16,block-13644,"['T1-12A', 'T1-19B', 'T2-0A']","['HTct(UTct-T3)', 'IntTct', 'WTct(UTct-T2)']","['HTct(UTct-T3)', 'IntTct', 'LTct', 'WTct(UTct-T2)']","['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'NTct(UTct-T1)', 'PDMN', 'WTct(UTct-T2)', 'mVAC(T1)']","['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'NTct(UTct-T1)', 'PDMN', 'WTct(UTct-T2)', 'mVAC(T1)']",
9019,23,6,block-9019,"['T2-17A', 'T3-7B', 'brain-brain']",['IntNp(T3)'],['IntNp(T3)'],"['AMNp', 'ANm', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'WTct(UTct-T2)', 'mVAC(T1)', 'mVAC(T2)', 'mVAC(T3)']","['AMNp', 'ANm', 'AbN4', 'AbNT', 'CV', 'HTct(UTct-T3)', 'IntNp(T1)', 'IntNp(T2)', 'IntNp(T3)', 'IntTct', 'LTct', 'MetaLN', 'WTct(UTct-T2)', 'mVAC(T1)', 'mVAC(T2)', 'mVAC(T3)']",


In [607]:
df.query('group == 11285')

Unnamed: 0,body,block,group,presyn_rois,postsyn_rois,hemi_nm
1152,11285,11232,11285,"[AMNp, CV, IntNp(T1), IntNp(T2), IntTct, LTct,...","[AMNp, CV, IntNp(T1), IntNp(T2), IntTct, LTct,...",


In [608]:
clio_ann.query('group == 11285')

Unnamed: 0,user,group,root_side,body,last_modified_by,hemilineage,class,exit_nerve,entry_nerve,status,...,long_tract,naming_user,description,neuropils_dendritic,neuropils_axonal,old_bodyids,has_soma,is_cervical,soma_nm,hemi_nm
3214,jefferis@gmail.com,11285,,11285,jefferis@gmail.com,TBD,Ascending Interneuron,CvC,,Prelim Roughly traced,...,,,,,,,True,True,T1,T1-TBD


In [540]:
cluster = vc.index[-1]
link = vnc_link_for_bodies(df.query('block == @cluster')['body'])
HTML(f'<a href="{link}">link</a>')

In [501]:
vc

20261    549
22091    437
16762    380
24298    340
12550    337
        ... 
9304      15
859       12
13777     10
6167      10
23948      8
Name: block, Length: 175, dtype: int64

In [611]:
clio_ann.query('hemilineage != ""')['hemilineage'].value_counts().sum()

15198

In [797]:
#vnc_link_for_bodies(clio_ann.query('hemilineage == "9B"')['body'])

In [796]:
#clio_ann.query('hemilineage != ""').groupby(['hemilineage', 'soma_side']).size().rename('count').hvplot.barh(height=3000)

In [502]:
#print(' '.join(map(str, df.query('block == 111')['body'].tolist())))

In [503]:
#print(' '.join(map(str, df.query('block == ')['body'].sample(100).tolist())))

In [504]:
nbs

<NestedBlockState object, with base <LayeredBlockState object with 24299 blocks, 5 layers, degree-corrected, with 2 edge covariates, for graph <Graph object, directed, with 24335 vertices and 1391002 edges, 2 internal edge properties, at 0x7ffcb1f62490>, at 0x7ffcf2c52450>, and 16 levels of sizes [(24335, 197), (197, 67), (67, 34), (34, 14), (14, 6), (6, 2), (2, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)] at 0x7ffcb1f62e90>

In [434]:
sensories = vstats.query('status == "Sensory Anchor"').index
df.query('body in @sensories')['block'].value_counts()

20776    312
376      269
49       260
4705     252
2065     207
7075     191
11845    172
1346     103
2325      98
20719     97
19356     92
1817      48
7662      36
44        36
14522     35
594       32
8208      24
9012      19
7527      18
2585      14
715       12
5193      12
515       10
3076      10
4636      10
11554      8
5741       8
93         7
1232       7
6690       7
20953      5
3209       4
15322      4
3497       3
3634       2
18896      2
7727       1
886        1
2702       1
6108       1
7562       1
3014       1
Name: block, dtype: int64

In [437]:
print(' '.join(map(str, df.query('body in @sensories and block == 3634')['body'].tolist())))

22169 26075


In [626]:
clio_ann['group'].value_counts().hvplot.hist(bins=np.arange(0.5, 50.5, 1), width=1000, height=500)

In [804]:
neuclease.clio._clio.DEFAULT_CLIO_SESSION = None

In [827]:
from neuclease.clustering.blockmodel import fetch_sanitized_body_annotations, extract_roi_counts, minimize_layered_nested_blockmodel

In [805]:
ca = fetch_json_annotations_all('VNC')

In [810]:
vstats['has_soma'].value_counts(dropna=False)

True     15844
False    11916
Name: has_soma, dtype: int64

In [818]:
#ca.dtypes

In [None]:
vstats = fetch_vnc_statuses(*vnc_locked)

In [None]:
big_statuses = {'Prelim Roughly traced', 'Soma Anchor', 'Cervical Anchor', 'Sensory Anchor', 'Primary Anchor', 'PRT Orphan', 'Leaves'}
big_bodies = vstats.query('status in @big_statuses').index.drop_duplicates()
roi_pre, roi_post, biroi_pre, biroi_post = extract_roi_counts(point_df, big_bodies)

In [894]:
body_ann = fetch_sanitized_body_annotations(*vnc_locked, ca, vstats)

In [877]:
roihemi = body_ann.query('roihemi != ""')['roihemi'].reset_index()
roihemi['weight'] = 1
roihemi = roihemi.set_index(['body', 'roihemi'])['weight']

biroihemi = body_ann.query('biroihemi != ""')['biroihemi'].reset_index()
biroihemi['weight'] = 1
biroihemi = biroihemi.set_index(['body', 'biroihemi'])['weight']

for s in roi_pre, roi_post, biroi_pre, biroi_post:
    s.rename(s.name.replace('_', ''), inplace=True)

group = body_ann.query('group != ""')['group'].reset_index()
group['weight'] = 1
group = group.set_index(['body', 'group'])['weight']

g, node_to_vertex, nbs, df = minimize_layered_nested_blockmodel(strengths, [biroihemi, group, biroi_pre, biroi_post], 300)

Inferring with strength cutoff 300...
Inferring with strength cutoff 300 took 0:01:00.941393


In [878]:
df['block'].value_counts()

1463    305
955     265
1008    233
1474    155
936      54
119      37
1147     29
101      16
1449     11
Name: block, dtype: int64

In [900]:
print(df.query('block == 101')['body'].tolist())

[10051, 10065, 10237, 10310, 10411, 10471, 10582, 10747, 10750, 10999, 11129, 11319, 11957, 12137, 101414, 162962]


In [903]:
d = body_ann.loc[[10051, 10065, 10237, 10310, 10411, 10471, 10582, 10747, 10750, 10999, 11129, 11319, 11957, 12137, 101414, 162962], 'group']

In [904]:
d

body
10051          
10065     10059
10237     10237
10310          
10411     10411
10471     10471
10582     10237
10747          
10750     10411
10999     10471
11129          
11319          
11957          
12137          
101414         
162962         
Name: group, dtype: object

In [906]:
d.loc[d == ""].index.tolist()

[10051, 10310, 10747, 11129, 11319, 11957, 12137, 101414, 162962]