In [1]:
from caveclient import CAVEclient
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from standard_transform import minnie_ds
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

datastack_name = 'minnie65_public'
client = CAVEclient(datastack_name)

# Show the description of the datastack
client.info.get_datastack_info()['description']

# see the available materialization versions
client.materialize.get_versions()

versions = sorted(client.materialize.get_versions(), key = lambda v: client.materialize.get_timestamp(v))

for version in versions:
    print(f"Version {version}: {client.materialize.get_timestamp(version)}")

# CAVE always use the same version for consistency
client.version = 1621

Version 117: 2021-06-11 08:10:00.215114+00:00
Version 343: 2022-02-24 08:10:00.184668+00:00
Version 661: 2023-04-06 20:17:09.199182+00:00
Version 795: 2023-08-23 08:10:01.404268+00:00
Version 943: 2024-01-22 08:10:01.497934+00:00
Version 1078: 2024-06-05 10:10:01.203215+00:00
Version 1181: 2024-09-16 10:10:01.121167+00:00
Version 1300: 2025-01-13 10:10:01.286229+00:00
Version 1412: 2025-04-29 10:10:01.200893+00:00
Version 1507: 2025-07-31 08:10:01.117494+00:00
Version 1621: 2025-11-25 08:10:01.094430+00:00


In [8]:
#BC <-> PN23

from functions import bounding_box
    
bounding_box_twotypes = bounding_box(600, 140, 800, 700, 240, 900)
cell_type_one = 'BC'
cell_type_two = '23P'

cell_type_one_df = client.materialize.tables.aibs_metamodel_celltypes_v661(cell_type = cell_type_one, pt_position_bbox = bounding_box_twotypes).query(split_positions = True)
cell_type_one_list = cell_type_one_df['pt_root_id'].tolist()
proofread_one_df = client.materialize.tables.proofreading_status_and_strategy(pt_root_id = cell_type_one_list).query(split_positions = True)
proofread_one_list = proofread_one_df['valid_id'].tolist()
number_cell_type_one = len(proofread_one_list)
print(f'number {cell_type_one}: {number_cell_type_one}')

cell_type_two_df = client.materialize.tables.aibs_metamodel_celltypes_v661(cell_type = cell_type_two, pt_position_bbox = bounding_box_twotypes).query(split_positions = True)
cell_type_two_list = cell_type_two_df['pt_root_id'].tolist()
proofread_two_df = client.materialize.tables.proofreading_status_and_strategy(pt_root_id = cell_type_two_list).query(split_positions = True)
proofread_two_list = proofread_two_df['valid_id'].tolist()
number_cell_type_two = len(proofread_two_list)
print(f'number {cell_type_two}: {number_cell_type_two}')
combined_df = pd.concat([proofread_one_df, proofread_two_df], ignore_index = True)

# output connections (BC -> 23P)
output_syn_df = client.materialize.synapse_query(pre_ids = proofread_one_list, post_ids = proofread_two_list)
output_df = output_syn_df.groupby(['pre_pt_root_id', 'post_pt_root_id']).count()[['id']].rename(columns = {'id': 'syn_count'}).sort_values(by = 'syn_count', ascending = False,).reset_index()    
unique_connections_bcpn_df = output_df[['pre_pt_root_id', 'post_pt_root_id']].drop_duplicates()
number_connections_found_bcpn = len(unique_connections_bcpn_df)
print('number_connections_found BC -> 23PN (unique pairs):', number_connections_found_bcpn)

# input connections (23P -> BC)
input_syn_df = client.materialize.synapse_query(pre_ids = proofread_two_list, post_ids = proofread_one_list)
input_df = input_syn_df.groupby(['pre_pt_root_id', 'post_pt_root_id']).count()[['id']].rename(columns = {'id': 'syn_count'}).sort_values(by = 'syn_count', ascending = False,).reset_index()
unique_connections_pnbc_df = input_df[['pre_pt_root_id', 'post_pt_root_id']].drop_duplicates()
number_connections_found_pnbc = len(unique_connections_pnbc_df)
print('number_connections_found 23PN -> BC (unique pairs):', number_connections_found_pnbc)

# reciprocal connections
reciprocal_df = pd.merge(output_df, input_df, left_on = ['pre_pt_root_id', 'post_pt_root_id'], right_on = ['post_pt_root_id', 'pre_pt_root_id'], suffixes = ['_BC_to_23P', '_23P_to_BC'])
number_reciprocal_found = len(reciprocal_df)

# connectivity calculations
number_cells_total = number_cell_type_one + number_cell_type_two
number_connections_total = number_connections_found_bcpn + number_connections_found_pnbc
print('number_cells_total:', number_cells_total)
total_possible_connections = (number_cell_type_one) * (number_cell_type_two)
print('total_possible_connections:', total_possible_connections)

# P(A->B)
connectivity_bcpn = number_connections_found_bcpn / total_possible_connections
# P(B->A)
connectivity_pnbc = number_connections_found_pnbc / total_possible_connections

print(f'Connectivity BC -> 23P: {connectivity_bcpn * 100:.4f}%')
print(f'Connectivity 23P -> BC: {connectivity_pnbc * 100:.4f}%')

reciprocity_prob_chance = connectivity_bcpn * connectivity_pnbc
expected_reciprocal_count = reciprocity_prob_chance * total_possible_connections

print(f'Probability of reciprocity by chance: {reciprocity_prob_chance * 100:.4f}%')
print(f'Expected reciprocal pairs (by chance): {expected_reciprocal_count:.2f}')

reciprocity_prob_observed = number_reciprocal_found / total_possible_connections

print(f'Probability of reciprocity observed: {reciprocity_prob_observed * 100:.4f}%')
print(f'Observed reciprocal pairs: {number_reciprocal_found}')

if reciprocity_prob_observed > reciprocity_prob_chance:
    print('reciprocity is overrepresented')
elif reciprocity_prob_observed == reciprocity_prob_chance:
    print('reciprocity is as high as chance')
else:
    print('reciprocity is underrepresented')

voxels: x=[161123], y=[120584], z=[20000]
voxels: x=[188206], y=[143310], z=[22500]
number BC: 7
number 23P: 142
number_connections_found BC -> 23PN (unique pairs): 461
number_connections_found 23PN -> BC (unique pairs): 318
number_cells_total: 149
total_possible_connections: 994
Connectivity BC -> 23P: 46.3783%
Connectivity 23P -> BC: 31.9920%
Probability of reciprocity by chance: 14.8373%
Expected reciprocal pairs (by chance): 147.48
Probability of reciprocity observed: 21.6298%
Observed reciprocal pairs: 215
reciprocity is overrepresented


In [12]:
#BC <-> BC

from functions import bounding_box

bounding_box_onetype = bounding_box(600, 140, 800, 700, 240, 900)
cell_type = 'BC'

cell_type_df = client.materialize.tables.aibs_metamodel_celltypes_v661(cell_type = cell_type, pt_position_bbox = bounding_box_onetype).query(split_positions = True)
cell_type_list = cell_type_df['pt_root_id'].tolist()
proofread_df = client.materialize.tables.proofreading_status_and_strategy(pt_root_id = cell_type_list).query(split_positions = True)
proofread_list = proofread_df['valid_id'].tolist()
number_cells = len(proofread_list)
print(f'number {cell_type}: {number_cells}')

# connections (BC -> BC)
syn_df = client.materialize.synapse_query(pre_ids = proofread_list, post_ids = proofread_list)
connections_df_raw = syn_df.groupby(['pre_pt_root_id', 'post_pt_root_id']).count()[['id']].rename(columns = {'id': 'syn_count'}).sort_values(by = 'syn_count', ascending = False).reset_index()

# exclude self-connections (autapses)
connections_df = connections_df_raw[connections_df_raw['pre_pt_root_id'] != connections_df_raw['post_pt_root_id']]
print(connections_df)

unique_connections_df = connections_df[['pre_pt_root_id', 'post_pt_root_id']].drop_duplicates()
number_connections_found = len(unique_connections_df)

# reciprocal connections
reciprocal_df = pd.merge(unique_connections_df, unique_connections_df, left_on = ['pre_pt_root_id', 'post_pt_root_id'], right_on = ['post_pt_root_id', 'pre_pt_root_id'], suffixes = ['_forward', '_backward'])

# remove duplicates (A->B and B->A count as one reciprocal pair)
reciprocal_df = reciprocal_df[reciprocal_df['pre_pt_root_id_forward'] < reciprocal_df['post_pt_root_id_forward']]
number_reciprocal_found = len(reciprocal_df)

print(reciprocal_df)

# connectivity calculations
total_possible_connections = number_cells * (number_cells - 1)
print('total_possible_connections:', total_possible_connections)
connectivity = (number_connections_found / total_possible_connections)
connectivity_percent = connectivity * 100
print(f'connectivity: {connectivity_percent:.4}%')

# reciprocity calculations
reciprocity_rate_chance = (connectivity * connectivity) * 100
print(f'reciprocity by chance: {reciprocity_rate_chance:.4}%')
reciprocity_rate_observed = (number_reciprocal_found  / (total_possible_connections/2)) * 100
print(f'reciprocity observed: {reciprocity_rate_observed:.4}%')
print(f'reciprocal connections found: {number_reciprocal_found} / {total_possible_connections/2}')

if reciprocity_rate_observed > reciprocity_rate_chance:
    print('reciprocity is overrepresented')
elif reciprocity_rate_observed == reciprocity_rate_chance:
    print('reciprocity is as high as chance')
else:
    print('reciprocity is underrepresented')

voxels: x=[161123], y=[120584], z=[20000]
voxels: x=[188206], y=[143310], z=[22500]
number BC: 7
        pre_pt_root_id     post_pt_root_id  syn_count
0   864691135579134981  864691136619344859         16
1   864691135939380228  864691136010404012         12
2   864691136010404012  864691135448017554          7
3   864691136138803837  864691136010404012          7
4   864691136010404012  864691135579134981          6
5   864691135448017554  864691136010404012          6
6   864691136619344859  864691136010404012          6
7   864691135939380228  864691135448017554          4
8   864691135579134981  864691136010404012          4
9   864691136005322698  864691135448017554          4
10  864691135448017554  864691136005322698          4
11  864691136619344859  864691136138803837          3
12  864691136138803837  864691135448017554          3
13  864691135939380228  864691136619344859          3
14  864691136010404012  864691135939380228          2
15  864691135448017554  864691135579134

In [4]:
#23P <-> 23P

from functions import bounding_box

bounding_box_onetype = bounding_box(600, 140, 800, 700, 240, 900)
cell_type = '23P'

cell_type_df = client.materialize.tables.aibs_metamodel_celltypes_v661(cell_type = cell_type, pt_position_bbox = bounding_box_onetype).query(split_positions = True)
cell_type_list = cell_type_df['pt_root_id'].tolist()
proofread_df = client.materialize.tables.proofreading_status_and_strategy(pt_root_id = cell_type_list).query(split_positions = True)
proofread_list = proofread_df['valid_id'].tolist()
number_cells = len(proofread_list)
print(f'number {cell_type}: {number_cells}')

# connections (BC -> BC)
syn_df = client.materialize.synapse_query(pre_ids = proofread_list, post_ids = proofread_list)
connections_df_raw = syn_df.groupby(['pre_pt_root_id', 'post_pt_root_id']).count()[['id']].rename(columns = {'id': 'syn_count'}).sort_values(by = 'syn_count', ascending = False).reset_index()

# exclude self-connections (autapses)
connections_df = connections_df_raw[connections_df_raw['pre_pt_root_id'] != connections_df_raw['post_pt_root_id']]
print(connections_df)

unique_connections_df = connections_df[['pre_pt_root_id', 'post_pt_root_id']].drop_duplicates()
number_connections_found = len(unique_connections_df)

# reciprocal connections
reciprocal_df = pd.merge(unique_connections_df, unique_connections_df, left_on = ['pre_pt_root_id', 'post_pt_root_id'], right_on = ['post_pt_root_id', 'pre_pt_root_id'], suffixes = ['_forward', '_backward'])

# remove duplicates (A->B and B->A count as one reciprocal pair)
reciprocal_df = reciprocal_df[reciprocal_df['pre_pt_root_id_forward'] < reciprocal_df['post_pt_root_id_forward']]
number_reciprocal_found = len(reciprocal_df)

print(reciprocal_df)

# connectivity calculations
total_possible_connections = number_cells * (number_cells - 1)
print('total_possible_connections:', total_possible_connections)
connectivity = (number_connections_found / total_possible_connections)
connectivity_percent = connectivity * 100
print(f'connectivity: {connectivity_percent:.4}%')

# reciprocity calculations
reciprocity_rate_chance = (connectivity * connectivity) * 100
print(f'reciprocity by chance: {reciprocity_rate_chance:.4}%')
reciprocity_rate_observed = (number_reciprocal_found * 2 / number_connections_found) * 100
print(f'reciprocity observed: {reciprocity_rate_observed:.4}%')
print(f'reciprocal connections found: {number_reciprocal_found} / {number_connections_found}')

if reciprocity_rate_observed > reciprocity_rate_chance:
    print('reciprocity is overrepresented')
elif reciprocity_rate_observed == reciprocity_rate_chance:
    print('reciprocity is as high as chance')
else:
    print('reciprocity is underrepresented')

voxels: x=[161123], y=[120584], z=[20000]
voxels: x=[188206], y=[143310], z=[22500]
number 23P: 142
          pre_pt_root_id     post_pt_root_id  syn_count
0     864691136674327431  864691136108332249          4
1     864691135349839831  864691135801170018          4
2     864691135867009413  864691136330407914          4
3     864691135361314119  864691135779587517          4
4     864691135562842337  864691135885921648          4
...                  ...                 ...        ...
1196  864691134966014495  864691135584504429          1
1197  864691134966014495  864691135756145106          1
1198  864691134966014495  864691136311114330          1
1199  864691134966014495  864691136330407914          1
1200  864691134966495519  864691135277186789          1

[1201 rows x 3 columns]
    pre_pt_root_id_forward  post_pt_root_id_forward  pre_pt_root_id_backward  \
2       864691135773073147       864691135938604804       864691135938604804   
3       864691135763431478       8646911358

In [15]:
#not proofread BC <-> PN23

from functions import bounding_box
    
bounding_box_twotypes = bounding_box(600, 140, 800, 700, 240, 900)
cell_type_one = 'BC'
cell_type_two = '23P'

cell_type_one_df = client.materialize.tables.aibs_metamodel_celltypes_v661(cell_type = cell_type_one, pt_position_bbox = bounding_box_twotypes).query(split_positions = True)
cell_type_one_list = cell_type_one_df['pt_root_id'].tolist()
number_cell_type_one = len(cell_type_one_list)
print(f'number {cell_type_one}: {number_cell_type_one}')

cell_type_two_df = client.materialize.tables.aibs_metamodel_celltypes_v661(cell_type = cell_type_two, pt_position_bbox = bounding_box_twotypes).query(split_positions = True)
cell_type_two_list = cell_type_two_df['pt_root_id'].tolist()
number_cell_type_two = len(cell_type_two_list)
print(f'number {cell_type_two}: {number_cell_type_two}')
combined_df = pd.concat([cell_type_one_df, cell_type_two_df], ignore_index = True)

# output connections (BC -> 23P)
output_syn_df = client.materialize.synapse_query(pre_ids = cell_type_one_list, post_ids = cell_type_two_list)
output_df = output_syn_df.groupby(['pre_pt_root_id', 'post_pt_root_id']).count()[['id']].rename(columns = {'id': 'syn_count'}).sort_values(by = 'syn_count', ascending = False,).reset_index()    
unique_connections_bcpn_df = output_df[['pre_pt_root_id', 'post_pt_root_id']].drop_duplicates()
number_connections_found_bcpn = len(unique_connections_bcpn_df)
print('number_connections_found BC -> 23PN (unique pairs):', number_connections_found_bcpn)

# input connections (23P -> BC)
input_syn_df = client.materialize.synapse_query(pre_ids = cell_type_two_list, post_ids = cell_type_one_list)
input_df = input_syn_df.groupby(['pre_pt_root_id', 'post_pt_root_id']).count()[['id']].rename(columns = {'id': 'syn_count'}).sort_values(by = 'syn_count', ascending = False,).reset_index()
unique_connections_pnbc_df = input_df[['pre_pt_root_id', 'post_pt_root_id']].drop_duplicates()
number_connections_found_pnbc = len(unique_connections_pnbc_df)
print('number_connections_found 23PN -> BC (unique pairs):', number_connections_found_pnbc)

# reciprocal connections
reciprocal_df = pd.merge(output_df, input_df, left_on = ['pre_pt_root_id', 'post_pt_root_id'], right_on = ['post_pt_root_id', 'pre_pt_root_id'], suffixes = ['_BC_to_23P', '_23P_to_BC'])
number_reciprocal_found = len(reciprocal_df)

# connectivity calculations
number_cells_total = number_cell_type_one + number_cell_type_two
number_connections_total = number_connections_found_bcpn + number_connections_found_pnbc
print('number_cells_total:', number_cells_total)
total_possible_connections = (number_cell_type_one) * (number_cell_type_two)
print('total_possible_connections:', total_possible_connections)

# P(A->B)
connectivity_bcpn = number_connections_found_bcpn / total_possible_connections
# P(B->A)
connectivity_pnbc = number_connections_found_pnbc / total_possible_connections

print(f'Connectivity BC -> 23P: {connectivity_bcpn * 100:.4f}%')
print(f'Connectivity 23P -> BC: {connectivity_pnbc * 100:.4f}%')

reciprocity_prob_chance = connectivity_bcpn * connectivity_pnbc
expected_reciprocal_count = reciprocity_prob_chance * total_possible_connections

print(f'Probability of reciprocity by chance: {reciprocity_prob_chance * 100:.4f}%')
print(f'Expected reciprocal pairs (by chance): {expected_reciprocal_count:.2f}')

reciprocity_prob_observed = number_reciprocal_found / (total_possible_connections)

print(f'Probability of reciprocity observed: {reciprocity_prob_observed * 100:.4f}%')
print(f'Observed reciprocal pairs: {number_reciprocal_found}')

if reciprocity_prob_observed > reciprocity_prob_chance:
    print('reciprocity is overrepresented')
elif reciprocity_prob_observed == reciprocity_prob_chance:
    print('reciprocity is as high as chance')
else:
    print('reciprocity is underrepresented')

voxels: x=[161123], y=[120584], z=[20000]
voxels: x=[188206], y=[143310], z=[22500]
number BC: 7
number 23P: 198
number_connections_found BC -> 23PN (unique pairs): 661
number_connections_found 23PN -> BC (unique pairs): 368
number_cells_total: 205
total_possible_connections: 1386
Connectivity BC -> 23P: 47.6912%
Connectivity 23P -> BC: 26.5512%
Probability of reciprocity by chance: 12.6626%
Expected reciprocal pairs (by chance): 175.50
Probability of reciprocity observed: 17.8932%
Observed reciprocal pairs: 248
reciprocity is overrepresented


In [17]:
#not proofread 23P <-> 23P

from functions import bounding_box

bounding_box_onetype = bounding_box(600, 140, 800, 700, 240, 900)
cell_type = '23P'

cell_type_df = client.materialize.tables.aibs_metamodel_celltypes_v661(cell_type = cell_type, pt_position_bbox = bounding_box_onetype).query(split_positions = True)
cell_type_list = cell_type_df['pt_root_id'].tolist()
number_cells = len(cell_type_list)
print(f'number {cell_type}: {number_cells}')

# connections (BC -> BC)
syn_df = client.materialize.synapse_query(pre_ids = cell_type_list, post_ids = cell_type_list)
connections_df_raw = syn_df.groupby(['pre_pt_root_id', 'post_pt_root_id']).count()[['id']].rename(columns = {'id': 'syn_count'}).sort_values(by = 'syn_count', ascending = False).reset_index()

# exclude self-connections (autapses)
connections_df = connections_df_raw[connections_df_raw['pre_pt_root_id'] != connections_df_raw['post_pt_root_id']]
print(connections_df)

unique_connections_df = connections_df[['pre_pt_root_id', 'post_pt_root_id']].drop_duplicates()
number_connections_found = len(unique_connections_df)

# reciprocal connections
reciprocal_df = pd.merge(unique_connections_df, unique_connections_df, left_on = ['pre_pt_root_id', 'post_pt_root_id'], right_on = ['post_pt_root_id', 'pre_pt_root_id'], suffixes = ['_forward', '_backward'])

# remove duplicates (A->B and B->A count as one reciprocal pair)
reciprocal_df = reciprocal_df[reciprocal_df['pre_pt_root_id_forward'] < reciprocal_df['post_pt_root_id_forward']]
number_reciprocal_found = len(reciprocal_df)

print(reciprocal_df)

# connectivity calculations
total_possible_connections = number_cells * (number_cells - 1)
print('total_possible_connections:', total_possible_connections)
connectivity = (number_connections_found / total_possible_connections)
connectivity_percent = connectivity * 100
print(f'connectivity: {connectivity_percent:.4}%')
print('number_connections_found', number_connections_found)

# reciprocity calculations
reciprocity_rate_chance = (connectivity * connectivity) * 100
print(f'reciprocity by chance: {reciprocity_rate_chance:.4}%')
reciprocity_rate_observed = (number_reciprocal_found  / (total_possible_connections/2)) * 100
print(f'reciprocity observed: {reciprocity_rate_observed:.4}%')
print(f'reciprocal connections found: {number_reciprocal_found} / {total_possible_connections/2}')

if reciprocity_rate_observed > reciprocity_rate_chance:
    print('reciprocity is overrepresented')
elif reciprocity_rate_observed == reciprocity_rate_chance:
    print('reciprocity is as high as chance')
else:
    print('reciprocity is underrepresented')

voxels: x=[161123], y=[120584], z=[20000]
voxels: x=[188206], y=[143310], z=[22500]
number 23P: 198
          pre_pt_root_id     post_pt_root_id  syn_count
0     864691135349839831  864691135801170018          4
1     864691135361314119  864691135779587517          4
2     864691135562842337  864691135885921648          4
3     864691135867009413  864691136330407914          4
4     864691135164707629  864691135562052065          4
...                  ...                 ...        ...
1887  864691136991328917  864691135763431478          1
1888  864691136991328917  864691135774202363          1
1889  864691135164707629  864691135347633055          1
1890  864691136991328917  864691135782542416          1
1891  864691137021996142  864691136604001745          1

[1892 rows x 3 columns]
     pre_pt_root_id_forward  post_pt_root_id_forward  pre_pt_root_id_backward  \
4        864691135763431478       864691135874782350       864691135874782350   
5        864691135350981847       8646911