In [2]:
import os
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import gcsfs
import pyarrow.feather as feather
from motif_utils import *
import netsci.metrics.motifs as nsm
import json

## Loading the graph

In [None]:
DATASET = 'fafb_783'
REGION = 'central_complex'
NETWORK = f'{DATASET}_{REGION}'

SYN_LOCAL_PATH = f'data/{NETWORK}/synapses.feather'
GCS_PATH = f'gs://sjcabs_2025_data/fafb/{REGION}/{DATASET}_{REGION}_synapses.feather'
META_PATH = f'gs://sjcabs_2025_data/fafb/{REGION}/{DATASET}_{REGION}_meta.feather'
FLY_VIS_PATH = f'data/flyvis_data/flyvis_synapse_df.csv'

LOAD_FLYVIS = True

if not LOAD_FLYVIS:
    print('Network Name:', NETWORK)

    if os.path.exists(SYN_LOCAL_PATH):
        print(f'Loading from local: {SYN_LOCAL_PATH}')
        synapses_df = pd.read_feather(SYN_LOCAL_PATH)
    else:
        print(f'Loading from GCS: {GCS_PATH}')
        import gcsfs
        gcs = gcsfs.GCSFileSystem(token='google_default')
        import pyarrow.feather as feather
        with gcs.open(GCS_PATH.replace('gs://', ''), 'rb') as f:
            synapses_df = feather.read_feather(f)
        with gcs.open(META_PATH.replace('gs://', ''), 'rb') as f:
            meta_df = feather.read_feather(f)

    # Add edges (multiple synapses between same neurons become edge weight)
    edge_counts = synapses_df.groupby(['pre', 'post']).size().reset_index(name='weight')

else:
    print(f'Loading from FlyVis data: {FLY_VIS_PATH}')
    synapses_df = pd.read_csv(FLY_VIS_PATH)
    print(f'  Columns: {list(synapses_df.columns)}')
    assert not synapses_df.duplicated(subset=['source_index', 'target_index']).any(), "Duplicates found! You actually DO need groupby."
    edge_counts = synapses_df.rename(columns={
        'source_index': 'pre',
        'target_index': 'post',
        'n_syn': 'weight'
    })


print(f'✓ Loaded {len(synapses_df):,} synapses')
synapses_df.head()

Loading from FlyVis data: data/flyvis_data/flyvis_synapse_df.csv
  Columns: ['du', 'dv', 'n_syn', 'n_syn_certainty', 'sign', 'source_index', 'source_type', 'source_u', 'source_v', 'target_index', 'target_type', 'target_u', 'target_v']
✓ Loaded 1,513,231 synapses


Unnamed: 0,du,dv,n_syn,n_syn_certainty,sign,source_index,source_type,source_u,source_v,target_index,target_type,target_u,target_v
0,0,0,40.0,5.859477,-1.0,0,R1,-15,0,5768,L1,-15,0
1,0,0,40.0,5.859477,-1.0,1,R1,-15,1,5769,L1,-15,1
2,0,0,40.0,5.859477,-1.0,2,R1,-15,2,5770,L1,-15,2
3,0,0,40.0,5.859477,-1.0,3,R1,-15,3,5771,L1,-15,3
4,0,0,40.0,5.859477,-1.0,4,R1,-15,4,5772,L1,-15,4


In [25]:
print(f'✓ Loaded {len(synapses_df):,} synapses')
G_connectome = nx.DiGraph()

for _, row in tqdm(edge_counts.iterrows()):
    G_connectome.add_edge(row['pre'], row['post'], weight=row['weight'])

print(f'✓ Built connectome graph')
print(f'  Nodes: {G_connectome.number_of_nodes():,}')
print(f'  Edges: {G_connectome.number_of_edges():,}')
print(f'  Density: {nx.density(G_connectome):.4f}')

mat_sparse = nx.to_scipy_sparse_array(G_connectome)
bin_mat_sparse = mat_sparse.copy()
bin_mat_sparse[bin_mat_sparse > 1] = 1

# Create mapping between node IDs and matrix indices
node_list = list(G_connectome.nodes())
node_id_to_index = {node_id: idx for idx, node_id in enumerate(node_list)}
index_to_node_id = {idx: node_id for idx, node_id in enumerate(node_list)}

✓ Loaded 1,820,042 synapses


1513231it [00:35, 43135.45it/s]


✓ Built connectome graph
  Nodes: 45,607
  Edges: 1,513,231
  Density: 0.0007


## Basic validation

In [7]:
# out_degree = G_connectome.out_degree(index_to_node_id[1290])
# out_degree_mat = bin_mat_sparse[1290, :].sum()
# print(out_degree, out_degree_mat)

for node in tqdm(G_connectome.nodes):
    assert G_connectome.out_degree(node) == bin_mat_sparse[node_id_to_index[node], :].sum()
    assert G_connectome.in_degree(node) == bin_mat_sparse[:, node_id_to_index[node]].sum()
    break

  0%|          | 0/45607 [00:00<?, ?it/s]


In [8]:
def stats(conn_mat: np.ndarray):
    print(f'total connections (synapses): {np.sum(conn_mat)}')
    print('total number of elements in the connectivity matrix (N^2):', conn_mat.size)
    print(f"Non-zero elements: {np.count_nonzero(conn_mat)}")
    print(f"Percentage of non-zero elements: {np.count_nonzero(conn_mat) / conn_mat.size * 100:.2f}%")

    max_post_idx, max_pre_idx = np.unravel_index(conn_mat.argmax(), conn_mat.shape)
    print(f'Max synapses between a single pair of neurons: {conn_mat[max_post_idx, max_pre_idx]} (from {max_pre_idx} to {max_post_idx})')


stats(mat_sparse.toarray())
print()
stats(bin_mat_sparse.toarray())

total connections (synapses): 1513231
total number of elements in the connectivity matrix (N^2): 2079998449
Non-zero elements: 1513231
Percentage of non-zero elements: 0.07%
Max synapses between a single pair of neurons: 1 (from 1 to 0)

total connections (synapses): 1513231
total number of elements in the connectivity matrix (N^2): 2079998449
Non-zero elements: 1513231
Percentage of non-zero elements: 0.07%
Max synapses between a single pair of neurons: 1 (from 1 to 0)


## Extraction

In [None]:
netsci_motif_keys = [12, 36, 6, 38, 14, 74, 98, 78, 102, 46, 108, 110, 238]
if LOAD_FLYVIS:
    output_dir = f'data/flyvis_data'
else:
    output_dir = f'data/{NETWORK}'
os.makedirs(output_dir, exist_ok=True)

if os.path.exists(f'{output_dir}/binary_fsl.json'):
    print('Loading pre-calculated motifs...')
    network_fsl = json.load(open(f'{output_dir}/binary_fsl.json'))
    network_fsl = {int(k): v for k, v in network_fsl.items()}
else:
    print('Calculating motifs...')
    n_reals, participating_nodes = nsm.motifs(bin_mat_sparse, algorithm='louzoun', participation=True)
    n_reals = n_reals[3:]
    participating_nodes = participating_nodes[3:]

    network_fsl = {netsci_motif_keys[i]: amount for (i, amount) in enumerate(n_reals)}
    fsl_fully_mapped = {netsci_motif_keys[i]: nodes for (i, nodes) in enumerate(participating_nodes)}

    with open(f'{output_dir}/binary_fsl.json', 'w') as f:
        json.dump({k: int(v) for k, v in network_fsl.items()}, f)

    save_motif_participation_nodes_h5(fsl_fully_mapped, f'{output_dir}/participation_nodes.h5')

In [None]:
network_fsl