## This notebook is generated entirely by Claude Code (Opus)

In [None]:
import sys
sys.path.insert(0, '/storage/fs/store1/shared/pub_utils/src')

from pub_utils.assemble import assemble_npp_connectome, get_npp_release_vector, get_npp_receptor_matrix

# Test with FLP-1 and RipollSanchez2023 assets
neuropeptide = 'flp-1'
release_sources = ['sequencing:RipollSanchez2023']
receptor_sources = ['sequencing:RipollSanchez2023']
pairing_source = 'RipollSanchez2023'

# First, check the release vector for FLP-1
release = get_npp_release_vector(neuropeptide, release_sources)
print("FLP-1 Release Vector (neurons expressing FLP-1):")
print(f"  Shape: {release.shape}")
print(f"  Sum (releasing neurons): {release.sum()}")
print(f"  Non-NaN count: {release.notna().sum()}")
releasing_neurons = release[release == 1].index.tolist()
print(f"  Neurons releasing FLP-1: {releasing_neurons}")
# Check the receptor matrix for FLP-1 (which includes DMSR-7 based on pairing info)
receptor = get_npp_receptor_matrix(neuropeptide, receptor_sources, pairing_source)
print("FLP-1 Receptor Matrix (receptors responding to FLP-1):")
print(f"  Shape: {receptor.shape}")
print(f"  Receptors found: {receptor.columns.tolist()}")

# Check DMSR-7 specifically
if 'dmsr-7' in receptor.columns:
    dmsr7_col = receptor['dmsr-7']
    print(f"\nDMSR-7 expression:")
    print(f"  Sum (expressing neurons): {dmsr7_col.sum()}")
    expressing = dmsr7_col[dmsr7_col == 1].index.tolist()
    print(f"  Neurons expressing DMSR-7: {expressing}")
else:
    print("\nDMSR-7 not found in receptor matrix columns")
    
# Assemble the full NPP connectome for FLP-1
connectome = assemble_npp_connectome(
    neuropeptide='flp-1',
    release_sources=['sequencing:RipollSanchez2023'],
    receptor_sources=['sequencing:RipollSanchez2023'],
    pairing_source='RipollSanchez2023',
    receptor_gate='or',
    output_format='binary'
)

print("FLP-1 Connectome (binary, all receptors):")
print(f"  Shape: {connectome.shape}")
print(f"  Total connections: {int(connectome.sum().sum())}")
print(f"  Source neurons: {(connectome.sum(axis=1) > 0).sum()}")
print(f"  Target neurons: {(connectome.sum(axis=0) > 0).sum()}")

# Show the actual connections (source -> target)
connections = []
for src in connectome.index:
    for tgt in connectome.columns:
        if connectome.loc[src, tgt] == 1:
            connections.append((src, tgt))
            
print(f"\nConnections (source -> target) [{len(connections)} total]:")
for src, tgt in sorted(connections):
    print(f"  {src} -> {tgt}")
# Now test specifically the FLP-1 + DMSR-7 pair using per_pair output
per_pair = assemble_npp_connectome(
    neuropeptide='flp-1',
    release_sources=['sequencing:RipollSanchez2023'],
    receptor_sources=['sequencing:RipollSanchez2023'],
    pairing_source='RipollSanchez2023',
    receptor_gate='or',
    output_format='per_pair'
)

print("Per-receptor connectomes available:")
for receptor, matrix in per_pair.items():
    total_conn = int(matrix.sum().sum())
    print(f"  {receptor}: {total_conn} connections")

# Show FLP-1 -> DMSR-7 specific connections
print("\n" + "="*60)
print("FLP-1 -> DMSR-7 Connectome:")
print("="*60)
dmsr7_matrix = per_pair['dmsr-7']
print(f"Shape: {dmsr7_matrix.shape}")
print(f"Total connections: {int(dmsr7_matrix.sum().sum())}")

# Show connections
dmsr7_connections = []
for src in dmsr7_matrix.index:
    for tgt in dmsr7_matrix.columns:
        if dmsr7_matrix.loc[src, tgt] == 1:
            dmsr7_connections.append((src, tgt))
            
print(f"\nFLP-1/DMSR-7 connections ({len(dmsr7_connections)} total):")
for src, tgt in sorted(dmsr7_connections):
    print(f"  {src} -> {tgt}")
import pandas as pd
import numpy as np

# Load the preassembled connectome
preassembled_path = '/storage/fs/store1/shared/pub_utils/connectomes/preassembled/molecular/FLP-1_DMSR-7_RipollSanchez2023_longRange.csv'
preassembled = pd.read_csv(preassembled_path, index_col=0)

print("Preassembled FLP-1/DMSR-7 connectome:")
print(f"  Shape: {preassembled.shape}")
print(f"  Total connections: {int(preassembled.sum().sum())}")
print(f"  Source neurons with outgoing: {(preassembled.sum(axis=1) > 0).sum()}")
print(f"  Target neurons with incoming: {(preassembled.sum(axis=0) > 0).sum()}")

# Get our assembled connectome (dmsr-7 specific)
assembled = per_pair['dmsr-7']

print("\nAssembled FLP-1/DMSR-7 connectome:")
print(f"  Shape: {assembled.shape}")
print(f"  Total connections: {int(assembled.sum().sum())}")
print(f"  Source neurons with outgoing: {(assembled.sum(axis=1) > 0).sum()}")
print(f"  Target neurons with incoming: {(assembled.sum(axis=0) > 0).sum()}")
# Detailed comparison
# Align indices for comparison
common_idx = preassembled.index.intersection(assembled.index)
common_col = preassembled.columns.intersection(assembled.columns)

print(f"Common neurons in index: {len(common_idx)}")
print(f"Common neurons in columns: {len(common_col)}")

# Compare on common neurons
pre_aligned = preassembled.loc[common_idx, common_col]
asm_aligned = assembled.loc[common_idx, common_col]

# Check for exact match
exact_match = (pre_aligned == asm_aligned).all().all()
print(f"\nExact match: {exact_match}")

if not exact_match:
    # Find differences
    diff = pre_aligned != asm_aligned
    diff_count = diff.sum().sum()
    print(f"Number of differences: {diff_count}")
    
    # Show specific differences
    for src in common_idx:
        for tgt in common_col:
            if pre_aligned.loc[src, tgt] != asm_aligned.loc[src, tgt]:
                print(f"  {src} -> {tgt}: preassembled={pre_aligned.loc[src, tgt]}, assembled={asm_aligned.loc[src, tgt]}")
else:
    print("All 302x302 = 91,204 entries match exactly!")
    
# Additional validation: check source neurons
pre_sources = preassembled.index[(preassembled.sum(axis=1) > 0)].tolist()
asm_sources = assembled.index[(assembled.sum(axis=1) > 0)].tolist()
print(f"\nSource neurons (preassembled): {pre_sources}")
print(f"Source neurons (assembled): {asm_sources}")
print(f"Sources match: {pre_sources == asm_sources}")

# Check target neurons
pre_targets = preassembled.columns[(preassembled.sum(axis=0) > 0)].tolist()
asm_targets = assembled.columns[(assembled.sum(axis=0) > 0)].tolist()
print(f"\nTarget neurons match: {set(pre_targets) == set(asm_targets)}")
print(f"Number of targets: {len(pre_targets)}")
# Additional validation: verify intermediate data
print("="*60)
print("Intermediate Data Validation")
print("="*60)

# 1. Pairing info validation
from pub_utils.assemble import _load_npp_pairing_info
pairing = _load_npp_pairing_info('RipollSanchez2023')
flp1_dmsr7_pair = pairing[(pairing['ligand'] == 'flp-1') & (pairing['receptor'] == 'dmsr-7')]
print("\n1. Pairing Info (RipollSanchez2023):")
print(f"   FLP-1 / DMSR-7 pair found: {len(flp1_dmsr7_pair) > 0}")
if len(flp1_dmsr7_pair) > 0:
    print(f"   EC50: {flp1_dmsr7_pair['EC50'].values[0]}")

# 2. Release data validation
from pub_utils.assemble import _load_npp_release_data
release_data = _load_npp_release_data('sequencing', 'RipollSanchez2023')
release_data.columns = release_data.columns.str.lower()
flp1_release = release_data['flp-1']
releasing_neurons = flp1_release[flp1_release == 1].index.tolist()
print("\n2. Release Data (sequencing:RipollSanchez2023):")
print(f"   FLP-1 column exists: {'flp-1' in release_data.columns}")
print(f"   Neurons releasing FLP-1: {releasing_neurons}")

# 3. Receptor data validation
from pub_utils.assemble import _load_npp_receptor_data
receptor_data = _load_npp_receptor_data('sequencing', 'RipollSanchez2023')
dmsr7_expr = receptor_data['dmsr-7']
expressing_neurons = dmsr7_expr[dmsr7_expr == 1].index.tolist()
print("\n3. Receptor Data (sequencing:RipollSanchez2023):")
print(f"   DMSR-7 column exists: {'dmsr-7' in receptor_data.columns}")
print(f"   Neurons expressing DMSR-7: {len(expressing_neurons)}")

# 4. Verify connection count = sources × targets
expected_connections = len(releasing_neurons) * len(expressing_neurons)
print(f"\n4. Connection Count Validation:")
print(f"   Expected: {len(releasing_neurons)} sources × {len(expressing_neurons)} targets = {expected_connections}")
print(f"   Actual: {int(assembled.sum().sum())}")
print(f"   Match: {expected_connections == int(assembled.sum().sum())}")

FLP-1 Release Vector (neurons expressing FLP-1):
  Shape: (302,)
  Sum (releasing neurons): 2
  Non-NaN count: 302
  Neurons releasing FLP-1: ['AVKL', 'AVKR']
FLP-1 Receptor Matrix (receptors responding to FLP-1):
  Shape: (302, 4)
  Receptors found: ['dmsr-5', 'dmsr-6', 'dmsr-7', 'frpr-7']

DMSR-7 expression:
  Sum (expressing neurons): 102
  Neurons expressing DMSR-7: ['ALML', 'ALMR', 'AVM', 'SDQL', 'SDQR', 'URBL', 'URBR', 'DVA', 'ALA', 'RIAL', 'RIAR', 'RIBL', 'RIBR', 'RICL', 'RICR', 'RIH', 'AVFL', 'AVFR', 'AVKL', 'AVKR', 'PVPL', 'PVPR', 'PVR', 'PVT', 'RID', 'PVCL', 'PVCR', 'AVAL', 'AVAR', 'AVBL', 'AVBR', 'AVDL', 'AVDR', 'SABD', 'SABVL', 'SABVR', 'RIML', 'RIMR', 'RMDDL', 'RMDDR', 'RMDVL', 'RMDVR', 'RMED', 'RMEV', 'RMHL', 'RMHR', 'SMDDL', 'SMDDR', 'SMDVL', 'SMDVR', 'DA01', 'DA02', 'DA03', 'DA04', 'DA05', 'DA06', 'DA07', 'DA08', 'DA09', 'DB01', 'DB02', 'DB03', 'DB04', 'DB05', 'DB06', 'DB07', 'AS01', 'AS02', 'AS03', 'AS04', 'AS05', 'AS06', 'AS07', 'AS08', 'AS09', 'AS10', 'AS11', 'PDB', 