## This notebook is generated entirely by Claude Code (Opus)
Candy read through each line of markdown and code as a sanity check.

In [None]:
import pub_utils as pu
import pandas as pd
import numpy as np

## Molecular Connectome Assembly

This notebook demonstrates how to assemble molecular connectomes by combining:
- **Release data**: Which neurons release a neurotransmitter/neuropeptide
- **Receptor data**: Which neurons express receptors for that molecule
- **Pairing info**: Which receptor-ligand pairs are valid

### Assembly Logic
```
Connectome[source, target] = Release[source, molecule] AND Receptor[target, receptor]
where (molecule, receptor) is a valid pair from pairing_info
```

## 1. Neurotransmitter (NT) Connectome Assembly

### Available Data Sources

**Release sources** (format: `method:dataset`):
- `literature:Bentley2016` - Monoamine markers (cat-2, tdc-1, tbh-1, tph-1, etc.)
- `reporter:Wang2024` - Fluorescent reporter data
- `staining:Wang2024` - Antibody staining data

**Receptor sources**:
- `sequencing:Fenyves2020` - ACh, GABA, Glutamate ionotropic receptors
- `reporter:HobertLab` - ACh metabotropic, GABA receptors  
- `reporter:Muralidhara2025` - Dopamine receptors
- `literature:Bentley2016` - All monoamine receptors
- `literature:Dag2023` - Serotonin receptors

In [None]:
# Example 1: Dopamine connectome using Muralidhara2025 receptor data
dopamine_conn = pu.assemble_nt_connectome(
    'dopamine',
    release_markers=['synthesis'],  # cat-2 marker
    release_sources=['literature:Bentley2016'],
    receptor_sources=['reporter:Muralidhara2025'],
    output_format='binary'
)

# Check results
dopaminergic_neurons = dopamine_conn.index[dopamine_conn.sum(axis=1) > 0].tolist()
print(f"Dopaminergic neurons: {dopaminergic_neurons}")
print(f"Total connections: {int(dopamine_conn.sum().sum())}")

In [None]:
# Example 2: Serotonin connectome with count output (shows # of receptor types per connection)
serotonin_conn = pu.assemble_nt_connectome(
    'serotonin',
    release_markers=['synthesis'],  # tph-1 marker
    release_sources=['literature:Bentley2016'],
    receptor_sources=['literature:Dag2023'],
    output_format='count'
)

serotonergic = serotonin_conn.index[serotonin_conn.sum(axis=1) > 0].tolist()
print(f"Serotonergic neurons: {serotonergic}")
print(f"Max receptor types per connection: {int(serotonin_conn.max().max())}")

In [None]:
# Example 3: Per-receptor connectomes (returns dict of matrices)
dopamine_per_receptor = pu.assemble_nt_connectome(
    'dopamine',
    release_markers=['synthesis'],
    release_sources=['literature:Bentley2016'],
    receptor_sources=['reporter:Muralidhara2025'],
    output_format='per_pair'
)

print(f"Receptors: {list(dopamine_per_receptor.keys())}")
for receptor, matrix in dopamine_per_receptor.items():
    print(f"  {receptor}: {int(matrix.sum().sum())} connections")

In [None]:
# Example 4: Visualize a connectome
fig = pu.plot_connectome_matrix(
    dopamine_conn, 
    title='Dopamine Connectome (Muralidhara2025 receptors)',
    colorbar_label='Connection',
    show_blocks=True
)

In [None]:
# Save assembled connectome with metadata
pu.save_connectome(
    dopamine_conn,
    '../connectomes/dk_assembly/dopamine_conn_01.csv',
    metadata={
        'molecule': 'dopamine',
        'molecule_type': 'neurotransmitter',
        'release_markers': ['synthesis'],
        'release_sources': ['literature:Bentley2016'],
        'receptor_sources': ['reporter:Muralidhara2025'],
        'receptor_gate': 'or',
        'receptor_type': 'all',
        'output_format': 'binary',
        'description': 'Dopamine connectome using cat-2 synthesis marker and Muralidhara2025 receptor data'
    }
)
print("Saved connectome and metadata to connectomes/dk_assembly/dopamine_conn_01.csv[.json]")

## 2. Neuropeptide (NPP) Connectome Assembly

### Available Data Sources

**Release sources**:
- `literature:Bentley2016` - 31 neuropeptides
- `sequencing:RipollSanchez2023` - 108 neuropeptides

**Receptor sources**:
- `literature:Bentley2016` - 13 GPCRs
- `sequencing:RipollSanchez2023` - 138 GPCRs

**Pairing info sources**:
- `Altun2013` - 42 pairs
- `Bentley2016` - 22 pairs
- `RipollSanchez2023` - 92 pairs

In [None]:
# Example 5: Single neuropeptide connectome (FLP-1)
flp1_conn = pu.assemble_npp_connectome(
    'flp-1',
    release_sources=['sequencing:RipollSanchez2023'],
    receptor_sources=['sequencing:RipollSanchez2023'],
    pairing_source='RipollSanchez2023',
    output_format='binary'
)

flp1_sources = flp1_conn.index[flp1_conn.sum(axis=1) > 0].tolist()
print(f"FLP-1 releasing neurons: {flp1_sources}")
print(f"Total connections: {int(flp1_conn.sum().sum())}")

In [None]:
# Example 6: NPP with per-receptor breakdown
flp1_per_receptor = pu.assemble_npp_connectome(
    'flp-1',
    release_sources=['sequencing:RipollSanchez2023'],
    receptor_sources=['sequencing:RipollSanchez2023'],
    pairing_source='RipollSanchez2023',
    output_format='per_pair'
)

print(f"FLP-1 receptors: {list(flp1_per_receptor.keys())}")
for receptor, matrix in flp1_per_receptor.items():
    print(f"  {receptor}: {int(matrix.sum().sum())} connections")

## 3. Mixing and Matching Data Sources

You can combine different release and receptor sources to create custom connectomes.

In [None]:
# Example 7: Mix Bentley2016 release with RipollSanchez2023 receptors for NPP
# This uses literature-validated releasing neurons with sequencing-based receptor expression

flp18_mixed = pu.assemble_npp_connectome(
    'flp-18',
    release_sources=['literature:Bentley2016'],  # Literature-validated
    receptor_sources=['sequencing:RipollSanchez2023'],  # Sequencing-based
    pairing_source='RipollSanchez2023',
    output_format='binary'
)

flp18_sources = flp18_mixed.index[flp18_mixed.sum(axis=1) > 0].tolist()
print(f"FLP-18 sources (Bentley2016 literature): {flp18_sources}")
print(f"Connections with RipollSanchez2023 receptors: {int(flp18_mixed.sum().sum())}")

## 4. Lower-Level Functions

Use `get_release_vector` and `get_receptor_matrix` to inspect intermediate data before assembly.

In [None]:
# Example 8: Inspect release vector and receptor matrix separately

# Get dopamine release vector (which neurons release dopamine)
dop_release = pu.get_release_vector(
    'dopamine',
    markers=['synthesis'],  # cat-2 marker
    sources=['literature:Bentley2016']
)
print("Dopamine-releasing neurons:")
print(dop_release[dop_release == 1].index.tolist())

# Get dopamine receptor matrix (which neurons express which receptors)
dop_receptors = pu.get_receptor_matrix(
    'dopamine',
    sources=['reporter:Muralidhara2025']
)
print(f"\nReceptor matrix shape: {dop_receptors.shape}")
print(f"Receptors: {dop_receptors.columns.tolist()}")
print(f"Neurons expressing dop-1: {int((dop_receptors['dop-1'] == 1).sum())}")

## 5. Aggregating Multiple Connectomes

Combine multiple single-molecule connectomes into an aggregate.

In [None]:
# Example 9: Aggregate all monoamine connectomes
from pub_utils.assemble import _load_pairing_info

monoamines = ['dopamine', 'serotonin', 'tyramine', 'octopamine']
aggregate = pd.DataFrame(0.0, index=pu.AllHermNeurons, columns=pu.AllHermNeurons)

for nt in monoamines:
    try:
        conn = pu.assemble_nt_connectome(
            nt,
            release_markers=['synthesis'],
            release_sources=['literature:Bentley2016'],
            receptor_sources=['literature:Bentley2016'],
            output_format='count'
        )
        aggregate = aggregate.add(conn.fillna(0))
        print(f"{nt}: {int(conn.sum().sum())} connections")
    except Exception as e:
        print(f"{nt}: skipped ({e})")

print(f"\nAggregate monoamine connectome: {int(aggregate.sum().sum())} total connections")