## Setup

In [1]:
cd ../../..

/pasteur/helix/projects/ml4ig_hot/Users/rtrimbou/ReCoN


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
#pip install .[grn-lite]

In [3]:
import numpy as np
import scanpy as sc  # single cell data
import pandas as pd  # data manipulation
import liana as li  # cell communication
import recon  # multilayer and perturbation prediction
import recon.data
import os
# set up chdir environment variable
os.chdir("/pasteur/helix/projects/ml4ig_hot/Users/rtrimbou/ReCoN/")

In [4]:
rna = sc.read_h5ad("./data/perturbation_tuto/rna.h5ad")

Let's check what cell types are present in this dataset

In [5]:
rna.obs["celltype"].unique().tolist()[:5]

['B_cell', 'ILC', 'Macrophage', 'MigDC', 'Monocyte']

## Create ReCoN's multilayer network

### Importing GRNs

You can either generate GRNs direclty with ReCoN or import a previously generated one.<br>

```{tip}
If you wish to generate it directly with ReCoN, please follow the turorial [________]_______.
```

```{warning}
You'll then require a python=3.10 conda environment, cf [Installation]_____.
```




In [6]:
grn_path = "./data/perturbation_tuto/grn.csv"
grn = pd.read_csv(grn_path)
grn = grn.sort_values(by="weight", ascending=False)[:500_000]
grn["source"] = grn["source"].str.capitalize()
grn["source"] = grn["source"] + '_TF'
grn["target"] = grn["target"].str.capitalize()
grn.head(3)

Unnamed: 0.1,Unnamed: 0,target,source,weight
0,0,Pax5,Mbd1_TF,9.5e-05
1,1,Pax5,Smad1_TF,9.2e-05
2,2,Pax5,Smad5_TF,9.2e-05


### Computing cell communication

The cell-cell communication is inferred through LIANA+, an external pakage dedicatedto this task

```{tip}
For information, you can check LIANA+ documentation: https://liana-py.readthedocs.io/en/latest/
```


In [7]:
li.method.cellphonedb(rna, 
            # NOTE by default the resource uses HUMAN gene symbols
            resource_name="mouseconsensus",
            expr_prop=0.00,
            use_raw=False,
            groupby="celltype",
            verbose=True, key_added='cpdb_res')


Using resource `mouseconsensus`.
Using `.X`!
15364 features of mat are empty, they will be removed.
Make sure that normalized counts are passed!
0.36 of entities in the resource are missing from the data.


Generating ligand-receptor stats for 1296 samples and 937 features


100%|██████████| 1000/1000 [00:01<00:00, 623.42it/s]


```{warning}
ReCoN simply requires to rename the columns of the output dataframe of LIANA.
```

We rename ligand and receptors as 'source' and 'ligands', connected cell types as 'celltype_source' and 'celltype_target', and the scores as 'weight'.


In [8]:
ccc_network = rna.uns["cpdb_res"].copy()
ccc_network = ccc_network[["ligand", "receptor", "lr_means", "source", "target"]]
ccc_network = ccc_network.rename(columns={
    "lr_means": "weight",
    "source": "celltype_source",
    "target": "celltype_target",
    "ligand": "source",
    "receptor": "target"
})
ccc_network = ccc_network[ccc_network['weight'] != 0]

In [9]:

ccc_network.head(3)

Unnamed: 0,source,target,weight,celltype_source,celltype_target
406685,App,Cd74,102.485008,cDC2,cDC1
405645,Copa,Cd74,102.370003,cDC1,cDC1
410237,Copa,Cd74,102.366211,eTAC,cDC1


### Add receptor & receptor - target genes informations

In [10]:
receptor_genes = recon.data.load_data.load_receptor_genes("mouse_receptor_gene_from_NichenetPKN")

genes = np.unique(grn['source'].tolist() + grn['target'].tolist())
receptor_genes = receptor_genes[receptor_genes['target'].isin(genes)]
receptor_genes.head()

Unnamed: 0,source,target,weight
2,A1bg,Abca1,0.005156
3,A1bg,Abcb1a,0.005877
4,A1bg,Abcb1b,0.005877
7,A1bg,Acsl1,0.005915
8,A1bg,Adk,0.005092


In [11]:
receptor_genes

Unnamed: 0,source,target,weight
2,A1bg,Abca1,0.005156
3,A1bg,Abcb1a,0.005877
4,A1bg,Abcb1b,0.005877
7,A1bg,Acsl1,0.005915
8,A1bg,Adk,0.005092
...,...,...,...
706631,Xcr1,Tmem14c,0.009901
706632,Xcr1,Tmem63a,0.009302
706633,Xcr1,Tnfrsf22,0.005791
706634,Xcr1,Tnfrsf23,0.005791


In [12]:
grn.source.nunique()

360

In [13]:
# Define seed genes (example: TNF-alpha signaling genes)
seed_genes = ["Nfkb1", "Tnf", "Il6", "Ccl2", "Cxcl10"]

# Add celltype suffix for seeds - use a cell type that exists in the data
focal_celltype = "Macrophage"  # Changed from Fibroblast to match available data
seeds_with_suffix = {f"{g}::{focal_celltype}": 1.0 for g in seed_genes}

## Assemble the multicellular network

There is many modifiable parameters:

In [14]:
cell_communication_graph_directed = False
cell_communication_graph_weighted = True
restart_proba = 0.6
ccc_proba = 0.5
grn_graph_weighted = True
grn_graph_directed = False

In [15]:
import recon.explore

In [16]:
celltypes=["B_cell", "pDC", "Macrophage", "NK_cell", "T_cell_CD4", "T_cell_CD8"]    # list of cell types to include in the analysis

multicell = recon.explore.Multicell(
    celltypes = {celltype: recon.explore.Celltype(
#        receptor_graph = receptor_layer,
        grn_graph = grn,
        receptor_grn_bipartite = receptor_genes,
        celltype_name = celltype,
        receptor_graph_directed=False,
        receptor_graph_weighted=False,
        grn_graph_directed=grn_graph_directed,
        grn_graph_weighted=grn_graph_weighted,
        receptor_grn_bipartite_graph_directed=False,
        receptor_grn_bipartite_graph_weighted=True,
        seeds = [])  # we can either pass a dictionary of Celltype objects, or build them on the fly
        for celltype in celltypes},
    cell_communication_graph = ccc_network.iloc[ccc_network["celltype_source"].isin(celltypes).values & ccc_network["celltype_target"].isin(celltypes).values, :],
    cell_communication_graph_directed=cell_communication_graph_directed,
    cell_communication_graph_weighted=cell_communication_graph_weighted,
    # bipartite parameters can be -1, 0, 1 here
    bipartite_grn_cell_communication_directed=False,
    bipartite_grn_cell_communication_weighted=False,
    bipartite_cell_communication_receptor_directed=False,
    bipartite_cell_communication_receptor_weighted=False,
    seeds = seeds_with_suffix,
)

                No receptor_graph provided,
                an empty receptor graph will be created.
                
The keys of the dictionary will be the celltype names.


Now, we need to precise the kind of exploration we want to do : upstream/downstream of gene set activation, and intracellular only or extracellular.

In [17]:
multicell.lamb = recon.explore.set_lambda(
    multicell,
    direction="upstream",
    strategy="intercell",
)

```{tip}
Alternatively, you can modify lambda transition probabilities freely, to modulate GRN and CCC exploration
```

### We now run the analysis through random walks with restart

In [18]:
# Create multiXrank object
multilayer = multicell.Multixrank(
    restart_proba=restart_proba
)

# Run random walk with restart
results = multilayer.random_walk_rank()

# Format results as gene profiles per cell type
cell_type_profiles = recon.explore.format_multicell_results(results, celltypes=celltypes)

Seeds are provided as a dictionary with weights per seed.
Creating a multixrank object with seeds as a dictionary.
cell_communication
receptor
gene
receptor
gene
receptor
gene
receptor
gene
receptor
gene
receptor
gene
Identifying produced ligands in response to the perturbation.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


## Debug: Copy of sankey_paths functions to debug empty results

Let's copy all the functions used to build partial networks here to see what's happening:

In [19]:
# Helper functions copied from sankey_paths.py for debugging

def get_top_tfs(results_df, cell_type, n=5):
    """Get top n TFs for a cell type from results."""
    results = results_df.copy()
    if results.empty:
        return results
    results.loc[:, "celltype"] = results["multiplex"].str.split("_", expand=True)[0]
    results = results[results["celltype"]==cell_type]
    results = results[results["node"].str.endswith(f"_TF::{cell_type}")]
    print(f"Top TFs found: {len(results)} rows")
    return results.iloc[:n, :]

def get_top_receptors(results_df, cell_type, n=5):
    """Get top n receptors for a cell type from results."""
    results = results_df.copy()
    if results.empty:
        return results
    results.loc[:, "celltype"] = results.loc[:, "multiplex"].str.split("_", expand=True)[0]
    results = results[results["celltype"]==cell_type]
    # remove fake node
    results = results[results["node"]!=f"fake_receptor::{cell_type}"]
    results = results[results["node"].str.endswith(f"_receptor::{cell_type}")]
    print(f"Top receptors found: {len(results)} rows")
    return results.iloc[:n, :]

def get_top_ligands(results_df, receptor_ligand_df, n=5, per_celltype=False):
    """Get top n ligands from results."""
    cc_scores = results_df.loc[results_df["multiplex"] == "cell_communication", :].copy()
    if cc_scores.empty:
        print("WARNING: No cell_communication rows in results!")
        return cc_scores
    
    # Standardize node names: "GENE-CellType" → "GENE::CellType"
    def _hyphen_to_doublecolon(x):
        if "-" in x and "::" not in x:
            gene_part, cell_part = x.rsplit("-", 1)
            return f"{gene_part}::{cell_part}"
        else:
            return x
    
    cc_scores.loc[:, "node_std"] = cc_scores.loc[:, "node"].astype(str).apply(_hyphen_to_doublecolon)
    
    # Find ligands that appear in receptor_ligand_df
    ligands_in_pairs = set(receptor_ligand_df.loc[:, "ligand"].astype(str).unique())
    print(f"Ligands in receptor_ligand_df: {len(ligands_in_pairs)}")
    
    cc_scores = cc_scores.loc[cc_scores.loc[:,"node_std"].isin(ligands_in_pairs), :].copy()
    print(f"Top ligands after filtering: {len(cc_scores)} rows")
    
    if cc_scores.empty:
        return cc_scores
    
    cc_scores.loc[:, "ligand_celltype"] = cc_scores.loc[:, "node_std"].str.split("::", n=1).str[-1]
    
    if per_celltype:
        top_ligand_names = (
            cc_scores
            .sort_values(["ligand_celltype", "score"], ascending=[True, False])
            .groupby("ligand_celltype", group_keys=False)["node_std"]
            .head(n)
            .tolist()
        )
    else:
        top_ligand_names = (
            cc_scores
            .sort_values("score", ascending=False)
            .head(n)["node_std"]
            .tolist()
        )
    
    filtered = cc_scores.loc[cc_scores.loc[:, "node_std"].isin(top_ligand_names), :].copy()
    return filtered.drop(columns=["node_std"]).reset_index(drop=True)

def extract_gene_tf_pairs(tf_gene_layer, top_tfs, seeds):
    """Extract TF-gene pairs from the GRN layer."""
    sources_list = seeds.tolist()
    targets_list = list(top_tfs["node"].values)
    
    print(f"Seeds (genes): {len(sources_list)} - {sources_list[:3]}")
    print(f"Top TFs: {len(targets_list)} - {targets_list[:3]}")
    print(f"TF-gene layer shape: {tf_gene_layer.shape}")
    print(f"TF-gene layer columns: {tf_gene_layer.columns.tolist()}")
    
    # Check what's in the layer
    print(f"Sample source values: {tf_gene_layer['source'].head(3).tolist()}")
    print(f"Sample target values: {tf_gene_layer['target'].head(3).tolist()}")
    
    filtered_df = tf_gene_layer[
        tf_gene_layer["source"].isin(targets_list) &
        tf_gene_layer["target"].isin(sources_list)
    ].copy()
    
    print(f"Filtered TF-gene pairs: {len(filtered_df)} rows")
    
    filtered_df = filtered_df.rename(columns={"source": "gene", "target": "tf"})
    filtered_df.loc[:, 'tf_clean'] = filtered_df['tf'].str.replace('_TF', '', regex=False)
    
    return filtered_df

In [20]:
# Now test step by step

print("=== STEP 1: Get cell communication layer ===")
cc_df = recon.plot.sankey_paths.get_cell_communication_layer(
    multicell,
    as_dataframe=True,
    ligand_cells=["pDC", "Macrophage"],
    receptor_cells=["Macrophage"]
)
print(f"Cell communication layer shape: {cc_df.shape}")
print(cc_df.head(3))

print("\n=== STEP 2: Get top ligands ===")
top_ligands = get_top_ligands(results, cc_df, n=500, per_celltype=True)
print(f"Top ligands shape: {top_ligands.shape}")
print(top_ligands.head(3) if not top_ligands.empty else "EMPTY!")

print("\n=== STEP 3: Get top receptors ===")
top_receptors = get_top_receptors(results, cell_type="Macrophage", n=500)
print(f"Top receptors shape: {top_receptors.shape}")
print(top_receptors.head(3) if not top_receptors.empty else "EMPTY!")

print("\n=== STEP 4: Get top TFs ===")
top_tfs = get_top_tfs(results, cell_type="Macrophage", n=500)
print(f"Top TFs shape: {top_tfs.shape}")
print(top_tfs.head(3) if not top_tfs.empty else "EMPTY!")

=== STEP 1: Get cell communication layer ===
Cell communication layer shape: (1231, 7)
       ligand          receptor             receptor_clean     weight  \
0   Cd74::pDC   App::Macrophage   App_receptor::Macrophage  28.699999   
1   Cd74::pDC  Copa::Macrophage  Copa_receptor::Macrophage  28.615000   
2  Itgal::pDC  Lyz2::Macrophage  Lyz2_receptor::Macrophage  17.995003   

  celltype_source celltype_target         network_key  
0      Macrophage             pDC  cell_communication  
1      Macrophage             pDC  cell_communication  
2      Macrophage             pDC  cell_communication  

=== STEP 2: Get top ligands ===
Ligands in receptor_ligand_df: 480
Top ligands after filtering: 480 rows
Top ligands shape: (480, 5)
            multiplex              node               layer         score  \
0  cell_communication  Abca1-Macrophage  cell_communication  7.494544e-06   
1  cell_communication         Abca1-pDC  cell_communication  6.173352e-07   
2  cell_communication         A

In [21]:
print("\n=== STEP 5: Get TF-gene layer (GRN) ===")
tf_gene_df = recon.plot.sankey_paths.get_celltype_gene_layer(
    multicell_obj=multicell,
    cell_type="Macrophage",
    layer_name="gene",
    as_dataframe=True
)
print(f"TF-gene layer shape: {tf_gene_df.shape}")
print(f"Columns: {tf_gene_df.columns.tolist()}")
print("First 3 rows:")
print(tf_gene_df.head(3))

print("\n=== STEP 6: Extract gene-TF pairs ===")
seeds_prefixed = pd.Series([f"{gene}::Macrophage" for gene in seed_genes])
print(f"Seeds with prefix: {seeds_prefixed.tolist()}")

gene_tf_pairs = extract_gene_tf_pairs(tf_gene_df, top_tfs, seeds_prefixed)
print(f"\nGene-TF pairs found: {len(gene_tf_pairs)}")
print(gene_tf_pairs if not gene_tf_pairs.empty else "EMPTY!")


=== STEP 5: Get TF-gene layer (GRN) ===
TF-gene layer shape: (500000, 5)
Columns: ['Unnamed: 0', 'target', 'source', 'weight', 'network_key']
First 3 rows:
   Unnamed: 0            target                source    weight  \
0           0  Pax5::Macrophage   Mbd1_TF::Macrophage  0.000095   
1           1  Pax5::Macrophage  Smad1_TF::Macrophage  0.000092   
2           2  Pax5::Macrophage  Smad5_TF::Macrophage  0.000092   

      network_key  
0  Macrophage_grn  
1  Macrophage_grn  
2  Macrophage_grn  

=== STEP 6: Extract gene-TF pairs ===
Seeds with prefix: ['Nfkb1::Macrophage', 'Tnf::Macrophage', 'Il6::Macrophage', 'Ccl2::Macrophage', 'Cxcl10::Macrophage']
Seeds (genes): 5 - ['Nfkb1::Macrophage', 'Tnf::Macrophage', 'Il6::Macrophage']
Top TFs: 360 - ['Ahctf1_TF::Macrophage', 'Ahr_TF::Macrophage', 'Ar_TF::Macrophage']
TF-gene layer shape: (500000, 5)
TF-gene layer columns: ['Unnamed: 0', 'target', 'source', 'weight', 'network_key']
Sample source values: ['Mbd1_TF::Macrophage', 'Smad1_TF

In [22]:
print("\n=== STEP 7: Get receptor-TF bipartite layer ===")
receptor_tf_layer = recon.plot.sankey_paths.get_celltype_grn_receptor_bipartite(
    multicell_obj=multicell,
    cell_type="Macrophage",
    as_dataframe=True
)
print(f"Receptor-TF layer shape: {receptor_tf_layer.shape}")
print(f"Columns: {receptor_tf_layer.columns.tolist()}")
print("First 3 rows:")
print(receptor_tf_layer.head(3))

print("\n=== STEP 8: Extract receptor-TF pairs ===")

def extract_receptor_tf_pairs_debug(receptor_gene_layer, top_tfs, top_receptors):
    """Extract receptor-TF pairs from the bipartite layer."""
    # Remove _TF suffix from TFs for matching
    sources_list = list(top_tfs["node"].values)
    sources_list_clean = ["".join(e.split("_TF")) for e in sources_list]
    
    targets_list = list(top_receptors["node"].values)
    
    print(f"Top TFs (cleaned): {len(sources_list_clean)} - {sources_list_clean[:3]}")
    print(f"Top receptors: {len(targets_list)} - {targets_list[:3]}")
    print(f"Receptor-gene layer shape: {receptor_gene_layer.shape}")
    print(f"Receptor-gene layer columns: {receptor_gene_layer.columns.tolist()}")
    
    # Check what's in the layer
    print(f"Sample col1 values: {receptor_gene_layer['col1'].head(3).tolist()}")
    print(f"Sample col2 values: {receptor_gene_layer['col2'].head(3).tolist()}")
    
    # Filter: col1 should match TFs (sources_list_clean), col2 should match receptors (targets_list)
    filtered_df = receptor_gene_layer[
        receptor_gene_layer.loc[:, "col1"].isin(targets_list) &
        receptor_gene_layer.loc[:, "col2"].isin(sources_list_clean)
    ].copy()
    
    print(f"Filtered receptor-TF pairs: {len(filtered_df)} rows")
    
    filtered_df = filtered_df.rename(columns={"col1": "tf", "col2": "receptor"})
    return filtered_df

receptor_tf_pairs = extract_receptor_tf_pairs_debug(receptor_tf_layer, top_tfs, top_receptors)
print(f"\nReceptor-TF pairs found: {len(receptor_tf_pairs)}")
print(receptor_tf_pairs.head(5) if not receptor_tf_pairs.empty else "EMPTY!")


=== STEP 7: Get receptor-TF bipartite layer ===
Receptor-TF layer shape: (434183, 4)
Columns: ['col1', 'col2', 'weight', 'network_key']
First 3 rows:
                        col1                col2    weight  \
2  A1bg_receptor::Macrophage   Abca1::Macrophage  0.005156   
3  A1bg_receptor::Macrophage  Abcb1a::Macrophage  0.005877   
4  A1bg_receptor::Macrophage  Abcb1b::Macrophage  0.005877   

                          network_key  
2  Macrophage_grn-Macrophage_receptor  
3  Macrophage_grn-Macrophage_receptor  
4  Macrophage_grn-Macrophage_receptor  

=== STEP 8: Extract receptor-TF pairs ===
Top TFs (cleaned): 360 - ['Ahctf1::Macrophage', 'Ahr::Macrophage', 'Ar::Macrophage']
Top receptors: 500 - ['A1bg_receptor::Macrophage', 'Abca1_receptor::Macrophage', 'Ackr3_receptor::Macrophage']
Receptor-gene layer shape: (434183, 4)
Receptor-gene layer columns: ['col1', 'col2', 'weight', 'network_key']
Sample col1 values: ['A1bg_receptor::Macrophage', 'A1bg_receptor::Macrophage', 'A1bg_recept

In [23]:
print("\n=== STEP 9: Check if top TFs are in the bipartite ===")

# Get all unique genes from col2 (these are the genes connected to receptors)
all_genes_in_bipartite = set(receptor_tf_layer['col2'].unique())
print(f"Total unique genes in bipartite col2: {len(all_genes_in_bipartite)}")
print(f"Sample genes in bipartite: {list(all_genes_in_bipartite)[:5]}")

# Get cleaned top TFs
top_tfs_cleaned = set(["".join(e.split("_TF")) for e in top_tfs["node"].values])
print(f"\nTotal top TFs (cleaned): {len(top_tfs_cleaned)}")
print(f"Sample top TFs (cleaned): {list(top_tfs_cleaned)[:5]}")

# Find intersection
matching_tfs = all_genes_in_bipartite.intersection(top_tfs_cleaned)
print(f"\nMatching TFs found in bipartite: {len(matching_tfs)}")
print(f"Matching TFs: {list(matching_tfs)[:10]}")

# Get all unique receptors from col1
all_receptors_in_bipartite = set(receptor_tf_layer['col1'].unique())
print(f"\nTotal unique receptors in bipartite col1: {len(all_receptors_in_bipartite)}")

# Get top receptors
top_receptors_set = set(top_receptors["node"].values)
print(f"Total top receptors: {len(top_receptors_set)}")

# Find intersection
matching_receptors = all_receptors_in_bipartite.intersection(top_receptors_set)
print(f"\nMatching receptors found in bipartite: {len(matching_receptors)}")
print(f"Matching receptors: {list(matching_receptors)[:10]}")

print("\n=== Summary ===")
print(f"We should find: {len(matching_receptors)} receptors × {len(matching_tfs)} TFs combinations")
print("(But only where actual edges exist in the bipartite)")

# Now let's manually filter to see what we get
manual_filter = receptor_tf_layer[
    receptor_tf_layer['col1'].isin(top_receptors_set) &
    receptor_tf_layer['col2'].isin(top_tfs_cleaned)
]
print(f"\nManual filtering result: {len(manual_filter)} receptor-TF pairs")
print(manual_filter.head(10) if not manual_filter.empty else "EMPTY!")


=== STEP 9: Check if top TFs are in the bipartite ===
Total unique genes in bipartite col2: 9892
Sample genes in bipartite: ['Ppp4c::Macrophage', 'Mboat1::Macrophage', 'Rab30::Macrophage', 'St8sia6::Macrophage', 'Ap2m1::Macrophage']

Total top TFs (cleaned): 360
Sample top TFs (cleaned): ['Klf6::Macrophage', 'Nfe2::Macrophage', 'Rbpj::Macrophage', 'Smad3::Macrophage', 'Yy1::Macrophage']

Matching TFs found in bipartite: 359
Matching TFs: ['Klf6::Macrophage', 'Nfe2::Macrophage', 'Rbpj::Macrophage', 'Smad3::Macrophage', 'Yy1::Macrophage', 'Homez::Macrophage', 'Sox13::Macrophage', 'Mecp2::Macrophage', 'Ikzf1::Macrophage', 'Hoxa3::Macrophage']

Total unique receptors in bipartite col1: 705
Total top receptors: 500

Matching receptors found in bipartite: 500
Matching receptors: ['Cd80_receptor::Macrophage', 'Lmbr1l_receptor::Macrophage', 'Mmp9_receptor::Macrophage', 'Il22ra2_receptor::Macrophage', 'Cd7_receptor::Macrophage', 'Il23r_receptor::Macrophage', 'Camk2a_receptor::Macrophage', 'Cdh

## Higher-level functions: build_partial_networks and plot functions

Now let's add the full pipeline functions with our FIXED extract functions:

In [24]:
import plotly.graph_objects as go
import hashlib
from typing import Tuple, Union, List

# FIXED extract functions with corrected filtering logic

def extract_gene_tf_pairs_fixed(tf_gene_layer, top_tfs, seeds):
    """Extract TF-gene pairs from the GRN layer (FIXED VERSION)."""
    # FIXED: TFs are in source, genes are in target
    tfs_list = list(top_tfs["node"].values)
    genes_list = seeds.tolist()
    
    filtered_df = tf_gene_layer[
        tf_gene_layer["source"].isin(tfs_list) &  # source = TFs
        tf_gene_layer["target"].isin(genes_list)  # target = genes
    ].copy()
    
    # Rename for downstream processing
    filtered_df = filtered_df.rename(columns={"source": "tf", "target": "gene"})
    filtered_df.loc[:, 'tf_clean'] = filtered_df['tf'].str.replace('_TF', '', regex=False)
    
    return filtered_df

def extract_receptor_tf_pairs_fixed(receptor_gene_layer, top_tfs, top_receptors):
    """Extract receptor-TF pairs from the bipartite layer (FIXED VERSION)."""
    # FIXED: col1 = receptors, col2 = genes (not TFs with _TF suffix)
    # Remove _TF suffix from TFs for matching
    tfs_list = list(top_tfs["node"].values)
    tfs_list_clean = ["".join(e.split("_TF")) for e in tfs_list]
    
    receptors_list = list(top_receptors["node"].values)
    
    filtered_df = receptor_gene_layer[
        receptor_gene_layer.loc[:, "col1"].isin(receptors_list) &      # col1 = receptors
        receptor_gene_layer.loc[:, "col2"].isin(tfs_list_clean)         # col2 = genes/TFs
    ].copy()
    
    # Rename for downstream processing
    filtered_df = filtered_df.rename(columns={"col1": "receptor", "col2": "tf"})
    return filtered_df

def build_partial_networks_fixed(
    multicell_obj,
    results,
    cell_type: str,
    seeds: Union[List[str], pd.Series],
    ligand_cells: List[str] = None,
    top_ligand_n: int = 100,
    top_receptor_n: int = 30,
    top_tf_n: int = 10,
    before_top_n: int = 5,
    per_celltype: bool = True,
    include_before_cells: bool = False
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Construct partial or full network layers needed for Sankey plots (FIXED VERSION).
    
    Returns tuple of 5 DataFrames:
      (before_receptor_tf_df, before_tf_ligand_df,
       receptor_ligand_df, receptor_tf_df, gene_tf_df)
    """
    # Get cell communication layer
    cc_df = recon.plot.sankey_paths.get_cell_communication_layer(
        multicell_obj,
        as_dataframe=True,
        ligand_cells=ligand_cells,
        receptor_cells=[cell_type])

    # Get top nodes
    top_ligands = get_top_ligands(results, cc_df, n=top_ligand_n, per_celltype=per_celltype)
    top_receptors = get_top_receptors(results, cell_type=cell_type, n=top_receptor_n)
    top_tfs = get_top_tfs(results, cell_type=cell_type, n=top_tf_n)

    # Extract receptor-ligand pairs
    receptor_ligand_top = recon.plot.sankey_paths.extract_receptor_ligand_pairs(
        receptor_ligand_df=cc_df,
        top_ligands_df=top_ligands,
        top_receptors_df=top_receptors
    )

    # Get layers
    tf_gene_df = recon.plot.sankey_paths.get_celltype_gene_layer(
        multicell_obj=multicell_obj,
        cell_type=cell_type,
        layer_name="gene",
        as_dataframe=True
    )

    receptor_tf_df = recon.plot.sankey_paths.get_celltype_grn_receptor_bipartite(
        multicell_obj=multicell_obj,
        cell_type=cell_type,
        as_dataframe=True
    )

    # Extract pairs using FIXED functions
    seeds_prefixed = pd.Series([f"{gene}::{cell_type}" for gene in seeds])
    gene_tf_pairs = extract_gene_tf_pairs_fixed(tf_gene_df, top_tfs, seeds_prefixed)
    receptor_tf_pairs = extract_receptor_tf_pairs_fixed(receptor_tf_df, top_tfs, top_receptors)

    print(f"Gene-TF pairs extracted: {len(gene_tf_pairs)}")
    print(f"Receptor-TF pairs extracted: {len(receptor_tf_pairs)}")

    if not include_before_cells:
        # Return empty frames for before-layers
        return (
            pd.DataFrame(columns=["receptor", "tf", "weight"]),
            pd.DataFrame(columns=["tf_clean", "gene", "weight"]),
            receptor_ligand_top,
            receptor_tf_pairs,
            gene_tf_pairs
        )

    # Otherwise build full before-cell layers
    before_cell_types = top_ligands["ligand_celltype"].unique()
    all_before_receptor_tf_pairs = []
    all_before_gene_tf_pairs = []

    for before_cell_type in before_cell_types:
        before_top_receptors = get_top_receptors(results, cell_type=before_cell_type, n=before_top_n)
        before_top_tfs = get_top_tfs(results, cell_type=before_cell_type, n=before_top_n)

        before_tf_gene_df = recon.plot.sankey_paths.get_celltype_gene_layer(
            multicell_obj, before_cell_type, "gene", as_dataframe=True)
        before_receptor_tf_df = recon.plot.sankey_paths.get_celltype_grn_receptor_bipartite(
            multicell_obj, before_cell_type, as_dataframe=True)

        before_gene_tf_pairs = extract_gene_tf_pairs_fixed(
            tf_gene_layer=before_tf_gene_df,
            top_tfs=before_top_tfs,
            seeds=receptor_ligand_top[
                receptor_ligand_top["celltype_source"] == before_cell_type
            ]["ligand"].values
        )

        before_receptor_tf_pairs = extract_receptor_tf_pairs_fixed(
            receptor_gene_layer=before_receptor_tf_df,
            top_tfs=before_top_tfs,
            top_receptors=before_top_receptors
        )

        all_before_receptor_tf_pairs.append(before_receptor_tf_pairs)
        all_before_gene_tf_pairs.append(before_gene_tf_pairs)

    all_before_receptor_tf_df = pd.concat(all_before_receptor_tf_pairs, ignore_index=True)
    all_before_gene_tf_df = pd.concat(all_before_gene_tf_pairs, ignore_index=True)

    return (
        all_before_receptor_tf_df,
        all_before_gene_tf_df,
        receptor_ligand_top,
        receptor_tf_pairs,
        gene_tf_pairs
    )

print("Fixed build_partial_networks function loaded!")

Fixed build_partial_networks function loaded!


In [25]:
print("\n=== TEST: Run build_partial_networks with FIXED functions ===")

# Test without before-cells first (simpler)
networks = build_partial_networks_fixed(
    multicell_obj=multicell,
    results=results,
    cell_type="Macrophage",
    seeds=seed_genes,
    ligand_cells=["pDC", "Macrophage"],
    top_ligand_n=10,
    top_receptor_n=10,
    top_tf_n=10,
    per_celltype=True,
    include_before_cells=False
)

print("\n=== Network sizes ===")
print(f"before_receptor_tf: {len(networks[0])} rows (should be 0 - not included)")
print(f"before_tf_ligand: {len(networks[1])} rows (should be 0 - not included)")
print(f"receptor_ligand: {len(networks[2])} rows")
print(f"receptor_tf: {len(networks[3])} rows")
print(f"gene_tf: {len(networks[4])} rows")

print("\n=== Sample data ===")
if len(networks[2]) > 0:
    print("receptor_ligand (first 3):")
    print(networks[2].head(3))
if len(networks[3]) > 0:
    print("\nreceptor_tf (first 3):")
    print(networks[3].head(3))
if len(networks[4]) > 0:
    print("\ngene_tf (first 3):")
    print(networks[4].head(3))


=== TEST: Run build_partial_networks with FIXED functions ===
Ligands in receptor_ligand_df: 480
Top ligands after filtering: 480 rows
Top receptors found: 705 rows
Top TFs found: 360 rows
Gene-TF pairs extracted: 11
Receptor-TF pairs extracted: 11

=== Network sizes ===
before_receptor_tf: 0 rows (should be 0 - not included)
before_tf_ligand: 0 rows (should be 0 - not included)
receptor_ligand: 0 rows
receptor_tf: 11 rows
gene_tf: 11 rows

=== Sample data ===

receptor_tf (first 3):
                     receptor                  tf    weight  \
13  A1bg_receptor::Macrophage     Ahr::Macrophage  0.005332   
32  A1bg_receptor::Macrophage      Ar::Macrophage  0.006002   
34  A1bg_receptor::Macrophage  Arid5b::Macrophage  0.005737   

                           network_key  
13  Macrophage_grn-Macrophage_receptor  
32  Macrophage_grn-Macrophage_receptor  
34  Macrophage_grn-Macrophage_receptor  

gene_tf (first 3):
       Unnamed: 0               gene                     tf    weight  \


## Sankey Plotting Functions

Now let's add the actual plotting functions that use the fixed network building functions:

In [26]:
def plot_3layer_sankey_fixed(
    receptor_tf_df: pd.DataFrame,
    gene_tf_df: pd.DataFrame,
    cell_type: Union[str, None] = None,
    flow: str = "upstream",
    color: str = "rgba(160, 160, 160, 0.4)",
    save_path=None
):
    """Plot 3-layer Sankey: Receptor → TF → Gene"""
    def format_links(df, source_col, target_col):
        return df.loc[:, [source_col, target_col, "weight"]].rename(columns={
            source_col: "source",
            target_col: "target",
            "weight": "value"
        })

    r_t = format_links(receptor_tf_df, "receptor", "tf")
    t_g = format_links(gene_tf_df, "tf_clean", "gene")

    # Filter to connected components
    r_t = r_t[r_t["target"].isin(t_g["source"])]
    t_g = t_g[t_g["source"].isin(r_t["target"])]

    if flow.lower() == "downstream":
        for df in [r_t, t_g]:
            df[["source", "target"]] = df[["target", "source"]]

    r_t["color"] = color
    t_g["color"] = color

    # Normalize weights
    for df in [r_t, t_g]:
        total = df["value"].sum()
        if total > 0:
            df["value"] /= total

    links = pd.concat([r_t, t_g], ignore_index=True)

    all_nodes = pd.unique(links[["source", "target"]].values.ravel())
    node_idx = {name: i for i, name in enumerate(all_nodes)}
    links["source_idx"] = links["source"].map(node_idx)
    links["target_idx"] = links["target"].map(node_idx)

    def _format_label(x: str) -> str:
        parts = x.split("::", 1)
        return parts[0] if len(parts) == 2 else x

    labels = [_format_label(n) for n in all_nodes]

    sankey_data = go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels
        ),
        link=dict(
            source=links["source_idx"],
            target=links["target_idx"],
            value=links["value"],
            color=links["color"]
        ),
        orientation="h"
    )

    title_text = (
        f"Top regulators in {cell_type}: Receptor → TF → Gene"
        if flow.lower() == "upstream"
        else f"Top regulators in {cell_type}: Gene → TF → Receptor"
    )
    layer_names = (
        ["Receptors", "TFs", "Genes"]
        if flow.lower() == "upstream"
        else ["Genes", "TFs", "Receptors"]
    )
    x_positions = [0.0, 0.5, 1.0]

    fig = go.Figure(data=[sankey_data])
    fig.update_layout(title_text=title_text, font_size=14, font_color="black")

    for x, name in zip(x_positions, layer_names):
        fig.add_annotation(
            x=x, y=-0.15,
            text=f"<b>{name}</b>",
            showarrow=False,
            font=dict(size=16)
        )

    if save_path:
        fig.write_html(save_path)

    fig.show()


def plot_4layer_sankey_fixed(
    ligand_receptor_df: pd.DataFrame,
    receptor_tf_df: pd.DataFrame,
    gene_tf_df: pd.DataFrame,
    flow: str = "upstream",
    save_path: Union[str, None] = None
):
    """Plot 4-layer Sankey: Ligand → Receptor → TF → Gene"""
    def format_links(df, source_col, target_col):
        return df.loc[:, [source_col, target_col, "weight"]].rename(columns={
            source_col: "source",
            target_col: "target",
            "weight": "value"
        })

    def hex_to_rgba(hex_color, alpha=0.6):
        h = hex_color.lstrip("#")
        return f"rgba({int(h[0:2], 16)}, {int(h[2:4], 16)}, {int(h[4:6], 16)}, {alpha})"

    def string_to_color(string):
        h = hashlib.md5(string.encode()).hexdigest()
        return "#" + h[:6]

    # Format links
    l_r = format_links(ligand_receptor_df, "ligand", "receptor_clean")
    r_t = format_links(receptor_tf_df, "receptor", "tf")
    t_g = format_links(gene_tf_df, "tf_clean", "gene")

    # Filter to connected components
    l_r = l_r[l_r["target"].isin(r_t["source"])]
    r_t = r_t[r_t["source"].isin(l_r["target"])]
    r_t = r_t[r_t["target"].isin(t_g["source"])]
    t_g = t_g[t_g["source"].isin(r_t["target"])]
    l_r = l_r[l_r["target"].isin(r_t["source"])]

    if flow.lower() == "downstream":
        for df in [l_r, r_t, t_g]:
            df[["source", "target"]] = df[["target", "source"]]

    # Assign colors by celltype
    def assign_group_colors(df, column):
        unique_types = df[column].str.extract(r"::(.+)$")[0].fillna("Unknown")
        return unique_types.apply(lambda ct: hex_to_rgba(string_to_color(ct)))

    l_r["color"] = assign_group_colors(l_r, "source")
    r_t["color"] = "rgba(100, 200, 100, 1)"
    t_g["color"] = "rgba(100, 200, 100, 1)"

    # Normalize weights
    for df in [l_r, r_t, t_g]:
        total = df["value"].sum()
        if total > 0:
            df["value"] /= total

    links = pd.concat([l_r, r_t, t_g], ignore_index=True)

    all_nodes = pd.unique(links[["source", "target"]].values.ravel())
    node_idx = {name: i for i, name in enumerate(all_nodes)}
    links["source_idx"] = links["source"].map(node_idx)
    links["target_idx"] = links["target"].map(node_idx)

    def _format_label(x: str) -> str:
        parts = x.split("::", 1)
        return parts[0].split("_")[0] if len(parts) == 2 else x

    labels = [_format_label(n) for n in all_nodes]

    sankey_data = go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels
        ),
        link=dict(
            source=links["source_idx"],
            target=links["target_idx"],
            value=links["value"],
            color=links["color"]
        ),
        orientation="h"
    )

    title_text = (
        "Ligand → Receptor → TF → Gene"
        if flow.lower() == "upstream"
        else "Gene → TF → Receptor → Ligand"
    )
    layer_names = (
        ["Ligands", "Receptors", "TFs", "Genes"]
        if flow.lower() == "upstream"
        else ["Genes", "TFs", "Receptors", "Ligands"]
    )
    x_positions = [0.0, 0.33, 0.66, 1.0]

    fig = go.Figure(data=[sankey_data])
    fig.update_layout(title_text=title_text, font_size=14, font_color="black")

    # Add color legend
    color_map = (
        pd.concat([l_r, r_t, t_g])[["source", "color"]]
        .dropna()
        .drop_duplicates()
    )
    color_map["celltype"] = color_map["source"].str.extract(r"::(.+)$")[0]
    color_map = color_map.dropna(subset=["celltype"])
    color_map = dict(zip(color_map["celltype"], color_map["color"]))

    for i, (ct, color) in enumerate(sorted(color_map.items())):
        fig.add_annotation(
            x=1.02, y=1.0 - i * 0.05,
            text=f"<b>{ct}</b>",
            showarrow=False,
            font=dict(size=12),
            bgcolor=color,
            bordercolor="black",
            borderwidth=0.5,
            align="left",
            xanchor="left"
        )

    for x, name in zip(x_positions, layer_names):
        fig.add_annotation(
            x=x, y=-0.15,
            text=f"<b>{name}</b>",
            showarrow=False,
            font=dict(size=16)
        )

    if save_path:
        fig.write_html(save_path)

    fig.show()


print("Sankey plotting functions loaded!")

Sankey plotting functions loaded!


In [27]:
def plot_intracell_sankey_fixed(
    multicell_obj,
    results,
    cell_type,
    seeds,
    top_receptor_n: int = 30,
    top_tf_n: int = 10,
    flow="upstream",
    save_path=None
):
    """
    Plot intracellular Sankey: Receptor → TF → Gene (FIXED VERSION)
    """
    networks = build_partial_networks_fixed(
        multicell_obj=multicell_obj,
        results=results,
        cell_type=cell_type,
        seeds=seeds,
        ligand_cells=[],
        top_receptor_n=top_receptor_n,
        top_tf_n=top_tf_n,
        include_before_cells=False
    )

    receptor_tf_df = networks[3]
    gene_tf_df = networks[4]

    plot_3layer_sankey_fixed(
        receptor_tf_df=receptor_tf_df,
        gene_tf_df=gene_tf_df,
        cell_type=cell_type,
        flow=flow,
        save_path=save_path
    )


def plot_ligand_sankey_fixed(
    multicell_obj,
    results,
    cell_type,
    seeds,
    ligand_cells,
    top_ligand_n: int = 100,
    top_receptor_n: int = 30,
    top_tf_n: int = 10,
    per_celltype: bool = True,
    flow="upstream",
    save_path=None
):
    """
    Plot ligand Sankey: Ligand → Receptor → TF → Gene (FIXED VERSION)
    """
    networks = build_partial_networks_fixed(
        multicell_obj=multicell_obj,
        results=results,
        cell_type=cell_type,
        seeds=seeds,
        ligand_cells=ligand_cells,
        top_ligand_n=top_ligand_n,
        top_receptor_n=top_receptor_n,
        top_tf_n=top_tf_n,
        per_celltype=per_celltype,
        include_before_cells=False
    )

    ligand_receptor_df = networks[2]
    receptor_tf_df = networks[3]
    gene_tf_df = networks[4]

    plot_4layer_sankey_fixed(
        ligand_receptor_df=ligand_receptor_df,
        receptor_tf_df=receptor_tf_df,
        gene_tf_df=gene_tf_df,
        flow=flow,
        save_path=save_path
    )


print("Wrapper functions loaded!")

Wrapper functions loaded!


## Test the FIXED Sankey Plots

Now let's test the fixed plotting functions with your data:

In [28]:
print("=== TEST 1: Intracellular Sankey (Receptor → TF → Gene) ===")

plot_intracell_sankey_fixed(
    multicell_obj=multicell,
    results=results,
    cell_type="Macrophage",
    seeds=seed_genes,
    top_receptor_n=20,
    top_tf_n=15,
    flow="upstream",
    save_path=None
)

=== TEST 1: Intracellular Sankey (Receptor → TF → Gene) ===
Ligands in receptor_ligand_df: 1202
Top ligands after filtering: 1202 rows
Top receptors found: 705 rows
Top TFs found: 360 rows
Gene-TF pairs extracted: 17
Receptor-TF pairs extracted: 32


In [29]:
print("=== TEST 2: Ligand Sankey (Ligand → Receptor → TF → Gene) ===")

plot_ligand_sankey_fixed(
    multicell_obj=multicell,
    results=results,
    cell_type="Macrophage",
    seeds=seed_genes,
    ligand_cells=["pDC", "Macrophage"],
    top_ligand_n=20,
    top_receptor_n=15,
    top_tf_n=10,
    per_celltype=True,
    flow="upstream",
    save_path=None
)

=== TEST 2: Ligand Sankey (Ligand → Receptor → TF → Gene) ===
Ligands in receptor_ligand_df: 480
Top ligands after filtering: 480 rows
Top receptors found: 705 rows
Top TFs found: 360 rows
Gene-TF pairs extracted: 11
Receptor-TF pairs extracted: 19


In [30]:
cell_type_profiles

celltype,B_cell,Macrophage,NK_cell,T_cell_CD4,T_cell_CD8,pDC
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A3galt2,4.444732e-12,1.259955e-08,2.683902e-11,1.362864e-11,1.299453e-12,2.404800e-12
A4galt,2.796036e-11,9.280250e-08,2.598037e-11,1.335376e-11,1.360601e-11,1.961332e-11
Aa467197,1.923007e-11,1.116939e-06,8.586074e-11,6.277994e-11,4.800427e-11,3.569001e-11
Aaas,4.714790e-11,2.208568e-06,1.004029e-10,8.759264e-11,6.005010e-11,1.038141e-10
Aacs,1.672407e-10,4.275225e-06,4.130415e-10,2.698474e-10,2.074770e-10,2.555063e-10
...,...,...,...,...,...,...
Zxdc,5.308747e-11,1.502681e-06,9.761166e-11,6.349777e-11,6.040384e-11,1.108970e-10
Zyg11b,4.446161e-11,9.693849e-07,1.260306e-10,8.263008e-11,5.804473e-11,7.198467e-11
Zyx,1.190913e-10,3.668198e-06,3.175372e-10,2.072795e-10,1.458688e-10,2.200037e-10
Zzef1,3.747627e-10,1.941463e-05,1.015000e-09,6.801624e-10,5.545905e-10,6.594296e-10


## Visualize Molecular Cascades

### 1. Intracellular Cascade (Receptor → TF → Gene)

Shows regulation within a single cell type.

In [31]:
results

Unnamed: 0,multiplex,node,layer,score
0,cell_communication,Abca1-B_cell,cell_communication,1.065214e-06
1,cell_communication,Abca1-Macrophage,cell_communication,7.494544e-06
2,cell_communication,Abca1-NK_cell,cell_communication,9.169113e-07
3,cell_communication,Abca1-T_cell_CD4,cell_communication,6.551031e-07
4,cell_communication,Abca1-T_cell_CD8,cell_communication,8.785732e-07
...,...,...,...,...
10523,T_cell_CD8_grn,Zxdc::T_cell_CD8,gene,6.040384e-11
10524,T_cell_CD8_grn,Zyg11b::T_cell_CD8,gene,5.804473e-11
10525,T_cell_CD8_grn,Zyx::T_cell_CD8,gene,1.458688e-10
10526,T_cell_CD8_grn,Zzef1::T_cell_CD8,gene,5.545905e-10


In [32]:
multicell.multiplexes["B_cell_grn"]

{'names': ['gene'],
 'graph_type': ['01'],
 'layers': [        Unnamed: 0          target             source    weight network_key
  0                0    Pax5::B_cell    Mbd1_TF::B_cell  0.000095  B_cell_grn
  1                1    Pax5::B_cell   Smad1_TF::B_cell  0.000092  B_cell_grn
  2                2    Pax5::B_cell   Smad5_TF::B_cell  0.000092  B_cell_grn
  3                3    Pax5::B_cell    Mbd2_TF::B_cell  0.000089  B_cell_grn
  4                4    Pax5::B_cell  Zfp128_TF::B_cell  0.000084  B_cell_grn
  ...            ...             ...                ...       ...         ...
  499995      499995   Gata1::B_cell  Zfp637_TF::B_cell  0.000001  B_cell_grn
  499996      499996  Ptpn18::B_cell  Zfp369_TF::B_cell  0.000001  B_cell_grn
  499997      499997  Ptpn18::B_cell  Zfp110_TF::B_cell  0.000001  B_cell_grn
  499998      499998   Cpne2::B_cell    Mafk_TF::B_cell  0.000001  B_cell_grn
  499999      499999   Lyrm7::B_cell  Tcf7l2_TF::B_cell  0.000001  B_cell_grn
  
  [50000

In [33]:
tf_gene_df = recon.plot.sankey_paths.get_celltype_gene_layer(
        multicell_obj=multicell,
        cell_type="Macrophage",
        layer_name="gene",
        as_dataframe=True
    )

tf_gene_df

Unnamed: 0.1,Unnamed: 0,target,source,weight,network_key
0,0,Pax5::Macrophage,Mbd1_TF::Macrophage,0.000095,Macrophage_grn
1,1,Pax5::Macrophage,Smad1_TF::Macrophage,0.000092,Macrophage_grn
2,2,Pax5::Macrophage,Smad5_TF::Macrophage,0.000092,Macrophage_grn
3,3,Pax5::Macrophage,Mbd2_TF::Macrophage,0.000089,Macrophage_grn
4,4,Pax5::Macrophage,Zfp128_TF::Macrophage,0.000084,Macrophage_grn
...,...,...,...,...,...
499995,499995,Gata1::Macrophage,Zfp637_TF::Macrophage,0.000001,Macrophage_grn
499996,499996,Ptpn18::Macrophage,Zfp369_TF::Macrophage,0.000001,Macrophage_grn
499997,499997,Ptpn18::Macrophage,Zfp110_TF::Macrophage,0.000001,Macrophage_grn
499998,499998,Cpne2::Macrophage,Mafk_TF::Macrophage,0.000001,Macrophage_grn


In [34]:
results

Unnamed: 0,multiplex,node,layer,score
0,cell_communication,Abca1-B_cell,cell_communication,1.065214e-06
1,cell_communication,Abca1-Macrophage,cell_communication,7.494544e-06
2,cell_communication,Abca1-NK_cell,cell_communication,9.169113e-07
3,cell_communication,Abca1-T_cell_CD4,cell_communication,6.551031e-07
4,cell_communication,Abca1-T_cell_CD8,cell_communication,8.785732e-07
...,...,...,...,...
10523,T_cell_CD8_grn,Zxdc::T_cell_CD8,gene,6.040384e-11
10524,T_cell_CD8_grn,Zyg11b::T_cell_CD8,gene,5.804473e-11
10525,T_cell_CD8_grn,Zyx::T_cell_CD8,gene,1.458688e-10
10526,T_cell_CD8_grn,Zzef1::T_cell_CD8,gene,5.545905e-10


In [35]:
receptor_tf_df = recon.plot.sankey_paths.get_celltype_grn_receptor_bipartite(

        multicell_obj=multicell,
        cell_type="Macrophage",
        as_dataframe=True
    )

receptor_tf_df

Unnamed: 0,col1,col2,weight,network_key
2,A1bg_receptor::Macrophage,Abca1::Macrophage,0.005156,Macrophage_grn-Macrophage_receptor
3,A1bg_receptor::Macrophage,Abcb1a::Macrophage,0.005877,Macrophage_grn-Macrophage_receptor
4,A1bg_receptor::Macrophage,Abcb1b::Macrophage,0.005877,Macrophage_grn-Macrophage_receptor
7,A1bg_receptor::Macrophage,Acsl1::Macrophage,0.005915,Macrophage_grn-Macrophage_receptor
8,A1bg_receptor::Macrophage,Adk::Macrophage,0.005092,Macrophage_grn-Macrophage_receptor
...,...,...,...,...
706631,Xcr1_receptor::Macrophage,Tmem14c::Macrophage,0.009901,Macrophage_grn-Macrophage_receptor
706632,Xcr1_receptor::Macrophage,Tmem63a::Macrophage,0.009302,Macrophage_grn-Macrophage_receptor
706633,Xcr1_receptor::Macrophage,Tnfrsf22::Macrophage,0.005791,Macrophage_grn-Macrophage_receptor
706634,Xcr1_receptor::Macrophage,Tnfrsf23::Macrophage,0.005791,Macrophage_grn-Macrophage_receptor


In [36]:
recon.plot.sankey_paths.build_partial_networks(
    multicell_obj=multicell,
    results=results,
    cell_type="Macrophage",
    seeds=seed_genes,
    ligand_cells=["pDC", "Macrophage"],
    top_ligand_n=50,
    top_receptor_n=50,
    top_tf_n=50,
    per_celltype=True,
    include_before_cells=False
    )

['Ahctf1_TF::Macrophage', 'Ahr_TF::Macrophage', 'Ar_TF::Macrophage', 'Arid2_TF::Macrophage', 'Arid3a_TF::Macrophage', 'Arid3b_TF::Macrophage', 'Arid5a_TF::Macrophage', 'Arid5b_TF::Macrophage', 'Arnt2_TF::Macrophage', 'Arnt_TF::Macrophage', 'Arntl_TF::Macrophage', 'Atf1_TF::Macrophage', 'Atf2_TF::Macrophage', 'Atf3_TF::Macrophage', 'Atf4_TF::Macrophage', 'Atf5_TF::Macrophage', 'Atf6_TF::Macrophage', 'Atf6b_TF::Macrophage', 'Atf7_TF::Macrophage', 'Bach1_TF::Macrophage', 'Bach2_TF::Macrophage', 'Batf3_TF::Macrophage', 'Batf_TF::Macrophage', 'Bbx_TF::Macrophage', 'Bcl11a_TF::Macrophage', 'Bcl11b_TF::Macrophage', 'Bcl6_TF::Macrophage', 'Bhlha15_TF::Macrophage', 'Bhlhe40_TF::Macrophage', 'Cdc5l_TF::Macrophage', 'Cebpa_TF::Macrophage', 'Cebpb_TF::Macrophage', 'Cebpd_TF::Macrophage', 'Cebpg_TF::Macrophage', 'Cenpb_TF::Macrophage', 'Cic_TF::Macrophage', 'Clock_TF::Macrophage', 'Creb1_TF::Macrophage', 'Creb3_TF::Macrophage', 'Creb3l2_TF::Macrophage', 'Crebzf_TF::Macrophage', 'Crem_TF::Macrophage

(Empty DataFrame
 Columns: [receptor, tf, weight]
 Index: [],
 Empty DataFrame
 Columns: [tf_clean, gene, weight]
 Index: [],
                   ligand            receptor               receptor_clean  \
 0              Cd74::pDC     App::Macrophage     App_receptor::Macrophage   
 1              Rpsa::pDC     App::Macrophage     App_receptor::Macrophage   
 2      Il6ra::Macrophage  Adam17::Macrophage  Adam17_receptor::Macrophage   
 3             Itgb1::pDC  Adam17::Macrophage  Adam17_receptor::Macrophage   
 4            Notch1::pDC  Adam17::Macrophage  Adam17_receptor::Macrophage   
 5             Itga4::pDC  Adam23::Macrophage  Adam23_receptor::Macrophage   
 6   Tnfrsf21::Macrophage     App::Macrophage     App_receptor::Macrophage   
 7     Notch1::Macrophage  Adam17::Macrophage  Adam17_receptor::Macrophage   
 8        Met::Macrophage  Adam17::Macrophage  Adam17_receptor::Macrophage   
 9             Itgb1::pDC   Adam9::Macrophage   Adam9_receptor::Macrophage   
 10            I

In [37]:
# Define which cell types can produce ligands (exclude the focal cell type)
ligand_source_cells = ["Macrophage", "pDC"]

recon.plot.sankey_paths.plot_intercell_sankey(
    multicell_obj=multicell,
    results=results,
    cell_type="Macrophage",
    ligand_cells=ligand_source_cells,  # Fixed: only other cell types
    seeds=[k.split("::")[0] for k,v in seeds_with_suffix.items()],
    top_receptor_n=100,
    top_tf_n=50,
    top_ligand_n=500,
    flow="upstream",  # Show: receptor → TF → gene
    save_path=None
)

['Ahctf1_TF::Macrophage', 'Ahr_TF::Macrophage', 'Ar_TF::Macrophage', 'Arid2_TF::Macrophage', 'Arid3a_TF::Macrophage', 'Arid3b_TF::Macrophage', 'Arid5a_TF::Macrophage', 'Arid5b_TF::Macrophage', 'Arnt2_TF::Macrophage', 'Arnt_TF::Macrophage', 'Arntl_TF::Macrophage', 'Atf1_TF::Macrophage', 'Atf2_TF::Macrophage', 'Atf3_TF::Macrophage', 'Atf4_TF::Macrophage', 'Atf5_TF::Macrophage', 'Atf6_TF::Macrophage', 'Atf6b_TF::Macrophage', 'Atf7_TF::Macrophage', 'Bach1_TF::Macrophage', 'Bach2_TF::Macrophage', 'Batf3_TF::Macrophage', 'Batf_TF::Macrophage', 'Bbx_TF::Macrophage', 'Bcl11a_TF::Macrophage', 'Bcl11b_TF::Macrophage', 'Bcl6_TF::Macrophage', 'Bhlha15_TF::Macrophage', 'Bhlhe40_TF::Macrophage', 'Cdc5l_TF::Macrophage', 'Cebpa_TF::Macrophage', 'Cebpb_TF::Macrophage', 'Cebpd_TF::Macrophage', 'Cebpg_TF::Macrophage', 'Cenpb_TF::Macrophage', 'Cic_TF::Macrophage', 'Clock_TF::Macrophage', 'Creb1_TF::Macrophage', 'Creb3_TF::Macrophage', 'Creb3l2_TF::Macrophage', 'Crebzf_TF::Macrophage', 'Crem_TF::Macrophage

### 2. Ligand-Receptor Cascade (Ligand → Receptor → TF → Gene)

Shows how ligands from other cells regulate the focal cell type.

In [38]:
import importlib
import recon.plot.sankey_paths
importlib.reload(recon.plot.sankey_paths)

<module 'recon.plot.sankey_paths' from '/pasteur/appa/homes/rtrimbou/miniconda3/envs/snakemake/envs/recon2/lib/python3.10/site-packages/recon/plot/sankey_paths.py'>

In [39]:
# Define which cell types can produce ligands (all other cell types)
ligand_source_cells = [ct for ct in celltypes if ct != focal_celltype]

recon.plot.sankey_paths.plot_ligand_sankey(
    multicell_obj=multicell,
    results=results,
    cell_type=focal_celltype,
    seeds=seeds_with_suffix,
    ligand_cells=ligand_source_cells,
    top_ligand_n=20,
    top_receptor_n=10,
    top_tf_n=5,
    per_celltype=True,  # Select top ligands per source cell type
    flow="upstream",
    save_path=None
)

['Ahctf1_TF::Macrophage', 'Ahr_TF::Macrophage', 'Ar_TF::Macrophage', 'Arid2_TF::Macrophage', 'Arid3a_TF::Macrophage']
['Ahctf1::Macrophage', 'Ahr::Macrophage', 'Ar::Macrophage', 'Arid2::Macrophage', 'Arid3a::Macrophage']
['A1bg_receptor::Macrophage', 'Abca1_receptor::Macrophage', 'Ackr3_receptor::Macrophage', 'Ackr4_receptor::Macrophage', 'Acvr1_receptor::Macrophage', 'Acvr1b_receptor::Macrophage', 'Acvr1c_receptor::Macrophage', 'Acvr2a_receptor::Macrophage', 'Acvr2b_receptor::Macrophage', 'Acvrl1_receptor::Macrophage']


### 3. Full Intercellular Cascade

Shows the complete regulatory cascade including upstream regulators in ligand-producing cells.

In [40]:
recon.plot.sankey_paths.plot_intercell_sankey(
    multicell_obj=multicell,
    results=results,
    cell_type=focal_celltype,
    seeds=seeds_with_suffix,
    ligand_cells=ligand_source_cells,
    top_ligand_n=20,
    top_receptor_n=10,
    top_tf_n=5,
    before_top_n=3,  # Top regulators in upstream cells
    per_celltype=True,
    flow="upstream",
    save_path=None
)

['Ahctf1_TF::Macrophage', 'Ahr_TF::Macrophage', 'Ar_TF::Macrophage', 'Arid2_TF::Macrophage', 'Arid3a_TF::Macrophage']
['Ahctf1::Macrophage', 'Ahr::Macrophage', 'Ar::Macrophage', 'Arid2::Macrophage', 'Arid3a::Macrophage']
['A1bg_receptor::Macrophage', 'Abca1_receptor::Macrophage', 'Ackr3_receptor::Macrophage', 'Ackr4_receptor::Macrophage', 'Acvr1_receptor::Macrophage', 'Acvr1b_receptor::Macrophage', 'Acvr1c_receptor::Macrophage', 'Acvr2a_receptor::Macrophage', 'Acvr2b_receptor::Macrophage', 'Acvrl1_receptor::Macrophage']
[]
[]
[]


[]
[]
[]
[]
[]
[]
[]
[]
[]
['Ahctf1_TF::pDC', 'Ahr_TF::pDC', 'Ar_TF::pDC']
['Ahctf1::pDC', 'Ahr::pDC', 'Ar::pDC']
['A1bg_receptor::pDC', 'Abca1_receptor::pDC', 'Ackr3_receptor::pDC']


## Interpreting Sankey Diagrams

**Node layers** (from left to right in upstream flow):
1. **Upstream receptors** (in ligand-producing cells)
2. **Upstream TFs** (regulate ligand production)
3. **Ligands** (from other cell types)
4. **Receptors** (in focal cell type)
5. **TFs** (in focal cell type)
6. **Target genes** (seeds of interest)

**Link thickness**: Proportional to edge weights in the network

**Color**: Different colors distinguish between layers

**Interactive**: Hover over nodes and links to see details

## Saving Plots

To save plots as HTML files for sharing:

In [41]:
# Save intercellular cascade
recon.plot.sankey_paths.plot_intercell_sankey(
    multicell_obj=multicell,
    results=results,
    top_ligand_n = 1000,
    cell_type=focal_celltype,
    seeds=seeds_with_suffix,
    ligand_cells=ligand_source_cells,
    save_path="macrophage_cascade.html"  # Updated filename
)

['Ahctf1_TF::Macrophage', 'Ahr_TF::Macrophage', 'Ar_TF::Macrophage', 'Arid2_TF::Macrophage', 'Arid3a_TF::Macrophage', 'Arid3b_TF::Macrophage', 'Arid5a_TF::Macrophage', 'Arid5b_TF::Macrophage', 'Arnt2_TF::Macrophage', 'Arnt_TF::Macrophage']
['Ahctf1::Macrophage', 'Ahr::Macrophage', 'Ar::Macrophage', 'Arid2::Macrophage', 'Arid3a::Macrophage', 'Arid3b::Macrophage', 'Arid5a::Macrophage', 'Arid5b::Macrophage', 'Arnt2::Macrophage', 'Arnt::Macrophage']
['A1bg_receptor::Macrophage', 'Abca1_receptor::Macrophage', 'Ackr3_receptor::Macrophage', 'Ackr4_receptor::Macrophage', 'Acvr1_receptor::Macrophage', 'Acvr1b_receptor::Macrophage', 'Acvr1c_receptor::Macrophage', 'Acvr2a_receptor::Macrophage', 'Acvr2b_receptor::Macrophage', 'Acvrl1_receptor::Macrophage', 'Adam12_receptor::Macrophage', 'Adam15_receptor::Macrophage', 'Adam17_receptor::Macrophage', 'Adam19_receptor::Macrophage', 'Adam23_receptor::Macrophage', 'Adam2_receptor::Macrophage', 'Adam9_receptor::Macrophage', 'Adamts13_receptor::Macrophage

## Key Parameters

- **`cell_type`**: Focal cell type receiving signals
- **`seeds`**: Target genes of interest (must include `::celltype` suffix)
- **`ligand_cells`**: Cell types that can produce ligands
- **`top_ligand_n`**: Number of top-scoring ligands to include
- **`top_receptor_n`**: Number of top-scoring receptors to include
- **`top_tf_n`**: Number of top-scoring TFs to include
- **`before_top_n`**: Number of upstream regulators in ligand-producing cells
- **`per_celltype`**: If True, select top ligands per source cell type (more balanced)
- **`flow`**: `"upstream"` (ligand→gene) or `"downstream"` (gene→ligand)
- **`save_path`**: Path to save HTML file (None = display only)