In [None]:
import commot as ct
import scanpy as sc
import pandas as pd
import numpy as np
from tqdm import tqdm

### Run Commot

In [2]:
adata = sc.read_h5ad('/Users/allywang/Desktop/work/S2025/SpaceOracle/data/training_data_2025/mouse_kidney_13.h5ad')
adata

AnnData object with n_obs × n_vars = 8509 × 3058
    obs: 'ct1', 'ct2', 'ct3', 'ct4', 'cond', 'medulla_cortex', 'domain', 'cell_type', 'cell_type_int'
    uns: 'cell_type_colors', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap', 'spatial', 'spatial_unscaled'
    varm: 'PCs'
    layers: 'imputed_count', 'normalized_count', 'raw_count'
    obsp: 'connectivities', 'distances'

In [3]:
# adata.X = adata.layers['imputed_count']
adata.X = adata.layers['normalized_count']


In [4]:
df_ligrec = ct.pp.ligand_receptor_database(
    database='CellChat', 
    species='mouse', 
    signaling_type=None
)
    
df_ligrec.columns = ['ligand', 'receptor', 'pathway', 'signaling']  

df_ligrec['name'] = df_ligrec['ligand'] + '-' + df_ligrec['receptor']
len(df_ligrec['name'].unique())

2017

In [5]:
import sys
sys.path.append('../../src')
from spaceoracle.tools.network import expand_paired_interactions

expanded = expand_paired_interactions(df_ligrec)
genes = set(expanded.ligand) | set(expanded.receptor)
genes = list(genes)

expanded

Unnamed: 0,ligand,receptor,pathway,signaling,name
0,Tgfb1,Tgfbr1,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
0,Tgfb1,Tgfbr2,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr1,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr2,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
2,Tgfb3,Tgfbr1,TGFb,Secreted Signaling,Tgfb3-Tgfbr1_Tgfbr2
...,...,...,...,...,...
2016,Itga9,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2016,Itgb1,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2017,Itga4,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1
2017,Itgb7,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1


In [6]:
expanded = expanded[expanded.ligand.isin(adata.var_names) & expanded.receptor.isin(adata.var_names)]
expanded

Unnamed: 0,ligand,receptor,pathway,signaling,name
1,Tgfb2,Tgfbr1,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
2,Tgfb3,Tgfbr1,TGFb,Secreted Signaling,Tgfb3-Tgfbr1_Tgfbr2
10,Tgfb2,Tgfbr1,TGFb,Secreted Signaling,Tgfb2-Acvr1_Tgfbr1_Tgfbr2
11,Tgfb3,Tgfbr1,TGFb,Secreted Signaling,Tgfb3-Acvr1_Tgfbr1_Tgfbr2
15,Bmp2,Bmpr1b,BMP,Secreted Signaling,Bmp2-Bmpr1b_Acvr2a
...,...,...,...,...,...
2012,Thy1,Itgav,THY1,Cell-Cell Contact,Thy1-Itgav_Itgb3
2012,Thy1,Itgb3,THY1,Cell-Cell Contact,Thy1-Itgav_Itgb3
2015,Itga4,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb1-Vcam1
2017,Itga4,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1


In [7]:
ct.tl.spatial_communication(adata,
    database_name='user_database', 
    # df_ligrec=df_ligrec, 
    df_ligrec=expanded, 
    dis_thr=200, 
    # heteromeric=True
    heteromeric=False
)

In [8]:
adata

AnnData object with n_obs × n_vars = 8509 × 3058
    obs: 'ct1', 'ct2', 'ct3', 'ct4', 'cond', 'medulla_cortex', 'domain', 'cell_type', 'cell_type_int'
    uns: 'cell_type_colors', 'neighbors', 'pca', 'umap', 'commot-user_database-info'
    obsm: 'X_pca', 'X_umap', 'spatial', 'spatial_unscaled', 'commot-user_database-sum-sender', 'commot-user_database-sum-receiver'
    varm: 'PCs'
    layers: 'imputed_count', 'normalized_count', 'raw_count'
    obsp: 'connectivities', 'distances', 'commot-user_database-H2-Q6-Cd8b1', 'commot-user_database-H2-Q6-Cd8a', 'commot-user_database-Cadm1-Cadm1', 'commot-user_database-Il27-Il27ra', 'commot-user_database-Egf-Egfr', 'commot-user_database-Tnfsf9-Tnfrsf9', 'commot-user_database-Pdgfc-Pdgfra', 'commot-user_database-H2-Q10-Cd8b1', 'commot-user_database-H2-Q10-Cd8a', 'commot-user_database-Tnf-Tnfrsf1b', 'commot-user_database-Fasl-Fas', 'commot-user_database-Efna5-Epha4', 'commot-user_database-Efna5-Epha3', 'commot-user_database-Efna5-Epha1', 'commot-user

In [9]:
adata.write_h5ad('commot.h5ad')
adata = sc.read_h5ad('commot.h5ad')

In [10]:
# lr_info = {k.replace('commot-user_database-', ''): v for k, v in adata.obsp.items() if 'commot-user_database-' in k}
# len(lr_info)

In [11]:
# df_ligrec = df_ligrec[df_ligrec['name'].isin(lr_info.keys())]
# df_ligrec['signaling'].value_counts()

### Get cluster communication scores

In [12]:
expanded['rename'] = expanded['ligand'] + '-' + expanded['receptor']

In [13]:
from tqdm import tqdm
import commot as ct

for name in tqdm(expanded['rename'].unique()):

    ct.tl.cluster_communication(adata, database_name='user_database', pathway_name=name, clustering='cell_type',
        random_seed=12, n_permutations=100)

100%|██████████| 326/326 [05:37<00:00,  1.03s/it]


In [14]:
adata.write_h5ad('commot_cluster.h5ad')
adata = sc.read_h5ad('commot_cluster.h5ad')

In [15]:
from collections import defaultdict
data_dict = defaultdict(dict)

for name in expanded['rename']:
    data_dict[name]['communication_matrix'] = adata.uns[f'commot_cluster-cell_type-user_database-{name}']['communication_matrix']
    data_dict[name]['communication_pvalue'] = adata.uns[f'commot_cluster-cell_type-user_database-{name}']['communication_pvalue']

import pickle
with open('/Users/allywang/Desktop/work/S2025/SpaceOracle/data/training_data_2025/commot_kidney_communication.pkl', 'wb') as f:
    pickle.dump(data_dict, f)

In [16]:
# check outputs

import pickle
with open('/Users/allywang/Desktop/work/S2025/SpaceOracle/data/training_data_2025/commot_kidney_communication.pkl', 'rb') as f:
    info = pickle.load(f)

len(info.keys())

326

In [17]:
def get_sig_interactions(value_matrix, p_matrix, pval=0.3):
    p_matrix = np.where(p_matrix < pval, 1, 0)
    return value_matrix * p_matrix

interactions = {}
for lig, rec in tqdm(zip(expanded['ligand'], expanded['receptor'])):
    name = lig + '-' + rec

    if name in info.keys():

        value_matrix = info[name]['communication_matrix']
        p_matrix = info[name]['communication_pvalue']

        sig_matrix = get_sig_interactions(value_matrix, p_matrix)
        
        if sig_matrix.sum().sum() > 0:
            interactions[name] = sig_matrix
    
len(interactions)

396it [00:00, 6577.53it/s]


326

### Get expanded LR masks

In [18]:
interactions.keys()

dict_keys(['Tgfb2-Tgfbr1', 'Tgfb3-Tgfbr1', 'Bmp2-Bmpr1b', 'Bmp6-Bmpr1b', 'Wnt4-Fzd5', 'Wnt11-Fzd5', 'Egf-Egfr', 'Areg-Egfr', 'Btc-Egfr', 'Btc-Erbb4', 'Hbegf-Egfr', 'Hbegf-Erbb4', 'Nrg1-Erbb4', 'Nrg2-Erbb4', 'Nrg4-Erbb4', 'Nrg1-Itgav', 'Nrg1-Itgb3', 'Pdgfc-Pdgfra', 'Pdgfd-Pdgfrb', 'Vegfa-Flt1', 'Vegfa-Kdr', 'Vegfc-Kdr', 'Igf1-Itgav', 'Igf1-Itgb3', 'Apln-Aplnr', 'Ccl5-Ccr1', 'Ccl3-Ccr1', 'Ccl9-Ccr1', 'Ccl6-Ccr1', 'Ccl6-Ccr2', 'Ccl2-Ccr2', 'Ccl12-Ccr2', 'Ccl27a-Ccr2', 'Ccl4-Ccr5', 'Ccl5-Ccr5', 'Ccl3-Ccr5', 'Cxcl1-Cxcr2', 'Cxcl2-Cxcr2', 'Pf4-Cxcr3', 'Cxcl9-Cxcr3', 'Cxcl10-Cxcr3', 'Cxcl12-Cxcr4', 'Cxcl12-Ackr3', 'Mif-Cd74', 'Mif-Cxcr4', 'Mif-Cd44', 'Mif-Cxcr2', 'Mif-Ackr3', 'Xcl1-Xcr1', 'Il27-Il27ra', 'Il1b-Il1r1', 'Il1b-Il1rap', 'Il17c-Il17ra', 'Tnf-Tnfrsf1b', 'Fasl-Fas', 'Tnfsf9-Tnfrsf9', 'Cd40lg-Itgb3', 'Cd40lg-Itgam', 'Spp1-Cd44', 'Spp1-Itgav', 'Spp1-Itgb3', 'Spp1-Itgb6', 'Spp1-Itga4', 'Spp1-Itga8', 'Angpt1-Tek', 'Angpt2-Tek', 'Mdk-Sdc1', 'Mdk-Sdc4', 'Mdk-Itga4', 'Mdk-Lrp1', 'Ptn-Sdc1',

In [19]:
# create cell x gene matrix
ct_masks = {ct: adata.obs['cell_type'] == ct for ct in adata.obs['cell_type'].unique()}

df = pd.DataFrame(index=adata.obs_names, columns=genes)
df = df.fillna(0)

for name in tqdm(interactions.keys(), total=len(interactions)):
    lig, rec = name.rsplit('-', 1)
    
    tmp = interactions[name].sum(axis=1)
    for ct, val in zip(interactions[name].index, tmp):
        df.loc[ct_masks[ct], lig] += tmp[ct]
    
    tmp = interactions[name].sum(axis=0)
    for ct, val in zip(interactions[name].columns, tmp):
        df.loc[ct_masks[ct], rec] += tmp[ct]

df.shape

100%|██████████| 326/326 [00:03<00:00, 91.40it/s]


(8509, 993)

In [20]:
print('Number of LR filtered using celltype specificity:')
np.where(df > 0, 1, 0).sum().sum() / (df.shape[0] * df.shape[1])

Number of LR filtered using celltype specificity:


0.15084555337829017

In [None]:
# df.to_parquet('/ix/djishnu/shared/djishnu_kor11/miscellaneous/kidney_commot_LRs.parquet')
df.to_parquet('/Users/allywang/Desktop/work/S2025/SpaceOracle/notebooks/mouse_kidney_survey/kidney_LRs.parquet')

In [35]:
df.columns[np.where(df.sum() > 0)]

Index(['Podxl', 'Cadm1', 'Il27', 'H2-Q10', 'Tnf', 'Efna5', 'Wnt11', 'Cd209a',
       'Sema6d', 'Cxcl10',
       ...
       'Cd274', 'Calca', 'Klrk1', 'Il1r1', 'Sema3d', 'Fzd5', 'Tnfrsf1b',
       'Sema3c', 'Cxcl12', 'Pdgfra'],
      dtype='object', length=216)

### Save params in adata

In [None]:
adata = sc.read_h5ad('/ix/djishnu/shared/djishnu_kor11/training_data_2025/mouse_kidney_13.h5ad')
adata

In [None]:
from spaceoracle.models.parallel_estimators import *

cell_threshes = pd.read_parquet('/ix/djishnu/shared/djishnu_kor11/commot_outputs/kidney_LRs.parquet')
adata.uns['cell_thresholds'] = cell_threshes
adata

In [None]:
adata = init_received_ligands(
    adata, 
    radius=200, 
    contact_distance=30, 
    cell_threshes=cell_threshes
)
adata

In [None]:
adata.write_h5ad('/ix/djishnu/shared/djishnu_kor11/training_data_2025/mouse_kidney_13.h5ad')
