In [1]:
import commot as ct
import scanpy as sc
import pandas as pd
import numpy as np
from tqdm import tqdm

### Run Commot

In [2]:
adata = sc.read_h5ad('/ix/djishnu/shared/djishnu_kor11/training_data_2025/snrna_human_tonsil.h5ad')
adata

AnnData object with n_obs × n_vars = 5778 × 3549
    obs: 'cell_type', 'author_cell_type', 'cell_type_int', 'leiden', 'leiden_R', 'cell_type_2'
    uns: 'author_cell_type_colors', 'cell_thresholds', 'cell_type_2_colors', 'cell_type_colors', 'dendrogram_leiden', 'leiden', 'leiden_R', 'leiden_colors', 'ligand_receivers', 'neighbors', 'pca', 'received_ligands', 'received_ligands_tfl', 'umap'
    obsm: 'X_pca', 'X_umap', 'ora_estimate', 'ora_pvals', 'spatial', 'spatial_unscaled'
    varm: 'PCs'
    layers: 'imputed_count', 'normalized_count'
    obsp: 'connectivities', 'distances'

In [3]:
# adata.X = adata.layers['imputed_count']
adata.X = adata.layers['normalized_count']


In [4]:
df_ligrec = ct.pp.ligand_receptor_database(
    database='CellChat', 
    species='human', 
    signaling_type=None
)
    
df_ligrec.columns = ['ligand', 'receptor', 'pathway', 'signaling']  

df_ligrec['name'] = df_ligrec['ligand'] + '-' + df_ligrec['receptor']
len(df_ligrec['name'].unique())

1938

In [5]:
import sys
sys.path.append('../../src')
from spaceoracle.tools.network import expand_paired_interactions

expanded = expand_paired_interactions(df_ligrec)
genes = set(expanded.ligand) | set(expanded.receptor)
genes = list(genes)

expanded

Unnamed: 0,ligand,receptor,pathway,signaling,name
0,TGFB1,TGFBR1,TGFb,Secreted Signaling,TGFB1-TGFBR1_TGFBR2
0,TGFB1,TGFBR2,TGFb,Secreted Signaling,TGFB1-TGFBR1_TGFBR2
1,TGFB2,TGFBR1,TGFb,Secreted Signaling,TGFB2-TGFBR1_TGFBR2
1,TGFB2,TGFBR2,TGFb,Secreted Signaling,TGFB2-TGFBR1_TGFBR2
2,TGFB3,TGFBR1,TGFb,Secreted Signaling,TGFB3-TGFBR1_TGFBR2
...,...,...,...,...,...
1936,ITGA9,VCAM1,VCAM,Cell-Cell Contact,ITGA9_ITGB1-VCAM1
1936,ITGB1,VCAM1,VCAM,Cell-Cell Contact,ITGA9_ITGB1-VCAM1
1937,ITGA4,VCAM1,VCAM,Cell-Cell Contact,ITGA4_ITGB7-VCAM1
1937,ITGB7,VCAM1,VCAM,Cell-Cell Contact,ITGA4_ITGB7-VCAM1


In [6]:
expanded = expanded[expanded.ligand.isin(adata.var_names) & expanded.receptor.isin(adata.var_names)]
expanded[expanded.ligand == 'IL7']

Unnamed: 0,ligand,receptor,pathway,signaling,name
726,IL7,IL7R,IL2,Secreted Signaling,IL7-IL7R_IL2RG


In [7]:
ct.tl.spatial_communication(adata,
    database_name='user_database', 
    # df_ligrec=df_ligrec, 
    df_ligrec=expanded, 
    dis_thr=200, 
    # heteromeric=True
    heteromeric=False
)

In [8]:
adata

AnnData object with n_obs × n_vars = 5778 × 3549
    obs: 'cell_type', 'author_cell_type', 'cell_type_int', 'leiden', 'leiden_R', 'cell_type_2'
    uns: 'author_cell_type_colors', 'cell_type_2_colors', 'cell_type_colors', 'dendrogram_leiden', 'leiden', 'leiden_R', 'leiden_colors', 'neighbors', 'pca', 'umap', 'commot-user_database-info'
    obsm: 'X_pca', 'X_umap', 'ora_estimate', 'ora_pvals', 'spatial', 'spatial_unscaled', 'commot-user_database-sum-sender', 'commot-user_database-sum-receiver'
    varm: 'PCs'
    layers: 'imputed_count', 'normalized_count'
    obsp: 'connectivities', 'distances', 'commot-user_database-COL4A4-SDC4', 'commot-user_database-COL4A4-ITGA9', 'commot-user_database-COL4A4-ITGB1', 'commot-user_database-COL4A4-ITGA1', 'commot-user_database-COL4A4-GP6', 'commot-user_database-COL4A4-ITGB8', 'commot-user_database-COL4A4-CD44', 'commot-user_database-COL4A4-ITGA10', 'commot-user_database-NRXN3-NLGN1', 'commot-user_database-NRXN3-NLGN3', 'commot-user_database-CCL2-CCR2'

In [9]:
adata.write_h5ad('commot.h5ad')
adata = sc.read_h5ad('commot.h5ad')

In [57]:
[x for x in adata.obsp.keys() if 'IL7' in x]

['commot-user_database-IL7-IL7R']

In [None]:
# lr_info = {k.replace('commot-user_database-', ''): v for k, v in adata.obsp.items() if 'commot-user_database-' in k}
# len(lr_info)

311

In [None]:
# df_ligrec = df_ligrec[df_ligrec['name'].isin(lr_info.keys())]
# df_ligrec['signaling'].value_counts()

signaling
Cell-Cell Contact     69
Secreted Signaling    46
ECM-Receptor          42
Name: count, dtype: int64

### Get cluster communication scores

In [7]:
expanded['rename'] = expanded['ligand'] + '-' + expanded['receptor']

In [62]:
from tqdm import tqdm
import commot as ct

for name in tqdm(expanded['rename'].unique()):

    ct.tl.cluster_communication(adata, database_name='user_database', pathway_name=name, clustering='cell_type',
        random_seed=12, n_permutations=100)

100%|██████████| 310/310 [24:22<00:00,  4.72s/it]


In [13]:
# adata.write_h5ad('commot_cluster.h5ad')
# adata = sc.read_h5ad('commot_cluster.h5ad')

In [63]:
[x for x in adata.uns.keys() if 'IL7' in x]

['commot_cluster-cell_type-user_database-IL7-IL7R']

In [64]:
from collections import defaultdict
data_dict = defaultdict(dict)

for name in expanded['rename']:
    data_dict[name]['communication_matrix'] = adata.uns[f'commot_cluster-cell_type-user_database-{name}']['communication_matrix']
    data_dict[name]['communication_pvalue'] = adata.uns[f'commot_cluster-cell_type-user_database-{name}']['communication_pvalue']

import pickle
with open('/ix/djishnu/shared/djishnu_kor11/commot_outputs/tonsil_communication.pkl', 'wb') as f:
    pickle.dump(data_dict, f)

In [8]:
# check outputs

import pickle
with open('/ix/djishnu/shared/djishnu_kor11/commot_outputs/tonsil_communication.pkl', 'rb') as f:
    info = pickle.load(f)

len(info.keys())

310

In [9]:
[x for x in info.keys() if 'IL7' in x]

['IL7-IL7R']

In [10]:
def get_sig_interactions(value_matrix, p_matrix, pval=0.3):
    p_matrix = np.where(p_matrix < pval, 1, 0)
    return value_matrix * p_matrix

interactions = {}
for lig, rec in tqdm(zip(expanded['ligand'], expanded['receptor'])):
    name = lig + '-' + rec

    if name in info.keys():

        value_matrix = info[name]['communication_matrix']
        p_matrix = info[name]['communication_pvalue']

        sig_matrix = get_sig_interactions(value_matrix, p_matrix)
        
        if sig_matrix.sum().sum() > 0:
            interactions[name] = sig_matrix
    
len(interactions)

473it [00:00, 643.95it/s]


310

### Get expanded LR masks

In [11]:
interactions.keys()

dict_keys(['GDF7-BMPR2', 'BMP6-BMPR2', 'BMP7-BMPR2', 'BMP8A-BMPR2', 'WNT10A-LRP5', 'WNT10A-FZD7', 'WNT10A-FZD8', 'WNT10B-LRP5', 'WNT10B-FZD7', 'WNT10B-FZD8', 'WNT3-LRP5', 'WNT3-FZD7', 'WNT3-FZD8', 'WNT4-LRP5', 'WNT4-FZD7', 'WNT4-FZD8', 'WNT5B-FZD7', 'WNT5B-FZD8', 'NRG1-ERBB4', 'NRG2-ERBB4', 'NRG3-ERBB4', 'FGF7-FGFR2', 'PDGFD-PDGFRB', 'VEGFC-FLT4', 'VEGFC-KDR', 'IGF1-IGF1R', 'IGF1-ITGA6', 'CCL2-CCR2', 'CCL4-CCR5', 'CCL5-CCR5', 'CCL3-CCR5', 'CCL20-CCR6', 'CCL19-CCR7', 'CCL21-CCR7', 'CXCL9-CXCR3', 'CXCL10-CXCR3', 'CXCL13-CXCR3', 'CXCL12-CXCR4', 'CXCL13-CXCR5', 'CXCL16-CXCR6', 'IL4-IL4R', 'IL7-IL7R', 'IL15-IL2RB', 'IL4-IL13RA1', 'IL6-IL6R', 'IL6-IL6ST', 'EBI3-IL27RA', 'EBI3-IL6ST', 'IL18-IL18R1', 'IL18-IL18RAP', 'IL33-IL1RAP', 'IL34-CSF1R', 'CSF1-CSF1R', 'TNF-TNFRSF1A', 'TNF-TNFRSF1B', 'EDA-EDAR', 'TNFSF11-TNFRSF11A', 'TNFSF8-TNFRSF8', 'TNFSF9-TNFRSF9', 'TNFSF13B-TNFRSF13B', 'CD40LG-ITGA2B', 'CD40LG-ITGA5', 'CD40LG-ITGB1', 'CD40LG-ITGAM', 'CD40LG-ITGB2', 'ANGPTL1-ITGA1', 'ANGPTL1-ITGB1', '

In [12]:
# create cell x gene matrix
ct_masks = {ct: adata.obs['cell_type'] == ct for ct in adata.obs['cell_type'].unique()}

df = pd.DataFrame(index=adata.obs_names, columns=genes)
df = df.fillna(0)

for name in tqdm(interactions.keys(), total=len(interactions)):
    lig, rec = name.rsplit('-', 1)
    
    tmp = interactions[name].sum(axis=1)
    for ct, val in zip(interactions[name].index, tmp):
        df.loc[ct_masks[ct], lig] += tmp[ct]
    
    tmp = interactions[name].sum(axis=0)
    for ct, val in zip(interactions[name].columns, tmp):
        df.loc[ct_masks[ct], rec] += tmp[ct]

df.shape

100%|██████████| 310/310 [00:14<00:00, 22.12it/s]


(5778, 958)

In [13]:
print('Number of LR filtered using celltype specificity:')
np.where(df > 0, 1, 0).sum().sum() / (df.shape[0] * df.shape[1])

Number of LR filtered using celltype specificity:


0.1520613788822479

In [14]:
# df.to_parquet('/ix/djishnu/shared/djishnu_kor11/miscellaneous/tonsil_commot_LRs.parquet')
df.to_parquet('/ix/djishnu/shared/djishnu_kor11/commot_outputs/tonsil_LRs.parquet')

In [15]:
ligands = ['IL2', 'IL7']
df[ligands].sum(axis=0)

IL2    0.000000
IL7    2.603589
dtype: float64

### Get ligand receivers mask for tissue density weighting

In [16]:
df = pd.read_parquet('/ix/djishnu/shared/djishnu_kor11/commot_outputs/tonsil_LRs.parquet')
df.head(3)

Unnamed: 0_level_0,BTC,PENK,CLDN11,FGFR1,CHRNA5,IL11,PRSS1,SEMA5A,PDGFC,PTPRF,...,ANGPTL7,OPRK1,COL2A1,IFNE,CCL4,PRSS2,AMH,PRLR,EPHA2,POMC
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACCCAAGCGCCTTG-1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,5.4e-05,0,0,0,0.0,0
AAACCCAAGTGGACGT-1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1.1e-05,0,0,0,9e-06,0
AAACCCACAGAAGTGC-1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.0,0,0,0,0.0,0


In [17]:
expanded.head(3)

Unnamed: 0,ligand,receptor,pathway,signaling,name,rename
38,GDF7,BMPR2,BMP,Secreted Signaling,GDF7-BMPR1A_BMPR2,GDF7-BMPR2
41,GDF7,BMPR2,BMP,Secreted Signaling,GDF7-BMPR1B_BMPR2,GDF7-BMPR2
54,BMP6,BMPR2,BMP,Secreted Signaling,BMP6-ACVR1_BMPR2,BMP6-BMPR2


In [18]:
info.keys()

dict_keys(['GDF7-BMPR2', 'BMP6-BMPR2', 'BMP7-BMPR2', 'BMP8A-BMPR2', 'WNT10A-LRP5', 'WNT10A-FZD7', 'WNT10A-FZD8', 'WNT10B-LRP5', 'WNT10B-FZD7', 'WNT10B-FZD8', 'WNT3-LRP5', 'WNT3-FZD7', 'WNT3-FZD8', 'WNT4-LRP5', 'WNT4-FZD7', 'WNT4-FZD8', 'WNT5B-FZD7', 'WNT5B-FZD8', 'NRG1-ERBB4', 'NRG2-ERBB4', 'NRG3-ERBB4', 'FGF7-FGFR2', 'PDGFD-PDGFRB', 'VEGFC-FLT4', 'VEGFC-KDR', 'IGF1-IGF1R', 'IGF1-ITGA6', 'CCL2-CCR2', 'CCL4-CCR5', 'CCL5-CCR5', 'CCL3-CCR5', 'CCL20-CCR6', 'CCL19-CCR7', 'CCL21-CCR7', 'CXCL9-CXCR3', 'CXCL10-CXCR3', 'CXCL13-CXCR3', 'CXCL12-CXCR4', 'CXCL13-CXCR5', 'CXCL16-CXCR6', 'IL4-IL4R', 'IL7-IL7R', 'IL15-IL2RB', 'IL4-IL13RA1', 'IL6-IL6R', 'IL6-IL6ST', 'EBI3-IL27RA', 'EBI3-IL6ST', 'IL18-IL18R1', 'IL18-IL18RAP', 'IL33-IL1RAP', 'IL34-CSF1R', 'CSF1-CSF1R', 'TNF-TNFRSF1A', 'TNF-TNFRSF1B', 'EDA-EDAR', 'TNFSF11-TNFRSF11A', 'TNFSF8-TNFRSF8', 'TNFSF9-TNFRSF9', 'TNFSF13B-TNFRSF13B', 'CD40LG-ITGA2B', 'CD40LG-ITGA5', 'CD40LG-ITGB1', 'CD40LG-ITGAM', 'CD40LG-ITGB2', 'ANGPTL1-ITGA1', 'ANGPTL1-ITGB1', '

In [19]:
[x for x in info.keys() if 'IL7' in x]

['IL7-IL7R']

In [20]:
ligand_receivers_mask = pd.DataFrame(index=df.index, columns=np.unique(expanded.ligand)).fillna(0)

for _, row in tqdm(expanded.iterrows(), total=len(expanded)):
    ligand = row.ligand
    receptor = row.receptor

    receivers = df[receptor].values
    ligand_receivers_mask.loc[:, ligand] += receivers

ligand_receivers_mask.head(3)

100%|██████████| 473/473 [00:00<00:00, 1122.55it/s]


Unnamed: 0_level_0,ADGRE5,ALCAM,ANGPTL1,ANXA1,BMP6,BMP7,BMP8A,C3,CADM1,CCL19,...,TNFSF8,TNFSF9,TNR,VEGFC,VSIR,WNT10A,WNT10B,WNT3,WNT4,WNT5B
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACCCAAGCGCCTTG-1,0.000224,0.002991,0.002544,0.0,0.000351,0.000351,0.000351,0.002458,1.4e-05,0.000485,...,7e-06,1.1e-05,0.002058,3.1e-05,4.7e-05,0.000122,0.000122,0.000122,0.000122,0.0
AAACCCAAGTGGACGT-1,0.000251,0.000528,0.001493,2e-06,0.000554,0.000554,0.000554,0.001567,0.001478,1.5e-05,...,0.0,2e-06,0.003399,0.0,6.8e-05,0.000524,0.000524,0.000524,0.000524,5.1e-05
AAACCCACAGAAGTGC-1,5.9e-05,0.0,0.001402,0.0,0.000254,0.000254,0.000254,0.001281,4.4e-05,0.0,...,3.2e-05,0.0,0.002826,0.0,1.3e-05,0.000319,0.000319,0.000319,0.000319,1e-06


In [21]:
ligand_receivers_mask.to_parquet('/ix/djishnu/shared/djishnu_kor11/commot_outputs/tonsil_ligreceivers.parquet')

### Get true LR pairs

In [None]:
stop

In [None]:
genes = set(df_ligrec.ligand) | set(df_ligrec.receptor)
genes = list(genes)
len(genes)

199

In [None]:
def get_sig_interactions(value_matrix, p_matrix, pval=0.05):
    p_matrix = np.where(p_matrix < pval, 1, 0)
    return value_matrix * p_matrix

interactions = {}
for lig, rec in tqdm(zip(df_ligrec['ligand'], df_ligrec['receptor'])):
    name = lig + '-' + rec

    if name in info.keys():

        value_matrix = info[name]['communication_matrix']
        p_matrix = info[name]['communication_pvalue']

        sig_matrix = get_sig_interactions(value_matrix, p_matrix)
        
        if sig_matrix.sum().sum() > 0:
            interactions[name] = sig_matrix
    
len(interactions)

238it [00:00, 1703.08it/s]


238

In [None]:
# create cell x LR unit matrix
ct_masks = {ct: adata.obs['cell_type'] == ct for ct in adata.obs['cell_type'].unique()}

df = pd.DataFrame(index=adata.obs_names, columns=genes)
df = df.fillna(0)

for lig, rec in tqdm(zip(df_ligrec.ligand, df_ligrec.receptor), total=len(df_ligrec)):

    tmp = interactions[name].sum(axis=1)
    for ct, val in zip(interactions[name].index, tmp):
        df.loc[ct_masks[ct], lig] += tmp[ct]
    
    tmp = interactions[name].sum(axis=0)
    for ct, val in zip(interactions[name].columns, tmp):
        df.loc[ct_masks[ct], rec] += tmp[ct]

df.shape

100%|██████████| 238/238 [00:06<00:00, 39.04it/s]


(5778, 199)

In [91]:
print('Number of LR filtered using celltype specificity:')
np.where(df > 0, 1, 0).sum().sum() / (df.shape[0] * df.shape[1])

Number of LR filtered using celltype specificity:


0.1520613788822479

In [90]:
# df.to_parquet('/ix/djishnu/shared/djishnu_kor11/miscellaneous/tonsil_commot_LRs_units.parquet')
df.to_parquet('/ix/djishnu/shared/djishnu_kor11/commot_outputs/tonsil_LRs_units.parquet')


### Scratch

In [None]:
# def count_interactions(matrix):
#     mask = matrix.astype(bool).toarray()
#     mask = np.maximum(mask, mask.T)
#     mask = np.triu(mask, k=1)
#     return mask.sum()

def count_interactions(matrix):
    return matrix.sum()

num_values = {k: count_interactions(lr_info[k]) for k in lr_info.keys()}

In [None]:
# import matplotlib.pyplot as plt

# plt.hist(list(num_values.values()), bins=1000)
# plt.semilogy()

# # threshold = round(adata.n_obs * 0.05)
# threshold = np.percentile(list(num_values.values()), 10)

# plt.text(threshold, plt.ylim()[1] * 0.9, f'Threshold: {threshold}', color='red', ha='center')
# plt.axvline(threshold, color='red', linestyle='dashed', linewidth=1)
# plt.axvspan(0, threshold, color='red', alpha=0.3)
# plt.xlim(0, 20000)
# plt.xlabel('Number of interactions')

# plt.show()

In [None]:
from collections import defaultdict

celltypes = adata.obs['cell_type'].unique()
interactions = defaultdict(lambda: defaultdict(dict))

for a in celltypes:
    a_mask = adata.obs['cell_type'] == a
    
    for b in celltypes:
        b_mask = adata.obs['cell_type'] == b
        
        for k, v in lr_info.items():
            if k == 'total=total':
                continue

            
            interactions[a][b][k]=np.sum(v[a_mask, :][:, b_mask].astype(bool))

len(interactions)

9

In [None]:
interactions

defaultdict(<function __main__.<lambda>()>,
            {'T cells': defaultdict(dict,
                         {'T cells': {'ADGRE5-CD55': 887,
                           'ALCAM-CD6': 1694,
                           'ANGPTL1-ITGA1_ITGB1': 41,
                           'ANXA1-FPR1': 4,
                           'C3-CR2': 7,
                           'C3-ITGAM_ITGB2': 1,
                           'C3-ITGAX_ITGB2': 0,
                           'CADM1-CADM1': 40,
                           'CCL19-CCR7': 63,
                           'CCL2-CCR2': 0,
                           'CCL20-CCR6': 26,
                           'CCL21-CCR7': 206,
                           'CCL3-CCR5': 0,
                           'CCL4-CCR5': 27,
                           'CCL5-CCR5': 175,
                           'CD226-NECTIN2': 0,
                           'CD226-PVR': 61,
                           'CD274-PDCD1': 27,
                           'CD40LG-CD40': 12,
                           'CD40LG-I