In [1]:
import commot as ct
import scanpy as sc
import pandas as pd
import numpy as np
from tqdm import tqdm

### Run Commot

In [2]:
adata = sc.read_h5ad('/Users/koush/Desktop/training_data/slideseq_mouse_lymphnode.h5ad')
adata

AnnData object with n_obs × n_vars = 24617 × 3604
    obs: 'cell_type', 'cell_type_int'
    obsm: 'X_spatial', 'spatial', 'tangram_ct_pred'
    layers: 'imputed_count', 'imputed_count_v0', 'normalized_count', 'raw_count'

In [3]:
# adata.X = adata.layers['imputed_count']
adata.X = adata.layers['normalized_count']


In [4]:
df_ligrec = ct.pp.ligand_receptor_database(
    database='CellChat', 
    species='mouse', 
    signaling_type=None
)
    
df_ligrec.columns = ['ligand', 'receptor', 'pathway', 'signaling']  

df_ligrec['name'] = df_ligrec['ligand'] + '-' + df_ligrec['receptor']
len(df_ligrec['name'].unique())

2017

In [5]:
import sys
sys.path.append('../../src')
from spaceoracle.tools.network import expand_paired_interactions

expanded = expand_paired_interactions(df_ligrec)
genes = set(expanded.ligand) | set(expanded.receptor)
genes = list(genes)

expanded

Unnamed: 0,ligand,receptor,pathway,signaling,name
0,Tgfb1,Tgfbr1,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
0,Tgfb1,Tgfbr2,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr1,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr2,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
2,Tgfb3,Tgfbr1,TGFb,Secreted Signaling,Tgfb3-Tgfbr1_Tgfbr2
...,...,...,...,...,...
2016,Itga9,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2016,Itgb1,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2017,Itga4,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1
2017,Itgb7,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1


In [6]:
expanded = expanded[expanded.ligand.isin(adata.var_names) & expanded.receptor.isin(adata.var_names)]
expanded

Unnamed: 0,ligand,receptor,pathway,signaling,name
0,Tgfb1,Tgfbr1,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
0,Tgfb1,Tgfbr2,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr1,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr2,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
3,Tgfb1,Tgfbr2,TGFb,Secreted Signaling,Tgfb1-Acvr1b_Tgfbr2
...,...,...,...,...,...
2015,Itga4,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb1-Vcam1
2015,Itgb1,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb1-Vcam1
2016,Itgb1,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2017,Itga4,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1


In [8]:
from spaceoracle.tools.utils import scale_adata

scale_adata(adata)

AnnData object with n_obs × n_vars = 24617 × 3604
    obs: 'cell_type', 'cell_type_int'
    obsm: 'X_spatial', 'spatial', 'tangram_ct_pred', 'spatial_unscaled'
    layers: 'imputed_count', 'imputed_count_v0', 'normalized_count', 'raw_count'

In [9]:
adata

AnnData object with n_obs × n_vars = 24617 × 3604
    obs: 'cell_type', 'cell_type_int'
    obsm: 'X_spatial', 'spatial', 'tangram_ct_pred', 'spatial_unscaled'
    layers: 'imputed_count', 'imputed_count_v0', 'normalized_count', 'raw_count'

In [10]:
ct.tl.spatial_communication(adata,
    database_name='user_database', 
    # df_ligrec=df_ligrec, 
    df_ligrec=expanded, 
    dis_thr=200, 
    # heteromeric=True
    heteromeric=False
)

In [20]:
expanded[expanded.ligand == 'Il2']

Unnamed: 0,ligand,receptor,pathway,signaling,name
719,Il2,Il2ra,IL2,Secreted Signaling,Il2-Il2ra_Il2rb_Il2rg
719,Il2,Il2rb,IL2,Secreted Signaling,Il2-Il2ra_Il2rb_Il2rg
719,Il2,Il2rg,IL2,Secreted Signaling,Il2-Il2ra_Il2rb_Il2rg
720,Il2,Il2rb,IL2,Secreted Signaling,Il2-Il2rb_Il2rg
720,Il2,Il2rg,IL2,Secreted Signaling,Il2-Il2rb_Il2rg


In [23]:
[i for i in adata.obsp.keys() if 'Il2' in i]

['commot-user_database-Il2-Il2rg',
 'commot-user_database-Il2-Il2ra',
 'commot-user_database-Il2-Il2rb',
 'commot-user_database-Il27-Il6st',
 'commot-user_database-Il27-Il27ra',
 'commot-user_database-Il15-Il2rg',
 'commot-user_database-Il15-Il2rb',
 'commot-user_database-Il4-Il2rg',
 'commot-user_database-Il24-Il20rb']

In [51]:
adata.write_h5ad('/Users/koush/Desktop/training_data/slideseq_mouse_lymphnode.h5ad')

In [10]:
# lr_info = {k.replace('commot-user_database-', ''): v for k, v in adata.obsp.items() if 'commot-user_database-' in k}
# len(lr_info)

In [11]:
# df_ligrec = df_ligrec[df_ligrec['name'].isin(lr_info.keys())]
# df_ligrec['signaling'].value_counts()

### Get cluster communication scores

In [52]:
expanded['rename'] = expanded['ligand'] + '-' + expanded['receptor']

In [53]:
from tqdm import tqdm
import commot as ct

for name in tqdm(expanded['rename'].unique()):

    ct.tl.cluster_communication(adata, database_name='user_database', pathway_name=name, clustering='cell_type',
        random_seed=12, n_permutations=100)

100%|██████████| 300/300 [45:04<00:00,  9.01s/it]


In [54]:
adata.write_h5ad('/Users/koush/Desktop/training_data/slideseq_mouse_lymphnode.h5ad')

In [None]:
from collections import defaultdict
data_dict = defaultdict(dict)

for name in expanded['rename']:
    data_dict[name]['communication_matrix'] = adata.uns[
        f'commot_cluster-cell_type-user_database-{name}']['communication_matrix']
    data_dict[name]['communication_pvalue'] = adata.uns[
        f'commot_cluster-cell_type-user_database-{name}']['communication_pvalue']

import pickle
with open('/Users/koush/Desktop/training_data/commot_slideseq_mouse_lymphnode_communication.pkl', 'wb') as f:
    pickle.dump(data_dict, f)

In [56]:
# check outputs

import pickle
with open('/Users/koush/Desktop/training_data/commot_slideseq_mouse_lymphnode_communication.pkl', 'rb') as f:
    info = pickle.load(f)

len(info.keys())

300

In [57]:
def get_sig_interactions(value_matrix, p_matrix, pval=0.3):
    p_matrix = np.where(p_matrix < pval, 1, 0)
    return value_matrix * p_matrix

interactions = {}

for lig, rec in tqdm(zip(expanded['ligand'], expanded['receptor']), total=len(expanded)):
    name = lig + '-' + rec

    if name in info.keys():

        value_matrix = info[name]['communication_matrix']
        p_matrix = info[name]['communication_pvalue']

        sig_matrix = get_sig_interactions(value_matrix, p_matrix)
        
        if sig_matrix.sum().sum() > 0:
            interactions[name] = sig_matrix
    
len(interactions)

100%|██████████| 413/413 [00:00<00:00, 5220.90it/s]


294

In [61]:
[i for i in interactions.keys() if 'Il2' in i]

['Il2-Il2ra',
 'Il2-Il2rb',
 'Il2-Il2rg',
 'Il4-Il2rg',
 'Il15-Il2rb',
 'Il15-Il2rg',
 'Il27-Il27ra',
 'Il27-Il6st',
 'Il24-Il20rb']

### Get expanded LR masks

In [63]:
interactions.keys()

dict_keys(['Tgfb1-Tgfbr1', 'Tgfb1-Tgfbr2', 'Tgfb2-Tgfbr1', 'Tgfb2-Tgfbr2', 'Gdf15-Tgfbr2', 'Wnt7a-Fzd2', 'Nrg1-Itgav', 'Nrg1-Itgb3', 'Igf1-Igf1r', 'Igf1-Itgav', 'Igf1-Itgb3', 'Igf1-Itga6', 'Ccl8-Ccr2', 'Ccl6-Ccr2', 'Ccl2-Ccr2', 'Ccl12-Ccr2', 'Ccl27a-Ccr2', 'Ccl17-Ccr4', 'Ccl22-Ccr4', 'Ccl5-Ccr4', 'Ccl20-Ccr6', 'Ppbp-Cxcr2', 'Pf4-Cxcr3', 'Cxcl9-Cxcr3', 'Cxcl13-Cxcr3', 'Cxcl12-Cxcr4', 'Cxcl13-Cxcr5', 'Cxcl16-Cxcr6', 'Mif-Cd74', 'Mif-Cxcr4', 'Mif-Cd44', 'Mif-Cxcr2', 'Il2-Il2ra', 'Il2-Il2rb', 'Il2-Il2rg', 'Il4-Il4ra', 'Il4-Il2rg', 'Il15-Il15ra', 'Il15-Il2rb', 'Il15-Il2rg', 'Il6-Il6ra', 'Il6-Il6st', 'Il27-Il27ra', 'Il27-Il6st', 'Clcf1-Lifr', 'Clcf1-Il6st', 'Il24-Il20rb', 'Il1b-Il1r1', 'Il1b-Il1rap', 'Il33-Il1rap', 'Csf1-Csf1r', 'Il16-Cd4', 'Gh-Ghr', 'Ifng-Ifngr1', 'Ifng-Ifngr2', 'Lta-Tnfrsf1a', 'Lta-Tnfrsf1b', 'Lta-Ltbr', 'Ltb-Ltbr', 'Fasl-Fas', 'Tnfsf13b-Tnfrsf13b', 'Tnfsf13b-Tnfrsf13c', 'Adipoq-Adipor1', 'Retn-Cap1', 'Nampt-Itga5', 'Nampt-Itgb1', 'Angptl2-Itga5', 'Angptl2-Itgb1', 'Angptl2

In [64]:
# create cell x gene matrix
ct_masks = {ct: adata.obs['cell_type'] == ct for ct in adata.obs['cell_type'].unique()}

df = pd.DataFrame(index=adata.obs_names, columns=genes)
df = df.fillna(0)

for name in tqdm(interactions.keys(), total=len(interactions)):
    lig, rec = name.rsplit('-', 1)
    
    tmp = interactions[name].sum(axis=1)
    for ct, val in zip(interactions[name].index, tmp):
        df.loc[ct_masks[ct], lig] += tmp[ct]
    
    tmp = interactions[name].sum(axis=0)
    for ct, val in zip(interactions[name].columns, tmp):
        df.loc[ct_masks[ct], rec] += tmp[ct]

df.shape

100%|██████████| 294/294 [00:09<00:00, 32.41it/s]


(24617, 993)

In [65]:
print('Number of LR filtered using celltype specificity:')
np.where(df > 0, 1, 0).sum().sum() / (df.shape[0] * df.shape[1])

Number of LR filtered using celltype specificity:


0.17491809363353933

In [98]:
df.loc[:, df.mean() > 0].shape

(24617, 199)

In [104]:
# np.array(df.loc[:, df.mean() == 0].columns)

In [105]:
df[['Il2', 'Il2ra', 'Il2rb', 'Cxcr4']]

Unnamed: 0,Il2,Il2ra,Il2rb,Cxcr4
AAAAAAAAAAAATC,0.000012,0.000002,4.661060e-06,0.000015
AAAAAAAACCAACA,0.000006,0.000000,3.890292e-07,0.000005
AAAAAAAGCCAACA,0.000006,0.000000,3.890292e-07,0.000005
AAAAAAATTTAGGG,0.000015,0.000001,1.732573e-06,0.000009
AAAAAACGGTAGAT,0.000015,0.000001,1.732573e-06,0.000009
...,...,...,...,...
TTTTTTGTTTTCTA,0.000012,0.000002,4.661060e-06,0.000015
TTTTTTTATGGTAC,0.000015,0.000001,1.732573e-06,0.000009
TTTTTTTGTCGCCT,0.000012,0.000002,4.661060e-06,0.000015
TTTTTTTTGCTTTA,0.000003,0.000004,9.721187e-06,0.000036


In [106]:
# df.to_parquet('/ix/djishnu/shared/djishnu_kor11/miscellaneous/kidney_commot_LRs.parquet')
df.to_parquet('/Users/koush/Desktop/training_data/slideseq_mouse_lymphnode_LRs.parquet')

In [107]:
df.columns[np.where(df.sum() > 0)]

Index(['Pf4', 'Thy1', 'Notch2', 'Nrp2', 'Cr2', 'Lifr', 'Cd72', 'Cap1', 'Cd274',
       'Spn',
       ...
       'Ccr2', 'Tgfb1', 'Ccl2', 'Angptl2', 'Selplg', 'Prss2', 'Nrp1', 'Il15ra',
       'Csf1', 'Cdh1'],
      dtype='object', length=199)

### Save params in adata

In [108]:
adata

AnnData object with n_obs × n_vars = 24617 × 3604
    obs: 'cell_type', 'cell_type_int'
    uns: 'commot-user_database-info', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr2', 'commot_cluster-cell_type-user_database-Gdf15-Tgfbr2', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd2', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd5', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd7', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd8', 'commot_cluster-cell_type-user_database-Nrg1-Itgav', 'commot_cluster-cell_type-user_database-Nrg1-Itgb3', 'commot_cluster-cell_type-user_database-Igf1-Igf1r', 'commot_cluster-cell_type-user_database-Igf1-Itgav', 'commot_cluster-cell_type-user_database-Igf1-Itgb3', 'commot_cluster-cell_type-user_database-Igf1-Itga6', 'commot_cluster-cell_type-user_database-Ccl8-Ccr2', 'commot_cluster-c

In [109]:
from spaceoracle.models.parallel_estimators import *

cell_threshes = pd.read_parquet('/Users/koush/Desktop/training_data/slideseq_mouse_lymphnode_LRs.parquet')
adata.uns['cell_thresholds'] = cell_threshes
adata

AnnData object with n_obs × n_vars = 24617 × 3604
    obs: 'cell_type', 'cell_type_int'
    uns: 'commot-user_database-info', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr2', 'commot_cluster-cell_type-user_database-Gdf15-Tgfbr2', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd2', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd5', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd7', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd8', 'commot_cluster-cell_type-user_database-Nrg1-Itgav', 'commot_cluster-cell_type-user_database-Nrg1-Itgb3', 'commot_cluster-cell_type-user_database-Igf1-Igf1r', 'commot_cluster-cell_type-user_database-Igf1-Itgav', 'commot_cluster-cell_type-user_database-Igf1-Itgb3', 'commot_cluster-cell_type-user_database-Igf1-Itga6', 'commot_cluster-cell_type-user_database-Ccl8-Ccr2', 'commot_cluster-c

In [110]:
adata = init_received_ligands(
    adata, 
    radius=200, 
    contact_distance=30, 
    cell_threshes=cell_threshes
)
adata

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


AnnData object with n_obs × n_vars = 24617 × 3604
    obs: 'cell_type', 'cell_type_int'
    uns: 'commot-user_database-info', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr2', 'commot_cluster-cell_type-user_database-Gdf15-Tgfbr2', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd2', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd5', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd7', 'commot_cluster-cell_type-user_database-Wnt7a-Fzd8', 'commot_cluster-cell_type-user_database-Nrg1-Itgav', 'commot_cluster-cell_type-user_database-Nrg1-Itgb3', 'commot_cluster-cell_type-user_database-Igf1-Igf1r', 'commot_cluster-cell_type-user_database-Igf1-Itgav', 'commot_cluster-cell_type-user_database-Igf1-Itgb3', 'commot_cluster-cell_type-user_database-Igf1-Itga6', 'commot_cluster-cell_type-user_database-Ccl8-Ccr2', 'commot_cluster-c

In [115]:
# del adata.layers['imputed_count_v0']
# del adata.obsp
keys = list(adata.obsm.keys())
for key in keys:
    if 'commot' in key:
        del adata.obsm[key]

In [118]:
keys = list(adata.uns.keys())
for key in keys:
    if 'commot' in key:
        del adata.uns[key]

In [119]:
adata

AnnData object with n_obs × n_vars = 24617 × 3604
    obs: 'cell_type', 'cell_type_int'
    uns: 'cell_thresholds', 'received_ligands', 'received_ligands_tfl'
    obsm: 'X_spatial', 'spatial', 'tangram_ct_pred', 'spatial_unscaled'
    layers: 'imputed_count', 'normalized_count', 'raw_count'

In [122]:
adata.write_h5ad('/Users/koush/Desktop/training_data/slideseq_mouse_lymphnode.h5ad')