In [1]:
import commot as ct
import scanpy as sc
import pandas as pd
import numpy as np
from tqdm import tqdm

### Run Commot

In [2]:
adata = sc.read_h5ad('/Users/koush/Desktop/training_data/mouse_brain_wt_slideseq.h5ad')
adata

AnnData object with n_obs × n_vars = 23409 × 3311
    obs: 'cell_type', 'cell_type_int', 'seurat_clusters', 'umap_1', 'umap_2', 'author_cell_type'
    obsm: 'X_umap', 'spatial'
    layers: 'imputed_count', 'normalized_count', 'raw_count'

In [3]:
# adata.X = adata.layers['imputed_count']
adata.X = adata.layers['normalized_count']


In [4]:
df_ligrec = ct.pp.ligand_receptor_database(
    database='CellChat', 
    species='mouse', 
    signaling_type=None
)
    
df_ligrec.columns = ['ligand', 'receptor', 'pathway', 'signaling']  

df_ligrec['name'] = df_ligrec['ligand'] + '-' + df_ligrec['receptor']
len(df_ligrec['name'].unique())

2017

In [5]:
import sys
sys.path.append('../../src')
from spaceoracle.tools.network import expand_paired_interactions

expanded = expand_paired_interactions(df_ligrec)
genes = set(expanded.ligand) | set(expanded.receptor)
genes = list(genes)

expanded

Unnamed: 0,ligand,receptor,pathway,signaling,name
0,Tgfb1,Tgfbr1,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
0,Tgfb1,Tgfbr2,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr1,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr2,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
2,Tgfb3,Tgfbr1,TGFb,Secreted Signaling,Tgfb3-Tgfbr1_Tgfbr2
...,...,...,...,...,...
2016,Itga9,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2016,Itgb1,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2017,Itga4,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1
2017,Itgb7,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1


In [6]:
expanded = expanded[expanded.ligand.isin(adata.var_names) & expanded.receptor.isin(adata.var_names)]
expanded

Unnamed: 0,ligand,receptor,pathway,signaling,name
0,Tgfb1,Tgfbr1,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
0,Tgfb1,Tgfbr2,TGFb,Secreted Signaling,Tgfb1-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr1,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
1,Tgfb2,Tgfbr2,TGFb,Secreted Signaling,Tgfb2-Tgfbr1_Tgfbr2
2,Tgfb3,Tgfbr1,TGFb,Secreted Signaling,Tgfb3-Tgfbr1_Tgfbr2
...,...,...,...,...,...
2015,Itgb1,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb1-Vcam1
2016,Itga9,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2016,Itgb1,Vcam1,VCAM,Cell-Cell Contact,Itga9_Itgb1-Vcam1
2017,Itga4,Vcam1,VCAM,Cell-Cell Contact,Itga4_Itgb7-Vcam1


In [7]:
from spaceoracle.tools.utils import scale_adata

scale_adata(adata)

AnnData object with n_obs × n_vars = 23409 × 3311
    obs: 'cell_type', 'cell_type_int', 'seurat_clusters', 'umap_1', 'umap_2', 'author_cell_type'
    obsm: 'X_umap', 'spatial', 'spatial_unscaled'
    layers: 'imputed_count', 'normalized_count', 'raw_count'

In [8]:
adata

AnnData object with n_obs × n_vars = 23409 × 3311
    obs: 'cell_type', 'cell_type_int', 'seurat_clusters', 'umap_1', 'umap_2', 'author_cell_type'
    obsm: 'X_umap', 'spatial', 'spatial_unscaled'
    layers: 'imputed_count', 'normalized_count', 'raw_count'

In [9]:
ct.tl.spatial_communication(adata,
    database_name='user_database', 
    # df_ligrec=df_ligrec, 
    df_ligrec=expanded, 
    dis_thr=200, 
    # heteromeric=True
    heteromeric=False
)

In [10]:
adata

AnnData object with n_obs × n_vars = 23409 × 3311
    obs: 'cell_type', 'cell_type_int', 'seurat_clusters', 'umap_1', 'umap_2', 'author_cell_type'
    uns: 'commot-user_database-info'
    obsm: 'X_umap', 'spatial', 'spatial_unscaled', 'commot-user_database-sum-sender', 'commot-user_database-sum-receiver'
    layers: 'imputed_count', 'normalized_count', 'raw_count'
    obsp: 'commot-user_database-Nrtn-Gfra2', 'commot-user_database-Nrtn-Ret', 'commot-user_database-Tnfsf12-Tnfrsf12a', 'commot-user_database-Pros1-Tyro3', 'commot-user_database-Pros1-Axl', 'commot-user_database-Tnfsf8-Tnfrsf8', 'commot-user_database-Cntn2-Cntn2', 'commot-user_database-Cntn2-Cntnap2', 'commot-user_database-Cntn2-L1cam', 'commot-user_database-Col9a2-Itgb1', 'commot-user_database-Col9a2-Itga11', 'commot-user_database-Col9a2-Itga3', 'commot-user_database-Col9a2-Itgav', 'commot-user_database-Col9a2-Itga9', 'commot-user_database-Col9a2-Cd44', 'commot-user_database-Col9a2-Itgb8', 'commot-user_database-Col9a2-Itga1'

In [11]:
adata.write_h5ad('/Users/koush/Desktop/training_data/mouse_brain_wt_slideseq_commot.h5ad')

In [10]:
# lr_info = {k.replace('commot-user_database-', ''): v for k, v in adata.obsp.items() if 'commot-user_database-' in k}
# len(lr_info)

In [11]:
# df_ligrec = df_ligrec[df_ligrec['name'].isin(lr_info.keys())]
# df_ligrec['signaling'].value_counts()

### Get cluster communication scores

In [12]:
expanded['rename'] = expanded['ligand'] + '-' + expanded['receptor']

In [13]:
from tqdm import tqdm
import commot as ct

for name in tqdm(expanded['rename'].unique()):

    ct.tl.cluster_communication(adata, database_name='user_database', pathway_name=name, clustering='cell_type',
        random_seed=12, n_permutations=100)

100%|██████████| 1015/1015 [1:13:47<00:00,  4.36s/it]


In [24]:
adata.write_h5ad('/Users/koush/Desktop/training_data/mouse_brain_wt_slideseq_commot.h5ad')

In [79]:
from collections import defaultdict
data_dict = defaultdict(dict)

for name in expanded['rename']:
    data_dict[name]['communication_matrix'] = adata.uns[
        f'commot_cluster-cell_type-user_database-{name}']['communication_matrix']
    data_dict[name]['communication_pvalue'] = adata.uns[
        f'commot_cluster-cell_type-user_database-{name}']['communication_pvalue']

import pickle
with open('/Users/koush/Desktop/training_data/mouse_brain_wt_slideseq_communication.pkl', 'wb') as f:
    pickle.dump(data_dict, f)

In [81]:
# check outputs

import pickle
with open('/Users/koush/Desktop/training_data/mouse_brain_wt_slideseq_communication.pkl', 'rb') as f:
    info = pickle.load(f)

len(info.keys())

1015

In [82]:
def get_sig_interactions(value_matrix, p_matrix, pval=0.3):
    p_matrix = np.where(p_matrix < pval, 1, 0)
    return value_matrix * p_matrix

interactions = {}

for lig, rec in tqdm(zip(expanded['ligand'], expanded['receptor']), total=len(expanded)):
    name = lig + '-' + rec

    if name in info.keys():

        value_matrix = info[name]['communication_matrix']
        p_matrix = info[name]['communication_pvalue']

        sig_matrix = get_sig_interactions(value_matrix, p_matrix)
        
        if sig_matrix.sum().sum() > 0:
            interactions[name] = sig_matrix
    
len(interactions)

100%|██████████| 1536/1536 [00:00<00:00, 5503.35it/s]


1009

### Get expanded LR masks

In [83]:
interactions.keys()

dict_keys(['Tgfb1-Tgfbr1', 'Tgfb1-Tgfbr2', 'Tgfb2-Tgfbr1', 'Tgfb2-Tgfbr2', 'Tgfb3-Tgfbr1', 'Tgfb3-Tgfbr2', 'Bmp2-Bmpr1a', 'Bmp2-Bmpr2', 'Bmp2-Bmpr1b', 'Bmp4-Bmpr1a', 'Bmp4-Bmpr2', 'Bmp4-Bmpr1b', 'Gdf5-Bmpr1a', 'Gdf5-Bmpr2', 'Gdf5-Bmpr1b', 'Gdf6-Bmpr1a', 'Gdf6-Bmpr2', 'Gdf6-Bmpr1b', 'Bmp5-Bmpr2', 'Bmp5-Bmpr1a', 'Bmp5-Bmpr1b', 'Bmp7-Bmpr2', 'Bmp7-Bmpr1a', 'Bmp7-Bmpr1b', 'Bmp8a-Bmpr2', 'Bmp8a-Bmpr1a', 'Bmp8a-Bmpr1b', 'Bmp8b-Bmpr2', 'Bmp8b-Bmpr1a', 'Bmp8b-Bmpr1b', 'Bmp10-Bmpr2', 'Gdf2-Bmpr2', 'Gdf11-Tgfbr1', 'Mstn-Tgfbr1', 'Gdnf-Gfra1', 'Gdnf-Ret', 'Nrtn-Gfra2', 'Nrtn-Ret', 'Artn-Gfra3', 'Artn-Ret', 'Artn-Gfra1', 'Wnt1-Fzd1', 'Wnt1-Lrp5', 'Wnt1-Fzd10', 'Wnt1-Fzd2', 'Wnt1-Fzd3', 'Wnt1-Fzd4', 'Wnt1-Fzd5', 'Wnt1-Fzd7', 'Wnt1-Fzd8', 'Wnt1-Fzd9', 'Wnt10a-Fzd1', 'Wnt10a-Lrp5', 'Wnt10a-Fzd10', 'Wnt10a-Fzd2', 'Wnt10a-Fzd3', 'Wnt10a-Fzd4', 'Wnt10a-Fzd5', 'Wnt10a-Fzd7', 'Wnt10a-Fzd8', 'Wnt10a-Fzd9', 'Wnt16-Fzd1', 'Wnt16-Lrp5', 'Wnt16-Fzd10', 'Wnt16-Fzd2', 'Wnt16-Fzd3', 'Wnt16-Fzd4', 'Wnt16-Fzd5', 'W

In [84]:
# create cell x gene matrix
ct_masks = {ct: adata.obs['cell_type'] == ct for ct in adata.obs['cell_type'].unique()}

df = pd.DataFrame(index=adata.obs_names, columns=genes)
df = df.fillna(0)

for name in tqdm(interactions.keys(), total=len(interactions)):
    lig, rec = name.rsplit('-', 1)
    
    tmp = interactions[name].sum(axis=1)
    for ct, val in zip(interactions[name].index, tmp):
        df.loc[ct_masks[ct], lig] += tmp[ct]
    
    tmp = interactions[name].sum(axis=0)
    for ct, val in zip(interactions[name].columns, tmp):
        df.loc[ct_masks[ct], rec] += tmp[ct]

df.shape

100%|██████████| 1009/1009 [00:23<00:00, 42.71it/s]


(23409, 993)

In [85]:
print('Number of LR filtered using celltype specificity:')
np.where(df > 0, 1, 0).sum().sum() / (df.shape[0] * df.shape[1])

Number of LR filtered using celltype specificity:


0.36856263742390505

In [86]:
df.loc[:, df.mean() > 0].shape

(23409, 414)

In [104]:
# np.array(df.loc[:, df.mean() == 0].columns)

In [87]:
df.shape, adata.shape

((23409, 993), (23409, 3311))

In [88]:
df.to_parquet('/Users/koush/Desktop/training_data/mouse_brain_wt_slideseq_LRs.parquet')

In [42]:
adata

AnnData object with n_obs × n_vars = 23409 × 3311
    obs: 'cell_type', 'cell_type_int', 'seurat_clusters', 'umap_1', 'umap_2', 'author_cell_type'
    uns: 'commot-user_database-info', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb3-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb3-Tgfbr2', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr1a', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr2', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr1b', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr1a', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr2', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr1b', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr1a', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr2', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr1b', 'c

### Save params in adata

In [89]:
from spaceoracle.models.parallel_estimators import *

cell_threshes = pd.read_parquet('/Users/koush/Desktop/training_data/mouse_brain_wt_slideseq_LRs.parquet')
adata.uns['cell_thresholds'] = cell_threshes
adata

AnnData object with n_obs × n_vars = 23409 × 3311
    obs: 'cell_type', 'cell_type_int', 'seurat_clusters', 'umap_1', 'umap_2', 'author_cell_type'
    uns: 'commot-user_database-info', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb3-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb3-Tgfbr2', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr1a', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr2', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr1b', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr1a', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr2', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr1b', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr1a', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr2', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr1b', 'c

In [90]:
%%time

adata = init_received_ligands(
    adata, 
    radius=200, 
    contact_distance=30, 
    cell_threshes=cell_threshes
)
adata

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


CPU times: user 8min 4s, sys: 11.8 s, total: 8min 15s
Wall time: 1min 8s


AnnData object with n_obs × n_vars = 23409 × 3311
    obs: 'cell_type', 'cell_type_int', 'seurat_clusters', 'umap_1', 'umap_2', 'author_cell_type'
    uns: 'commot-user_database-info', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb1-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb2-Tgfbr2', 'commot_cluster-cell_type-user_database-Tgfb3-Tgfbr1', 'commot_cluster-cell_type-user_database-Tgfb3-Tgfbr2', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr1a', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr2', 'commot_cluster-cell_type-user_database-Bmp2-Bmpr1b', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr1a', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr2', 'commot_cluster-cell_type-user_database-Bmp4-Bmpr1b', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr1a', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr2', 'commot_cluster-cell_type-user_database-Gdf5-Bmpr1b', 'c

In [92]:
# del adata.layers['imputed_count_v0']
del adata.obsp
keys = list(adata.obsm.keys())
for key in keys:
    if 'commot' in key:
        del adata.obsm[key]

In [93]:
keys = list(adata.uns.keys())
for key in keys:
    if 'commot' in key:
        del adata.uns[key]

In [94]:
adata

AnnData object with n_obs × n_vars = 23409 × 3311
    obs: 'cell_type', 'cell_type_int', 'seurat_clusters', 'umap_1', 'umap_2', 'author_cell_type'
    uns: 'cell_thresholds', 'received_ligands', 'received_ligands_tfl'
    obsm: 'X_umap', 'spatial', 'spatial_unscaled'
    layers: 'imputed_count', 'normalized_count', 'raw_count'

In [95]:
adata.write_h5ad('/Users/koush/Desktop/training_data/mouse_brain_wt_slideseq.h5ad')