Run tensor cell2cell on outputs of various communication scoring tools

In [1]:
import os
from collections import OrderedDict
import itertools
import numpy as np
import pandas as pd

seed = 888
np.random.seed(seed)

rev_path = '/data3/hratch/tc2c_analyses_1/natcomm_revisions/'
expression_data_path = '/data2/hratch/immune_CCI/covid/expression_data/covid_data/'

cp_delim = '-'
lr_delim = '&'

In [3]:
import sys
sys.path.insert(1, '/home/hratch/Projects/CCC/tc2c_analyses_1/notebooks/natcomm_revisions/')
from utility import matrix_to_interaction_tensor

In [None]:
# input: communication matrix
# columns: sender<DELIM>receiver (directionality matters) DELIM = '-'
# rows: ligand<DELIM>receptor DELIM = '&'

Load output of the different communication scoring pipelines

In [5]:
# load cellchat
cellchat_cm = pd.read_csv(rev_path + 'interim/tc2c_external_inputs/cellchat/cellchat_balf.csv', index_col = 0)
cellchat = dict()
for sample in cellchat_cm.Sample.unique():
    df = cellchat_cm[cellchat_cm.Sample == sample]
    df.drop(columns = ['Sample'], inplace = True)
    df.index = pd.Series(df.index).apply(lambda x: x.split('.')[1]).tolist()
    cellchat[sample] = df

# cellchat_tensor = matrix_to_interaction_tensor(scores = cellchat, 
#                                               how='outer', cp_delim='-', lr_delim='&')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Transform into 4D Interaction Tensor
Make all communication matrices consistent -- contain all same LR and CC pairs in the same order

In [26]:
# get all possible cell type pairs -- union of all LR pairs and CC pairs measured by all methods

# only necessary bc multiple pipelines, otherwise can feed directly into "matrix_to_communication_tensor" with how = 'outer'
score_methods = [cellchat, ]
method_names = ['cellchat', ]


cell_pairs = []
lr_pairs = []
for score_method in score_methods:
    cp = [df.columns.tolist() for df in score_method.values()]
    cp = [item for sublist in cp for item in sublist]
    cell_pairs += cp
    
    lr = [df.index.tolist() for df in score_method.values()]
    lr = [item for sublist in lr for item in sublist]
    lr_pairs += lr

In [27]:
cell_pairs = sorted(set(cell_pairs))
lr_pairs = sorted(set(lr_pairs))

samples = ['C100',
 'C141',
 'C142',
 'C143',
 'C144',
 'C145',
 'C146',
 'C148',
 'C149',
 'C152',
 'C51',
 'C52']

In [32]:
def create_nan_vals_all(df, lr_pairs, cell_pairs):
    """Adds nan communication scores if pipeline did not include a certain sender-receiver cell pair"""
    df = pd.concat([df, pd.DataFrame(index = set(lr_pairs).difference(df.index),columns = df.columns)])
    for col in set(cell_pairs).difference(df.columns):
        df[col] = float('nan')
    
    df = df.loc[lr_pairs, cell_pairs] # make the order the same
    return df

tensors = {}
for idx, score_method in enumerate(score_methods):
    print(method_names[idx])
    scores = OrderedDict({k: create_nan_vals_all(df=score_method[k], lr_pairs=lr_pairs, cell_pairs=cell_pairs) for k in samples})
    score_methods[idx] = scores
    tensors[method_names[idx]] = matrix_to_interaction_tensor(scores, how = 'outer', 
                                                             cp_delim='-', lr_delim='&')

  0%|          | 0/12 [00:00<?, ?it/s]

cellchat


100%|██████████| 12/12 [00:24<00:00,  2.05s/it]
