In [1]:
import cell2cell as c2c

import pandas as pd
import numpy as np

%matplotlib inline

In [2]:
c2c.__version__

'0.5.0'

# Load Data

Directories

In [3]:
import os

data_folder = './'
directory = os.fsencode(data_folder)

In [6]:
precomputed = c2c.io.read_data.load_variable_with_pickle(data_folder + '/PreBuiltTensor-PBMC.pkl')

**Tensor**

For generating a prebuilt tensor, we just need a tensor (list of lists or any other array obtained from cell2cell or any other tool such as CellChat or CellPhoneDB). Here we will use a numpy array borrowed from a precomputed InteractionTensor.

The tensor in this case (numpy array)

In [7]:
tensor = precomputed.tensor

In [9]:
type(tensor)

numpy.ndarray

In [8]:
tensor.shape

(60, 1639, 6, 6)

The tensor here contains the communication scores and looks like:

In [14]:
tensor

array([[[[6.95970696e-02, 8.66377371e-02, 1.08539970e-01,
          1.15750916e-01, 1.07436184e-01, 1.00861580e-01],
         [1.50725202e-01, 1.67765870e-01, 1.89668102e-01,
          1.96879048e-01, 1.88564316e-01, 1.81989712e-01],
         [1.92332633e-01, 2.09373301e-01, 2.31275533e-01,
          2.38486479e-01, 2.30171747e-01, 2.23597143e-01],
         [3.25274725e-01, 3.42315393e-01, 3.64217625e-01,
          3.71428571e-01, 3.63113839e-01, 3.56539235e-01],
         [3.32766355e-01, 3.49807022e-01, 3.71709255e-01,
          3.78920201e-01, 3.70605469e-01, 3.64030865e-01],
         [1.87045349e-01, 2.04086017e-01, 2.25988249e-01,
          2.33199195e-01, 2.24884463e-01, 2.18309859e-01]],

        [[1.46520147e-02, 3.16926821e-02, 5.35949147e-02,
          6.08058608e-02, 5.24911287e-02, 4.59165248e-02],
         [1.14012121e-02, 2.84418796e-02, 5.03441122e-02,
          5.75550583e-02, 4.92403262e-02, 4.26657223e-02],
         [1.12159742e-02, 2.82566417e-02, 5.01588743e-02,
    

**Names for each dimension element**

Now we need a name for each coordinate in each dimension of the tensor. Again, we will borrow that from our precomputed InteractionTensor.

In [23]:
dim1_names = precomputed.order_names[0]

In [24]:
dim2_names = precomputed.order_names[1]

In [25]:
dim3_names = precomputed.order_names[2]

In [26]:
dim4_names = precomputed.order_names[3]

How they look like

In [27]:
# Names of contexts
dim1_names[:5]

['S-HC003', 'S-HC004', 'S-HC005', 'S-HC006', 'S-HC007']

In [28]:
# Names of ligand-receptor pairs
dim2_names[:5]

['TGFB1^TGFBR1&TGFBR2',
 'TGFB2^TGFBR1&TGFBR2',
 'TGFB3^TGFBR1&TGFBR2',
 'TGFB1^ACVR1B&TGFBR2',
 'TGFB1^ACVR1C&TGFBR2']

In [29]:
# Names of sender cells
dim3_names[:5]

['B', 'CD4', 'CD8', 'DC', 'Mono']

In [30]:
# Names of receiver cells
dim4_names[:5]

['B', 'CD4', 'CD8', 'DC', 'Mono']

# Generate a prebuilt Interaction Tensor

With this information we can now generate an InteractionTensor without the need to use a gene expression matrix neither a list of LR pairs and we can directly reuse the communication scores even from other tools.

In [32]:
new = c2c.tensor.PreBuiltTensor(tensor=tensor,
                                order_names=[dim1_names, dim2_names, dim3_names, dim4_names],
                                order_labels=['Samples/Contexts', 'Ligand-Receptor Pairs', 'Sender Cells', 'Receiver Cells'],
                                mask=None, #Change if you want to omit values in the decomposition
                                device=None #Change if you are using pytorch as backend for using GPU
                               )

# Ready to perform analyses

In [34]:
# Decompose into 3 factors
new.compute_tensor_factorization(rank=5,
                                 init='random',
                                 random_state=888)

Top-10 LR pairs in each factor:

In [37]:
for i in range(3):
    print(new.get_top_factor_elements('Ligand-Receptor Pairs', 'Factor {}'.format(i+1), 10))
    print('')

CLDN11^CLDN11         25.369671
CD99^CD99             14.829649
MIF^CD74&CXCR4        13.629020
CD22^PTPRC            12.904281
TNFSF13B^TNFRSF13B    12.831876
TNFSF13B^TNFRSF17     12.733968
MIF^CD74&CD44         12.687034
TNFSF13B^TNFRSF13C    12.416768
MIF^CD74&CXCR2        12.007872
APP^CD74              11.697411
Name: Factor 1, dtype: float64

GRN^SORT1            15.260803
ANXA1^FPR1           12.771221
ANXA1^FPR2           12.748963
VSIR^IGSF11          12.211230
LGALS9^CD44          12.178061
ITGB2^CD226          11.845652
ITGB2^ICAM2          11.383251
NAMPT^ITGA5&ITGB1    11.329927
PSAP^GPR37L1         11.254942
NAMPT^INSR           11.195418
Name: Factor 2, dtype: float64

MIF^CD74&CXCR4    14.845685
MIF^CD74&CD44     14.041137
LGALS9^CD44       10.631402
CD22^PTPRC        10.380199
CXCL12^CXCR4       9.839697
COL4A4^CD44        9.620613
LAMA4^CD44         9.547697
COL9A3^CD44        9.537544
COL4A3^CD44        9.497780
COL9A2^CD44        9.458782
Name: Factor 3, dtype: flo