# Running MEBOCOST to generate outputs for Tensor-cell2cell v2

In [2]:
import cell2cell as c2c
import scanpy as sc

import numpy as np
import pandas as pd

from mebocost import mebocost
from tqdm.auto import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [3]:
import warnings
warnings.filterwarnings('ignore')

## Load Data

In [4]:
import os, sys

data_folder = './data/'
directory = os.fsencode(data_folder)

output_folder = './data/mebocost-results/'
if not os.path.isdir(output_folder):
    os.mkdir(output_folder)

**RNA-seq data**

In [5]:
rnaseq = sc.read_h5ad(data_folder + '/annotated_seurat_norm_harmony_2022.h5ad')

In [6]:
rnaseq = rnaseq.raw.to_adata()

In [7]:
rnaseq

AnnData object with n_obs × n_vars = 15973 × 21042
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'RNA_snn_res.0.5', 'seurat_clusters', 'celltype', 'old.ident'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable'
    uns: 'neighbors'
    obsm: 'X_harmony', 'X_pca', 'X_umap'
    obsp: 'distances'

In [8]:
rnaseq.obs.head(2)

Unnamed: 0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,RNA_snn_res.0.5,seurat_clusters,celltype,old.ident
Month-01_AAACCTGAGAATTGTG-1,Month-01,6417.0,2431,2.898551,11,11,Other,11
Month-01_AAACCTGAGCAACGGT-1,Month-01,8522.0,2464,2.663694,6,6,Progenitor,6


## Analysis

Here, we use the full MEBOCOST database of metabolite-sensor interactions. The analysis is run by time point (month) in our dataset.

We do not perform filters by minimum expression levels of the sensors or the metabolite levels, neither by the fraction of cells in a cell type presenting them. Similarly, we do not perform a permutation analysis as we are not interested in identifying cell-type specific communication; instead, we keep all communication scores as we are interested in finding patterns across contexts and sender-receiver cell types.

In [16]:
for month in rnaseq.obs['orig.ident'].unique():

    meta_context = rnaseq.obs.loc[(rnaseq.obs['orig.ident'] == month) \
                                  & (rnaseq.obs['celltype'] != 'Other') & (rnaseq.obs['celltype'] != 'MC')]
    
    # Remove celltype per context that has few single cells
    min_sc_number = 20
    excluded_sc = []
    for idx, row in (meta_context.groupby(['celltype'])[['celltype']].count() >= min_sc_number).iterrows():
        if ~row['celltype']:
            excluded_sc.append(idx)
        
    meta_context = meta_context.loc[~meta_context['celltype'].isin(excluded_sc)]
    cells = list(meta_context.index)

    meta_context.index.name = 'barcode'
    tmp_data = rnaseq[cells]
    
    # Keep genes in each sample with at least 4 single cells expressing it
    sc.pp.filter_genes(tmp_data, min_cells=4)
    
    # Normalize and Log1p
    sc.pp.normalize_total(tmp_data, target_sum=1e6)
    sc.pp.log1p(tmp_data)
    
    
    # MEBOCOST
    mebo_obj = mebocost.create_obj(
                        adata=tmp_data,
                        group_col='celltype',
                        condition_col=None,
                        met_est='mebocost',
                        config_path='/code/MEBOCOST-main/mebocost.conf',
                        exp_mat=None,
                        cell_ann=None,
                        ## make sure you set the right species
                        species='human',
                        met_pred=None,
                        met_enzyme=None,
                        met_sensor=None,
                        met_ann=None,
                        scFEA_ann=None,
                        compass_met_ann=None,
                        compass_rxn_ann=None,
                        cutoff_exp=0., # We avoid excluding cases to find smooth patterns with Tensor-cell2cell
                        cutoff_met=0., # We avoid excluding cases to find smooth patterns with Tensor-cell2cell
                        cutoff_prop=0., # We avoid excluding cases to find smooth patterns with Tensor-cell2cell
                        sensor_type='All',
                        thread=1
                        )
    
    commu_res = mebo_obj.infer_commu(n_shuffle=2, # We are insterested in the comm score, not in the P-value
                                     seed=12345, 
                                     Return=True, 
                                     thread=None,
                                     save_permuation=False,
                                     min_cell_number=1,
                                     pval_method='permutation_test_fdr',
                                     pval_cutoff=1.0 # To avoid filtering and return all comm scores.
                                 )
    
    # Prepare data for Tensor-cell2cell and export
    commu_res['Met-Sensor'] = commu_res.apply(lambda row: row['Metabolite_Name'] + '^' + row['Sensor'], axis=1)
    commu_res.to_csv(output_folder + 'MEBOCOST-{}.csv'.format(month))

[July 29, 2025 01:47:01]: We get expression data with 18632 genes and 3646 cells.
[July 29, 2025 01:47:01]: Data Preparation Done in 0.0026 seconds
[July 29, 2025 01:47:01]: Load config and read data based on given species [human].
[July 29, 2025 01:47:01]: Estimtate metabolite enzyme expression using mebocost
[July 29, 2025 01:47:14]: Infer communications
[July 29, 2025 01:47:14]: Sensor type used ['Transporter', 'Receptor; Transporter', 'Receptor', 'Receptor; Channel', 'Nuclear Receptor', 'Channel', 'Enzyme']
[July 29, 2025 01:47:14]: Parameters: {shuffling: 2 times, random seed: 12345, thread: 1}
[July 29, 2025 01:47:14]: met_sensor: (369, 8)
[July 29, 2025 01:47:14]: avg_exp: (1944, 4) for (gene, cell) of needed
[July 29, 2025 01:47:14]: avg_met: (546, 4) for (metabolite, cell) of needed
[July 29, 2025 01:47:14]: shuffling 2 times for generating backgroud
[July 29, 2025 01:47:14]: take exp and met avg for shuffling
[July 29, 2025 01:47:14]: thread: 1
[July 29, 2025 01:47:14]: SLC7A

[October 20, 2022 01:41:47]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:41:47]: RXRB ~ HMDB0006219

[October 20, 2022 01:41:47]: Normalizing Cluster Mean and Calculating Communication Score[October 20, 2022 01:41:47]: NR3C1 ~ HMDB0000063
[October 20, 2022 01:41:47]: Calculating P-value
[October 20, 2022 01:41:47]: Calculating P-value[October 20, 2022 01:41:47]: RXRG ~ HMDB0002369
[October 20, 2022 01:41:47]: Normalizing Cluster Mean and Calculating Communication Score


[October 20, 2022 01:41:47]: Normalizing Cluster Mean and Calculating Communication Score[October 20, 2022 01:41:47]: Calculating P-value
[October 20, 2022 01:41:47]: Calculating P-value
[October 20, 2022 01:41:47]: RORB ~ HMDB0000067[October 20, 2022 01:41:48]: NR3C1 ~ HMDB0001547


[October 20, 2022 01:41:48]: Normalizing Cluster Mean and Calculating Communication Score[October 20, 2022 01:41:48]: ADRA2C ~ HMDB0000068[October 20, 2022 01:41:48]: Normalizing Cluster Mean and Calcu

[October 20, 2022 01:42:05]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:05]: Normalizing Cluster Mean and Calculating Communication Score[October 20, 2022 01:42:05]: Calculating P-value

[October 20, 2022 01:42:05]: GRIN1 ~ HMDB0000123
[October 20, 2022 01:42:05]: Calculating P-value[October 20, 2022 01:42:05]: Normalizing Cluster Mean and Calculating Communication Score

[October 20, 2022 01:42:05]: Calculating P-value
[October 20, 2022 01:42:05]: TFR2 ~ HMDB0015531
[October 20, 2022 01:42:05]: Normalizing Cluster Mean and Calculating Communication Score[October 20, 2022 01:42:05]: S1PR1 ~ HMDB0000277

[October 20, 2022 01:42:05]: Normalizing Cluster Mean and Calculating Communication Score[October 20, 2022 01:42:05]: Calculating P-value

[October 20, 2022 01:42:05]: Calculating P-value[October 20, 2022 01:42:05]: TBXA2R ~ HMDB0001452

[October 20, 2022 01:42:05]: GABRA1 ~ HMDB0000112
[October 20, 2022 01:42:05]: Normalizing Cluster Mean and C

[October 20, 2022 01:42:08]: RXRG ~ HMDB0001358
[October 20, 2022 01:42:08]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:08]: Calculating P-value
[October 20, 2022 01:42:08]: AR ~ HMDB0000053
[October 20, 2022 01:42:08]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:08]: Calculating P-value
[October 20, 2022 01:42:08]: AR ~ HMDB0000077
[October 20, 2022 01:42:08]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:08]: Calculating P-value
[October 20, 2022 01:42:08]: ADRB2 ~ HMDB0000068
[October 20, 2022 01:42:08]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:08]: Calculating P-value
[October 20, 2022 01:42:09]: Calculating aboundance of metabolite and sensor expression in cell groups
[October 20, 2022 01:42:09]: provided cutoff for sensor expression, cutoff=0
[October 20, 2022 01:42:09]: provided cutoff for metabolite presence, cutof

[October 20, 2022 01:42:27]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:27]: Calculating P-value
[October 20, 2022 01:42:28]: TFR2 ~ HMDB0015531
[October 20, 2022 01:42:28]: Normalizing Cluster Mean and Calculating Communication Score[October 20, 2022 01:42:28]: TRPV4 ~ HMDB0000673

[October 20, 2022 01:42:28]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:28]: Calculating P-value[October 20, 2022 01:42:28]: Calculating P-value

[October 20, 2022 01:42:28]: S1PR3 ~ HMDB0000277[October 20, 2022 01:42:28]: GABBR2 ~ HMDB0000112
[October 20, 2022 01:42:28]: Normalizing Cluster Mean and Calculating Communication Score

[October 20, 2022 01:42:28]: Calculating P-value[October 20, 2022 01:42:28]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:28]: GABRB2 ~ HMDB0000112

[October 20, 2022 01:42:28]: Calculating P-value[October 20, 2022 01:42:28]: Normalizing Cluster Mean and Ca

[October 20, 2022 01:42:46]: met_sensor: (59, 8)
[October 20, 2022 01:42:46]: avg_exp: (1635, 5) for (gene, cell) of needed
[October 20, 2022 01:42:46]: avg_met: (493, 5) for (metabolite, cell) of needed
[October 20, 2022 01:42:46]: shuffling 1 times for generating backgroud
[October 20, 2022 01:42:46]: take exp and met avg for shuffling
[October 20, 2022 01:42:46]: thread: 8
[October 20, 2022 01:42:46]: ADORA1 ~ HMDB0000045
[October 20, 2022 01:42:46]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:46]: ADORA2B ~ HMDB0000050
[October 20, 2022 01:42:46]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:46]: Calculating P-value[October 20, 2022 01:42:46]: Calculating P-value
[October 20, 2022 01:42:46]: SCARB1 ~ HMDB0000067

[October 20, 2022 01:42:46]: Normalizing Cluster Mean and Calculating Communication Score
[October 20, 2022 01:42:46]: Calculating P-value[October 20, 2022 01:42:46]: GABBR1 ~ HMDB0000112

[Oc