Use LIANA to get communication scores on BALF COVID dataset from multiple CCC tools

env_name: liana

In [1]:
suppressPackageStartupMessages({
    library(CellChat, quietly = T)
    library(liana, quietly = T)
    library(Seurat, quietly = T)
    library(data.table, quietly = T)
    library(dplyr, quietly = T)
    library(magrittr, quietly = T)
})
options(stringsAsFactors = FALSE)

In [3]:
rev_path = '/data3/hratch/tc2c_analyses_1/natcomm_revisions/'
expr_files = '/data2/eric/Tensor-Revisions/COVID-19-BALF-log1p.h5ad'

raw_counts_path<-'/data2/hratch/immune_CCI/covid/expression_data/covid_data/'
# normalized_counts_path<-paste0(rev_path, 'interim/tc2c_external_inputs/liana/liana_inputs2/')

In [4]:
n.cores<-20
seed=888
n_perm = 100

ep = 0 #expr_prop param in LIANA
# prop_filt param in LIANA
if (!(ep > 0)){
    pf = F
}else{
    pf = T
}

In [5]:
# rewrites CellChat::subsetCommunicationProbability to not remove 0 scores
# basically, we want all scores regardless of p-values and if they're 0
format.cellchat.communication<-function(cellchat.obj, thresh = NULL){
    prob <- cellchat.obj@net$prob
    pval <- cellchat.obj@net$pval
    prob[pval >= thresh] <- 0
    net <- reshape2::melt(prob, value.name = "prob")
    colnames(net)[1:3] <- c("source","target","interaction_name")
    net.pval <- reshape2::melt(pval, value.name = "pval")
    net$pval <- net.pval$pval
    
    pairLR <- dplyr::select(cellchat.obj@LR$LRsig, c("interaction_name_2", "pathway_name", "ligand",  "receptor" ,"annotation","evidence"))
    idx <- match(net$interaction_name, rownames(pairLR))
    net <- cbind(net, pairLR[idx,])
    return(net)
    }

# adapts liana::call_cellchat to avoid CellChat::subsetCommunicationProbability, which excludes 0 scores
# default params from Daniel Dimitrov for consistency with LIANA
run_cellchat<-function(so, seed, expr_prop=0.1, nboot=1000, de_thresh=1, thresh=NULL){
    labels <- Seurat::Idents(so)
    meta <- data.frame(group = labels, row.names = names(labels))
    cellchat.obj<-createCellChat(object = GetAssayData(so, assay = 'RNA', slot = "data"), 
                                     meta = meta, 
                                   group.by = "group")
    cellchat.obj <- CellChat::addMeta(cellchat.obj, meta = meta)
    cellchat.obj <- CellChat::setIdent(cellchat.obj, ident.use = "group")
    
    ccDB<-CellChatDB.human
    resource<-c('CellChatDB')
    resource %<>% select_resource
    ccDB<-liana::cellchat_formatDB(ccDB, op_resource=resource$CellChatDB, exclude_anns=c())
    cellchat.obj@DB <- ccDB# human organism
    
    cellchat.obj <- subsetData(cellchat.obj) # subset the expression data of signaling genes, assign to @data.signalling 
    cellchat.obj <- identifyOverExpressedGenes(cellchat.obj, thresh.pc = expr_prop,
                                                thresh.p = de_thresh)
    cellchat.obj <- identifyOverExpressedInteractions(cellchat.obj)
    cellchat.obj <- projectData(cellchat.obj, PPI.human)
    cellchat.obj <- computeCommunProb(cellchat.obj, raw.use = F, type = 'triMean', trim = NULL, 
                                      seed.use = seed, population.size=T, do.fast=T, nboot=nboot) 
    cellchat.obj <- filterCommunication(cellchat.obj, min.cells = 1) # as in LIANA
    
    cm<-format.cellchat.communication(cellchat.obj, thresh=thresh) # replace CellChat::subsetCommunicationProbability
    cm <- cm %>%
        dplyr::select(source,
               target,
               ligand,
               receptor,
               prob,
               pval) %>%
        as_tibble()
    return(cm)
    
}

In [13]:
# ... is other arguments to run_cellchat
run.liana<-function(sample, seed, expr_prop, ...){
    raw.counts<-read.csv(paste0(raw_counts_path, 'DGE_', sample, '_External_Tool.csv'), row.names = 1)
    md<-read.csv(paste0(raw_counts_path, 'Meta_', sample, '_External_Tool.csv'), row.names = 1)
    
    so<-CreateSeuratObject(counts = raw.counts, meta.data = md)
    so<-Seurat::NormalizeData(so, normalization.method = "LogNormalize", scale.factor = 1e6)
    Idents(so)<-'celltype'
    
    scores <- liana_wrap(so, assay = "RNA", 
                     method = c('natmi', 'sca', 'cellphonedb'),
                     resource = c('CellChatDB'), 
                     seed = seed, 
                     workers = n.cores, parallelize = T, 

                     expr_prop = expr_prop, complex_policy = 'min0',
                     liana_call.params = list(expr_prop = expr_prop, complex_policy = 'min0'),
                     natmi.params = list(prop_filt = pf),
                     sca.params = list(prop_filt = pf),
                     cellphonedb.params = list(prop_filt = pf, workers=n.cores, parallelize = T) 
                    )
    scores[['cellchat']] = run_cellchat(so, seed=seed, expr_prop=expr_prop, ...)
    return(scores)

}

In [4]:
# get samples
samples<-c()
for (fn in list.files(raw_counts_path)){
    samples<-c(samples, strsplit(fn, '_')[[1]][[2]])
}
samples<-unique(samples)


# get scores
liana.res<-list()
for (sample in samples){
    print(sample)
    scores<-run.liana(sample=sample, seed = seed, expr_prop = ep, nboot = n_perm) # get the communication scores
    
    # write the communication scores
    for(method in names(scores)){
        fwrite(scores[[method]], 
              paste0(rev_path, 'interim/tc2c_external_inputs/liana/liana_outputs/', sample, '_communication_scores_', 
                     method, '.csv')
              )
    }
}

In [9]:
sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 18.04.6 LTS

Matrix products: default
BLAS/LAPACK: /home/hratch/anaconda3/envs/liana/lib/libopenblasp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] magrittr_2.0.2      data.table_1.14.2   SeuratObject_4.0.4 
 [4] Seurat_4.1.0        liana_0.0.7         CellChat_1.1.3     
 [7] Biobase_2.54.0      BiocGenerics_0.40.0 ggplot2_3.3.5      
[10] igraph_1.2.11       dplyr_1.0.8        

loaded via a namespace (and not attached):
  [1] utf8_1.2.2      