Use LIANA to get communication scores on BALF COVID dataset from multiple CCC tools

env_name: liana

In [1]:
suppressPackageStartupMessages({
#     library(CellChat, quietly = T)
    library(liana, quietly = T)
    library(Seurat, quietly = T)
})
options(stringsAsFactors = FALSE)

rev_path = '/data3/hratch/tc2c_analyses_1/natcomm_revisions/'
expr_files = '/data2/eric/Tensor-Revisions/COVID-19-BALF-log1p.h5ad'

raw_counts_path<-'/data2/hratch/immune_CCI/covid/expression_data/covid_data/'
# normalized_counts_path<-paste0(rev_path, 'interim/tc2c_external_inputs/liana/liana_inputs2/')
n.cores<-20
seed=888

In [3]:
run.liana<-function(sample){
    raw.counts<-read.csv(paste0(raw_counts_path, 'DGE_', sample, '_External_Tool.csv'), row.names = 1)
    md<-read.csv(paste0(raw_counts_path, 'Meta_', sample, '_External_Tool.csv'), row.names = 1)
    
    so<-CreateSeuratObject(counts = raw.counts, meta.data = md)
    so<-Seurat::NormalizeData(so, normalization.method = "LogNormalize", scale.factor = 1e6)
    Idents(so)<-'celltype'
    
    scores <- liana_wrap(so, assay = "RNA", 
                     method = c('natmi', 'logfc', 'cellchat', 'sca', 'cellphonedb', 'connectome'),
                     resource = c('CellChatDB'), 
                     cellchat.params = list(nboot=1000, expr_prop = 0.1, de_thresh = 1), #Dimitrov recommendation
                     seed = seed, 
                     workers = n.cores
                    )
    return(scores)

}

In [4]:
# get samples
samples<-c()
for (fn in list.files(raw_counts_path)){
    samples<-c(samples, strsplit(fn, '_')[[1]][[2]])
}
samples<-unique(samples)

# get scores
liana.res<-list()
for (sample in samples){
    print(sample)
    liana.res[[sample]]<-run.liana(sample)
}
saveRDS(liana.res, 'trash.rds')

In [290]:
# to.SCE<-function(sample){
#     # read in data
#     expr<-read.csv(paste0(normalized_counts_path, fns[[sample]]$expr))
#     md<-read.csv(paste0(normalized_counts_path, fns[[sample]]$metadata), 
#                 row.names=1)
#     md.barcode<-unname(sapply(rownames(md), function(x) strsplit(x, '-')))
#     md.barcode<-unlist(lapply(md.barcode, function(x) paste0(x, collapse='.')))
#     rownames(md)<-md.barcode

#     # convert to SCE
#     genes <- expr[[1]]
#     expr<-expr[,-1]
#     barcodes<-colnames(expr)
#     mat <- methods::as(expr, "Matrix")
#     dimnames(mat) <- list(genes, barcodes)
#     sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = NULL, logcounts = mat))
#     SummarizedExperiment::rowData(sce) <- S4Vectors::DataFrame(feature = gene)
#     SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(annotation = md[barcodes, ], row.names = barcodes)
#     return(sce)
# }
                              
# to.SCE<-function(sample){
#     log.counts<-read.csv(paste0(normalized_counts_path, fns[[sample]]$expr))
#     raw.counts<-read.csv(paste0(raw_counts_path, 'DGE_', sample, '_External_Tool.csv'))

#     # deal with genes
#     log.counts.genes<-log.counts[[1]]
#     log.counts<-log.counts[,-1]

#     raw.counts.genes<-raw.counts[[1]]
#     raw.counts<-raw.counts[,-1]

#     # take intersect between raw and log counts
#     genes<-intersect(raw.counts.genes, log.counts.genes)

#     raw.genes.map<-1:length(raw.counts.genes)
#     names(raw.genes.map)<-raw.counts.genes
#     log.genes.map<-1:length(log.counts.genes)
#     names(log.genes.map)<-log.counts.genes

#     # deal with barcodes
#     log.counts.barcodes<-colnames(log.counts)
#     log.counts.barcodes<-unname(sapply(log.counts.barcodes, function(x) strsplit(x, '[.]')))
#     log.counts.barcodes<-unlist(unname(lapply(log.counts.barcodes, function(x) x[[1]]))) 

#     raw.counts.barcodes<-colnames(raw.counts)
#     raw.counts.barcodes<-unname(sapply(raw.counts.barcodes, function(x) strsplit(x, '_')))
#     raw.counts.barcodes<-unlist(unname(lapply(raw.counts.barcodes, function(x) x[[1]])))

#     # take intersection between raw and log counts
#     barcodes<-intersect(raw.counts.barcodes, log.counts.barcodes)

#     raw.barcodes.map<-1:length(raw.counts.barcodes)
#     names(raw.barcodes.map)<-raw.counts.barcodes
#     log.barcodes.map<-1:length(log.counts.barcodes)
#     names(log.barcodes.map)<-log.counts.barcodes

#     log.counts<-log.counts[log.genes.map[genes],log.barcodes.map[barcodes]]
#     raw.counts<-raw.counts[raw.genes.map[genes],raw.barcodes.map[barcodes]]

#     #make barcode identifier unique
#     barcodes<-paste0(barcodes, '.', sample)
#     colnames(log.counts)<-barcodes
#     colnames(raw.counts)<-barcodes

#     rownames(log.counts)<-1:dim(log.counts)[[1]]
#     rownames(raw.counts)<-1:dim(raw.counts)[[1]]


#     md<-read.csv(paste0(normalized_counts_path, fns[[sample]]$metadata), 
#                 row.names=1)
#     md.barcode<-unname(sapply(rownames(md), function(x) strsplit(x, '-')))
#     md.barcode<-lapply(md.barcode, function(x) x[[1]])
#     md.barcode<-unlist(lapply(md.barcode, function(x) paste0(x, '.', sample)))
#     rownames(md)<-md.barcode

#     raw.counts <- methods::as(raw.counts, "Matrix")
#     dimnames(raw.counts) <- list(genes, barcodes)

#     log.counts <- methods::as(log.counts, "Matrix")
#     dimnames(log.counts) <- list(genes, barcodes)

#     sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = raw.counts, logcounts = log.counts))
#     SummarizedExperiment::rowData(sce) <- S4Vectors::DataFrame(gene = genes)
#     SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(row.names = barcodes)  
#     colLabels(sce)<-md[barcodes, ]                         
#     return(sce)
                          
# }                              