Get communication scores from CellChat in each context of BALF data to use as input to tensor cell2cell

env_name: cellchat

In [38]:
suppressPackageStartupMessages({
    library(CellChat, quietly = T)
    library(patchwork, quietly = T)
    library(RhpcBLASctl, quietly = T)
    library(Matrix, quietly = T)
    library(data.table, quietly = T)
    library(ggplot2, quietly = T)
    library(reshape2, quietly = T)
})
options(stringsAsFactors = FALSE)
# RhpcBLASctl::blas_set_num_threads(25) # no multithreading

# paths
natmi_path = '/data2/hratch/Software/NATMI/' # see script 04Aii for details
expression_data_path = '/data2/hratch/immune_CCI/covid/expression_data/covid_data/'#'/data2/eric/CCC-Benchmark/data/External/'
external_expression_path = F # set to T if using commented out path , small parsing differences
rev_path = '/data3/hratch/tc2c_analyses_1/natcomm_revisions/'
input_data_path = '/data2/hratch/immune_CCI/covid/inputs/'

# parameters
type_<-'functional'
group<-0
seed<-888
set.seed(seed)

if (external_expression_path){
    cell_grouper<-'cell_type'
}else{
    cell_grouper<-'celltype'
}

version<-2

# Prepare LR input
connectomeDB2020 for use with Cellchat

In [54]:
humandb<-CellChatDB.human
connectome_lr<-read.csv(paste0(natmi_path, 'lrdbs/lrc2p.csv'))
colnames(connectome_lr)<-c('ligand', 'receptor')
connectome_lr[['interaction_name']]<-apply(connectome_lr, 1, paste, collapse = '_')
rownames(connectome_lr)<-connectome_lr$interaction_name
connectome_db<-list(interaction=connectome_lr, complex=humandb$complex, 
                   cofactor=humandb$cofactor, geneInfo=humandb$geneInfo)

# Preprocess 
as in cellchat_benchmark/05B

In [55]:
fns = list()
for (fn in list.files(expression_data_path)){
    sn = strsplit(fn, '_')[[1]]
    sample.name = sn[[2]]
    type = sn[[1]]
    fns[[sample.name]][[type]] = fn
}
sample.names<-names(fns)

In [56]:
# load the UMI counts
read_sample_csv<-function(sn){
    counts<-as.data.frame(fread(paste0(expression_data_path, fns[[sn]]$DGE)))
    rownames(counts)<-counts$Gene
    counts<-counts[ , !(colnames(counts) %in% c('Gene'))]
    if(external_expression_path){colnames(counts)<-paste0(sn, '.', colnames(counts))} # consistency with metadata
    return(as.matrix(counts))
}

read_meta<-function(sn){
    meta<-read.csv(paste0(expression_data_path, fns[[sn]]$Meta))
    rownames(meta) = meta$Cell
    return(meta)
}


if (!group){                             
    counts<-lapply(setNames(sample.names, sample.names), function(sn) read_sample_csv(sn))
    meta<-lapply(setNames(sample.names, sample.names), function(sn) read_meta(sn))
                                  
}else{ # group by context
    stop('Should not group by context for classification')        
}

md.cell<-do.call("rbind", meta)
if (!external_expression_path){rownames(md.cell)<-md.cell$ID}
if (type_ == 'functional'){# functional requires same cell types to work                
    # filter for intersection of cell types across samples/contexts - to make comparable with Tensor-cell2cell
    cell.types<-Reduce(intersect, lapply(counts, function(df) unique(md.cell[colnames(df), cell_grouper])))
    cell.ids<-rownames(md.cell[md.cell[[cell_grouper]] %in% cell.types, ])   
    for (n in names(counts)){
        df<-counts[[n]]
        counts[[n]]<-df[,colnames(df) %in% cell.ids]
    }
}       

# Run Cellchat

In [None]:
print('Run cellchat')
suppressWarnings({
    suppressMessages({
        # create cellchat object for each sample or sample.name
        covid<-list()
        for (sample.name in names(counts)){
            print(sample.name)
            # loop through each sample.name and create a cell type future
            expr<-CellChat::normalizeData(counts[[sample.name]])
            cellchat<-createCellChat(object = as(expr, "dgCMatrix"), meta = md.cell[colnames(expr),], 
                                           group.by = cell_grouper)
            cellchat@DB <- connectome_db # human organism

            cellchat <- subsetData(cellchat) # subset the expression data of signaling genes, assign to @data.signalling 
            cellchat <- identifyOverExpressedGenes(cellchat)
            cellchat <- identifyOverExpressedInteractions(cellchat) # generate @ LR slot used by computeCommunProb
            cellchat <- projectData(cellchat, PPI.human) # shallow sequencing depth

            cellchat <- computeCommunProb(cellchat, raw.use = F, type = 'triMean', trim = NULL, seed.use = seed, 
                                         population.size = F) 

            # The functional similarity analysis requires the same cell population composition between two datasets.
            cellchat <- filterCommunication(cellchat, min.cells = 10)
            covid[[sample.name]]<-cellchat
        }
    })
})
saveRDS(covid, 
        paste0(rev_path, 'interim/tc2c_external_inputs/cellchat/', 'covid_balf_cellchatobjects.rds'))
print('complete')


In [11]:
# reformat for input to tensor-cell2cell
suppressWarnings({
    cm_list<-list()
    for (sample in names(covid)){
        # get communication array and format into a CCxLR matrix
        cm<-melt(covid[[sample]]@net$prob)
        colnames(cm)<-c('Sender', 'Receiver', 'LR', 'Score')

        # map LRs to appropriate naming convention
        lr_map<-covid[[sample]]@LR$LRsig
        lr_map<-apply(lr_map[, c(1,2)], 1, paste, collapse = '&')
        cm[['LR']]<-unname(lr_map[cm$LR])

        # recast into CCxLR
        cm['Sender.Receiver']<-apply(cm[, c('Sender', 'Receiver')], 1, paste, collapse = '-')
        cm<-cm[!(colnames(cm) %in% c('Sender', 'Receiver'))]
        cm<-reshape2::dcast(cm, LR~Sender.Receiver, value.var='Score')
        rownames(cm)<-cm$LR
        cm<-cm[colnames(cm) != 'LR']
        
        cm[['Sample']]<-sample
        cm_list[[sample]]<-cm
    }
})
cm_all<-do.call(rbind,cm_list)
write.csv(cm_all, paste0(rev_path, 'interim/tc2c_external_inputs/cellchat/', 'cellchat_balf.csv'))