In [1]:
suppressPackageStartupMessages({
    library(CellChat, quietly = T)
    library(patchwork, quietly = T)
    library(RhpcBLASctl, quietly = T)
    library(Matrix, quietly = T)
    library(data.table, quietly = T)
})
options(stringsAsFactors = FALSE)
# RhpcBLASctl::blas_set_num_threads(25) # no multithreading

In [2]:
# paths
expression_data_path = '/data2/eric/CCC-Benchmark/data/External/'#'/data2/hratch/immune_CCI/covid/expression_data/'
output_path = '/data2/hratch/immune_CCI/covid/balf_classification/'
input_data_path = '/data2/hratch/immune_CCI/covid/inputs/'

# parameters
type_<-'functional'#'structural'
cell_grouper<-'cell_type'
group<-0
seed<-888

Load data:

In [3]:
humandb<-CellChatDB.human

fns = list()
for (fn in list.files(expression_data_path)){
    sn = strsplit(fn, '_')[[1]]
    sample.name = sn[[2]]
    type = sn[[1]]
    fns[[sample.name]][[type]] = fn
}
sample.names<-names(fns)

In [5]:
# load the UMI counts
read_sample_csv<-function(sn){
    counts<-as.data.frame(fread(paste0(expression_data_path, fns[[sn]]$DGE)))
    rownames(counts)<-counts$Gene
    counts<-counts[ , !(colnames(counts) %in% c('Gene'))]
    colnames(counts)<-paste0(sn, '.', colnames(counts)) # consistency with metadata
    return(as.matrix(counts))
}

read_meta<-function(sn){
    meta<-read.csv(paste0(expression_data_path, fns[[sn]]$Meta))
    rownames(meta) = meta$Cell
    return(meta)
}


if (!group){                             
    counts<-lapply(setNames(sample.names, sample.names), function(sn) read_sample_csv(sn))
    meta<-lapply(setNames(sample.names, sample.names), function(sn) read_meta(sn))
                                  
}else{ # group by context
    stop('Should not group by context for classification')        
}

md.cell<-do.call("rbind", meta)
if (type_ == 'functional'){# functional requires same cell types to work                
    # filter for intersection of cell types across samples/contexts - to make comparable with Tensor-cell2cell
    cell.types<-Reduce(intersect, lapply(counts, function(df) unique(md.cell[colnames(df), cell_grouper])))
    cell.ids<-rownames(md.cell[md.cell[[cell_grouper]] %in% cell.types, ])   
    for (n in names(counts)){
        df<-counts[[n]]
        counts[[n]]<-df[,colnames(df) %in% cell.ids]
    }
}       

Run cellchat:

In [13]:
suppressWarnings({
    suppressMessages({
        # create cellchat object for each sample or sample.name
        covid.list<-list()
        for (sample.name in names(counts)){
            # loop through each sample.name and create a cell type future
            expr<-CellChat::normalizeData(counts[[sample.name]])
            cellchat<-createCellChat(object = as(expr, "dgCMatrix"), meta = md.cell[colnames(expr),], 
                                           group.by = cell_grouper)
            cellchat@DB <- humandb # human organism

            cellchat <- subsetData(cellchat) # subset the expression data of signaling genes, assign to @data.signalling 
            cellchat <- identifyOverExpressedGenes(cellchat)
            cellchat <- identifyOverExpressedInteractions(cellchat) # generate @ LR slot used by computeCommunProb
            cellchat <- projectData(cellchat, PPI.human) # shallow sequencing depth

            cellchat <- computeCommunProb(cellchat, raw.use = F, type = 'triMean', trim = NULL, seed.use = seed, 
                                         population.size = F) 

            # The functional similarity analysis requires the same cell population composition between two datasets.
            cellchat <- filterCommunication(cellchat, min.cells = 10)
            cellchat <- computeCommunProbPathway(cellchat)
            covid.list[[sample.name]]<-cellchat
        }

        # merge and analyze
        cellchat <- mergeCellChat(covid.list, add.names = names(covid.list))
        cellchat <- computeNetSimilarityPairwise(cellchat, type = type_)
        cellchat.embed <- netEmbedding(cellchat, type = type_)
    })
})

The cell groups used for CellChat analysis are  B Epithelial Macrophages mDC NK T 
The cell-cell communication related with the following cell groups are excluded due to the few number of cells:  B 
The cell groups used for CellChat analysis are  B Epithelial Macrophages mDC NK T 
Compute signaling network similarity for datasets 1 2 
Manifold learning of the signaling networks for datasets 1 2 


In [None]:
embed<-cellchat.embed@netP$similarity[[type_]]$dr[[1]]
idx<-rownames(embed)
rownames(embed)<-1:dim(embed)[[1]]
embed<-as.data.frame(embed)

embed[['Sample']]<-sapply(strsplit(idx, '--'), function(x) x[[2]])
embed[['Signalling.Pathway']]<-sapply(strsplit(idx, '--'), function(x) x[[1]])
 
                                      
sample_context_map<-list()
for (sn in sample.names){
    context<-unique(md.cell[md.cell$sample == sn,'severity'])
    if (length(context) != 1){stop('Incorred sample to context mapping')}
    sample_context_map[[sn]]<-context
}  
                                      
embed[['Context']]<-unlist(unname(sample_context_map[embed[['Sample']]]))   
                                      
write.csv(embed, paste0(output_path, paste0('cellchat_embeddings_balf_', type_, '.csv')))