Use cellchat to classify samples

In [1]:
suppressPackageStartupMessages({
    library(CellChat, quietly = T)
    library(patchwork, quietly = T)
    library(RhpcBLASctl, quietly = T)
    library(Matrix, quietly = T)
    library(rhdf5, quietly = T)
    library(stringr, quietly = T)
})
options(stringsAsFactors = FALSE)
# expression_data_path = '/data2/hratch/immune_CCI/covid/covid_atlas/interim/umi_for_timing/'
input_path = '/data2/hratch/immune_CCI/covid/covid_atlas/interim/classification_inputs/'
output_path = '/data2/hratch/immune_CCI/covid/covid_atlas/interim/'
# RhpcBLASctl::blas_set_num_threads(20) 

In [2]:
# parameters
cell_grouper<-'majorType'
seed = 888
# inputs
group = FALSE
sample.names<-c('S-S083', 'S-M001', 'S-HC018-1')

Load data:

In [3]:
# load metadata
md.cell <- read.csv(paste0(input_path,'metadata.csv'), row.names = 1)

sample_names<-unique(md.cell$sampleID)
sample_context_map<-list()
for (sn in sample_names){
    context<-unique(md.cell[md.cell$sampleID == sn,'CoVID.19.severity'])
    if (length(context) != 1){stop('Incorred sample to context mapping')}
    sample_context_map[[sn]]<-context
}
contexts<-unique(sample_context_map)
# load LR pairs
# filter for the LR pairs used by Tensor cell2cell
# lr_pairs<-read.csv(paste0(input_data_path,'Tensor-cell2cell-LRpairs.csv'))
# lr_pairs<-lr_pairs$interaction_name
humandb<-CellChatDB.human
# humandb$interaction<-CellChatDB.human$interaction[CellChatDB.human$interaction$interaction_name %in% lr_pairs, ] 
# saveRDS(humandb, paste0(output_results_path, 'humandb.rds'))

In [4]:
# load the UMI counts
read_sample_h5<-function(sn){
    counts<-h5read(paste0(input_path, 'umi_per_sample.h5'), sn)
    count<-counts[[4]]
    colnames(count)<-counts[[2]]
    rownames(count)<-sapply(counts[[1]], function(x) str_replace_all(x, '-', '.')) 
    return(count)
}


if (!group){
    counts<-lapply(setNames(sample.names, sample.names), function(sn) read_sample_h5(sn))
}else{ # group by context
    by.context<-lapply(setNames(contexts, contexts), function(context) names(sample_context_map[sample.names][sample_context_map[sample.names] == context]))
    
    group_by_context<-function(context){
        sns<-by.context[[context]]       
        counts<-lapply(sns, function(sn) read_sample_h5(sn))    
        counts<-do.call(cbind, counts)
        return (counts)
    }
    counts<-lapply(setNames(contexts, contexts), function(context) group_by_context(context))                   
                    
}

Run cellchat:

In [5]:
# create cellchat object for each sample or sample.name
covid.list<-list()
for (sample.name in names(counts)){
    # loop through each sample.name and create a cell type future
    expr<-CellChat::normalizeData(counts[[sample.name]])
    cellchat<-createCellChat(object = as(expr, "dgCMatrix"), meta = md.cell[colnames(expr),], 
                                   group.by = cell_grouper)
    cellchat@DB <- humandb # human organism

    cellchat <- subsetData(cellchat) # subset the expression data of signaling genes, assign to @data.signalling 
    cellchat <- identifyOverExpressedGenes(cellchat)
    cellchat <- identifyOverExpressedInteractions(cellchat) # generate @ LR slot used by computeCommunProb
    cellchat <- projectData(cellchat, PPI.human) # shallow sequencing depth
    
    cellchat <- computeCommunProb(cellchat, raw.use = F, type = 'triMean', trim = NULL, seed.use = seed, 
                                 population.size = F) 
    
    # The functional similarity analysis requires the same cell population composition between two datasets.
    cellchat <- filterCommunication(cellchat, min.cells = 10)
    cellchat <- computeCommunProbPathway(cellchat)
    covid.list[[sample.name]]<-cellchat
}

# merge and analyze
cellchat <- mergeCellChat(covid.list, add.names = names(covid.list))
cellchat <- computeNetSimilarityPairwise(cellchat, type = 'structural')
cellchat.embed <- netEmbedding(cellchat, type = 'structural')   

Create a CellChat object from a data matrix

Set cell identities for the new CellChat object



The cell groups used for CellChat analysis are  B CD4 CD8 DC Macro Mega Mono Neu NK Plasma 
The cell-cell communication related with the following cell groups are excluded due to the few number of cells:  Macro Neu Plasma 


Create a CellChat object from a data matrix

Set cell identities for the new CellChat object



The cell groups used for CellChat analysis are  B CD4 CD8 DC Mega Mono NK Plasma 
The cell-cell communication related with the following cell groups are excluded due to the few number of cells:  CD4 Mega Plasma 


Create a CellChat object from a data matrix

Set cell identities for the new CellChat object



The cell groups used for CellChat analysis are  B CD4 CD8 DC Mega Mono NK Plasma 
The cell-cell communication related with the following cell groups are excluded due to the few number of cells:  DC Mega 


Merge the following slots: 'data.signaling','net', 'netP','meta', 'idents', 'var.features' , 'DB', and 'LR'.



Compute signaling network similarity for datasets 1 2 3 
Manifold learning of the signaling networks for datasets 1 2 3 


In [21]:
write.csv(cellchat.embed@netP$similarity$structural$dr[[1]], 
          paste0(output_path, 'cellchat_embeddings.csv'))

In [22]:
cellchat.embed@netP$similarity$structural$dr[[1]]

Unnamed: 0,UMAP1,UMAP2
TGFb--S-S083,5.585470,7.3091879
GDNF--S-S083,26.390963,15.3708105
ACTIVIN--S-S083,25.557961,14.0986662
EGF--S-S083,5.915041,5.3643494
NRG--S-S083,2.190008,-8.5898752
PDGF--S-S083,25.082098,13.5163832
VEGF--S-S083,5.563293,6.2629442
IGF--S-S083,5.438999,7.6065140
CCL--S-S083,9.411817,5.0678596
CXCL--S-S083,-3.377059,21.2127209
