In [1]:
# install packages - did this in command line conda environment
# remotes::install_github("jokergoo/circlize@9b21578")
# remotes::install_github("jokergoo/ComplexHeatmap@7d95ca5")
# remotes::install_github("immunogenomics/presto@31dc97f")
# remotes::install_github("jinworks/CellChat@88c2e13")
# BiocManager::install("GenomeInfoDbData")

In [1]:
suppressPackageStartupMessages({
    library(tidyverse)
    library(zellkonverter)
    # library(scater)
    #library(scran)
    # library(scuttle)
    library(SingleCellExperiment)
    library(CellChat)
})

In [2]:
dist_out_dir <- "/home/workspace/spatial_mouse_lung_outputs/downstream_analysis/distance"

cellchat_out_dir <- file.path(dist_out_dir, "cellchat")
if (!dir.exists(cellchat_out_dir)) {
  dir.create(cellchat_out_dir, recursive = TRUE)
}



In [3]:
sce = readH5AD(file.path(dist_out_dir, "adata_cellchat_prepped.h5ad"))

“[1m[22mThe names of these selected [32muns[39m items have been modified to match R
conventions: [32m'_scvi_manager_uuid' -> 'X_scvi_manager_uuid'[39m and [32m'_scvi_uuid' ->[39m
[32m'X_scvi_uuid'[39m”
“[1m[22mThe names of these selected [32mobs[39m columns have been modified to match R
conventions: [32m'_scvi_batch' -> 'X_scvi_batch'[39m and [32m'_scvi_labels' ->[39m
[32m'X_scvi_labels'[39m”


In [4]:
#sce$Timepoint <- stringr::str_extract(sce$batch, "\\d+")
sce$sample_label <- factor(sce$sample_label, levels = c("HDM_day3", "HDM_day30"))

In [5]:
reducedDimNames(sce)

In [6]:
run_cellchat <- function(sce_tmp, name, out_dir) {
    
    print("step: data.input"); flush.console()
    data.input = assay(sce_tmp, "X") # X are the log norm counts here, see part 1
    meta = data.frame(labels = sce_tmp$label_fine,
                    samples = sce_tmp$sample_label_cp, # (KA - I only have one sample, this is just a copy of sample_label
                    row.names = colnames(sce_tmp))
    print("step: spatial.locs"); flush.console()
    # spatial.locs = reducedDim(sce_tmp, 'spatial') |> as.data.frame() # KA this gives me an error 
    # spatial.locs = as.matrix(reducedDim(sce_tmp, 'spatial')) # this takes a long time
    spatial.locs = reducedDim(sce_tmp, 'spatial')  # try this
    scale.factors = list(spot.diameter = 5, spot = 5)
    spatial.factors = data.frame(ratio = 1, tol = 5)

    print("running: createCellChat")
    cellchat <-
        createCellChat(
            object = data.input,
            meta = meta,
            group.by = "labels",
            datatype = "spatial",
            coordinates = spatial.locs,
            spatial.factors = spatial.factors
        )


    CellChatDB <- CellChatDB.mouse # use CellChatDB.human if running on human data

    # use a subset of CellChatDB for cell-cell communication analysis
    # CellChatDB.use <- subsetDB(CellChatDB, search = "Secreted Signaling") # use Secreted Signaling
    # use all CellChatDB for cell-cell communication analysis
    CellChatDB.use <- CellChatDB # simply use the default CellChatDB

    # set the used database in the object
    cellchat@DB <- CellChatDB.use

    # subset the expression data of signaling genes for saving computation cost
    cellchat <- subsetData(cellchat) # This step is necessary even if using the whole database

    # future::plan("multisession", workers = 8) # do parallel
    print("running: identifyOverExpressedGenes, identifyOverExpressedInteractions")
    cellchat <- identifyOverExpressedGenes(cellchat)
    cellchat <- identifyOverExpressedInteractions(cellchat)

    # Typically, contact.range = 10, which is a typical human cell size
    print("running: computeCommunProb")
    cellchat <- computeCommunProb(cellchat,
        type = "truncatedMean", trim = 0.1, # try 0.1, could lower to 0.05
        distance.use = TRUE, interaction.range = 100,
        scale.distance = 1,
        contact.dependent = TRUE, contact.range = 10
    )
    # Filter out the cell-cell communication if there are only few number of cells in certain cell groups
    print("running: filterCommunication")
    cellchat <- filterCommunication(cellchat, min.cells = 10)

    print("running: computeCommunProbPathway")
    cellchat <- computeCommunProbPathway(cellchat)
    cellchat <- aggregateNet(cellchat)

    print(paste("running: saveRDS for sample: ", name))
    saveRDS(cellchat, file = file.path(out_dir, paste0("cellchat_",name,".rds")))
}

In [7]:
cellchat_sample <- function(sce, sample_label_select, out_dir) {

    sce_tmp = sce[,sce$sample_label == sample_label_select]

    # # try on subset
    # sce_tmp <- sce_tmp[, sample(1:ncol(sce_tmp), 1000)]
    
    # This is absolutely key. Otherwise Cellchat does not WORK!
    # KA note here - we have only one sample per condition - make a copy of the sample_label column 
    sce_tmp$sample_label_cp <- sce_tmp$sample_label
    sce_tmp$sample_label_cp <- droplevels(sce_tmp$sample_label_cp)
    # KA also do this for label column
    sce_tmp$label_fine <- droplevels(sce_tmp$label_fine)

    print("run_cellchat"); flush.console()
    run_cellchat(sce_tmp, sample_label_select, out_dir)
}

In [8]:
cellchat_out_dir

In [9]:
cellchat_sample(sce, 'HDM_day3', cellchat_out_dir)
cellchat_sample(sce, 'HDM_day30', cellchat_out_dir)


[1] "run_cellchat"
[1] "step: data.input"
[1] "step: spatial.locs"
[1] "running: createCellChat"
[1] "Create a CellChat object from a data matrix"
Create a CellChat object from spatial transcriptomics data... 
Set cell identities for the new CellChat object 
The cell groups used for CellChat analysis are  AT1, AT2, Alv Mf, Art, B cell, CD4 act (TLS), CD4 act (adventitia), CD4 act (parenchyma), CD4 naive, CD8 act, CD8 naive, Cap, Cap-a, Ccr7+ cDC2, Ccr7- cDC2, Ciliated, Club, Col13a1+ fibroblast, Col14a1+ fibroblast, ILC2, Int Mf, Lymph, Mesothelial, Mono, Myofibroblast, NK cell, Neut, Pericyte 1, Pericyte 2, Plasmablast, SMC, Vein, cDC1, gd T cell 
[1] "running: identifyOverExpressedGenes, identifyOverExpressedInteractions"
The number of highly variable ligand-receptor pairs used for signaling inference is 209 
[1] "running: computeCommunProb"
truncatedMean is used for calculating the average gene expression per cell group. 
[1] ">>> Run CellChat on spatial transcriptomics data using d

In [10]:
sce

class: SingleCellExperiment 
dim: 480 499432 
metadata(15): X_scvi_manager_uuid X_scvi_uuid ... sample_label_colors
  umap
assays(2): X counts
rownames(480): Ache Acta2 ... Xist Zdhhc14
rowData names(1): n_cells
colnames(499432): aaaaagmb-1_0 aaaaboon-1_0 ... oileejmi-1_1
  oilegecp-1_1
colData names(85): cell_id x_centroid ... inside_bronchi n_genes
reducedDimNames(7): X_pca X_scANVI ... X_umap_scvi_refalign spatial
mainExpName: NULL
altExpNames(0):