# IRD Xenium cell type annotation

This notebook documents the process of annotating cell types for the merged Xenium object in the IRD project. This approach uses a combination of sketching, or intelligent-subsampling, and unsupervised-clustering to find distinct cell types from their spatial expression profiles.

In [None]:
library(Seurat)
library(tidyverse)
library(qs2)

## Section 1: Create Seurat object from counts matrix and metadata converted from Scanpy

In [None]:
counts <- ReadMtx(
  mtx = "/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/rds_workflow/mtx/matrix.mtx",
  features = "/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/rds_workflow/mtx/features.tsv",
  cells = "/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/rds_workflow/mtx/barcodes.tsv",
  feature.column = 1,
  cell.column = 1
)
obj <- CreateSeuratObject(
    counts = counts,
    meta.data = read.csv("/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/rds_workflow/mtx/metadata.tsv", sep="\t", header=T, row.names=1)
)

In [None]:
obj <- RenameAssays(obj, RNA = "Xenium")

In [None]:
obj

### Remove snv probe genes and genes not shared between custom panels

These genes will skew the normalization and HVG selection, thus affecting clustering results

In [None]:
snv_genes <- rownames(obj)[grep("-WT|-ALT", rownames(obj))]
snv_genes

In [None]:
# Load processed Xenium objects with v5 and v6 custom panels
v5_obj <- readRDS("/diskmnt/Projects/SenNet_analysis/Main.analysis/bm/Xenium/NBM/SN151R1-Ma1Fd2-2U1/SN151R1-Ma1Fd2-2U1_processed.rds")
v6_obj <- readRDS("/diskmnt/Projects/SenNet_analysis/Main.analysis/bm/Xenium/NBM/SN222R1-Ma1Fd2-1U1/SN222R1-Ma1Fd2-1U1_processed.rds")

shared_genes <- intersect(rownames(v5_obj), rownames(v6_obj))
print(length(shared_genes))

In [None]:
obj <- subset(obj, features = shared_genes)
obj


In [None]:
qs_save(obj, "/diskmnt/Users2/chouw/Projects/BM_spatial/IRD/IRD_JW_merge_seurat_sharedGenesOnly.qs")

## Section 2: Run the Seurat sketch data pipeline with Harmony integration

In [None]:
system2(command = "nohup",
        args = c("Rscript",
                 "/diskmnt/Users2/chouw/Projects/SenNet_bone/src/spatial/Xenium_processing/sketch_data.R",
                 "-i", "/diskmnt/Users2/chouw/Projects/BM_spatial/IRD/IRD_JW_merge_seurat_sharedGenesOnly.qs",
                 "-o", "/diskmnt/Users2/chouw/Projects/BM_spatial/IRD",
                 "-x", "IRD_JW_Xenium_merge",
                 "-n", "3000",
                 "--split_by", "Sample",
                 "--assay", "Xenium"),
        stdout = '/diskmnt/Users2/chouw/Projects/SenNet_bone/src/spatial/logs/nohup_IRDsketch.out',
        stderr = '/diskmnt/Users2/chouw/Projects/SenNet_bone/src/spatial/logs/nohup_IRDsketch.out')

## Section 3: Inspect sketch results and annotate cell types

In [None]:
obj <- qs_read("/diskmnt/Users2/chouw/Projects/BM_spatial/IRD/IRD_JW_Xenium_merge_Xenium_sketched_harmonized.qs")

In [None]:
DefaultAssay(obj) <- "sketch"
DimPlot(obj, reduction = "sketched.Xenium.harmony_umap", group.by = "seurat_clusters", label = TRUE, repel = TRUE) + NoLegend()

In [None]:
all_markers = list(
  HSC = c("CD34", "AVP", "SPINK2", "SMIM24"),
  Erythroblast = c("PCNA", "MYC", "CENPF"),  # "PARP1", "GATA1", 
  LateErythroid = c("AHSP", "ALAS2", "SLC4A1", "HEMGN"),
  Megakaryocyte = c("PF4", "PLEK", "CAVIN2", "MMRN1"),
  GMP = c("ELANE", "MPO", "CTSG", "FUT4"),
  LateMyeloid = c("LTF", "CAMP", "RETN"),
  Neutrophil = c("MMP9", "AQP9", "IL1R2", "S100A12"), ## "ITGAM", 
  BaEoMast = c("GATA2", "MS4A2", "CPA3"), ## "MS4A3", 
  CD14Monocyte = c("VCAN", "FCN1", "CD14"),
  Macrophage = c("CD5L", "CD163", "VSIG4"),
  pDC = c("IRF8", "RUNX2", "LILRA4", "IL3RA"),
  cDC2 = c("CLEC10A", "CD1C", "FCER1A"),
  CD4T = c("CD3D", "CD3E", "TRAC", "IL7R", "CD4", "CCL5"),
  CD8T = c("CD8A", "NKG7"),
  NKcell = c("GNLY", "CD247", "PRF1", "KLRD1"),
  EarlyB = c("DNTT"), ## "IGLL1", "VPREB1"
  MatureB = c("TCL1A", "MS4A1", "CD79A", "CD19", "BANK1"),
  Plasma = c("MZB1", "SDC1", "SLAMF7", "TNFRSF17"),
  MSC = c("LEPR", "CXCL12", "FBN1", "ALDH1A3"),
  Adipocyte = c("FABP4", "PLIN4", "ADIPOQ", "LPL"),
  Osteoblastic = c("SPP1", "BGLAP", "SFRP4", "CTSK"),
  AEC = c("BTNL9"),
  SEC = c("DNASE1L3"),
  Endothelial = c("KDR", "FLT4", "PECAM1"), #"ENG"
  vSMC = c("ACTA2", "MYH11", "PDGFRB")
)
options(repr.plot.width = 20, repr.plot.height = 6)
DotPlot(obj, features = all_markers, group.by = "seurat_clusters") + RotatedAxis()

### T subtyping

In [None]:
Graphs(obj)

In [None]:
colnames(obj@meta.data)

In [None]:
Idents(obj) <- "seurat_clusters"
obj <- FindSubCluster(obj, cluster = "4", graph.name = "sketch.Xenium_snn", subcluster.name = "T.subcluster", resolution = 0.2, algorithm = 4)
options(repr.plot.width = 10, repr.plot.height = 8)
DimPlot(obj, reduction = "sketched.Xenium.harmony_umap", group.by = "T.subcluster", label = TRUE, repel = TRUE) + NoLegend()
options(repr.plot.width = 12, repr.plot.height = 6)
DotPlot(obj, features = c("CD3D", "CD3E", "CD4", "CD8A", "TRAC", "SELL", "CCR7", "IL7R", "GZMA", "GZMB", "GZMK", "GNLY", "PRF1", "NKG7", "KLRB1", "KLRC1","KLRD1"), group.by = "T.subcluster") + RotatedAxis()

### B subtyping

In [None]:
obj <- FindSubCluster(obj, cluster = "9", graph.name = "sketch.Xenium_snn", subcluster.name = "B.subcluster", resolution = 0.1, algorithm = 4)
options(repr.plot.width = 10, repr.plot.height = 8)
DimPlot(obj, reduction = "sketched.Xenium.harmony_umap", group.by = "B.subcluster", label = TRUE, repel = TRUE) + NoLegend()
options(repr.plot.width = 12, repr.plot.height = 6)
DotPlot(obj, features = c("DNTT", "MS4A1", "CD79A", "CD19", "BANK1"), group.by = "B.subcluster") + RotatedAxis()

### GMP subtyping

HSC markers are expressed in the GMP cluster - subcluster to separate them out

In [None]:
options(repr.plot.width = 10, repr.plot.height = 8)
FeaturePlot(obj, reduction = "sketched.Xenium.harmony_umap", features = "SPINK2")

In [None]:
obj <- FindSubCluster(obj, cluster = "2", graph.name = "sketch.Xenium_snn", subcluster.name = "GMP.subcluster", resolution = 0.1, algorithm = 4)
options(repr.plot.width = 10, repr.plot.height = 8)
DimPlot(obj, reduction = "sketched.Xenium.harmony_umap", group.by = "GMP.subcluster", label = TRUE, repel = TRUE) + NoLegend()
options(repr.plot.width = 12, repr.plot.height = 6)
DotPlot(obj, features = c("CD34", "AVP", "CRHBP", "SMIM24", "SPINK2","ELANE", "MPO", "CTSG", "FUT4"), group.by = "GMP.subcluster") + RotatedAxis()

### Unify the subclutser annotation and annotate

In [None]:
colnames(obj@meta.data)

In [None]:
obj$sketch_subcluster <- ifelse(obj$seurat_clusters == "4", obj$T.subcluster, 
                                ifelse(obj$seurat_clusters == "9", obj$B.subcluster, 
                                       ifelse(obj$seurat_clusters == "2", obj$GMP.subcluster, 
                                              obj$seurat_clusters)))
options(repr.plot.width = 8, repr.plot.height = 6)
DimPlot(obj, reduction = "sketched.Xenium.harmony_umap", group.by = "sketch_subcluster", label = TRUE, repel = TRUE) + NoLegend()

In [None]:
Idents(obj) <- "sketch_subcluster"
obj <- RenameIdents(obj, '1' = 'Plasma cell',
                         '2_1' = 'GMP',
                         '2_2' = 'HSPC',
                         '3' = 'Low confidence',
                         '4_1' = 'CD8 T',
                         '4_2' = 'CD4 T',
                         '4_3' = 'NK_T',
                         '5' = 'Neutrophil',
                         '6' = 'Late Myeloid',
                         '7' = 'Erythroid',
                         '8' = 'Endothelial',
                         '9_1' = 'Mature B',
                         '9_2' = 'Early B',
                         '9_3' = 'Mature B',
                         '10' = 'MSC',
                         '11' = 'Erythroid',
                         '12' = 'Macrophage',
                         '13' = 'Monocyte',
                         '14' = 'Low confidence',
                         '15' = 'Osteoblast',
                         '16' = 'Ba/Eo/Ma',
                         '17' = 'Megakaryocyte',
                         '18' = 'Adipocyte',
                         '19' = 'vSMC',
                         '20' = 'pDC',
                         '21' = 'NK',
                         '22' = 'cDC2',
                         '23' = 'Low confidence'
                         )
obj$manual_anno_WC_sketch <- Idents(obj)

In [None]:
qs_save(obj, "/diskmnt/Users2/chouw/Projects/BM_spatial/IRD/IRD_JW_Xenium_merge_Xenium_sketched_harmonized.qs")

## Section 4: Project annotation back to full dataset

In [None]:
system2(command = "nohup",
        args = c("Rscript",
                 "/diskmnt/Users2/chouw/Projects/SenNet_bone/src/spatial/Xenium_processing/project_sketch_data.R",
                 "-i", "/diskmnt/Users2/chouw/Projects/BM_spatial/IRD/IRD_JW_Xenium_merge_Xenium_sketched_harmonized.qs",
                 "-o", "/diskmnt/Users2/chouw/Projects/BM_spatial/IRD",
                 "-x", "IRD_JW_Xenium_merge",
                 "-a", "Xenium",
                 "-s", "sketch.Xenium",
                 "-r", "sketched.Xenium.harmony",
                 "-n", "manual_anno_WC_sketch",
                 "--group_by_col", "Sample"),
        stdout = '/diskmnt/Users2/chouw/Projects/SenNet_bone/src/spatial/logs/nohup_IRDsketchProject.out',
        stderr = '/diskmnt/Users2/chouw/Projects/SenNet_bone/src/spatial/logs/nohup_IRDsketchProject.out')

## Section 5: Inspect projection results

In [None]:
obj <- qs_read("/diskmnt/Users2/chouw/Projects/BM_spatial/IRD/IRD_JW_Xenium_merge_sketched_projected_umap.qs")

Some projected cell types with lower confidence were rescued by Julia. This is documented in a separate notebook: `/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/manual_plus_clustering_celltyping.ipynb`

In [None]:
revised_ct <- read.csv("/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/merged_metadata.csv", row.names = 1)
head(revised_ct)

In [None]:
head(obj@meta.data)

In [None]:
# Map the ct column from revised_ct onto obj@meta.data based on the same rownames
obj@meta.data$ct <- revised_ct[rownames(obj@meta.data), "ct"]
head(obj@meta.data)

In [None]:
DefaultAssay(obj) <- "sketch"

In [None]:
obj

In [None]:
ct_palette <- c(
  "HSPC" = "#d6e376",
  "Erythroid" = "#cfcfcf",
  "Megakaryocyte" = "#8f8f8f",
  "GMP" = "#88cf46",
  "Late Myeloid" = "#4ab300",
  "Neutrophil" = "#95ad74",
  "Ba/Eo/Ma" = "#618038",
  "cDC" = "#3bff8c",
  "Monocyte" = "#3dd49f",
  "Macrophage" = "#03ab70",
  "pDC" = "#a5c3c4",
  "CD4 T" = "#ff8400",
  "CD8 T" = "#ff0000",
  "NK" = "#9302d1",
  "Early B" = "#7cb2e6",
  "Mature B" = "#045eb5",
  "PC" = "#ffbafd",
  "MSC" = "#cfc10a",
  "Fibro/Osteo" = "#ba9e00",
  "Adipocyte" = "#ffe600",
  "Endothelial" = "#cc7e7e",
  "vSMC/Pericyte" = "#ad4b8e",
  "Low Confidence" = "#FFFFFF"
)

In [None]:
DimPlot(obj, 
        reduction = "sketched.Xenium.harmony_umap", 
        group.by = "ct", 
        cols = ct_palette,
        label = TRUE, 
        repel = TRUE, 
        label.size = 5, 
        label.box = TRUE, 
        raster = TRUE) + NoLegend()
ggsave("/diskmnt/Users2/chouw/Projects/BM_spatial/IRD/IRD_JW_Xenium_merge_sketched_projected_umap_annotated.pdf", width = 10, height = 8, dpi = 300)