In [None]:
# conda env cellchat
#-------------
library(RColorBrewer)
library(Seurat)
library(ggplot2)
library(reshape2)
library(viridis)
library(dplyr)
library(Matrix)
library(cowplot)
library(getopt)
library(tidyr)
library(tidyverse)
library(plyr)
library(ComplexHeatmap)
library(ggpubr)
library(enrichR)
library(scProportionTest)
library(DESeq2)

#---------Gene Annotations---------------------------------------
gs <- readRDS('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/analysis/resources/gene_sets.rds')
annotations <- gs$annotations
s.genes <- gs$s.genes
g2m.genes <- gs$g2m.genes
ig_genes <- gs$ig_genes #length=436
hemogenes <- gs$hemo_genes
protein_coding_genes <- gs$protein_coding_genes
#--- palettes
colors <- readRDS('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/analysis/resources/colors.rds')
dotplot.color <- colors$dotplot.color
afine_cols <- colors$afine
time_cols <- colors$time
proj_cols <- colors$proj
cols <- colors$bcols
#----------------------------------------------
filter_genes <- function(genes){
  de_fi1 <- genes[!genes%in% ig_genes & !genes %in% hemogenes]
  de_fim1 <- de_fi1[!str_starts(de_fi1, 'MT-')]
  de_fimr1 <- de_fim1[!str_starts(de_fim1, 'RPL') & !str_starts(de_fim1, 'RPS')]
  genesf <- de_fimr1
  return(genesf)
}
# return only protein-coding genes (excl. mito, hemo, Ig, ribo)
filter_genes_p <- function(genes){
de_fi1 <- genes[!genes%in% ig_genes & !genes %in% hemogenes & genes %in% protein_coding_genes]
  de_fim1 <- de_fi1[!str_starts(de_fi1, 'MT-')]
  de_fimr1 <- de_fim1[!str_starts(de_fim1, 'RPL') & !str_starts(de_fim1, 'RPS')]
  genesf <- de_fimr1
  return(genesf)
}
#-----------------------------------------
setwd("/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/no_harmony/Mye/")

In [None]:
counts <- ReadMtx(
  mtx = "/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/no_harmony/Mye/mtx/matrix.mtx",
  features = "/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/no_harmony/Mye/mtx/features.tsv",
  cells = "/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/no_harmony/Mye/mtx/barcodes.tsv",
  feature.column = 1,
  cell.column = 1
)

counts

In [None]:
obj <- CreateSeuratObject(
    counts = counts,
    meta.data = read.csv("/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/no_harmony/Mye/mtx/metadata.tsv", sep="\t", header=T, row.names=1)
)

In [None]:
meta <- obj@meta.data
meta$id <- paste0(str_replace_all(meta$UPN, "_", ''),"-", meta$Collection, "-", str_replace_all(meta$subset, " ", ""))

obj@meta.data <- meta

pb <- AggregateExpression(obj, assays = "RNA", return.seurat = T, group.by = c('id'))
pb$id <- rownames(pb@meta.data)

head(pb@meta.data)

In [None]:
pb@meta.data[, c('UPN', 'Collection', 'subset')] <- str_split_fixed(pb$id, "-", 3)
pbm <- pb@meta.data
head(pbm)

In [None]:
table(pbm$subset, pbm$Collection)

In [None]:
saveRDS(pb, 'pseudobulk_mye.rds')

In [None]:
pb <- readRDS('pseudobulk_mye.rds')

In [None]:
unique(pb$subset)

In [None]:
Idents(pb) <- 'Collection'
DefaultAssay(pb) <- "RNA"
# Pull current counts layer 
cts <- GetAssayData(pb, assay='RNA', layer='counts')
# Add pseudocount (keep same type: integer)
cts_plus1 <- cts + 1L

In [None]:
pb[['RNA']]$counts <- cts_plus1

In [None]:
ndmm_pt <- FindMarkers(object = pb,  ident.1 ="NDMM", ident.2 = "PT", test.use = "DESeq2")

ndmm_pt$gene <- rownames(ndmm_pt)

ndmm_pt <- ndmm_pt %>% filter(p_val_adj < 0.05)

ndmm_pt

In [None]:
ndmm_nbm <- FindMarkers(object = pb,  ident.1 ="NDMM", ident.2 = "NBM", test.use = "DESeq2")
ndmm_nbm$gene <- rownames(ndmm_nbm)
ndmm_nbm <- ndmm_nbm %>% filter(p_val_adj < 0.05)
pt_nbm <- FindMarkers(object = pb,  ident.1 ="PT", ident.2 = "NBM", test.use = "DESeq2")
pt_nbm$gene <- rownames(pt_nbm)
pt_nbm <- pt_nbm %>% filter(p_val_adj < 0.05)


In [None]:
ndmm_nbm %>% filter(gene %in% c('TNFSF13', 'TNFSF13B'))
pt_nbm %>% filter(gene %in% c('TNFSF13', 'TNFSF13B'))


In [None]:
# get time DEGs

time_degs <- list()
for(s in c('CD14Mc','CD16Mc/TAM','cDC','Neutrophil')){
    print(s)
    Idents(pb) <- 'subset'
  pbs <- subset(pb, idents=s)
  Idents(pbs) <- 'Collection'

  dp <- FindMarkers(object = pbs,  ident.1 ="NDMM", ident.2 = "PT", test.use = "DESeq2")
  dp$comp <-'NDMMvsPT'
  dp$gene <- rownames(dp)
  
  rownames(dp) <- NULL
  dp$subset <- s
  df <- dp[which(dp$p_val_adj < 0.05),]
  df <- df[which(df$gene %in% filter_genes_p(df$gene)),]
  time_degs[[s]] <- df   
}

tdf <- bind_rows(time_degs)

In [None]:
time_degs[['Neutrophil']] %>% filter(gene %in% c('TNFSF13', 'TNFSF13B'))
time_degs[['CD16Mc/TAM']] %>% filter(gene %in% c('TNFSF13', 'TNFSF13B'))
time_degs[['CD14Mc']] %>% filter(gene %in% c('TNFSF13', 'TNFSF13B'))
time_degs[['cDC']] %>% filter(gene %in% c('TNFSF13', 'TNFSF13B'))


In [None]:
#-----------------------------------------
# initialize enrichR databases
dbs <- listEnrichrDbs()
gob <- c("GO_Biological_Process_2021")
gom <- c("GO_Molecular_Function_2021")
kegg <- c("KEGG_2019_Human")
react <- c("Reactome_2022")
#------------------------------------------

In [None]:
# enrichR of CD14Mc DEGs
df = time_degs[['CD14Mc']]
ndmm_pt = df %>% filter(comp=='NDMMvsPT' & avg_log2FC >0)
pt_ndmm = df %>% filter(comp=='NDMMvsPT' & avg_log2FC <0)
ndmm_nbm = df %>% filter(comp=='NDMMvsNBM' & avg_log2FC >0)
pt_nbm = df %>% filter(comp=='PTvsNBM' & avg_log2FC >0)

g1 <- enrichr(ndmm_pt$gene, gob)[[1]] %>% filter(Adjusted.P.value <0.05)
g2 <- enrichr(pt_ndmm$gene, gob)[[1]] %>% filter(Adjusted.P.value <0.05)
g3 <- enrichr(ndmm_nbm$gene, gob)[[1]] %>% filter(Adjusted.P.value <0.05)
g4 <- enrichr(pt_nbm$gene, gob)[[1]] %>% filter(Adjusted.P.value <0.05)

# plot bubbleplot
g1$HigherIn = 'NDMM'
g2$HigherIn = 'PT'
epg = rbind(g1[1:20,], g2[1:20,])
epg$nGenes <- 'Unk'
for(i in 1:nrow(epg)){
  epg[i, 'nGenes'] <- length(strsplit(epg[i, "Genes"], ";")[[1]])
}
rownames(epg) <- NULL
epg$ngenes_pathway <- str_split_fixed(epg$Overlap, "/", 2)[,2] 
epg$ratio <- as.numeric(epg$nGenes) / as.numeric(as.character(epg$ngenes_pathway))

epg <- epg[order(as.numeric(epg$ratio)),]
epg$Term <- factor(epg$Term, levels=unique(epg$Term))
head(epg)


p1 <- ggplot(epg[epg$HigherIn == "NDMM", ],
             aes(x = as.numeric(ratio), y = Term, size = as.numeric(nGenes))) +
  geom_point(alpha = 1) +
  scale_size(range = c(1, 10)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

p2 <- ggplot(epg[epg$HigherIn == "PT", ],
             aes(x = as.numeric(ratio), y = Term, size = as.numeric(nGenes))) +
  geom_point(alpha = 1) +
  scale_size(range = c(1, 10)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

pdf("enrichR_CD14_NDMMvsPT_bubbleplot.pdf", width = 11, height = 7)
print(p1)
print(p2)
dev.off()

common_genes <- c()
terms_genesplot <- g2$Term[c(1,2,4,10,14,16)]

epg_genesplot <- epg[which(epg$Term %in% terms_genesplot),]
for(r in 1:nrow(epg_genesplot)){
  genes <- unlist(strsplit(epg_genesplot[r,]$Genes, ';'))
  common_genes <- c(common_genes, genes)
}
common_genes <- unique(common_genes)
common_genes

mat_toplot <- matrix(0, length(terms_genesplot), length(common_genes))
rownames(mat_toplot) <- terms_genesplot
colnames(mat_toplot) <- common_genes

for(t in terms_genesplot){
    epgf = epg_genesplot %>% filter(Term==t)
    genes <- unlist(strsplit(epgf[,'Genes'], ';'))
    print(genes)
    for(g in genes){
      mat_toplot[t, g] = 1
    }
}
mat_toplot
mat_toplot <- t(mat_toplot)
colplot <- c('0'='white', '1'='black')
hm_paths <- Heatmap(mat_toplot, name="Enriched", col=colplot, cluster_rows=T, cluster_columns=T)

Idents(pb) = 'subset'
cd14mc = subset(pb, ident='CD14Mc')

exp <- as.data.frame(AverageExpression(object=cd14mc, features = common_genes, group.by = c("Collection"))[[1]])
exp <- exp[, c('NBM', 'NDMM', 'PT')]
exp_scaled <- as.data.frame(t(scale(t(exp))))
col_exp = circlize::colorRamp2(c(min(exp_scaled), 0, max(exp_scaled)),c("blue", "white", "red"))
exp_scaled <- exp_scaled[rownames(mat_toplot),]
hm <- Heatmap(as.matrix(exp_scaled), name="Expr", col=col_exp, cluster_rows=F, cluster_columns=F)
hm_combined <- hm_paths + hm

pdf('enrichR_CD14_upinPT_selectPathways_heatmap.pdf', width=8, height=10)
print(hm_combined)
dev.off()

In [None]:
# enrichR of Neutrophils
df = time_degs[['Neutrophil']]
ndmm_pt = df %>% filter(comp=='NDMMvsPT' & avg_log2FC >0)
pt_ndmm = df %>% filter(comp=='NDMMvsPT' & avg_log2FC <0)
ndmm_nbm = df %>% filter(comp=='NDMMvsNBM' & avg_log2FC >0)
pt_nbm = df %>% filter(comp=='PTvsNBM' & avg_log2FC >0)

g1 <- enrichr(ndmm_pt$gene, gob)[[1]] %>% filter(Adjusted.P.value <0.05)
g2 <- enrichr(pt_ndmm$gene, kegg)[[1]] %>% filter(Adjusted.P.value <0.05)
g3 <- enrichr(ndmm_nbm$gene, gob)[[1]] %>% filter(Adjusted.P.value <0.05)
g4 <- enrichr(pt_nbm$gene, gob)[[1]] %>% filter(Adjusted.P.value <0.05)

In [None]:
pt_ndmm$gene

In [None]:
ifn_related_genes <- c(
  # Core IFN transcriptional regulators
  "IRF1", "IRF2BP2",
  
  # Signal transduction & feedback modulators
  "TRIM38", "RIOK3", "CXXC5", "SMAD7", "MAFG", 
  
  # Interferon-responsive stress & chromatin modifiers
  "PRNP", "C15orf48", "BAZ2A", "DHRS3", 
  
  # IFN-modulated trafficking / effector genes
  "RAB31", "FMNL1", "ARL8A", "ZBTB43"
)


Idents(pb) = 'subset'
neutro = subset(pb, ident='Neutrophil')

exp <- as.data.frame(AverageExpression(object=neutro, features = ifn_related_genes, group.by = c("Collection"))[[1]])
exp <- exp[, c('NBM', 'NDMM', 'PT')]
exp_scaled <- as.data.frame(t(scale(t(exp))))
col_exp = circlize::colorRamp2(c(min(exp_scaled), 0, max(exp_scaled)),c("blue", "white", "red"))
hm <- Heatmap(as.matrix(exp_scaled), name="Expr", col=col_exp, cluster_rows=F, cluster_columns=F)
pdf('enrichR_neutrophil_upinPT_IFNrelated_heatmap.pdf', width=4, height=6)
print(hm)
dev.off()


In [None]:
mye = subset(pb, subset=(!subset %in% c('MSC', 'HSPC')))

In [None]:
unique(mye$subset)

In [None]:
# subset pseudobulk object to only include paired samples
meta <- mye@meta.data

paired_upns <- meta %>%
  distinct(UPN, Collection) %>%        # unique UPNâ€“Collection combos
  group_by(UPN) %>%
  filter(all(c("NDMM","PT") %in% Collection)) %>%   # UPN must contain both
  pull(UPN) %>%
  unique()

length(paired_upns)
mye_paired <- subset(mye, subset = UPN %in% paired_upns)
unique(mye_paired$Collection)
mye_paired <- subset(mye, subset = UPN %in% paired_upns)

In [None]:
paired_degs = list()
for(ct in unique(mye$subset)){
    paired_sub = subset(mye_paired, subset=subset==ct)
    Idents(paired_sub) = 'Collection'
    dp <- FindMarkers(object = paired_sub, ident.1 ="NDMM", ident.2 = "PT", test.use = "DESeq2")
    dp$comp <-'NDMMvsPT'
    dp$gene <- rownames(dp)
    df <- dp[which(dp$p_val_adj < 0.05),]
    df <- df[which(df$gene %in% filter_genes_p(df$gene)),]
    paired_degs[[ct]] <- df
}

In [None]:
paired_degs

In [None]:
genes <- c( "HIF1A", "IRF1", "SMAD7", "PRNP")

pdf("violin_plots_mye.pdf", width = 3, height = 4)
for(g in genes){
    for(ct in unique(mye$subset)){
        df <- FetchData(mye, vars = c(g, "Collection", "UPN"))
        colnames(df)[1] <- "expr"
        df <- df %>% filter(UPN %in% paired_upns)
    
        p <- ggplot(df, aes(Collection, expr, fill = Collection)) +
            geom_violin(alpha = 0.7) +
            geom_jitter(width = 0.15, size = 1, alpha = 0.6) +
            theme_bw() +
            ggtitle(paste0(g, ":", ct))
    
        print(p)
}}
dev.off()