# Preparation

In [None]:
sessionInfo()
set.seed(1)
.libPaths()

## Imports

In [None]:
library(devtools)
library(Seurat)
library(dplyr)
library(Matrix)
library(tidyr)
library(limma)
library(ggplot2)
library(ggthemes)
library(patchwork)
library(gprofiler2)
library(ggrepel)
library(scales)
library(ggthemes)
library(purrr)
library(MAST)
library(qpcR)
library(enrichR)
library(harmony)
library(Nebulosa)
library(celldex)
library(ggrastr)
library(cowplot)
library(ggpubr)

## Custom Utility Functions

The figsize in R is specified in inches, 1 inch = 2.54 cm.

In [None]:
set_figsize <- function(width, height){
    options(repr.plot.width = width, 
            repr.plot.height = height)
}

# Load in the data

In [None]:
#load in the data, explore the structure

obj <- readRDS(file = 'pathway/Preparation/rpca_integrated.rds')

In [None]:
obj
levels(obj)

In [None]:
colnames(obj@meta.data)

In [None]:
unique(obj@meta.data$Type)

In [None]:
unique(obj@meta.data$clusters_all)

In [None]:
dir.create('./outs/')

In [None]:
obj

# Run DGE for each cluster

In [None]:
# type in the parameters
cluster_of_interest <- levels(obj)
group1 <- 'HL'
group2 <- 'RLN'

#create a clustering column for the heatmap
obj@meta.data$cluster_dis <- paste0(obj@meta.data$clusters_all, '_', obj@meta.data$Type)
levels_hm <- c(paste0(levels(obj), '_', 'HL'), paste0(levels(obj), '_', 'RLN'))
levels_hm <- levels_hm[levels_hm %in% unique(obj@meta.data$cluster_dis)]

#for subset
object <- obj
Idents(object) <- 'clusters_all'
levels(object) <- levels(obj)

#for dge
grouping_dge <- 'Type'
cutoff_expression <- 0.05 #for the DGE min.pct
logfc_cutoff <- 0.02 #for the DGE analysis
padj_cutoff <- 0.05 #for the genes coming into the GSEA analysis
features <- rownames(object)
markers.remove <- grep(pattern = "^TRAV|^TRBV|^TRGV|^TRDV|^RPL|^RPS|^MT-|^IGKV|^IGLV|^IGHV|^IGH|^IGKC|^TRBC", x = rownames(object), value = TRUE)
features <- features[!(features%in%markers.remove)]
de_general_all <- c()

#for volcano
#colours_diagnosis_groups <- c('#5cadfb', '#1d9d01', '#ff8a01', '#bebebe')
colour1 <- '#D3556E'
colour2 <- 'lightgrey'
number_of_genes <- 20
mhc2_genes <- rownames(obj)[grep(rownames(obj), pattern = '^HLA-D')]
genes_of_interest_ihb <- c('CD28', 'CTLA4', 'CD80', 'CD86',
                      mhc2_genes, 'LAG3',
                      'CD274', 'PDCD1LG2', 'PDCD1', #PDL1, PDL2, PD1
                      'TNFRSF14', 'BTLA', 'CD160', #HVEM, BTLA, CD150
                      'CEACAM1', 'LGALS9', 'HMGB1', 'HAVCR2', #CEACAM1, Galectin9
                      'NECTIN2', 'NECTIN3', 'PVR', 'TIGIT', 'CD226') #CD112, CD113, CD155
genes_of_interest_act <- c('CD48', 'CD58', 'CD2',
                          'TNFSF15', 'TNFRSF25', #TL1 DR3
                          'TNFSF18', 'TNFRSF18', #GITRL GITR
                           'TNFSF9', 'TNFRSF9', #41BBL 41BB
                          'ICOSLG', 'ICOS', 
                           'TNFSF4', 'TNFRSF4', #OX40L OX40
                           'TNFSF8', 'TNFRSF8', #CD30L CD30
                            'CD40LG', 'CD40', 
                           'CD70', 'CD27'
                          )


#for PEA
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016')

#for heatmap
object_hm <- obj
Idents(object_hm) <- 'cluster_dis'
levels(object_hm) <- levels_hm
object_av <- AverageExpression(object_hm, assay = "RNA", return.seurat = TRUE, verbose = FALSE)


#here starts the loop with clusters of interest
for(cl in 1:length(cluster_of_interest)){
    
    #create the dir with output
    dir_path <- paste0('outs/', cluster_of_interest[cl])
    dir.create(dir_path)
    
    object_dge <- subset(object, idents = cluster_of_interest[cl])
    
    de_genes <- FindMarkers(object = object_dge, features = features, only.pos = FALSE, group.by = grouping_dge, ident.1 = group1, ident.2 = group2, min.pct = cutoff_expression, logfc.threshold = logfc_cutoff)
    de_genes$genes <- rownames(de_genes)
    
    
    #subset positive genes
    de_positive <- de_genes[de_genes$avg_log2FC > 0, ]
    de_positive <- de_positive[order(-de_positive$avg_log2FC), ]
    
    #subset negative genes
    de_negative <- de_genes[de_genes$avg_log2FC < 0, ]
    de_negative <- de_negative[order(de_negative$avg_log2FC), ]
    de_genes <- rbind(de_positive, de_negative)
    
    de_genes$cell.type <- cluster_of_interest[cl]
    de_general_all <- rbind(de_general_all, de_genes)
    
    
    #build and save the volcano plot
   #or additional genes of interest
    additional_markers_ihb <- genes_of_interest_ihb[genes_of_interest_ihb %in% de_genes$genes]
    additional_markers_act <- genes_of_interest_act[genes_of_interest_act %in% de_genes$genes]

     volcano <- ggplot(de_genes, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
        geom_vline(xintercept = 0) +
        geom_hline(yintercept = -log10(padj_cutoff), color ="grey", linetype ="dashed") +
        geom_point(data = de_genes,
                    color = "grey", alpha = 0.5) +
        geom_point(data = subset(de_genes, avg_log2FC > 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ],
                    fill = colour1, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, avg_log2FC < 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ],
                    fill = colour2, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_act), #iclude activatory genes
                    fill = '#2881C1', alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_ihb), #include inhibitory genes
                    fill = '#9A0000', alpha = 1, shape=21, size= 2.5) +
        geom_text_repel(data=rbind(subset(de_genes, avg_log2FC > 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ], subset(de_genes, avg_log2FC < 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ]), max.overlaps = 50, aes(label = genes))+
        geom_text_repel(data=subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_act), max.overlaps = 50, aes(label = genes), colour = '#003366')+
        geom_text_repel(data=subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_ihb), max.overlaps = 50, aes(label = genes), colour = '#9A0000')+
        theme_linedraw() +
        theme(panel.grid = element_blank(), legend.position = "none", 
              plot.title = element_text(size = 15, hjust = 0.5)) + ggtitle(paste0(group2, ' (left)', ' vs ', group1, ' (right)')) + 
        xlab("log2(average fold change)") +
        ylab("-log10(p-value)")
    ggsave(volcano, file = paste0(dir_path, '/', group1, '_vs_', group2, '_volcano_plot.pdf'), height = 6, width = 10)
    
    #prepare the subset of genes for PEA (filter for p value)
    #subset positive genes
    de_positive <- de_genes[de_genes$avg_log2FC > 0, ]
    de_positive <- de_positive[de_positive$p_val_adj < padj_cutoff, ]
    de_positive <- de_positive[order(-de_positive$avg_log2FC), ]
    
    #subset negative genes
    de_negative <- de_genes[de_genes$avg_log2FC < 0, ]
    de_negative <- de_negative[de_negative$p_val_adj < padj_cutoff, ]
    de_negative <- de_negative[order(de_negative$avg_log2FC), ]
    
    de_all <- rbind(de_positive, de_negative)
    #save the de_all 
    write.csv(de_all, file = paste0(dir_path, '/', group1,  '_vs_', group2, '_de_list.csv'))
    
    #top 30 sorted
    if(length(de_positive$genes) > number_of_genes){
    ordered_genes_plot <- de_positive$genes[1:number_of_genes]
    } else {ordered_genes_plot <- de_positive$genes}
    
    
    if(length(ordered_genes_plot) > 0){
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap.eps'), width = 30, height = 20)
    
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E")) + coord_equal()
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap_coordequal.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap_coordequal.eps'), width = 30, height = 20)
    }
    

    
    #top 30 sorted
    if(length(de_negative$genes) > number_of_genes){
    ordered_genes_plot <- de_negative$genes[1:number_of_genes]
    } else {ordered_genes_plot <- de_negative$genes}
    
    if(length(ordered_genes_plot) > 0){
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
    ggsave(heatmap, file = paste0(dir_path, '/', group2, '_heatmap.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group2, '_heatmap.eps'), width = 30, height = 20)
    
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E")) + coord_equal()
    ggsave(heatmap, file = paste0(dir_path, '/', group2, '_heatmap_coordequal.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group2, '_heatmap_coordequal.eps'), width = 30, height = 20)
    }
    
    #start the enrichr PEA 
    for(db in 1:length(databases_list)){
        enriched <- enrichr(de_positive$gene, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        if(nrow(enriched) > 0){
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group1, databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group1, databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_PEA.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_',  '_HEATMAP_genes', '.pdf'), width = 30, height = 20)
        }
        
        
        #perform the enrichment on negative genes
        enriched <- enrichr(de_negative$gene, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the negative enriched pathways
        write.csv(enriched, file = paste0(dir_path, '/', group2, '_', databases_list[db], '_pea_list.csv'))
            
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        if(nrow(enriched) > 0){
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_negative <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group2, databases_list[db]))
            } else {
            plot_negative <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group2, databases_list[db]))
        }
        ggsave(plot_negative, file = paste0(dir_path, '/', group2, '_', databases_list[db], '_PEA','.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            genes_to_plot
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            genes_to_plot
        }
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_path, '/', group2, '_', databases_list[db], '_HEATMAP_genes', '.pdf'), width = 30, height = 20)
        }
        }
    #end of the einrichr loop
}
#end of the clusters loop

write.csv(de_general_all, file = './outs/de_general_all.csv')

In [None]:
# type in the parameters
cluster_of_interest <- levels(obj)
group1 <- 'HL'
group2 <- 'RLN'

#create a clustering column for the heatmap
obj@meta.data$cluster_dis <- paste0(obj@meta.data$clusters_all, '_', obj@meta.data$Type)
levels_hm <- c(paste0(levels(obj), '_', 'HL'), paste0(levels(obj), '_', 'RLN'))
levels_hm <- levels_hm[levels_hm %in% unique(obj@meta.data$cluster_dis)]

#for subset
object <- obj
Idents(object) <- 'clusters_all'
levels(object) <- levels(obj)

#for dge
grouping_dge <- 'Type'
cutoff_expression <- 0.05 #for the DGE min.pct
logfc_cutoff <- 0.02 #for the DGE analysis
padj_cutoff <- 0.05 #for the genes coming into the GSEA analysis
features <- rownames(object)
markers.remove <- grep(pattern = "^TRAV|^TRBV|^TRGV|^TRDV|^RPL|^RPS|^MT-|^IGKV|^IGLV|^IGHV|^IGH|^IGKC|^TRBC", x = rownames(object), value = TRUE)
features <- features[!(features%in%markers.remove)]

#for volcano
#colours_diagnosis_groups <- c('#5cadfb', '#1d9d01', '#ff8a01', '#bebebe')
colour1 <- '#D3556E'
colour2 <- 'lightgrey'
number_of_genes <- 20
mhc2_genes <- rownames(obj)[grep(rownames(obj), pattern = '^HLA-D')]
genes_of_interest_ihb <- c('CD28', 'CTLA4', 'CD80', 'CD86',
                      mhc2_genes, 'LAG3',
                      'CD274', 'PDCD1LG2', 'PDCD1', #PDL1, PDL2, PD1
                      'TNFRSF14', 'BTLA', 'CD160', #HVEM, BTLA, CD150
                      'CEACAM1', 'LGALS9', 'HMGB1', 'HAVCR2', #CEACAM1, Galectin9
                      'NECTIN2', 'NECTIN3', 'PVR', 'TIGIT', 'CD226') #CD112, CD113, CD155
genes_of_interest_act <- c('CD48', 'CD58', 'CD2',
                          'TNFSF15', 'TNFRSF25', #TL1 DR3
                          'TNFSF18', 'TNFRSF18', #GITRL GITR
                           'TNFSF9', 'TNFRSF9', #41BBL 41BB
                          'ICOSLG', 'ICOS', 
                           'TNFSF4', 'TNFRSF4', #OX40L OX40
                           'TNFSF8', 'TNFRSF8', #CD30L CD30
                            'CD40LG', 'CD40', 
                           'CD70', 'CD27'
                          )


#for PEA
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016')

#for heatmap
object_hm <- obj
Idents(object_hm) <- 'cluster_dis'
levels(object_hm) <- levels_hm
object_av <- AverageExpression(object_hm, assay = "RNA", return.seurat = TRUE, verbose = FALSE)


#here starts the loop with clusters of interest
for(cl in 11:length(cluster_of_interest)){
    
    #create the dir with output
    dir_path <- paste0('outs/', cluster_of_interest[cl])
    dir.create(dir_path)
    
    object_dge <- subset(object, idents = cluster_of_interest[cl])
    
    de_genes <- FindMarkers(object = object_dge, features = features, only.pos = FALSE, group.by = grouping_dge, ident.1 = group1, ident.2 = group2, min.pct = cutoff_expression, logfc.threshold = logfc_cutoff)
    de_genes$genes <- rownames(de_genes)
    
    
    #subset positive genes
    de_positive <- de_genes[de_genes$avg_log2FC > 0, ]
    de_positive <- de_positive[order(-de_positive$avg_log2FC), ]
    
    #subset negative genes
    de_negative <- de_genes[de_genes$avg_log2FC < 0, ]
    de_negative <- de_negative[order(de_negative$avg_log2FC), ]
    de_genes <- rbind(de_positive, de_negative)
    
    de_genes$cell.type <- cluster_of_interest[cl]
    de_general_all <- rbind(de_general_all, de_genes)
    
    
    #build and save the volcano plot
   #or additional genes of interest
    additional_markers_ihb <- genes_of_interest_ihb[genes_of_interest_ihb %in% de_genes$genes]
    additional_markers_act <- genes_of_interest_act[genes_of_interest_act %in% de_genes$genes]

     volcano <- ggplot(de_genes, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
        geom_vline(xintercept = 0) +
        geom_hline(yintercept = -log10(padj_cutoff), color ="grey", linetype ="dashed") +
        geom_point(data = de_genes,
                    color = "grey", alpha = 0.5) +
        geom_point(data = subset(de_genes, avg_log2FC > 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ],
                    fill = colour1, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, avg_log2FC < 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ],
                    fill = colour2, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_act), #iclude activatory genes
                    fill = '#2881C1', alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_ihb), #include inhibitory genes
                    fill = '#9A0000', alpha = 1, shape=21, size= 2.5) +
        geom_text_repel(data=rbind(subset(de_genes, avg_log2FC > 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ], subset(de_genes, avg_log2FC < 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ]), max.overlaps = 50, aes(label = genes))+
        geom_text_repel(data=subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_act), max.overlaps = 50, aes(label = genes), colour = '#003366')+
        geom_text_repel(data=subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_ihb), max.overlaps = 50, aes(label = genes), colour = '#9A0000')+
        theme_linedraw() +
        theme(panel.grid = element_blank(), legend.position = "none", 
              plot.title = element_text(size = 15, hjust = 0.5)) + ggtitle(paste0(group2, ' (left)', ' vs ', group1, ' (right)')) + 
        xlab("log2(average fold change)") +
        ylab("-log10(p-value)")
    ggsave(volcano, file = paste0(dir_path, '/', group1, '_vs_', group2, '_volcano_plot.pdf'), height = 6, width = 10)
    
    #prepare the subset of genes for PEA (filter for p value)
    #subset positive genes
    de_positive <- de_genes[de_genes$avg_log2FC > 0, ]
    de_positive <- de_positive[de_positive$p_val_adj < padj_cutoff, ]
    de_positive <- de_positive[order(-de_positive$avg_log2FC), ]
    
    #subset negative genes
    de_negative <- de_genes[de_genes$avg_log2FC < 0, ]
    de_negative <- de_negative[de_negative$p_val_adj < padj_cutoff, ]
    de_negative <- de_negative[order(de_negative$avg_log2FC), ]
    
    de_all <- rbind(de_positive, de_negative)
    #save the de_all 
    write.csv(de_all, file = paste0(dir_path, '/', group1,  '_vs_', group2, '_de_list.csv'))
    
    #top 30 sorted
    if(length(de_positive$genes) > number_of_genes){
    ordered_genes_plot <- de_positive$genes[1:number_of_genes]
    } else {ordered_genes_plot <- de_positive$genes}
    
    
    if(length(ordered_genes_plot) > 0){
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap.eps'), width = 30, height = 20)
    
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E")) + coord_equal()
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap_coordequal.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap_coordequal.eps'), width = 30, height = 20)
    }
    

    
    #top 30 sorted
    if(length(de_negative$genes) > number_of_genes){
    ordered_genes_plot <- de_negative$genes[1:number_of_genes]
    } else {ordered_genes_plot <- de_negative$genes}
    
    if(length(ordered_genes_plot) > 0){
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
    ggsave(heatmap, file = paste0(dir_path, '/', group2, '_heatmap.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group2, '_heatmap.eps'), width = 30, height = 20)
    
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E")) + coord_equal()
    ggsave(heatmap, file = paste0(dir_path, '/', group2, '_heatmap_coordequal.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group2, '_heatmap_coordequal.eps'), width = 30, height = 20)
    }
    
    #start the enrichr PEA 
    for(db in 1:length(databases_list)){
        enriched <- enrichr(de_positive$gene, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        if(nrow(enriched) > 0){
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group1, databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group1, databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_PEA.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_',  '_HEATMAP_genes', '.pdf'), width = 30, height = 20)
        }
        
        
        #perform the enrichment on negative genes
        enriched <- enrichr(de_negative$gene, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the negative enriched pathways
        write.csv(enriched, file = paste0(dir_path, '/', group2, '_', databases_list[db], '_pea_list.csv'))
            
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        if(nrow(enriched) > 0){
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_negative <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group2, databases_list[db]))
            } else {
            plot_negative <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group2, databases_list[db]))
        }
        ggsave(plot_negative, file = paste0(dir_path, '/', group2, '_', databases_list[db], '_PEA','.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            genes_to_plot
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            genes_to_plot
        }
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_path, '/', group2, '_', databases_list[db], '_HEATMAP_genes', '.pdf'), width = 30, height = 20)
        }
        }
    #end of the einrichr loop
}
#end of the clusters loop

write.csv(de_general_all, file = './outs/de_general_all.csv')

# Run the DGE for Macrophages

In [None]:
levels(obj)

In [None]:
# type in the parameters
cluster_of_interest <- 'Macrophage'
group1 <- 'Macrophage'
group2 <- ''

#create a clustering column for the heatmap
obj@meta.data$cluster_dis <- paste0(obj@meta.data$clusters_all, '_', obj@meta.data$Type)
levels_hm <- c(paste0(levels(obj), '_', 'HL'), paste0(levels(obj), '_', 'RLN'))
levels_hm <- levels_hm[levels_hm %in% unique(obj@meta.data$cluster_dis)]

#for subset
object <- obj
Idents(object) <- 'clusters_all'
levels(object) <- levels(obj)

#for dge
grouping_dge <- 'clusters_all'
cutoff_expression <- 0.05 #for the DGE min.pct
logfc_cutoff <- 0.02 #for the DGE analysis
padj_cutoff <- 0.05 #for the genes coming into the GSEA analysis
features <- rownames(object)
markers.remove <- grep(pattern = "^TRAV|^TRBV|^TRGV|^TRDV|^RPL|^RPS|^MT-|^IGKV|^IGLV|^IGHV|^IGH|^IGKC|^TRBC", x = rownames(object), value = TRUE)
features <- features[!(features%in%markers.remove)]

#for volcano
#colours_diagnosis_groups <- c('#5cadfb', '#1d9d01', '#ff8a01', '#bebebe')
colour1 <- '#D3556E'
colour2 <- 'lightgrey'
number_of_genes <- 30
mhc2_genes <- rownames(obj)[grep(rownames(obj), pattern = '^HLA-D')]
genes_of_interest_ihb <- c('CD28', 'CTLA4', 'CD80', 'CD86',
                      mhc2_genes, 'LAG3',
                      'CD274', 'PDCD1LG2', 'PDCD1', #PDL1, PDL2, PD1
                      'TNFRSF14', 'BTLA', 'CD160', #HVEM, BTLA, CD150
                      'CEACAM1', 'LGALS9', 'HMGB1', 'HAVCR2', #CEACAM1, Galectin9
                      'NECTIN2', 'NECTIN3', 'PVR', 'TIGIT', 'CD226') #CD112, CD113, CD155
genes_of_interest_act <- c('CD48', 'CD58', 'CD2',
                          'TNFSF15', 'TNFRSF25', #TL1 DR3
                          'TNFSF18', 'TNFRSF18', #GITRL GITR
                           'TNFSF9', 'TNFRSF9', #41BBL 41BB
                          'ICOSLG', 'ICOS', 
                           'TNFSF4', 'TNFRSF4', #OX40L OX40
                           'TNFSF8', 'TNFRSF8', #CD30L CD30
                            'CD40LG', 'CD40', 
                           'CD70', 'CD27'
                          )


#for PEA
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016')

#for heatmap
object_hm <- obj
Idents(object_hm) <- 'cluster_dis'
levels(object_hm) <- levels_hm
object_av <- AverageExpression(object_hm, assay = "RNA", return.seurat = TRUE, verbose = FALSE)


#here starts the loop with clusters of interest
for(cl in 1:length(cluster_of_interest)){
    
    #create the dir with output
    dir_path <- paste0('outs/', cluster_of_interest[cl])
    dir.create(dir_path)
    
    
    de_genes <- FindMarkers(object = object, features = features, only.pos = TRUE, group.by = grouping_dge, ident.1 = group1, min.pct = cutoff_expression, logfc.threshold = logfc_cutoff)
    de_genes$genes <- rownames(de_genes)
    
    
    #subset positive genes
    de_positive <- de_genes[de_genes$avg_log2FC > 0, ]
    de_positive <- de_positive[order(-de_positive$avg_log2FC), ]
    
    de_genes <- de_positive
    
    de_genes$cell.type <- cluster_of_interest[cl]
    
    
    #build and save the volcano plot
   #or additional genes of interest
    additional_markers_ihb <- genes_of_interest_ihb[genes_of_interest_ihb %in% de_genes$genes]
    additional_markers_act <- genes_of_interest_act[genes_of_interest_act %in% de_genes$genes]

     volcano <- ggplot(de_genes, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
        geom_vline(xintercept = 0) +
        geom_hline(yintercept = -log10(padj_cutoff), color ="grey", linetype ="dashed") +
        geom_point(data = de_genes,
                    color = "grey", alpha = 0.5) +
        geom_point(data = subset(de_genes, avg_log2FC > 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ],
                    fill = colour1, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, avg_log2FC < 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ],
                    fill = colour2, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_act), #iclude activatory genes
                    fill = '#2881C1', alpha = 1, shape=21, size= 2.5) +
        geom_point(data = subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_ihb), #include inhibitory genes
                    fill = '#9A0000', alpha = 1, shape=21, size= 2.5) +
        geom_text_repel(data=rbind(subset(de_genes, avg_log2FC > 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ], subset(de_genes, avg_log2FC < 0 & p_val_adj < padj_cutoff)[1:number_of_genes, ]), max.overlaps = 50, aes(label = genes))+
        geom_text_repel(data=subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_act), max.overlaps = 50, aes(label = genes), colour = '#003366')+
        geom_text_repel(data=subset(de_genes, p_val_adj < padj_cutoff & genes %in% additional_markers_ihb), max.overlaps = 50, aes(label = genes), colour = '#9A0000')+
        theme_linedraw() +
        theme(panel.grid = element_blank(), legend.position = "none", 
              plot.title = element_text(size = 15, hjust = 0.5)) + ggtitle(paste0(group2, ' (left)', ' vs ', group1, ' (right)')) + 
        xlab("log2(average fold change)") +
        ylab("-log10(p-value)")
    ggsave(volcano, file = paste0(dir_path, '/', group1, '_vs_', 'all', '_volcano_plot.pdf'), height = 6, width = 10)
    
    #prepare the subset of genes for PEA (filter for p value)
    #subset positive genes
    de_positive <- de_genes[de_genes$avg_log2FC > 0, ]
    de_positive <- de_positive[de_positive$p_val_adj < padj_cutoff, ]
    de_positive <- de_positive[order(-de_positive$avg_log2FC), ]
    
    de_all <- de_positive
    #save the de_all 
    write.csv(de_all, file = paste0(dir_path, '/', group1,  '_vs_', group2, '_de_list.csv'))
    
    #top 30 sorted
    if(length(de_positive$genes) > number_of_genes){
    ordered_genes_plot <- de_positive$genes[1:number_of_genes]
    } else {ordered_genes_plot <- de_positive$genes}
    
    
    if(length(ordered_genes_plot) > 0){
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap.eps'), width = 30, height = 20)
    
    heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7, raster = FALSE) + 
    theme(
        text = element_text(size = 19, colour = 'black', face = 'plain'),
        axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) +
    scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E")) + coord_equal()
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap_coordequal.pdf'), width = 30, height = 20)
    ggsave(heatmap, file = paste0(dir_path, '/', group1, '_heatmap_coordequal.eps'), width = 30, height = 20)
    }
    

    #start the enrichr PEA 
    for(db in 1:length(databases_list)){
        enriched <- enrichr(de_positive$gene, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        if(nrow(enriched) > 0){
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group1, databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(group1, databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_PEA.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_path, '/', group1, '_', databases_list[db], '_',  '_HEATMAP_genes', '.pdf'), width = 30, height = 20)
        }
        
        }
    }

# Create the DGE heatmaps with inhibitory and activation signatures

In [None]:
dir_path <- './outs/markers_summ/'
dir.create(dir_path)

In [None]:
genes_of_interest_ihb <- c('CD86', 'CD80','CTLA4', 
                      'HLA-DRA', 'LAG3',
                      'CD274', 'PDCD1LG2', 'PDCD1', #PDL1, PDL2, PD1
                      'TNFRSF14', 'BTLA', 'CD160', #HVEM, BTLA, CD150
                      'CEACAM1', 'LGALS9', 'HMGB1', 'HAVCR2', #CEACAM1, Galectin9
                      'NECTIN2', 'NECTIN3', 'PVR', 'TIGIT', 'CD226') #CD112, CD113, CD155
genes_of_interest_act <- c(
                            'CD86', 'CD80', 'CD28', 
                            'CD48', 'CD58', 'CD2',
                          'TNFSF15', 'TNFRSF25', #TL1 DR3
                          'TNFSF18', 'TNFRSF18', #GITRL GITR
                           'TNFSF9', 'TNFRSF9', #41BBL 41BB
                          'ICOSLG', 'ICOS', 
                           'TNFSF4', 'TNFRSF4', #OX40L OX40
                           'TNFSF8', 'TNFRSF8', #CD30L CD30
                            'CD40', 'CD40LG', 
                           'CD70', 'CD27'
                          )

In [None]:
#fill in
clusters <- levels(obj)
genes <- genes_of_interest_ihb
de_output <- de_general


#create a data.frame
df <- data.frame(matrix(NA, ncol = 2, nrow = length(clusters) * length(genes)))
colnames(df) <- c('genes', 'clusters')

df_genes <- c()
for(i in 1:length(genes)){
    repeated_gene <- rep(x = genes[i], times = length(clusters))
    df_genes <- c(df_genes, repeated_gene)
}

df$genes <- df_genes
df$clusters <- rep(x = clusters, times = length(genes))
df$avg_log2FC <- 0
df$p_val_adj <- 1


for(i in 1:nrow(df)){
    fc <- filter(de_output, cell.type == df$clusters[i] & genes == df$genes[i])$avg_log2FC
    pvalue <- filter(de_output, cell.type == df$clusters[i] & genes == df$genes[i])$p_val_adj
    if(length(fc) > 0){
        df$avg_log2FC[i] <- fc
        df$p_val_adj[i] <- pvalue
    }
}

df$p_val_adj[df$p_val_adj > 0.05] <- 1
df$avg_log2FC[df$p_val_adj > 0.05] <- 0


df$clusters <- factor(df$clusters, levels = clusters)
df$genes <- factor(df$genes, levels = rev(genes))

df$avg_log2FC[df$avg_log2FC == 0] <- NA

In [None]:
width <- 10
height <- 10
name <- 'inhibitory_markers_heatmap_logfc'
set_figsize(width, height)
plot <- ggplot(df, aes(x = clusters, y = genes, fill = avg_log2FC)) + 
  geom_tile() + 
  cowplot::theme_cowplot() + 
  #grids(linetype = "dashed", size = 0.1) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ylab('') +
  theme(axis.ticks = element_blank()) +
  scale_fill_gradient2(low = '#2881C1', mid = "white", high = "#A20606", na.value = 'white') + coord_equal()
plot
ggsave(plot, file=paste0(dir_path, name, '.pdf'), width = width, height = height)

In [None]:
#fill in activatory panel
clusters <- levels(obj)
genes <- genes_of_interest_act
de_output <- de_general


#create a data.frame
df <- data.frame(matrix(NA, ncol = 2, nrow = length(clusters) * length(genes)))
colnames(df) <- c('genes', 'clusters')

df_genes <- c()
for(i in 1:length(genes)){
    repeated_gene <- rep(x = genes[i], times = length(clusters))
    df_genes <- c(df_genes, repeated_gene)
}

df$genes <- df_genes
df$clusters <- rep(x = clusters, times = length(genes))
df$avg_log2FC <- 0
df$p_val_adj <- 1


for(i in 1:nrow(df)){
    fc <- filter(de_output, cell.type == df$clusters[i] & genes == df$genes[i])$avg_log2FC
    pvalue <- filter(de_output, cell.type == df$clusters[i] & genes == df$genes[i])$p_val_adj
    if(length(fc) > 0){
        df$avg_log2FC[i] <- fc
        df$p_val_adj[i] <- pvalue
    }
}

df$p_val_adj[df$p_val_adj > 0.05] <- 1
df$avg_log2FC[df$p_val_adj > 0.05] <- 0

df$clusters <- factor(df$clusters, levels = clusters)
df$genes <- factor(df$genes, levels = rev(genes))

df$avg_log2FC[df$avg_log2FC == 0] <- NA

In [None]:
width <- 10
height <- 10
name <- 'activation_markers_heatmap_logfc'
set_figsize(width, height)
plot <- ggplot(df, aes(x = clusters, y = genes, fill = avg_log2FC)) + 
  geom_tile() + 
  cowplot::theme_cowplot() + 
  #grids(linetype = "dashed", size = 0.1) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ylab('') +
  theme(axis.ticks = element_blank()) +
  scale_fill_gradient2(low = '#2881C1', mid = "white", high = "#A20606", na.value = 'white') + coord_equal()
plot
ggsave(plot, file=paste0(dir_path, name, '.pdf'), width = width, height = height)

# Calculate the FC of each cluster within the disease

In [None]:
obj

In [None]:
colnames(obj@meta.data)

In [None]:
clusters <- levels(obj)
object <- obj
object@meta.data$clusters <- object@meta.data$clusters_all
object@meta.data$grouping <- object@meta.data$Type 
group1 <- 'HL'
group2 <- 'RLN'
data <- data.frame(matrix(NA, nrow = length(clusters), ncol = 4))
colnames(data) <- c('clusters', 'fr_group1', 'fr_group2', 'FC')
data$clusters <- clusters

for(i in 1:nrow(data)){
    data$fr_group1[i] <- nrow(filter(object@meta.data, clusters == data$clusters[i] & grouping == group1)) / nrow(filter(object@meta.data, grouping == group1))
    data$fr_group2[i] <- nrow(filter(object@meta.data, clusters == data$clusters[i] & grouping == group2)) / nrow(filter(object@meta.data, grouping == group2))
}

data$FC <- data$fr_group1 / data$fr_group2

In [None]:
set_figsize(10, 8)
color_qual_flow2 <- c("TRUE" = "#D3556E", HL = "#D3556E", "FALSE" = "lightgrey", RLN = "lightgrey")
reverselog_trans <- function(base = exp(1)) {
    trans <- function(x) -log(x, base)
    inv <- function(x) base^(-x)
    trans_new(paste0("reverselog-", format(base)), trans, inv,
              log_breaks(base = base),
              domain = c(1e-100, Inf))
    }
data  <- data[order(data$FC, decreasing = F),]
data$clusters  <- factor(data$clusters, levels = unique(data$clusters))
data

In [None]:
AB_Name_balloon_path <- ggplot(data, aes(y=clusters, x= FC, fill = FC > 1))+
        #xlim(-max(data$FC), max(data$FC))+
        scale_fill_manual(values = color_qual_flow2)+
        geom_vline(xintercept = 1, color = "grey", linetype="dashed") +
        geom_segment( aes(yend=clusters, xend=1), col= "black") +
        geom_point(shape=21, aes(size = 4)) +
        xlim(-max(data$FC) + 4, max(data$FC)) +
        theme_tufte()+ xlab("Fold Change") + ylab("") +
        theme(text=element_text(family="Helvetica"),
            title = element_text(colour = 'black', size = 16),
            axis.text.y = element_text(colour = 'black', size = 16),
            axis.text.x = element_text(angle = 45, hjust = 1), 
              legend.position = "right")+ ggtitle('FC of cluster abundancies HL vs RLN')
AB_Name_balloon_path
ggsave(AB_Name_balloon_path, file=paste0('./outs/cluster_abundancies_fcsorted.pdf'), width = 10, height = 8)

In [None]:
data$FC <- log2(data$FC)
data

In [None]:
set_figsize(10, 8)
color_qual_flow2 <- c("TRUE" = "#D3556E", HL = "#D3556E", "FALSE" = "lightgrey", RLN = "lightgrey")
reverselog_trans <- function(base = exp(1)) {
    trans <- function(x) -log(x, base)
    inv <- function(x) base^(-x)
    trans_new(paste0("reverselog-", format(base)), trans, inv,
              log_breaks(base = base),
              domain = c(1e-100, Inf))
    }
data  <- data[order(data$FC, decreasing = F),]
data$clusters  <- factor(data$clusters, levels = unique(data$clusters))
AB_Name_balloon_path <- ggplot(data, aes(y=clusters, x= FC, fill = FC > 0))+
        #xlim(-max(data$FC), max(data$FC))+
        scale_fill_manual(values = color_qual_flow2)+
        geom_vline(xintercept = 0, color = "grey", linetype="dashed") +
        geom_segment( aes(yend=clusters, xend=0), col= "black") +
        geom_point(shape=21, aes(size = 4)) +
        xlim(min(data$FC) - 0.5, max(data$FC) + 0.5) +
        theme_tufte()+ xlab("Fold Change") + ylab("") +
        theme(text=element_text(family="Helvetica"),
            title = element_text(colour = 'black', size = 16),
            axis.text.y = element_text(colour = 'black', size = 16),
            axis.text.x = element_text(angle = 45, hjust = 1), 
              legend.position = "right")+ ggtitle('FC of cluster abundancies HL vs RLN')
AB_Name_balloon_path
ggsave(AB_Name_balloon_path, file=paste0('./outs/cluster_abundancies_fcsorted_log2.pdf'), width = 10, height = 8)

In [None]:
obj

In [None]:
default_width <- 8

In [None]:
set_figsize(1.5*default_width, default_width)

df <- obj@meta.data
df$clusters <- df$clusters_all
clusters <- unique(df$clusters)
dis_state <- 'HL'
df$dis <- df$Type
df$dis <- factor(df$dis, levels = c('RLN', 'HL'))

#determine where are the most cells from HL
order_df <- data.frame(matrix(NA, ncol = 2, nrow = length(clusters)))
colnames(order_df) <- c('cluster', 'dis')
order_df$cluster <- clusters
for(i in 1:nrow(order_df)){
    order_df$dis[i] <- nrow(filter(df, dis == dis_state & clusters == order_df$cluster[i])) / nrow(filter(df, clusters == order_df$cluster[i]))
}

order_df <- order_df[order(order_df$dis), ]
order_list <- order_df$cluster

#order the clusters in the df 
df$clusters <- factor(df$clusters, levels = order_list)

#plot the type of disease state
plot <- ggplot(df, aes_string(x="clusters", fill="dis")) +
    geom_bar(position="fill") + 
    theme(
        plot.title = element_text(hjust = 0.45),
        text = element_text(size=25),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 55, vjust = 1, hjust=1, colour = 'black')) +
        scale_fill_manual('legend', values = c('lightgrey', '#D3556E'))+ ylab('Fraction')+
    ggtitle("Fraction of disease group within cluster")
print(plot)
ggsave(plot, file = 'pathway/Preparation/outs_plots/fractions_dis.pdf', width = 1.5*default_width, height = default_width)

set_figsize(2*default_width, default_width)




#plot per patient
#determine colors


plot <- ggplot(df, aes_string(x="clusters", fill="sample")) +
    geom_bar(position="fill") + 
    theme(
        plot.title = element_text(hjust = 0.45),
        text = element_text(size=25),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 55, vjust = 1, hjust=1, colour = 'black')) +
        ylab('Fraction') +
    ggtitle("Relative")
plot
ggsave(plot, file = 'pathway/Preparation/outs_plots/fractions_samples.pdf', width = 2*default_width, height = default_width)







#plot per disease state
set_figsize(1.5*default_width, default_width)

#determine the order of clusters by fraction within the disease state
#determine where are the most cells from HL
order_df <- data.frame(matrix(NA, ncol = 2, nrow = length(clusters)))
colnames(order_df) <- c('cluster', 'dis')
order_df$cluster <- clusters
for(i in 1:nrow(order_df)){
    order_df$dis[i] <- nrow(filter(df, dis == dis_state & clusters == order_df$cluster[i])) / nrow(filter(df, dis == dis_state))
}

order_df <- order_df[order(order_df$dis), ]
order_list <- order_df$cluster

#order the clusters in the df 
df$clusters <- factor(df$clusters, levels = order_list)

#determine colors
c25 <- c(
  "dodgerblue2", "#E31A1C", # red
  "green4",
  "#6A3D9A", # purple
  "#FF7F00", # orange
  "black", "gold1",
  "skyblue2", "#FB9A99", # lt pink
  "palegreen2",
  "#CAB2D6", # lt purple
  "#FDBF6F", # lt orange
  "gray70", "khaki2",
  "maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
  "darkturquoise", "green1", "yellow4", "yellow3",
  "darkorange4", "brown"
)
cols <- c25[1:length(clusters)]



ggplot(df, aes_string(x="dis", fill="clusters")) +
    geom_bar(position="fill") + 
    theme(
        plot.title = element_text(hjust = 0.45),
        text = element_text(size=25),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 55, vjust = 1, hjust=1, colour = 'black')) +
        ylab('Fraction')+ scale_fill_manual('legend', values = cols)+
    ggtitle("Relative")

In [None]:
width <- 10
height <- 10
set_figsize(width, height)

dir_plots <- 'pathway/Preparation/outs_plots/'
name <- 'umap_cell_phase'
grouping <- 'Phase'
umap_plot <- DimPlot(obj, reduction = 'umap', group.by = grouping, pt.size = 7, label = TRUE, repel  = TRUE, label.size = 7, raster = T, raster.dpi = c(5000, 5000)) + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          plot.title = element_text(size = 20, face = 'plain'),
          legend.text=element_text(size=20)) + NoAxes() + NoLegend()
print(umap_plot)
ggsave(umap_plot, file = paste0(dir_plots, name, '.pdf'), width = width, height = height)

umap_plot <- DimPlot(obj, reduction = 'umap', group.by = grouping, pt.size = 7, label = TRUE, repel  = TRUE, label.size = 7, raster = T, raster.dpi = c(5000, 5000)) + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          plot.title = element_text(size = 20, face = 'plain'),
          legend.text=element_text(size=20)) + NoAxes()
print(umap_plot)
ggsave(umap_plot, file = paste0(dir_plots, name, '_withlegend.pdf'), width = width, height = height)