Analysis: Vladyslav Kavaka (vladyslav.kavaka@med.uni-muenchen.de), Eduardo Beltran (eduardo.beltran@med.uni-muenchen.de)
Insitute of Clinical Neuroimmunology, LMU, Munich

In [8]:
sessionInfo()
set.seed(1)

R version 4.0.5 (2021-03-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 18.04.6 LTS

Matrix products: default
BLAS/LAPACK: /home/INIM/vladyslav.kavaka/miniconda3/envs/azimuth/lib/libopenblasp-r0.3.17.so

locale:
 [1] LC_CTYPE=C.UTF-8    LC_NUMERIC=C        LC_TIME=C          
 [4] LC_COLLATE=C        LC_MONETARY=C       LC_MESSAGES=C      
 [7] LC_PAPER=C          LC_NAME=C           LC_ADDRESS=C       
[10] LC_TELEPHONE=C      LC_MEASUREMENT=C    LC_IDENTIFICATION=C

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] ggpubr_0.4.0                cowplot_1.1.1              
 [3] ggrastr_1.0.1               enrichR_3.0                
 [5] qpcR_1.4-1                  robustbase_0.93-9          
 [7] rgl_0.108.3                 minpack.lm_1.2-1           
 [9] MASS_7.3-55                 MAST_1.16.0                
[11] SingleCellExperiment_1.12.0 SummarizedExperiment

In [None]:
library(devtools)
library(Seurat)
library(dplyr)
library(Matrix)
library(tidyr)
library(limma)
library(ggplot2)
library(ggthemes)
library(patchwork)
library(gprofiler2)
library(ggrepel)
library(scales)
library(ggthemes)
library(purrr)
library(MAST)
library(qpcR)
library(enrichR)
library(ggrastr)
library(cowplot)
library(ggpubr)
library(enrichR)
library(UpSetR)
library(ComplexUpset)
setEnrichrSite("Enrichr")

In [None]:
set_figsize <- function(width, height){
    options(repr.plot.width = width, 
            repr.plot.height = height)
}

In [None]:
options(repr.plot.width=11, repr.plot.height=11)

# Load in the object

In [None]:
pbmc <- readRDS(file = 'pathway to the full PBMC object')

In [None]:
Idents(pbmc) <- 'sample'
#remove the samples from other time points prior to analysis
#samples 'AU-MS-3', 'AU-H-3', 'AR-MS-2', 'AR-H-2', 'AR-MS-3', 'AR-H-3' were sampled on another date than CSF
#samples 'AU-MS-2', 'AU-H-2' are from the same date as the corresponding CSF
pbmc <- subset(pbmc, idents = c('AU-MS-3', 'AU-H-3', 'AR-MS-2', 'AR-H-2', 'AR-MS-3', 'AR-H-3'), invert = TRUE)
pbmc
unique(pbmc@meta.data$sample)

In [None]:
Idents(pbmc) <- 'cd8_coded'
levels(pbmc) <- c('NK-like', 'MAIT', '1_CCR7', '2_NELL2', '3_NT5E', '4_CD82', '5_MAL', '6_GZMK', '7_MX1', '8_CD74', '9_IKZF2', '10_FGFBP2')

In [None]:
pbmc@meta.data$samplenumb <- pbmc@meta.data$sample
pbmc@meta.data$samplenumb[pbmc@meta.data$sample == "AU-MS-2"] <- "AU-MS"
pbmc@meta.data$samplenumb[pbmc@meta.data$sample == "AU-H-2"] <- "AU-H"
unique(pbmc@meta.data$samplenumb)
#cluster of interest
clusters_of_interest <- c('6_GZMK', '8_CD74', '9_IKZF2', '10_FGFBP2')

In [None]:
pbmc

In [None]:
#read in the pbmc overlap object
pbmc_overlap <- readRDS(file = 'pathway to the file with csf matching clonotypes information')

In [None]:
pbmc_overlap

# Filtering algorithm: create expression matrix for each diagnosis subgroup with selection of features and filtering steps

## Create the matrix for MS features

In [None]:
Sys.time()

In [None]:
#create the table with expression values, lasts a bit longer
#determine the sample for the diagnosis group 

#search through the features within all samples within the MSgroup
object <- subset(pbmc, idents = clusters_of_interest)
samples <- unique(filter(object@meta.data, diagnosis_simp == "MS")$samplenumb)
genes <- rownames(object)
cutoff_expression <- 0.1
cutoff_cells <- 0.1

   expression <- data.frame(matrix(NA, ncol = length(samples), nrow  = length(genes)))
    colnames(expression) <- samples
    rownames(expression) <- genes
    for(c in 1:ncol(expression)){
        cells <- rownames(object@meta.data[object@meta.data$samplenumb == colnames(expression)[c], ])
        expression_matrix <- object@assays$RNA@data[rownames(expression), cells]
        expression_matrix <- data.frame(expression_matrix)
        expression_matrix$max <- apply(expression_matrix, 1, max)
        expression_matrix$expression_min <- cutoff_expression*expression_matrix$max
        expression_matrix$number_above_cutoff <- rowSums(expression_matrix[1:(ncol(expression_matrix)-2)] > expression_matrix$expression_min)
        expression_matrix$final_filter <- 0
        expression_matrix$final_filter[expression_matrix$number_above_cutoff > (cutoff_cells * length(cells))] <- 1
        expression[, c] <- expression_matrix$final_filter
    }
    expression$sum <- rowSums(expression)
    #select the features that are expressed above the cutoffs more than in the half of the samples
    features_all_samples <- rownames(expression[expression$sum > (0.5 * length(samples)), ])

In [None]:
#create the table with expression values, lasts a bit longer
#determine the sample for the diagnosis group 

#search through the features within all samples within the untreated MS group
object <- subset(pbmc, idents = clusters_of_interest)
samples <- unique(filter(object@meta.data, diagnosis_simp == "MS" & treatment == "none")$samplenumb)
genes <- rownames(object)
cutoff_expression <- 0.1
cutoff_cells <- 0.1

   expression <- data.frame(matrix(NA, ncol = length(samples), nrow  = length(genes)))
    colnames(expression) <- samples
    rownames(expression) <- genes
    for(c in 1:ncol(expression)){
        cells <- rownames(object@meta.data[object@meta.data$samplenumb == colnames(expression)[c], ])
        expression_matrix <- object@assays$RNA@data[rownames(expression), cells]
        expression_matrix <- data.frame(expression_matrix)
        expression_matrix$max <- apply(expression_matrix, 1, max)
        expression_matrix$expression_min <- cutoff_expression*expression_matrix$max
        expression_matrix$number_above_cutoff <- rowSums(expression_matrix[1:(ncol(expression_matrix)-2)] > expression_matrix$expression_min)
        expression_matrix$final_filter <- 0
        expression_matrix$final_filter[expression_matrix$number_above_cutoff > (cutoff_cells * length(cells))] <- 1
        expression[, c] <- expression_matrix$final_filter
    }
expression$sum <- rowSums(expression)
    #select the features that are expressed above the cutoffs more than in the half of the samples
    features_untreated_samples <- rownames(expression[expression$sum > (0.5 * length(samples)), ])

In [None]:
#create the table with expression values, lasts a bit longer
#determine the sample for the diagnosis group 

#search through the features within the overlapped group (meaning clonotypes present also in CSF)
object <- subset(pbmc_overlap, idents = clusters_of_interest)
object <- subset(object, overlap == "overlap")
samples <- unique(filter(object@meta.data, diagnosis_simp == "MS")$samplenumb)
genes <- rownames(object)
cutoff_expression <- 0.1
cutoff_cells <- 0.1

    expression <- data.frame(matrix(NA, ncol = length(samples), nrow  = length(genes)))
    colnames(expression) <- samples
    rownames(expression) <- genes
    for(c in 1:ncol(expression)){
        cells <- rownames(object@meta.data[object@meta.data$samplenumb == colnames(expression)[c], ])
        expression_matrix <- object@assays$RNA@data[rownames(expression), cells]
        expression_matrix <- data.frame(expression_matrix)
        expression_matrix$max <- apply(expression_matrix, 1, max)
        expression_matrix$expression_min <- cutoff_expression*expression_matrix$max
        expression_matrix$number_above_cutoff <- rowSums(expression_matrix[1:(ncol(expression_matrix)-2)] > expression_matrix$expression_min)
        expression_matrix$final_filter <- 0
        expression_matrix$final_filter[expression_matrix$number_above_cutoff > (cutoff_cells * length(cells))] <- 1
        expression[, c] <- expression_matrix$final_filter
    }
expression$sum <- rowSums(expression)
    #select the features that are expressed above the cutoffs more than in the half of the samples
    features_overlap_samples <- rownames(expression[expression$sum > (0.5 * length(samples)), ])

In [None]:
features_MS <- qpcR:::cbind.na(features_all_samples, features_untreated_samples, features_overlap_samples)
colnames(features_MS) <- c("all samples MS", "utreated samples MS", "overlapped samples MS")
write.csv(features_MS, file = "features_MS.csv")

In [None]:
Sys.time()

## Create the matrix for SCNI features

In [None]:
#create the table with expression values, lasts a bit longer
#determine the sample for the diagnosis group 

#search through the features within the overlapped group (meaning clonotypes present also in CSF)
object <- subset(pbmc, idents = clusters_of_interest)
samples <- unique(filter(object@meta.data, diagnosis_simp == "SCNI")$samplenumb)
genes <- rownames(object)
cutoff_expression <- 0.1
cutoff_cells <- 0.1

    expression <- data.frame(matrix(NA, ncol = length(samples), nrow  = length(genes)))
    colnames(expression) <- samples
    rownames(expression) <- genes
    for(c in 1:ncol(expression)){
        cells <- rownames(object@meta.data[object@meta.data$samplenumb == colnames(expression)[c], ])
        expression_matrix <- object@assays$RNA@data[rownames(expression), cells]
        expression_matrix <- data.frame(expression_matrix)
        expression_matrix$max <- apply(expression_matrix, 1, max)
        expression_matrix$expression_min <- cutoff_expression*expression_matrix$max
        expression_matrix$number_above_cutoff <- rowSums(expression_matrix[1:(ncol(expression_matrix)-2)] > expression_matrix$expression_min)
        expression_matrix$final_filter <- 0
        expression_matrix$final_filter[expression_matrix$number_above_cutoff > (cutoff_cells * length(cells))] <- 1
        expression[, c] <- expression_matrix$final_filter
    }
expression$sum <- rowSums(expression)
    #select the features that are expressed above the cutoffs more than in the half of the samples
    features_scni <- rownames(expression[expression$sum > (0.5 * length(samples)), ])

In [None]:
#create the table with expression values, lasts a bit longer
#determine the sample for the diagnosis group 

#search through the features within the overlapped group
object <- subset(pbmc_overlap, idents = clusters_of_interest)
object <- subset(object, overlap == "overlap")
samples <- unique(filter(object@meta.data, diagnosis_simp == "SCNI")$samplenumb)
genes <- rownames(object)
cutoff_expression <- 0.1
cutoff_cells <- 0.1

    expression <- data.frame(matrix(NA, ncol = length(samples), nrow  = length(genes)))
    colnames(expression) <- samples
    rownames(expression) <- genes
    for(c in 1:ncol(expression)){
        cells <- rownames(object@meta.data[object@meta.data$samplenumb == colnames(expression)[c], ])
        expression_matrix <- object@assays$RNA@data[rownames(expression), cells]
        expression_matrix <- data.frame(expression_matrix)
        expression_matrix$max <- apply(expression_matrix, 1, max)
        expression_matrix$expression_min <- cutoff_expression*expression_matrix$max
        expression_matrix$number_above_cutoff <- rowSums(expression_matrix[1:(ncol(expression_matrix)-2)] > expression_matrix$expression_min)
        expression_matrix$final_filter <- 0
        expression_matrix$final_filter[expression_matrix$number_above_cutoff > (cutoff_cells * length(cells))] <- 1
        expression[, c] <- expression_matrix$final_filter
    }
expression$sum <- rowSums(expression)
    #select the features that are expressed above the cutoffs more than in the half of the samples
    features_overlap_SCNI <- rownames(expression[expression$sum > (0.5 * length(samples)), ])

In [None]:
features_scni_matrix <- qpcR:::cbind.na(features_scni, features_overlap_SCNI)
colnames(features_scni_matrix) <- c("all samples scni", "overlapped samples scni")
write.csv(features_scni_matrix, file = "features_scni.csv")

In [None]:
Sys.time()

## Create the matrix for healthy features

In [None]:
#create the table with expression values, lasts a bit longer
#determine the sample for the diagnosis group 

#search through the features within all samples within the Healthy group
object <- subset(pbmc, idents = clusters_of_interest)
samples <- unique(filter(object@meta.data, diagnosis_simp == "Healthy")$samplenumb)
genes <- rownames(object)
cutoff_expression <- 0.1
cutoff_cells <- 0.1

   expression <- data.frame(matrix(NA, ncol = length(samples), nrow  = length(genes)))
    colnames(expression) <- samples
    rownames(expression) <- genes
    for(c in 1:ncol(expression)){
        cells <- rownames(object@meta.data[object@meta.data$samplenumb == colnames(expression)[c], ])
        expression_matrix <- object@assays$RNA@data[rownames(expression), cells]
        expression_matrix <- data.frame(expression_matrix)
        expression_matrix$max <- apply(expression_matrix, 1, max)
        expression_matrix$expression_min <- cutoff_expression*expression_matrix$max
        expression_matrix$number_above_cutoff <- rowSums(expression_matrix[1:(ncol(expression_matrix)-2)] > expression_matrix$expression_min)
        expression_matrix$final_filter <- 0
        expression_matrix$final_filter[expression_matrix$number_above_cutoff > (cutoff_cells * length(cells))] <- 1
        expression[, c] <- expression_matrix$final_filter
    }
expression$sum <- rowSums(expression)
    #select the features that are expressed above the cutoffs more than in the half of the samples
    features_healthy <- rownames(expression[expression$sum > (0.5 * length(samples)), ])

In [None]:
#create the table with expression values, lasts a bit longer
#determine the sample for the diagnosis group 

#search through the features within the overlapped group (meaning clonotypes present also in CSF)
object <- subset(pbmc_overlap, idents = clusters_of_interest)
object <- subset(object, overlap == "overlap")
samples <- unique(filter(object@meta.data, diagnosis_simp == "Healthy")$samplenumb)
genes <- rownames(object)
cutoff_expression <- 0.1
cutoff_cells <- 0.1

   expression <- data.frame(matrix(NA, ncol = length(samples), nrow  = length(genes)))
    colnames(expression) <- samples
    rownames(expression) <- genes
    for(c in 1:ncol(expression)){
        cells <- rownames(object@meta.data[object@meta.data$samplenumb == colnames(expression)[c], ])
        expression_matrix <- object@assays$RNA@data[rownames(expression), cells]
        expression_matrix <- data.frame(expression_matrix)
        expression_matrix$max <- apply(expression_matrix, 1, max)
        expression_matrix$expression_min <- cutoff_expression*expression_matrix$max
        expression_matrix$number_above_cutoff <- rowSums(expression_matrix[1:(ncol(expression_matrix)-2)] > expression_matrix$expression_min)
        expression_matrix$final_filter <- 0
        expression_matrix$final_filter[expression_matrix$number_above_cutoff > (cutoff_cells * length(cells))] <- 1
        expression[, c] <- expression_matrix$final_filter
    }
expression$sum <- rowSums(expression)
    #select the features that are expressed above the cutoffs more than in the half of the samples
    features_overlap_healthy <- rownames(expression[expression$sum > (0.5 * length(samples)), ])

In [None]:
features_healthy_matrix <- qpcR:::cbind.na(features_healthy, features_overlap_healthy)
colnames(features_healthy_matrix) <- c("all samples healthy", "overlapped samples healthy")
write.csv(features_healthy_matrix, file = "features_healthy.csv")

In [None]:
Sys.time()

# Intersect and combine all features

In [None]:
features_MS_intersect <- Reduce(intersect, list(features_all_samples, features_untreated_samples, features_overlap_samples))
length(features_MS_intersect)

In [None]:
length(features_scni)
length(features_overlap_SCNI)
features_SCNI_intersect <- Reduce(intersect, list(features_scni, features_overlap_SCNI))
length(features_SCNI_intersect)

In [None]:
length(features_healthy)
length(features_overlap_healthy)
features_healthy_intersect <- Reduce(intersect, list(features_healthy, features_overlap_healthy))
length(features_healthy_intersect)

In [None]:
features_intersect <- qpcR:::cbind.na(features_MS_intersect, features_SCNI_intersect, features_healthy_intersect, features_healthy)
colnames(features_intersect) <- c("features_ms", "features_scni", "features_healthy", "features_healthy_nonintersected")
write.csv(features_intersect, file = "features_intersect.csv")

# Prepare the object for analysis

In [None]:
#prepare the twin pairs for analysis
unique(pbmc@meta.data$samplenumb)
pbmc@meta.data$twin_pair <- 'FALSE'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'AR-MS'] <- 'AR-MS'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'AR-H'] <- 'AR-H'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'AU-MS'] <- 'AU'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'AU-H'] <- 'AU'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'AV-MS'] <- 'AV'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'AV-H'] <- 'AV'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'A-MS'] <- 'A'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'A-H'] <- 'A'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'B-MS'] <- 'B'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'B-H'] <- 'B'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'BE-MS'] <- 'BE'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'BE-H'] <- 'BE'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'Y-MS'] <- 'Y'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'Y-H'] <- 'Y'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'BF-MS'] <- 'BF'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'BF-H'] <- 'BF'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'CE-MS'] <- 'CE'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'CE-H'] <- 'CE'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'CI-MS'] <- 'CI'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'CI-H'] <- 'CI'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'BJ-MS'] <- 'BJ'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'BJ-H'] <- 'BJ'

pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'U-MS'] <- 'U'
pbmc@meta.data$twin_pair[pbmc@meta.data$samplenumb == 'U-H'] <- 'U'

unique(pbmc@meta.data$twin_pair)

In [None]:
#cluster of interest
clusters_of_interest <- c('6_GZMK', '8_CD74', '9_IKZF2', '10_FGFBP2')
object <- subset(pbmc, idents = clusters_of_interest) #will be used for plotting

#remove the IFN treated subject to unbias the analysis
object_analysis_general <- subset(object, treatment != 'IFN')
object_analysis_general

In [None]:
# determine the markers for comparison
markers <- read.csv(file = 'features_intersect.csv', row.names = 1)
markers.remove <- grep(pattern = "^TRAV|^TRBV|^TRGV|^TRDV|^RPL|^RPS|^IGKV|^IGLV|^IGHV|^IGHG|^IGLC|^TRKC|^MT", x = rownames(object), value = TRUE)


features_ms <- markers$features_ms
features_ms <- features_ms[!is.na(features_ms)]
features_ms <- features_ms[!(features_ms%in%markers.remove)]
length(features_ms)

features_scni <- markers$features_scni
features_scni <- features_scni[!is.na(features_scni)]
features_scni <- features_scni[!(features_scni%in%markers.remove)]
length(features_scni)

features_healthy <- markers$features_healthy
features_healthy <- features_healthy[!is.na(features_healthy)]
features_healthy <- features_healthy[!(features_healthy%in%markers.remove)]
length(features_healthy)

In [None]:
#start enrichR
library('enrichR')
setEnrichrSite("Enrichr")
dir.create('outs')

# SCNI vs Healthy analysis NON-pairwise

In [None]:
#prepare the file
object_analysis <- object_analysis_general
object_analysis

In [None]:
Idents(object_analysis) <- 'sample'
object_analysis <- subset(object_analysis, idents = c('AU-MS-2', 'AU-H-2'), invert = TRUE)
object_analysis

## Start for the first partner

In [None]:
#find first the markers per cluster
partner1 <- 'SCNI'
partner2 <- 'Healthy'
features_1 <- features_scni
features_2 <- features_healthy
clusters_of_interest <- c('6_GZMK', '8_CD74', '9_IKZF2', '10_FGFBP2')
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
grouping_de <- 'diagnosis_simp'
logfc.threshold <- 0.05
pvalue <- 0.05

#create output dirs
dir_path <- paste0('./outs/', partner1, '_vs_', partner2, '_NON_pairwise')
dir.create(dir_path)
plots_dir <- paste0(dir_path, '/plots/')
dir.create(plots_dir)
#define colors
colours_diagnosis_groups <- c('#bbbbbb', '#d1988f', '#8e2311')
colour1 <- '#d1988f'
colour2 <- '#bbbbbb'

#object - plotting object
Idents(object) <- grouping_de
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = TRUE, verbose = FALSE)
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest

de_list_partner1 <- c()
for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner1 <- FindMarkers(object_subset, ident.1 = partner1, ident.2 = partner2, group.by = grouping_de, features = features_1, 
                   only.pos = TRUE, logfc.threshold = logfc.threshold, verbose = FALSE)
        markers_partner1$genes <- rownames(markers_partner1)
        markers_partner1$cluster <- clusters_of_interest[i]
        markers_partner1 <- markers_partner1[markers_partner1$p_val_adj < pvalue, ]
        de_list_partner1 <- append(de_list_partner1, list(markers_partner1))
}

#search through the markers
hits <- 0
selected_markers_partner1 <- c()

for(i in 1:length(de_list_partner1)){
    markers_cluster <- de_list_partner1[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner1)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner1[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner1 <- rbind(selected_markers_partner1, gene_to_add)
        }
    }
}
selected_markers_partner1 <- selected_markers_partner1[!duplicated(selected_markers_partner1$genes), ]
selected_markers_partner1 <- selected_markers_partner1[order(-selected_markers_partner1$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner1 <- paste0(plots_dir, partner1, '/')
dir.create(plots_partner1)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner1] > ordered_genes[partner2]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner1, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner1] < ordered_genes[partner2]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner1, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner1, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner1, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort
genes_remove <- genes_lower
selected_markers_partner1 <- selected_markers_partner1[!selected_markers_partner1$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner1, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner1 <- paste0(dir_path, '/PEA_', partner1, '/')
dir.create(dir_pea_partner1)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner1$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Start for second partner

In [None]:
# start the same for the second partner
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest
de_list_partner2 <- c()

for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner2 <- FindMarkers(object_subset, ident.1 = partner2, ident.2 = partner1, group.by = grouping_de, features = features_2, 
                                        only.pos = TRUE, logfc.threshold = logfc.threshold)
        markers_partner2$genes <- rownames(markers_partner2)
        markers_partner2$cluster <- clusters_of_interest[i]
        markers_partner2 <- markers_partner2[markers_partner2$p_val_adj < pvalue, ]
        de_list_partner2 <- append(de_list_partner2, list(markers_partner2))
}

#search through the markers
hits <- 1
selected_markers_partner2 <- c()

for(i in 1:length(de_list_partner2)){
    markers_cluster <- de_list_partner2[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner2)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner2[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner2 <- rbind(selected_markers_partner2, gene_to_add)
        }
    }
}
selected_markers_partner2 <- selected_markers_partner2[!duplicated(selected_markers_partner2$genes), ]
selected_markers_partner2 <- selected_markers_partner2[order(-selected_markers_partner2$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner2 <- paste0(plots_dir, partner2, '/')
dir.create(plots_partner2)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner2] > ordered_genes[partner1]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner2, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner2] < ordered_genes[partner1]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner2, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner2, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner2, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort

genes_remove <- genes_lower
selected_markers_partner2 <- selected_markers_partner2[!selected_markers_partner2$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner2, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner2 <- paste0(dir_path, '/PEA_', partner2, '/')
dir.create(dir_pea_partner2)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner2$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Combine the final result and plot volcano

In [None]:
number_of_genes_volcano <- 25
options(repr.plot.width=10, repr.plot.height=8)
selected_markers_partner1$partner <- partner1
write.csv(selected_markers_partner1, file = paste0(dir_path, '/DGE_', partner1, '.csv'))
selected_markers_partner2$partner <- partner2
write.csv(selected_markers_partner2, file = paste0(dir_path, '/DGE_', partner2, '.csv'))

selected_markers_partner2$avg_log2FC <- -selected_markers_partner2$avg_log2FC 
selected_markers_combined <- rbind(selected_markers_partner1, selected_markers_partner2)

number_of_genes_volcano <- 25
volcano <- ggplot(selected_markers_combined, aes(x = avg_log2FC, y = -log10(avg_p_val_adj))) +
        geom_vline(xintercept = 0) +
        geom_hline(yintercept = -log10(0.05), color ="grey", linetype ="dashed") +
        geom_point(data = selected_markers_combined,
                    color = "grey", alpha = 1) +
        geom_point(data = selected_markers_partner1[1:number_of_genes_volcano, ],
                    fill = colour1, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = selected_markers_partner2[1:number_of_genes_volcano, ],
                    fill = colour2, alpha = 1, shape=21, size= 2.5) +
        geom_text_repel(data= rbind(selected_markers_partner1[1:number_of_genes_volcano, ], selected_markers_partner2[1:number_of_genes_volcano, ]), max.overlaps = number_of_genes_volcano, aes(label = genes))+
        theme_linedraw() +
        theme(panel.grid = element_blank(), legend.position = "none") +
        xlab("log2(average fold change)") +
        ylab("-log10(p-value)") + ggtitle(paste(partner2, '(left)', 'vs', partner1, '(right)'))
ggsave(volcano, file = paste0(dir_path, '/', partner1, '_vs_', partner2, '_volcano_plot.pdf'), height = 8, width = 10)

write.csv(selected_markers_combined, file = paste0(dir_path, '/DGE_', partner1, '_vs_', partner2, '.csv'))

# SCNI vs MS analysis NON-pairwise

In [None]:
#prepare the file
object_analysis <- object_analysis_general
object_analysis

In [None]:
Idents(object_analysis) <- 'sample'
object_analysis <- subset(object_analysis, idents = c('AU-MS-2', 'AU-H-2'), invert = TRUE)
object_analysis

## Start for the first partner

In [None]:
#find first the markers per cluster
partner1 <- 'SCNI'
partner2 <- 'MS'
features_1 <- features_scni
features_2 <- features_ms
clusters_of_interest <- c('6_GZMK', '8_CD74', '9_IKZF2', '10_FGFBP2')
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
grouping_de <- 'diagnosis_simp'
logfc.threshold <- 0.05
pvalue <- 0.05

#create output dirs
dir_path <- paste0('./outs/', partner1, '_vs_', partner2, '_NON_pairwise')
dir.create(dir_path)
plots_dir <- paste0(dir_path, '/plots/')
dir.create(plots_dir)
#define colors
colours_diagnosis_groups <- c('#bbbbbb', '#d1988f', '#8e2311')
colour1 <- '#d1988f'
colour2 <- '#8e2311'

#object - plotting object
Idents(object) <- grouping_de
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = TRUE, verbose = FALSE)
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest

de_list_partner1 <- c()
for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner1 <- FindMarkers(object_subset, ident.1 = partner1, ident.2 = partner2, group.by = grouping_de, features = features_1, 
                   only.pos = TRUE, logfc.threshold = logfc.threshold, verbose = FALSE)
        markers_partner1$genes <- rownames(markers_partner1)
        markers_partner1$cluster <- clusters_of_interest[i]
        markers_partner1 <- markers_partner1[markers_partner1$p_val_adj < pvalue, ]
        de_list_partner1 <- append(de_list_partner1, list(markers_partner1))
}

#search through the markers
hits <- 0
selected_markers_partner1 <- c()

for(i in 1:length(de_list_partner1)){
    markers_cluster <- de_list_partner1[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner1)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner1[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner1 <- rbind(selected_markers_partner1, gene_to_add)
        }
    }
}
selected_markers_partner1 <- selected_markers_partner1[!duplicated(selected_markers_partner1$genes), ]
selected_markers_partner1 <- selected_markers_partner1[order(-selected_markers_partner1$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner1 <- paste0(plots_dir, partner1, '/')
dir.create(plots_partner1)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner1] > ordered_genes[partner2]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner1, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner1] < ordered_genes[partner2]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner1, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner1, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner1, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort
genes_remove <- genes_lower
selected_markers_partner1 <- selected_markers_partner1[!selected_markers_partner1$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner1, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner1 <- paste0(dir_path, '/PEA_', partner1, '/')
dir.create(dir_pea_partner1)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner1$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Start for second partner

In [None]:
# start the same for the second partner
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest
de_list_partner2 <- c()

for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner2 <- FindMarkers(object_subset, ident.1 = partner2, ident.2 = partner1, group.by = grouping_de, features = features_2, 
                                        only.pos = TRUE, logfc.threshold = logfc.threshold)
        markers_partner2$genes <- rownames(markers_partner2)
        markers_partner2$cluster <- clusters_of_interest[i]
        markers_partner2 <- markers_partner2[markers_partner2$p_val_adj < pvalue, ]
        de_list_partner2 <- append(de_list_partner2, list(markers_partner2))
}

#search through the markers
hits <- 1
selected_markers_partner2 <- c()

for(i in 1:length(de_list_partner2)){
    markers_cluster <- de_list_partner2[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner2)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner2[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner2 <- rbind(selected_markers_partner2, gene_to_add)
        }
    }
}
selected_markers_partner2 <- selected_markers_partner2[!duplicated(selected_markers_partner2$genes), ]
selected_markers_partner2 <- selected_markers_partner2[order(-selected_markers_partner2$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner2 <- paste0(plots_dir, partner2, '/')
dir.create(plots_partner2)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner2] > ordered_genes[partner1]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner2, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner2] < ordered_genes[partner1]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner2, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner2, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner2, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort

genes_remove <- genes_lower
selected_markers_partner2 <- selected_markers_partner2[!selected_markers_partner2$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner2, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner2 <- paste0(dir_path, '/PEA_', partner2, '/')
dir.create(dir_pea_partner2)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner2$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Combine the final result and plot volcano

In [None]:
number_of_genes_volcano <- 25
options(repr.plot.width=10, repr.plot.height=8)
selected_markers_partner1$partner <- partner1
write.csv(selected_markers_partner1, file = paste0(dir_path, '/DGE_', partner1, '.csv'))
selected_markers_partner2$partner <- partner2
write.csv(selected_markers_partner2, file = paste0(dir_path, '/DGE_', partner2, '.csv'))

selected_markers_partner2$avg_log2FC <- -selected_markers_partner2$avg_log2FC 
selected_markers_combined <- rbind(selected_markers_partner1, selected_markers_partner2)

number_of_genes_volcano <- 25
volcano <- ggplot(selected_markers_combined, aes(x = avg_log2FC, y = -log10(avg_p_val_adj))) +
        geom_vline(xintercept = 0) +
        geom_hline(yintercept = -log10(0.05), color ="grey", linetype ="dashed") +
        geom_point(data = selected_markers_combined,
                    color = "grey", alpha = 1) +
        geom_point(data = selected_markers_partner1[1:number_of_genes_volcano, ],
                    fill = colour1, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = selected_markers_partner2[1:number_of_genes_volcano, ],
                    fill = colour2, alpha = 1, shape=21, size= 2.5) +
        geom_text_repel(data= rbind(selected_markers_partner1[1:number_of_genes_volcano, ], selected_markers_partner2[1:number_of_genes_volcano, ]), max.overlaps = number_of_genes_volcano, aes(label = genes))+
        theme_linedraw() +
        theme(panel.grid = element_blank(), legend.position = "none") +
        xlab("log2(average fold change)") +
        ylab("-log10(p-value)") + ggtitle(paste(partner2, '(left)', 'vs', partner1, '(right)'))
ggsave(volcano, file = paste0(dir_path, '/', partner1, '_vs_', partner2, '_volcano_plot.pdf'), height = 8, width = 10)

write.csv(selected_markers_combined, file = paste0(dir_path, '/DGE_', partner1, '_vs_', partner2, '.csv'))

# MS vs Healthy analysis NON-pairwise

In [None]:
#prepare the file
object_analysis <- object_analysis_general
object_analysis

In [None]:
Idents(object_analysis) <- 'sample'
object_analysis <- subset(object_analysis, idents = c('AU-MS-2', 'AU-H-2'), invert = TRUE)
object_analysis

## Start for the first partner

In [None]:
#find first the markers per cluster
partner1 <- 'MS'
partner2 <- 'Healthy'
features_1 <- features_ms
features_2 <- features_healthy
clusters_of_interest <- c('6_GZMK', '8_CD74', '9_IKZF2', '10_FGFBP2')
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
grouping_de <- 'diagnosis_simp'
logfc.threshold <- 0.05
pvalue <- 0.05

#create output dirs
dir_path <- paste0('./outs/', partner1, '_vs_', partner2, '_NON_pairwise')
dir.create(dir_path)
plots_dir <- paste0(dir_path, '/plots/')
dir.create(plots_dir)
#define colors
colours_diagnosis_groups <- c('#bbbbbb', '#d1988f', '#8e2311')
colour1 <- '#8e2311'
colour2 <- '#bbbbbb'

#object - plotting object
Idents(object) <- grouping_de
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = TRUE, verbose = FALSE)
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest

de_list_partner1 <- c()
for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner1 <- FindMarkers(object_subset, ident.1 = partner1, ident.2 = partner2, group.by = grouping_de, features = features_1, 
                   only.pos = TRUE, logfc.threshold = logfc.threshold, verbose = FALSE)
        markers_partner1$genes <- rownames(markers_partner1)
        markers_partner1$cluster <- clusters_of_interest[i]
        markers_partner1 <- markers_partner1[markers_partner1$p_val_adj < pvalue, ]
        de_list_partner1 <- append(de_list_partner1, list(markers_partner1))
}

#search through the markers
hits <- 0
selected_markers_partner1 <- c()

for(i in 1:length(de_list_partner1)){
    markers_cluster <- de_list_partner1[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner1)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner1[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner1 <- rbind(selected_markers_partner1, gene_to_add)
        }
    }
}
selected_markers_partner1 <- selected_markers_partner1[!duplicated(selected_markers_partner1$genes), ]
selected_markers_partner1 <- selected_markers_partner1[order(-selected_markers_partner1$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner1 <- paste0(plots_dir, partner1, '/')
dir.create(plots_partner1)

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner1] > ordered_genes[partner2]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner1, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner1] < ordered_genes[partner2]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner1, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner1, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner1, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort
genes_remove <- genes_lower
selected_markers_partner1 <- selected_markers_partner1[!selected_markers_partner1$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner1, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner1 <- paste0(dir_path, '/PEA_', partner1, '/')
dir.create(dir_pea_partner1)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner1$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Start for second partner

In [None]:
# start the same for the second partner
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest
de_list_partner2 <- c()

for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner2 <- FindMarkers(object_subset, ident.1 = partner2, ident.2 = partner1, group.by = grouping_de, features = features_2, 
                                        only.pos = TRUE, logfc.threshold = logfc.threshold)
        markers_partner2$genes <- rownames(markers_partner2)
        markers_partner2$cluster <- clusters_of_interest[i]
        markers_partner2 <- markers_partner2[markers_partner2$p_val_adj < pvalue, ]
        de_list_partner2 <- append(de_list_partner2, list(markers_partner2))
}

#search through the markers
hits <- 1
selected_markers_partner2 <- c()

for(i in 1:length(de_list_partner2)){
    markers_cluster <- de_list_partner2[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner2)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner2[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner2 <- rbind(selected_markers_partner2, gene_to_add)
        }
    }
}
selected_markers_partner2 <- selected_markers_partner2[!duplicated(selected_markers_partner2$genes), ]
selected_markers_partner2 <- selected_markers_partner2[order(-selected_markers_partner2$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner2 <- paste0(plots_dir, partner2, '/')
dir.create(plots_partner2)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner2] > ordered_genes[partner1]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner2, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner2] < ordered_genes[partner1]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner2, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner2, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner2, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort

genes_remove <- genes_lower
selected_markers_partner2 <- selected_markers_partner2[!selected_markers_partner2$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner2, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner2 <- paste0(dir_path, '/PEA_', partner2, '/')
dir.create(dir_pea_partner2)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner2$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Combine the final result and plot volcano

In [None]:
number_of_genes_volcano <- 25
options(repr.plot.width=10, repr.plot.height=8)
selected_markers_partner1$partner <- partner1
write.csv(selected_markers_partner1, file = paste0(dir_path, '/DGE_', partner1, '.csv'))
selected_markers_partner2$partner <- partner2
write.csv(selected_markers_partner2, file = paste0(dir_path, '/DGE_', partner2, '.csv'))

selected_markers_partner2$avg_log2FC <- -selected_markers_partner2$avg_log2FC 
selected_markers_combined <- rbind(selected_markers_partner1, selected_markers_partner2)

number_of_genes_volcano <- 25
volcano <- ggplot(selected_markers_combined, aes(x = avg_log2FC, y = -log10(avg_p_val_adj))) +
        geom_vline(xintercept = 0) +
        geom_hline(yintercept = -log10(0.05), color ="grey", linetype ="dashed") +
        geom_point(data = selected_markers_combined,
                    color = "grey", alpha = 1) +
        geom_point(data = selected_markers_partner1[1:number_of_genes_volcano, ],
                    fill = colour1, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = selected_markers_partner2[1:number_of_genes_volcano, ],
                    fill = colour2, alpha = 1, shape=21, size= 2.5) +
        geom_text_repel(data= rbind(selected_markers_partner1[1:number_of_genes_volcano, ], selected_markers_partner2[1:number_of_genes_volcano, ]), max.overlaps = number_of_genes_volcano, aes(label = genes))+
        theme_linedraw() +
        theme(panel.grid = element_blank(), legend.position = "none") +
        xlab("log2(average fold change)") +
        ylab("-log10(p-value)") + ggtitle(paste(partner2, '(left)', 'vs', partner1, '(right)'))
ggsave(volcano, file = paste0(dir_path, '/', partner1, '_vs_', partner2, '_volcano_plot.pdf'), height = 8, width = 10)

write.csv(selected_markers_combined, file = paste0(dir_path, '/DGE_', partner1, '_vs_', partner2, '.csv'))

# Combine the results and run combined PEA

In [None]:
#healthy
healthy_markers1 <- read.csv('./outs/MS_vs_Healthy_NON_pairwise/DGE_Healthy.csv')
healthy_markers2 <- read.csv('./outs/SCNI_vs_Healthy_NON_pairwise/DGE_Healthy.csv')
#SCNI
scni_markers1 <- read.csv('./outs/SCNI_vs_Healthy_NON_pairwise/DGE_SCNI.csv')
scni_markers2 <- read.csv('./outs/SCNI_vs_MS_NON_pairwise/DGE_SCNI.csv')
#MS
ms_markers1 <- read.csv('./outs/MS_vs_Healthy_NON_pairwise/DGE_MS.csv')
ms_markers2 <- read.csv('./outs/SCNI_vs_MS_NON_pairwise/DGE_MS.csv')

In [None]:
healthy_markers <- c(healthy_markers1$genes, healthy_markers2$genes)
healthy_markers <- unique(healthy_markers)
scni_markers <- c(scni_markers1$genes, scni_markers2$genes)
scni_markers <- unique(scni_markers)
ms_markers <- c(ms_markers1$genes, ms_markers2$genes)
ms_markers <- unique(ms_markers)

In [None]:
#object - plotting object
grouping_de <- 'diagnosis_simp'
Idents(object) <- grouping_de
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = TRUE, verbose = FALSE)

In [None]:
# run the PEAand heatmap for the healthy markers
#start the enrichr PEA for first partner
dir_pea_combined <- paste0('./outs/combined_non_pairwise/')
dir.create(dir_pea_combined)
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
colours_diagnosis_groups <- c('#bbbbbb', '#d1988f', '#8e2311')
partner <- "Healthy"
features <- healthy_markers
colour <- '#bbbbbb'

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[features, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_pea_combined, partner, '_heatmap_top20.pdf'), width = 5, height = 12)

for(db in 1:length(databases_list)){
        enriched <- enrichr(features, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

In [None]:
# run the PEAand heatmap for the SCNI markers
#start the enrichr PEA for first partner
dir_pea_combined <- paste0('./outs/combined_non_pairwise/')
dir.create(dir_pea_combined)
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
colours_diagnosis_groups <- c('#bbbbbb', '#d1988f', '#8e2311')
partner <- "SCNI"
features <- scni_markers
colour <- '#d1988f'

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[features, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_pea_combined, partner, '_heatmap_top20.pdf'), width = 5, height = 12)

for(db in 1:length(databases_list)){
        enriched <- enrichr(features, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

In [None]:
# run the PEAand heatmap for the MS markers
#start the enrichr PEA for first partner
dir_pea_combined <- paste0('./outs/combined_non_pairwise/')
dir.create(dir_pea_combined)
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
colours_diagnosis_groups <- c('#bbbbbb', '#d1988f', '#8e2311')
partner <- "MS"
features <- ms_markers
colour <- '#8e2311'

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[features, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_pea_combined, partner, '_heatmap_top20.pdf'), width = 5, height = 12)

for(db in 1:length(databases_list)){
        enriched <- enrichr(features, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

# Mapping on the extended database, create UMAPs with enrichment scores and heatmaps for the signaling signatures

## Prepare the signatures reference

In [None]:
#object - plotting object
grouping_de <- 'diagnosis_simp'
Idents(object) <- grouping_de
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = TRUE, verbose = FALSE)

In [None]:
#create the output dir
dir_plots <- paste0('./outs/signatures/')
dir.create(dir_plots)

In [None]:
#combine the filteres scni and ms features
features_filtered <- c(features_scni, features_ms)
features_filtered <- unique(features_filtered)

In [None]:
#folder with the list of reference
folder <- './genesets/'
files <- list.files(folder)

#create a reference list
reference_list <- list()
for(i in 1:length(files)){
    reference_list[[i]] <- read.csv2(file = paste0(folder, files[i]))
    
    #filter only the genes present in the reference features
    filtered_reference <- data.frame(matrix(NA, ncol = ncol(reference_list[[i]]), nrow = nrow(reference_list[[i]])))
    colnames(filtered_reference) <- colnames(reference_list[[i]])
    for(c in 1:ncol(filtered_reference)){
    genes <- reference_list[[i]][[c]][reference_list[[i]][[c]] %in% features_filtered]
    if(length(genes) > 0){
    filtered_reference[1:length(genes), c] <- genes}
    }
    
    #replace by the filtered genes
    reference_list[[i]] <- filtered_reference
}
names(reference_list) <- gsub(x = files, pattern = '.csv.*', replacement = '')


## Look into the signaling patterns

In [None]:
colours_diagnosis_groups <- c('#bbbbbb', '#d1988f', '#8e2311')

In [None]:
#try to look which genes from the DGEs are in the list of the signaling

#SCNI
scni_markers1 <- read.csv('./outs/SCNI_vs_Healthy_NON_pairwise/DGE_SCNI.csv')
scni_markers2 <- read.csv('./outs/SCNI_vs_MS_NON_pairwise/DGE_SCNI.csv')
#MS
ms_markers1 <- read.csv('./outs/MS_vs_Healthy_NON_pairwise/DGE_MS.csv')
ms_markers2 <- read.csv('./outs/SCNI_vs_MS_NON_pairwise/DGE_MS.csv')

scni_markers <- c(scni_markers1$genes, scni_markers2$genes)
scni_markers <- unique(scni_markers)
ms_markers <- c(ms_markers1$genes, ms_markers2$genes)
ms_markers <- unique(ms_markers)

In [None]:
#start the loop with looking through the genes

for(i in 1:length(reference_list)){
    genes_list <- c()
            for(c in 1:ncol(reference_list[[i]])){
                genes <- reference_list[[i]][[c]]
                genes <- genes[genes %in% rownames(object)]
                genes_list <- c(genes_list, genes)
                }
    genes_list <- unique(genes_list)
        
    genes_scni <- scni_markers[scni_markers %in% genes_list]
    genes_ms <- ms_markers[ms_markers %in% genes_list]
    
    genes_matched <- unique(c(genes_scni, genes_ms))
    
    genes_to_plot <- genes_matched
    width <- 6
    height <- length(genes_matched)/3.5
    name <- names(reference_list)[i]
    
    partner1 <- 'MS'
    partner2 <- 'Healthy'
    
    
    
    #create ordered genes list
    ordered_genes <- object_av@assays$RNA@scale.data[genes_to_plot, ] 
    ordered_genes <- as.data.frame(ordered_genes)
    ordered_genes <- ordered_genes[order(ordered_genes[, partner1]), ]
    
    genes_higher <- as.data.frame(ordered_genes[partner1] > ordered_genes[partner2]) #attention!
    genes_higher$genes <- rownames(genes_higher)
    genes_higher <- genes_higher$genes[genes_higher[[1]]]
    
    options(repr.plot.width=width, repr.plot.height=height)
    vlnplot <- VlnPlot(object, features = genes_higher, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                            theme(legend.position = "none",
                              text = element_text(size = 17),
                              axis.text = element_text(size = 17))
    
    ggsave(vlnplot, file = paste0(dir_plots, name, '_stackedviolin.pdf'), width = width, height = height)
    
    
    #create the heatmap
    width <- 6
    height <- length(genes_matched)/3.5
    options(repr.plot.width=width, repr.plot.height=height)
    
    #plot the heatmap
    heatmap <- DoHeatmap(object_av, features = genes_higher, draw.lines = FALSE, size = 7, raster = FALSE, group.colors = colours_diagnosis_groups) + 
            theme(text = element_text(size = 20, face = "plain", colour = 'black'),
                 axis.text.y=element_text(colour="black", size = 15)) + 
            scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
    ggsave(heatmap, file = paste0(dir_plots, name, '_heatmap.pdf'), width = width, height = height)
    
    
    #plot the dotplot
    width <- 6
    height <- length(genes_matched)/3.5
    
    options(repr.plot.width=width, repr.plot.height=height)
    dot_plot <- DotPlot(object, features = rev(genes_higher), dot.scale = 10) + 
                scale_colour_gradient2(low = "#2881C1", mid = "white", high = "#D3556E") + 
                coord_flip()
    ggsave(dot_plot, file = paste0(dir_plots, name, '_dotplot.pdf'), width = width, height = height)
    
    ##create the split umap with the genes##
    
    #create the table with expression values
    genes <- genes_higher
    object <- object
    
    expression <- data.frame(matrix(NA, ncol = length(genes), nrow  = nrow(object@meta.data)))
    for(i in 1:ncol(expression)){
        expression[, i] <- object@assays$RNA@data[genes[i], ]
        }
    
    expression[ncol(expression)+1] <- NA
    colnames(expression)[ncol(expression)] <- 'sum'
    
    for(i in 1:nrow(expression)){
        #with sum    
        expression$sum[i] <- sum(expression[i, 1:(ncol(expression)-1)]) / (ncol(expression)-1)
        #with geometrical mean
        #expression$sum[i] <- gm_mean(expression[i, 1:(ncol(expression)-1)])
    }
    
    object@meta.data$sum_genes <- expression$sum
    
    #define the cells above the threshhold to plot
    cells_to_highlight <- rownames(object@meta.data)[object@meta.data$sum_genes > quantile(object$sum_genes, 0.99)[[1]]]
    
    #plot the cells by the splitted column with selected threshhold
    object_split <- object
    Idents(object_split) <- 'cd8_coded'
    width <- 18
    height <- 7
    options(repr.plot.width=18, repr.plot.height=7)
    object_split$diagnosis_simp <- factor(x = object_split$diagnosis_simp, levels = c('Healthy', 'SCNI', 'MS'))
    umap_plot <- DimPlot(object_split, reduction = 'umap', label = TRUE, repel  = TRUE, label.size = 6, cells.highlight = cells_to_highlight, split.by = 'diagnosis_simp', cols.highlight = '#D3556E') + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          legend.text=element_text(size=20))
    options(repr.plot.width=11, repr.plot.height=11)
    ggsave(umap_plot, file = paste0(dir_plots, name, '_umap.pdf'), width = width, height = height)
    
    #save the genes
    write.csv(genes_higher, file = paste0(dir_plots, name, '.csv'))

}

In [None]:
#load in the signatures for the umaps
dir <- './outs/signatures/'
sign_list <- list.files(dir)
sign_list <- grep(sign_list, pattern = '.csv', value = T)
sign_list

In [None]:
#create average expression object for the heatmaps
Idents(object) <- 'diagnosis_simp'
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = T, verbose = FALSE)

In [None]:
dir_plots <- dir
colours_diagnosis_groups <- c('#bbbbbb', '#d1988f', '#8e2311')

#run the loop for plotting new heamaps
for(i in 1:length(sign_list)){
    name <- gsub(x = sign_list[i], pattern = '.csv', replacement = '')
    sign <- read.csv(file = paste0(dir, sign_list[i]), row.names = 1)
    sign <- sign[[1]]
    ##create the split umap with the genes##
    
    #create the table with expression values
    markers <- sign
    object_plot <- object_av
    
    #create margins for heatmap color scale
    data <- object_av@assays$RNA@scale.data
    data <- data[markers, ]
    max.value <- max(data)
    min.value <- min(data)
    
    #create the heatmap
    width <- 6
    height <- length(markers)/3.5
    options(repr.plot.width=width, repr.plot.height=height)
    
    #plot the heatmap
    heatmap <- DoHeatmap(object_plot, features = markers, draw.lines = FALSE, size = 7, raster = FALSE, group.colors = colours_diagnosis_groups) + 
            theme(text = element_text(size = 20, face = "plain", colour = 'black'),
                  axis.text.y=element_text(colour="black", size = 15)) + 
                  scale_fill_gradientn(colours = c("#2881C1", "white", "#D3556E", "#671727"), values = scales::rescale(c(min.value, 0, max.value/2, max.value)))
    ggsave(heatmap, file = paste0(dir_plots, name, '_heatmap_new.pdf'), width = width, height = height)
    }

In [None]:
#downsample the object 
table(object@meta.data$diagnosis_simp)

In [None]:
Idents(object) <- 'diagnosis_simp'
set.seed(1234)
number_downsample <- nrow(filter(object@meta.data, diagnosis_simp == 'Healthy'))
object_subset <- subset(object, downsample = number_downsample)
table(object_subset$diagnosis_simp)
object_subset

In [None]:
#check the number of cells per patient in the downsampled object
unique(filter(object_subset@meta.data, diagnosis_simp == 'Healthy')$samplenumb)
unique(filter(object_subset@meta.data, diagnosis_simp == 'SCNI')$samplenumb)
unique(filter(object_subset@meta.data, diagnosis_simp == 'MS')$samplenumb)
table(filter(object_subset@meta.data, diagnosis_simp == 'MS')$samplenumb)

#original object in the MS group
table(filter(object@meta.data, diagnosis_simp == 'MS')$samplenumb)

In [None]:
dir_plots <- dir

for(i in 1:length(sign_list)){
    name <- gsub(x = sign_list[i], pattern = '.csv', replacement = '')
    sign <- read.csv(file = paste0(dir, sign_list[i]), row.names = 1)
    sign <- sign[[1]]
    ##create the split umap with the genes##
    
    #create the table with expression values
    markers <- sign
    object_plot <- object_subset
    
    expression <- data.frame(matrix(NA, ncol = length(markers), nrow  = nrow(object_plot@meta.data)))
    for(i in 1:ncol(expression)){
        expression[, i] <- object_plot@assays$RNA@data[markers[i], ]
        }
    
    expression[ncol(expression)+1] <- NA
    colnames(expression)[ncol(expression)] <- 'sum'
    
    for(i in 1:nrow(expression)){
        #with sum    
        expression$sum[i] <- sum(expression[i, 1:(ncol(expression)-1)]) / (ncol(expression)-1)
        #with geometrical mean
        #expression$sum[i] <- gm_mean(expression[i, 1:(ncol(expression)-1)])
    }
    
    object_plot@meta.data$sum_genes <- expression$sum
  
    #define the cells above the threshhold to plot
    cells_to_highlight <- rownames(object_plot@meta.data)[object_plot@meta.data$sum_genes > quantile(object_plot$sum_genes, 0.99)[[1]]]
    
    #plot the cells by the splitted column with selected threshhold
    object_split <- object_plot
    Idents(object_split) <- 'cd8_coded'
    width <- 18
    height <- 7
    options(repr.plot.width=18, repr.plot.height=7)
    object_split$diagnosis_simp <- factor(x = object_split$diagnosis_simp, levels = c('Healthy', 'SCNI', 'MS'))
    umap_plot <- DimPlot(object_split, reduction = 'umap', pt.size = 9.5, label = TRUE, repel  = TRUE, label.size = 6, cells.highlight = cells_to_highlight, split.by = 'diagnosis_simp', cols.highlight = '#D3556E', raster = T, raster.dpi = c(2048, 2048)) + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          legend.text=element_text(size=20)) + NoAxes()
    options(repr.plot.width=11, repr.plot.height=11)
    ggsave(umap_plot, file = paste0(dir_plots, name, '_umap_featureplot.pdf'), width = width, height = height)
    
    umap_plot <- DimPlot(object_split, reduction = 'umap', label = TRUE, repel  = TRUE, label.size = 6, cells.highlight = cells_to_highlight, split.by = 'diagnosis_simp', cols.highlight = '#D3556E') + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          legend.text=element_text(size=20)) + NoAxes()
    options(repr.plot.width=11, repr.plot.height=11)
    ggsave(umap_plot, file = paste0(dir_plots, name, '_umap_featureplot_vector.pdf'), width = width, height = height)
}

# Create the upset plots from the selected signatures

In [None]:
#load in the signatures from the umaps
dir <- './outs/signatures/'
sign_list <- list.files(dir)
sign_list <- grep(sign_list, pattern = '.csv', value = T)
sign_list

In [None]:
combined_signatures <- c()
for(i in 1:length(sign_list)){
    name <- gsub(x = sign_list[i], pattern = '.csv', replacement = '')
    sign <- read.csv(file = paste0(dir, sign_list[i]), row.names = 1)
    sign <- sign[[1]]
    sign_df <- data.frame(pathway = name, genes = sign)
    combined_signatures <- rbind(combined_signatures, sign_df)
} 

In [None]:
#format before plotting
combined_signatures %>%
  group_by(genes) %>%
  summarize(Pathways = list(pathway)) -> combined_formatted

#remove the unique genes and leave only the overlap
combined_formatted$list_number <- NA
for(i in 1:nrow(combined_formatted)){
    combined_formatted$list_number[i] <- length(combined_formatted$Pathways[[i]])
}

combined_formatted <- combined_formatted[combined_formatted$list_number > 1, ]

In [None]:
unique(combined_signatures$pathway)

In [None]:
markers <- unique(combined_signatures$genes)
pathways <- unique(combined_signatures$pathway)
data <- data.frame(matrix(0, nrow = length(markers), ncol = (length(pathways) + 1)))
colnames(data) <- c('markers', pathways)
data$markers <- markers

for(i in 1:nrow(data)){
    gene_data <- filter(combined_signatures, genes == data$markers[i])
    for(c in 1:ncol(data)){
        if(colnames(data)[c] %in% gene_data$pathway){
            data[i, c] <- 1
        }
    }
}

data$sum <- rowSums(data[2:ncol(data)])

data_subset <- data[data$sum > 1, ]

In [None]:
#create the combi of all possible intersections

my_combi <- unlist(lapply(1:length(pathways),    # Get all combinations
                           combinat::combn, 
                           x = pathways,
                           simplify = FALSE), 
                    recursive = FALSE)
length(pathways)
my_combi <- my_combi[(length(pathways)+1):length(my_combi)]
head(my_combi)



In [None]:
#create the vector with number of intersected genes per intersection

data_vector <- c()
for(i in 1:length(my_combi)){
    name_vector <- paste(my_combi[[i]], collapse = '&')
    genes_list <- as.list(c())
    for(l in 1:length(my_combi[[i]])){
        genes_partner <- filter(combined_signatures, pathway == my_combi[[i]][l])$genes
        genes_partner <- list(genes_partner)
        genes_list <- append(genes_list, genes_partner)
    }
    number_elements <- length(Reduce(intersect, genes_list))
    vector_single <- c(name = number_elements)
    names(vector_single) <- name_vector
    data_vector <- c(data_vector, vector_single)
}

In [None]:
width <- 8
height <- 6
set_figsize(width, height)
ordering <- rev(c('IFN_g', 'IFN_ab', 'IL2', 'TNFa_NFkB', 'Myc', 'PI3K', 'Complement', 'Allograft', 'Hypoxia', 'Ox_phos', 'Fatty_Met'))
plot <- ComplexUpset::upset(fromExpression(data_vector), ordering, sort_sets = FALSE,  n_intersections= 20, width_ratio=0.15, keep_empty_groups=TRUE, height_ratio = 1.5)

ggsave(plot, width = width, height = height, file = './outs/shared_genes_intersection.pdf')
plot

In [None]:
#plot here to get the right amount of genes per set
width <- 8
height <- 6
set_figsize(width, height)
ordering <- rev(c('IFN_g', 'IFN_ab', 'IL2', 'TNFa_NFkB', 'Myc', 'PI3K', 'Complement', 'Allograft', 'Hypoxia', 'Ox_phos', 'Fatty_Met'))
plot <- ComplexUpset::upset(data, ordering, sort_sets = FALSE,  n_intersections= 20, width_ratio=0.15, height_ratio = 1.5)

ggsave(plot, width = width, height = height, file = './outs/shared_genes_intersection_correctsetnumber.pdf')
plot

# Create the UMAPs and heatmap with immunological modules (twins dataset)

In [None]:
#healthy
healthy_markers1 <- read.csv('./outs/MS_vs_Healthy_NON_pairwise/DGE_Healthy.csv')
healthy_markers2 <- read.csv('./outs/SCNI_vs_Healthy_NON_pairwise/DGE_Healthy.csv')
#SCNI
scni_markers1 <- read.csv('./outs/SCNI_vs_Healthy_NON_pairwise/DGE_SCNI.csv')
scni_markers2 <- read.csv('./outs/SCNI_vs_MS_NON_pairwise/DGE_SCNI.csv')
#MS
ms_markers1 <- read.csv('./outs/MS_vs_Healthy_NON_pairwise/DGE_MS.csv')
ms_markers2 <- read.csv('./outs/SCNI_vs_MS_NON_pairwise/DGE_MS.csv')


In [None]:
healthy_markers_np <- c(healthy_markers1$genes, healthy_markers2$genes)
healthy_markers_np <- unique(healthy_markers_np)
scni_markers_np <- c(scni_markers1$genes, scni_markers2$genes)
scni_markers_np <- unique(scni_markers_np)
ms_markers_np <- c(ms_markers1$genes, ms_markers2$genes)
ms_markers_np <- unique(ms_markers_np)

In [6]:
combined_dis_np <- unique(c(scni_markers_np, ms_markers_np))

In [None]:
set.seed(1234)

In [None]:
outs_dir <- './outs/'
dir.create(outs_dir)

In [None]:
data <- read.csv2(file = './modules/immunological_markers_plot.csv')
data <- data$Gene

data <- data[data %in% combined_dis_np]
data <- data[order(data)]
data
length(data)

In [None]:
#create average expression object for the heatmaps
Idents(object) <- 'diagnosis_simp'
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = T, verbose = FALSE)

In [None]:
dir_plots <- './outs/signatures/'
colours_diagnosis_groups <- c('#1D5B60', '#939393', '#8D2413')
group_intereset <- 'MS'
#run the loop for plotting new heamaps
    sign <- data
    ##create the split umap with the genes##
    
    #create the table with expression values
    markers <- sign
    object_plot <- object_av
    
    #create margins for heatmap color scale
    data_markers <- object_av@assays$RNA@scale.data
    data_markers <- data_markers[markers, ]
    max.value <- max(data_markers)
    min.value <- min(data_markers)

    #order expression
    data_markers <- data.frame(data_markers)
    data_markers <- data_markers[order(data_markers[[group_intereset]], decreasing = F), ]
    markers <- rownames(data_markers)
    
    #create the heatmap
    width <- 5
    height <- length(markers)/3.5
    options(repr.plot.width=width, repr.plot.height=height)
    
    #plot the heatmap
    heatmap <- DoHeatmap(object_plot, features = markers, draw.lines = FALSE, size = 7, raster = FALSE, group.colors = colours_diagnosis_groups) + 
            theme(text = element_text(size = 20, face = "plain", colour = 'black'),
                  axis.text.y=element_text(colour="black", size = 15)) + 
                  scale_fill_gradientn(colours = c("#2881C1", "white", "#D3556E", "#671727"), values = scales::rescale(c(min.value, 0, max.value/2, max.value)))
    heatmap
    ggsave(heatmap, file = paste0(dir_plots, 'combined_immunological_heatmap_new_vertical.pdf'), width = width, height = height)

In [None]:
#create average expression object for the heatmaps
Idents(object) <- 'diagnosis_simp'
levels(object) <- c('MS', 'SCNI', 'Healthy')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = T, verbose = FALSE)

In [None]:
dir_plots <- './outs/signatures/'
colours_diagnosis_groups <- c('#8D2413', '#939393', '#1D5B60')
group_intereset <- 'MS'
#run the loop for plotting new heamaps
    sign <- data
    ##create the split umap with the genes##
    
    #create the table with expression values
    markers <- sign
    object_plot <- object_av
    
    #create margins for heatmap color scale
    data_markers <- object_av@assays$RNA@scale.data
    data_markers <- data_markers[markers, ]
    max.value <- max(data_markers)
    min.value <- min(data_markers)

    #order expression
    data_markers <- data.frame(data_markers)
    data_markers <- data_markers[order(data_markers[[group_intereset]], decreasing = F), ]
    markers <- rownames(data_markers)
    
    #create the heatmap
    width <- 4.5
    height <- length(markers)/4
    options(repr.plot.width=width, repr.plot.height=height)
    
    #plot the heatmap
    heatmap <- DoHeatmap(object_plot, features = markers, draw.lines = FALSE, size = 7, raster = FALSE, angle = 270, hjust = 1, group.colors = colours_diagnosis_groups) + 
            theme(text = element_text(size = 20, face = "plain", colour = 'black'),
                  axis.text.y=element_text(colour="black", size = 14, angle = 330, vjust = 1)) + 
                  scale_fill_gradientn(colours = c("#2881C1", "white", "#D3556E", "#671727"), values = scales::rescale(c(min.value, 0, max.value/2, max.value)))
    heatmap
    ggsave(heatmap, file = paste0(dir_plots, 'combined_immunological_heatmap_new_horizontal.pdf'), width = width, height = height)

In [None]:
Idents(object) <- 'diagnosis_simp'
set.seed(1234)
number_downsample <- nrow(filter(object@meta.data, diagnosis_simp == 'Healthy'))
object_subset <- subset(object, downsample = number_downsample)
table(object_subset$diagnosis_simp)
object_subset

In [None]:
#check the number of cells per patient in the downsampled object
unique(filter(object_subset@meta.data, diagnosis_simp == 'Healthy')$samplenumb)
unique(filter(object_subset@meta.data, diagnosis_simp == 'SCNI')$samplenumb)
unique(filter(object_subset@meta.data, diagnosis_simp == 'MS')$samplenumb)
table(filter(object_subset@meta.data, diagnosis_simp == 'MS')$samplenumb)

#original object in the MS group
table(filter(object@meta.data, diagnosis_simp == 'MS')$samplenumb)

In [None]:
dir_plots <- './outs/signatures/'


    markers <- markers
    object_plot <- object_subset
    
    expression <- data.frame(matrix(NA, ncol = length(markers), nrow  = nrow(object_plot@meta.data)))
    for(i in 1:ncol(expression)){
        expression[, i] <- object_plot@assays$RNA@data[markers[i], ]
        }
    
    expression[ncol(expression)+1] <- NA
    colnames(expression)[ncol(expression)] <- 'sum'
    
    for(i in 1:nrow(expression)){
        #with sum    
        expression$sum[i] <- sum(expression[i, 1:(ncol(expression)-1)]) / (ncol(expression)-1)
        #with geometrical mean
        #expression$sum[i] <- gm_mean(expression[i, 1:(ncol(expression)-1)])
    }
    
    object_plot@meta.data$sum_genes <- expression$sum
  
    #define the cells above the threshhold to plot
    cells_to_highlight <- rownames(object_plot@meta.data)[object_plot@meta.data$sum_genes > quantile(object_plot$sum_genes, 0.99)[[1]]]

In [None]:
#plot the cells by the splitted column with selected threshhold
    object_split <- object_plot
    Idents(object_split) <- 'cd8_coded'
    width <- 19
    height <- 7
    options(repr.plot.width=18, repr.plot.height=7)
    object_split$diagnosis_simp <- factor(x = object_split$diagnosis_simp, levels = c('Healthy', 'SCNI', 'MS'))
    umap_plot <- DimPlot(object_split, reduction = 'umap', pt.size = 9.5, label = TRUE, repel  = TRUE, label.size = 6, cells.highlight = cells_to_highlight, split.by = 'diagnosis_simp', cols.highlight = '#D3556E', raster = T, raster.dpi = c(2048, 2048)) + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          legend.text=element_text(size=20)) + NoAxes() + NoLegend()
    umap_plot
    ggsave(umap_plot, file = paste0(dir_plots, 'combined_immunological_umap_featureplot.pdf'), width = width, height = height)
    
    umap_plot <- DimPlot(object_split, reduction = 'umap', label = TRUE, repel  = TRUE, label.size = 6, cells.highlight = cells_to_highlight, split.by = 'diagnosis_simp', cols.highlight = '#D3556E') + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          legend.text=element_text(size=20)) + NoAxes() + NoLegend()
    ggsave(umap_plot, file = paste0(dir_plots, 'combined_immunological_umap_featureplot_vector.pdf'), width = width, height = height)

# Create the UMAPs and heatmap with metabolic modules (twins dataset)

In [None]:
set.seed(1234)

In [None]:
data <- read.csv2(file = './modules/metabolic_markers_plot.csv')
data <- data$Gene
data <- unique(data)

data <- data[data %in% combined_dis_np]
data <- data[data %in% rownames(pbmc)]
data <- data[order(data)]
data
length(data)

In [None]:
#create average expression object for the heatmaps
Idents(object) <- 'diagnosis_simp'
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = T, verbose = FALSE)

In [None]:
dir_plots <- './outs/signatures/'
colours_diagnosis_groups <- c('#1D5B60', '#939393', '#8D2413')
group_intereset <- 'MS'
#run the loop for plotting new heamaps
    sign <- data
    ##create the split umap with the genes##
    
    #create the table with expression values
    markers <- sign
    object_plot <- object_av
    
    #create margins for heatmap color scale
    data_markers <- object_av@assays$RNA@scale.data
    data_markers <- data_markers[markers, ]
    max.value <- max(data_markers)
    min.value <- min(data_markers)

    #order expression
    data_markers <- data.frame(data_markers)
    data_markers <- data_markers[order(data_markers[[group_intereset]], decreasing = F), ]
    markers <- rownames(data_markers)
    
    #create the heatmap
    width <- 5
    height <- length(markers)/3.5
    options(repr.plot.width=width, repr.plot.height=height)
    
    #plot the heatmap
    heatmap <- DoHeatmap(object_plot, features = markers, draw.lines = FALSE, size = 7, raster = FALSE, group.colors = colours_diagnosis_groups) + 
            theme(text = element_text(size = 20, face = "plain", colour = 'black'),
                  axis.text.y=element_text(colour="black", size = 15)) + 
                  scale_fill_gradientn(colours = c("#2881C1", "white", "#D3556E", "#671727"), values = scales::rescale(c(min.value, 0, max.value/2, max.value)))
    heatmap
    ggsave(heatmap, file = paste0(dir_plots, 'combined_metabolic_heatmap_new_vertical.pdf'), width = width, height = height)

In [None]:
#create average expression object for the heatmaps
Idents(object) <- 'diagnosis_simp'
levels(object) <- c('MS', 'SCNI', 'Healthy')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = T, verbose = FALSE)

In [None]:
dir_plots <- './outs/signatures/'
colours_diagnosis_groups <- c('#8D2413', '#939393', '#1D5B60')
group_intereset <- 'MS'
#run the loop for plotting new heamaps
    sign <- data
    ##create the split umap with the genes##
    
    #create the table with expression values
    markers <- sign
    object_plot <- object_av
    
    #create margins for heatmap color scale
    data_markers <- object_av@assays$RNA@scale.data
    data_markers <- data_markers[markers, ]
    max.value <- max(data_markers)
    min.value <- min(data_markers)

    #order expression
    data_markers <- data.frame(data_markers)
    data_markers <- data_markers[order(data_markers[[group_intereset]], decreasing = F), ]
    markers <- rownames(data_markers)
    
    #create the heatmap
    width <- 4.2
    height <- length(markers)/4
    options(repr.plot.width=width, repr.plot.height=height)
    
    #plot the heatmap
    heatmap <- DoHeatmap(object_plot, features = markers, draw.lines = FALSE, size = 7, raster = FALSE, angle = 270, hjust = 1, group.colors = colours_diagnosis_groups) + 
            theme(text = element_text(size = 20, face = "plain", colour = 'black'),
                  axis.text.y=element_text(colour="black", size = 14, angle = 330, vjust = 1)) + 
                  scale_fill_gradientn(colours = c("#2881C1", "white", "#D3556E", "#671727"), values = scales::rescale(c(min.value, 0, max.value/2, max.value)))
    heatmap
    ggsave(heatmap, file = paste0(dir_plots, 'combined_metabolic_heatmap_new_horizontal.pdf'), width = width, height = height)

In [None]:
Idents(object) <- 'diagnosis_simp'
set.seed(1234)
number_downsample <- nrow(filter(object@meta.data, diagnosis_simp == 'Healthy'))
object_subset <- subset(object, downsample = number_downsample)
table(object_subset$diagnosis_simp)
object_subset

In [None]:
#check the number of cells per patient in the downsampled object
unique(filter(object_subset@meta.data, diagnosis_simp == 'Healthy')$samplenumb)
unique(filter(object_subset@meta.data, diagnosis_simp == 'SCNI')$samplenumb)
unique(filter(object_subset@meta.data, diagnosis_simp == 'MS')$samplenumb)
table(filter(object_subset@meta.data, diagnosis_simp == 'MS')$samplenumb)

#original object in the MS group
table(filter(object@meta.data, diagnosis_simp == 'MS')$samplenumb)

In [None]:
dir_plots <- './outs/signatures/'


    markers <- markers
    object_plot <- object_subset
    
    expression <- data.frame(matrix(NA, ncol = length(markers), nrow  = nrow(object_plot@meta.data)))
    for(i in 1:ncol(expression)){
        expression[, i] <- object_plot@assays$RNA@data[markers[i], ]
        }
    
    expression[ncol(expression)+1] <- NA
    colnames(expression)[ncol(expression)] <- 'sum'
    
    for(i in 1:nrow(expression)){
        #with sum    
        expression$sum[i] <- sum(expression[i, 1:(ncol(expression)-1)]) / (ncol(expression)-1)
        #with geometrical mean
        #expression$sum[i] <- gm_mean(expression[i, 1:(ncol(expression)-1)])
    }
    
    object_plot@meta.data$sum_genes <- expression$sum
  
    #define the cells above the threshhold to plot
    cells_to_highlight <- rownames(object_plot@meta.data)[object_plot@meta.data$sum_genes > quantile(object_plot$sum_genes, 0.99)[[1]]]
    
    #plot the cells by the splitted column with selected threshhold
    object_split <- object_plot
    Idents(object_split) <- 'cd8_coded'
    width <- 19
    height <- 7
    options(repr.plot.width=18, repr.plot.height=7)
    object_split$diagnosis_simp <- factor(x = object_split$diagnosis_simp, levels = c('Healthy', 'SCNI', 'MS'))
    umap_plot <- DimPlot(object_split, reduction = 'umap', pt.size = 9.5, label = TRUE, repel  = TRUE, label.size = 6, cells.highlight = cells_to_highlight, split.by = 'diagnosis_simp', cols.highlight = '#D3556E', raster = T, raster.dpi = c(2048, 2048)) + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          legend.text=element_text(size=20)) + NoAxes() + NoLegend()
    umap_plot
    ggsave(umap_plot, file = paste0(dir_plots, 'combined_metabolic_umap_featureplot.pdf'), width = width, height = height)
    
    umap_plot <- DimPlot(object_split, reduction = 'umap', label = TRUE, repel  = TRUE, label.size = 6, cells.highlight = cells_to_highlight, split.by = 'diagnosis_simp', cols.highlight = '#D3556E') + 
    theme(
          text = element_text(size = 20),
          axis.text = element_text(size = 20),
          legend.text=element_text(size=20))
    options(repr.plot.width=11, repr.plot.height=11) + NoAxes() + NoLegend()
    ggsave(umap_plot, file = paste0(dir_plots, 'combined_metabolic_umap_featureplot_vector.pdf'), width = width, height = height)

# Normalize expression against healthy individuals: immunological modules

In [None]:
set.seed(1234)

In [None]:
outs_dir <- './outs/'
dir.create(outs_dir)

In [None]:
data <- read.csv2(file = './modules/immunological_markers_plot.csv')
data <- data$Gene

data <- data[data %in% combined_dis_np]
data <- data[order(data)]
data

In [None]:
Idents(object) <- 'diagnosis_simp'
levels(object)

In [None]:
#normalize SCNI expression to Healthy

obj <- subset(object, idents = c('SCNI', 'Healthy'))
obj

object_av <- AverageExpression(obj, assay = "RNA", return.seurat = T, verbose = FALSE)
object_av <- object_av@assays$RNA@data
object_av <- object_av[data, ]
object_av <- data.frame(object_av)
object_av$FC <- object_av[['SCNI']] / object_av[['Healthy']]
object_av$logFC <- log2(object_av$FC)

scni_expression <- object_av['logFC']
colnames(scni_expression) <- 'SCNI'

In [None]:
#normalize MS expression to Healthy

obj <- subset(object, idents = c('MS', 'Healthy'))
obj

object_av <- AverageExpression(obj, assay = "RNA", return.seurat = T, verbose = FALSE)
object_av <- object_av@assays$RNA@data
object_av <- object_av[data, ]
object_av <- data.frame(object_av)
object_av$FC <- object_av[['MS']] / object_av[['Healthy']]
object_av$logFC <- log2(object_av$FC)

ms_expression <- object_av['logFC']
colnames(ms_expression) <- 'MS'

In [None]:
expression_combined <- cbind(scni_expression, ms_expression)
expression_combined$genes <- rownames(expression_combined)
df <- pivot_longer(expression_combined, cols=1:ncol(expression_combined)-1, names_to = "group", values_to = "expression")
min(df$expression)
max(df$expression)

In [None]:
head(df)

In [None]:
#combine both and plot
#800000
#D3556E
#fbe2e4
#f0aab3


width <- 5
height <- 12
name <- 'immunological_markers'
set_figsize(width, height)
df$group <- factor(df$group, levels = c('SCNI', 'MS'))
plot <- ggplot(df, aes(x = group, y = genes, fill = expression)) + 
  geom_tile() + 
  cowplot::theme_cowplot() + 
  #grids(linetype = "dashed", size = 0.1) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ylab('') +
  theme(axis.ticks = element_blank()) +  
  scale_fill_gradientn(colours = c("#2881C1", "white", "#D3556E", "#671727"), values = scales::rescale(c(min(df$expression), 0, max(df$expression)/6, max(df$expression)))) + 
  coord_equal()
plot
ggsave(plot, file=paste0(outs_dir, name, '_scale.pdf'), width = width, height = height)

# Normalize expression against healthy individuals: metabolic modules

In [None]:
outs_dir <- './outs/'
dir.create(outs_dir)

In [None]:
data <- read.csv2(file = './modules/metabolic_markers_plot.csv')
data <- data$Gene

data <- data[data %in% combined_dis_np]
data <- data[order(data)]
data

In [None]:
Idents(object) <- 'diagnosis_simp'
levels(object)

In [None]:
#normalize SCNI expression to Healthy

obj <- subset(object, idents = c('SCNI', 'Healthy'))
obj

object_av <- AverageExpression(obj, assay = "RNA", return.seurat = T, verbose = FALSE)
object_av <- object_av@assays$RNA@data
object_av <- object_av[data, ]
object_av <- data.frame(object_av)
object_av$FC <- object_av[['SCNI']] / object_av[['Healthy']]
object_av$logFC <- log2(object_av$FC)

scni_expression <- object_av['logFC']
colnames(scni_expression) <- 'SCNI'

In [None]:
#normalize MS expression to Healthy

obj <- subset(object, idents = c('MS', 'Healthy'))
obj

object_av <- AverageExpression(obj, assay = "RNA", return.seurat = T, verbose = FALSE)
object_av <- object_av@assays$RNA@data
object_av <- object_av[data, ]
object_av <- data.frame(object_av)
object_av$FC <- object_av[['MS']] / object_av[['Healthy']]
object_av$logFC <- log2(object_av$FC)

ms_expression <- object_av['logFC']
colnames(ms_expression) <- 'MS'

In [None]:
expression_combined <- cbind(scni_expression, ms_expression)
expression_combined$genes <- rownames(expression_combined)
df <- pivot_longer(expression_combined, cols=1:ncol(expression_combined)-1, names_to = "group", values_to = "expression")
min(df$expression)
max(df$expression)

In [None]:
#combine both and plot
#800000
#D3556E
#fbe2e4
#f0aab3


width <- 5
height <- 12
name <- 'metabolic_markers'
set_figsize(width, height)
df$group <- factor(df$group, levels = c('SCNI', 'MS'))
plot <- ggplot(df, aes(x = group, y = genes, fill = expression)) + 
  geom_tile() + 
  cowplot::theme_cowplot() + 
  #grids(linetype = "dashed", size = 0.1) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ylab('') +
  theme(axis.ticks = element_blank()) +  
  scale_fill_gradientn(colours = c("white", "#D3556E", "#671727"), values = scales::rescale(c(min(df$expression), max(df$expression)/2, max(df$expression)))) + 
  coord_equal()
plot
ggsave(plot, file=paste0(outs_dir, name, '_scale.pdf'), width = width, height = height)

# Pairwise analysis

In [None]:
#cluster of interest
clusters_of_interest <- c('6_GZMK', '8_CD74', '9_IKZF2', '10_FGFBP2')
object <- subset(pbmc, idents = clusters_of_interest)
object_analysis_general<- object
object_analysis_general

In [None]:
# determine the markers for comparison
markers <- read.csv(file = 'features_intersect.csv', row.names = 1)
markers.remove <- grep(pattern = "^TRAV|^TRBV|^TRGV|^TRDV|^RPL|^RPS|^IGKV|^IGLV|^IGHV|^IGHG|^IGLC|^TRKC|^MT", x = rownames(object), value = TRUE)


features_ms <- markers$features_ms
features_ms <- features_ms[!is.na(features_ms)]
features_ms <- features_ms[!(features_ms%in%markers.remove)]
length(features_ms)

features_scni <- markers$features_scni
features_scni <- features_scni[!is.na(features_scni)]
features_scni <- features_scni[!(features_scni%in%markers.remove)]
length(features_scni)

features_healthy <- markers$features_healthy
features_healthy <- features_healthy[!is.na(features_healthy)]
features_healthy <- features_healthy[!(features_healthy%in%markers.remove)]
length(features_healthy)

# MS vs SCNI analysis pairwise

In [None]:
#prepare the file
object_analysis <- subset(object_analysis_general, diagnosis_simp == 'MS' | diagnosis_simp == 'SCNI')
Idents(object_analysis) <- 'sample'
object_analysis <- subset(object_analysis, idents = c('AU-MS-2', 'AU-H-2'), invert = TRUE)
#subset only the paired twins from MS and SCNI group
twin_pairs <- unique(filter(object_analysis@meta.data, diagnosis_simp == 'SCNI')$twin_pair)
twin_pairs <- twin_pairs[twin_pairs %in% unique(filter(object_analysis@meta.data, diagnosis_simp == 'MS')$twin_pair)]
object_analysis <- subset(object_analysis, twin_pair %in% twin_pairs)
unique(object_analysis$sample)
object_analysis

In [None]:
#start enrichR
library('enrichR')
setEnrichrSite("Enrichr")
dir.create('outs_new_pairwise')

## Start for the first partner

In [None]:
#find first the markers per cluster
partner1 <- 'MS'
partner2 <- 'SCNI'
features_1 <- features_ms
features_2 <- features_scni
clusters_of_interest <- c('6_GZMK', '8_CD74', '9_IKZF2', '10_FGFBP2')
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
grouping_de <- 'diagnosis_simp'
logfc.threshold <- 0.05
pvalue <- 0.05

#create output dirs
dir_path <- paste0('./outs_new_pairwise/', partner1, '_vs_', partner2, '_pairwise')
dir.create(dir_path)
plots_dir <- paste0(dir_path, '/plots/')
dir.create(plots_dir)
#define colors
colours_diagnosis_groups <- c('#1D5B60', '#939393', '#8D2413')
colour1 <- '#8D2413'
colour2 <- '#939393'

#object - plotting object
Idents(object) <- grouping_de
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = TRUE, verbose = FALSE)
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest

de_list_partner1 <- c()
for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner1 <- FindMarkers(object_subset, ident.1 = partner1, ident.2 = partner2, group.by = grouping_de, features = features_1, test.use = 'LR', 
                    latent.vars = 'twin_pair', only.pos = TRUE, logfc.threshold = logfc.threshold)
        markers_partner1$genes <- rownames(markers_partner1)
        markers_partner1$cluster <- clusters_of_interest[i]
        markers_partner1 <- markers_partner1[markers_partner1$p_val_adj < pvalue, ]
        de_list_partner1 <- append(de_list_partner1, list(markers_partner1))
}

#search through the markers
hits <- 0
selected_markers_partner1 <- c()

for(i in 1:length(de_list_partner1)){
    markers_cluster <- de_list_partner1[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner1)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner1[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner1 <- rbind(selected_markers_partner1, gene_to_add)
        }
    }
}
selected_markers_partner1 <- selected_markers_partner1[!duplicated(selected_markers_partner1$genes), ]
selected_markers_partner1 <- selected_markers_partner1[order(-selected_markers_partner1$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner1 <- paste0(plots_dir, partner1, '/')
dir.create(plots_partner1)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner1] > ordered_genes[partner2]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner1, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner1] < ordered_genes[partner2]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner1, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner1, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner1, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort
genes_remove <- genes_lower
selected_markers_partner1 <- selected_markers_partner1[!selected_markers_partner1$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner1, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner1 <- paste0(dir_path, '/PEA_', partner1, '/')
dir.create(dir_pea_partner1)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner1$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Start for second partner

In [None]:
# start the same for the second partner
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest
de_list_partner2 <- c()

for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner2 <- FindMarkers(object_subset, ident.1 = partner2, ident.2 = partner1, group.by = grouping_de, features = features_2, test.use = 'LR', 
                    latent.vars = 'twin_pair', only.pos = TRUE, logfc.threshold = logfc.threshold)
        markers_partner2$genes <- rownames(markers_partner2)
        markers_partner2$cluster <- clusters_of_interest[i]
        markers_partner2 <- markers_partner2[markers_partner2$p_val_adj < pvalue, ]
        de_list_partner2 <- append(de_list_partner2, list(markers_partner2))
}

#search through the markers
hits <- 0
selected_markers_partner2 <- c()

for(i in 1:length(de_list_partner2)){
    markers_cluster <- de_list_partner2[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner2)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner2[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner2 <- rbind(selected_markers_partner2, gene_to_add)
        }
    }
}
selected_markers_partner2 <- selected_markers_partner2[!duplicated(selected_markers_partner2$genes), ]
selected_markers_partner2 <- selected_markers_partner2[order(-selected_markers_partner2$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner2 <- paste0(plots_dir, partner2, '/')
dir.create(plots_partner2)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner2] > ordered_genes[partner1]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner2, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner2] < ordered_genes[partner1]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner2, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner2, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner2, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort
genes_remove <- genes_lower
selected_markers_partner2 <- selected_markers_partner2[!selected_markers_partner2$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner2, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner2 <- paste0(dir_path, '/PEA_', partner2, '/')
dir.create(dir_pea_partner2)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner2$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        if(length(genes_to_plot) > 1){
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
        }
    #end of the einrichr loop

## Combine the final result and plot volcano

In [None]:
number_of_genes_volcano <- 25
options(repr.plot.width=10, repr.plot.height=8)
selected_markers_partner1$partner <- partner1
write.csv(selected_markers_partner1, file = paste0(dir_path, '/DGE_', partner1, '.csv'))
selected_markers_partner2$partner <- partner2
write.csv(selected_markers_partner2, file = paste0(dir_path, '/DGE_', partner2, '.csv'))

selected_markers_partner2$avg_log2FC <- -selected_markers_partner2$avg_log2FC 
selected_markers_combined <- rbind(selected_markers_partner1, selected_markers_partner2)

number_of_genes_volcano <- 25
volcano <- ggplot(selected_markers_combined, aes(x = avg_log2FC, y = -log10(avg_p_val_adj))) +
        geom_vline(xintercept = 0) +
        geom_hline(yintercept = -log10(0.05), color ="grey", linetype ="dashed") +
        geom_point(data = selected_markers_combined,
                    color = "grey", alpha = 1) +
        geom_point(data = selected_markers_partner1[1:number_of_genes_volcano, ],
                    fill = colour1, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = selected_markers_partner2[1:number_of_genes_volcano, ],
                    fill = colour2, alpha = 1, shape=21, size= 2.5) +
        geom_text_repel(data= rbind(selected_markers_partner1[1:number_of_genes_volcano, ], selected_markers_partner2[1:number_of_genes_volcano, ]), max.overlaps = number_of_genes_volcano, aes(label = genes))+
        theme_linedraw() +
        theme(panel.grid = element_blank(), legend.position = "none") +
        xlab("log2(average fold change)") +
        ylab("-log10(p-value)") + ggtitle(paste(partner2, '(left)', 'vs', partner1, '(right)'))
ggsave(volcano, file = paste0(dir_path, '/', partner1, '_vs_', partner2, '_volcano_plot.pdf'), height = 8, width = 10)

write.csv(selected_markers_combined, file = paste0(dir_path, '/DGE_', partner1, '_vs_', partner2, '.csv'))

# MS vs Healthy analysis pairwise

In [None]:
#prepare the file
object_analysis <- subset(object_analysis_general, diagnosis_simp == 'MS' | diagnosis_simp == 'Healthy')
#subset only the paired twins from MS and Healthy group
twin_pairs <- unique(filter(object_analysis@meta.data, diagnosis_simp == 'Healthy')$twin_pair)
twin_pairs <- twin_pairs[twin_pairs %in% unique(filter(object_analysis@meta.data, diagnosis_simp == 'MS')$twin_pair)]
object_analysis <- subset(object_analysis, twin_pair %in% twin_pairs)
unique(object_analysis$samplenumb)
object_analysis

In [None]:
#start enrichR
library('enrichR')
setEnrichrSite("Enrichr")

## Start for the first partner

In [None]:
#find first the markers per cluster
partner1 <- 'MS'
partner2 <- 'Healthy'
features_1 <- features_ms
features_2 <- features_healthy
clusters_of_interest <- c('6_GZMK', '8_CD74', '9_IKZF2', '10_FGFBP2')
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
grouping_de <- 'diagnosis_simp'
logfc.threshold <- 0.05
pvalue <- 0.05

#create output dirs
dir_path <- paste0('./outs_new_pairwise/', partner1, '_vs_', partner2, '_pairwise')
dir.create(dir_path)
plots_dir <- paste0(dir_path, '/plots/')
dir.create(plots_dir)
#define colors
colours_diagnosis_groups <- c('#1D5B60', '#939393', '#8D2413')
colour1 <- '#8D2413'
colour2 <- '#1D5B60'

#object - plotting object
Idents(object) <- grouping_de
levels(object) <- c('Healthy', 'SCNI', 'MS')
object_av <- AverageExpression(object, assay = "RNA", return.seurat = TRUE, verbose = FALSE)
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest

de_list_partner1 <- c()
for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner1 <- FindMarkers(object_subset, ident.1 = partner1, ident.2 = partner2, group.by = grouping_de, features = features_1, test.use = 'LR', 
                    latent.vars = 'twin_pair', only.pos = TRUE, logfc.threshold = logfc.threshold, verbose = FALSE)
        markers_partner1$genes <- rownames(markers_partner1)
        markers_partner1$cluster <- clusters_of_interest[i]
        markers_partner1 <- markers_partner1[markers_partner1$p_val_adj < pvalue, ]
        de_list_partner1 <- append(de_list_partner1, list(markers_partner1))
}

#search through the markers
hits <- 0
selected_markers_partner1 <- c()

for(i in 1:length(de_list_partner1)){
    markers_cluster <- de_list_partner1[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner1)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner1[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner1 <- rbind(selected_markers_partner1, gene_to_add)
        }
    }
}
selected_markers_partner1 <- selected_markers_partner1[!duplicated(selected_markers_partner1$genes), ]
selected_markers_partner1 <- selected_markers_partner1[order(-selected_markers_partner1$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner1 <- paste0(plots_dir, partner1, '/')
dir.create(plots_partner1)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner1] > ordered_genes[partner2]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner1, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner1] < ordered_genes[partner2]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner1, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner1, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner1, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort
genes_remove <- genes_lower
selected_markers_partner1 <- selected_markers_partner1[!selected_markers_partner1$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner1$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner1]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner1, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner1 <- paste0(dir_path, '/PEA_', partner1, '/')
dir.create(dir_pea_partner1)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner1$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour1) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner1,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner1, partner1, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Start for second partner

In [None]:
# start the same for the second partner
Idents(object_analysis) <- 'cd8_coded'
levels(object_analysis) <- clusters_of_interest
de_list_partner2 <- c()

for(i in 1:length(clusters_of_interest)){
        object_subset <- subset(object_analysis, idents = clusters_of_interest[i])
        markers_partner2 <- FindMarkers(object_subset, ident.1 = partner2, ident.2 = partner1, group.by = grouping_de, features = features_2, test.use = 'LR', 
                    latent.vars = 'twin_pair', only.pos = TRUE, logfc.threshold = logfc.threshold)
        markers_partner2$genes <- rownames(markers_partner2)
        markers_partner2$cluster <- clusters_of_interest[i]
        markers_partner2 <- markers_partner2[markers_partner2$p_val_adj < pvalue, ]
        de_list_partner2 <- append(de_list_partner2, list(markers_partner2))
}

#search through the markers
hits <- 0
selected_markers_partner2 <- c()

for(i in 1:length(de_list_partner2)){
    markers_cluster <- de_list_partner2[[i]]
    #determine the numbers of the other samples
    number_of_othersamples <- 1:length(de_list_partner2)
    number_of_othersamples <- number_of_othersamples[number_of_othersamples != i]
    
    #start to search for the markers in other clusters
    for(r in 1:nrow(markers_cluster)){
        gene_to_test <- markers_cluster$genes[r]
        intersect_gene <- filter(markers_cluster, genes == gene_to_test)
        #open the loop for the other samples
        for(o in number_of_othersamples){
           markers_other_cluster <- de_list_partner2[[o]]
           if(gene_to_test %in% markers_other_cluster$genes){
             intersect_gene <- rbind(intersect_gene, filter(markers_other_cluster, genes == gene_to_test))
           }
        }
        #add the result to the final table
        if(nrow(intersect_gene) > hits){
            gene_to_add <- intersect_gene[1, ]
            gene_to_add$avg_log2FC <- mean(intersect_gene$avg_log2FC)
            gene_to_add$max_log2FC <- max(intersect_gene$avg_log2FC)
            gene_to_add$min_log2FC <- min(intersect_gene$avg_log2FC)
            gene_to_add$avg_p_val_adj <- mean(intersect_gene$p_val_adj)
            gene_to_add$cluster <- paste0(intersect_gene$cluster, collapse = ', ')
            selected_markers_partner2 <- rbind(selected_markers_partner2, gene_to_add)
        }
    }
}
selected_markers_partner2 <- selected_markers_partner2[!duplicated(selected_markers_partner2$genes), ]
selected_markers_partner2 <- selected_markers_partner2[order(-selected_markers_partner2$avg_log2FC), ]

#plot the intersected markers for the next verification step
#create the dir for vln plots per partner
plots_partner2 <- paste0(plots_dir, partner2, '/')
dir.create(plots_partner2)
#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]

#create ordered heatmap
genes_higher <- as.data.frame(ordered_genes[partner2] > ordered_genes[partner1]) #attention!
genes_higher$genes <- rownames(genes_higher)
genes_higher <- genes_higher$genes[genes_higher[[1]]]
#save the genes higher as the partner on average
write.csv(genes_higher, file = paste0(plots_partner2, 'above_the_partner_average.csv'))

#create ordered heatmap
genes_lower <- as.data.frame(ordered_genes[partner2] < ordered_genes[partner1]) #attention!
genes_lower$genes <- rownames(genes_lower)
genes_lower <- genes_lower$genes[genes_lower[[1]]]
#save the genes lower as the partner on average
write.csv(genes_lower, file = paste0(plots_partner2, 'below_the_partner_average.csv'))

#prepare the genes for heatmap
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(plots_partner2, 'heatmap_top20_1.pdf'), width = 5, height = 12)

for(g in 1:length(ordered_genes)){
    plot <- VlnPlot(object, features = ordered_genes[g], pt.size = 0.01, cols = colours_diagnosis_groups)
    plot$layers[[2]]$aes_params$alpha <- 0.1
    ggsave(plot, file = paste0(plots_partner2, ordered_genes[g], '.pdf'), width = 6, height = 6)
}

In [None]:
#now look through the genes and select the ones validated on the full cohort
genes_remove <- genes_lower
selected_markers_partner2 <- selected_markers_partner2[!selected_markers_partner2$genes %in% genes_remove, ]

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[selected_markers_partner2$genes, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner2]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_path, '/', partner2, '_heatmap_top20.pdf'), width = 5, height = 12)

In [None]:
#start the enrichr PEA for first partner
dir_pea_partner2 <- paste0(dir_path, '/PEA_', partner2, '/')
dir.create(dir_pea_partner2)

for(db in 1:length(databases_list)){
        enriched <- enrichr(selected_markers_partner2$genes, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour2) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner2,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_partner2, partner2, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

## Combine the final result and plot volcano

In [None]:
number_of_genes_volcano <- 25
options(repr.plot.width=10, repr.plot.height=8)
selected_markers_partner1$partner <- partner1
write.csv(selected_markers_partner1, file = paste0(dir_path, '/DGE_', partner1, '.csv'))
selected_markers_partner2$partner <- partner2
write.csv(selected_markers_partner2, file = paste0(dir_path, '/DGE_', partner2, '.csv'))

selected_markers_partner2$avg_log2FC <- -selected_markers_partner2$avg_log2FC 
selected_markers_combined <- rbind(selected_markers_partner1, selected_markers_partner2)

number_of_genes_volcano <- 25
volcano <- ggplot(selected_markers_combined, aes(x = avg_log2FC, y = -log10(avg_p_val_adj))) +
        geom_vline(xintercept = 0) +
        geom_hline(yintercept = -log10(0.05), color ="grey", linetype ="dashed") +
        geom_point(data = selected_markers_combined,
                    color = "grey", alpha = 1) +
        geom_point(data = selected_markers_partner1[1:number_of_genes_volcano, ],
                    fill = colour1, alpha = 1, shape=21, size= 2.5) +
        geom_point(data = selected_markers_partner2[1:number_of_genes_volcano, ],
                    fill = colour2, alpha = 1, shape=21, size= 2.5) +
        geom_text_repel(data= rbind(selected_markers_partner1[1:number_of_genes_volcano, ], selected_markers_partner2[1:number_of_genes_volcano, ]), max.overlaps = number_of_genes_volcano, aes(label = genes))+
        theme_linedraw() +
        theme(panel.grid = element_blank(), legend.position = "none") +
        xlab("log2(average fold change)") +
        ylab("-log10(p-value)") + ggtitle(paste(partner2, '(left)', 'vs', partner1, '(right)'))
ggsave(volcano, file = paste0(dir_path, '/', partner1, '_vs_', partner2, '_volcano_plot.pdf'), height = 8, width = 10)

write.csv(selected_markers_combined, file = paste0(dir_path, '/DGE_', partner1, '_vs_', partner2, '.csv'))

# Combine the results and run combined PEA

In [None]:
#healthy
healthy_markers <- read.csv('./outs_new_pairwise/MS_vs_Healthy_pairwise/DGE_Healthy.csv')
#SCNI
scni_markers <- read.csv('./outs_new_pairwise/MS_vs_SCNI_pairwise/DGE_SCNI.csv')
#MS
ms_markers1 <- read.csv('./outs_new_pairwise/MS_vs_Healthy_pairwise/DGE_MS.csv')
ms_markers2 <- read.csv('./outs_new_pairwise/MS_vs_SCNI_pairwise/DGE_MS.csv')

In [None]:
healthy_markers <- healthy_markers$genes
scni_markers <- scni_markers$genes
ms_markers <- c(ms_markers1$genes, ms_markers2$genes)
ms_markers <- unique(ms_markers)

In [None]:
# run the PEAand heatmap for the healthy markers
#start the enrichr PEA for first partner
dir_pea_combined <- paste0('./outs_new_pairwise/combined_ONLY_pairwise/')
dir.create(dir_pea_combined)
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
colours_diagnosis_groups <- c('#1D5B60', '#939393', '#8D2413')
partner <- "Healthy"
features <- healthy_markers
colour <- '#1D5B60'

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[features, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_pea_combined, partner, '_heatmap_top20.pdf'), width = 5, height = 12)

for(db in 1:length(databases_list)){
        enriched <- enrichr(features, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

In [None]:
# run the PEAand heatmap for the SCNI markers
#start the enrichr PEA for first partner
dir_pea_combined <- paste0('./outs_new_pairwise/combined_ONLY_pairwise/')
dir.create(dir_pea_combined)
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
colours_diagnosis_groups <- c('#1D5B60', '#939393', '#8D2413')
partner <- "SCNI"
features <- scni_markers
colour <- '#939393'

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[features, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_pea_combined, partner, '_heatmap_top20.pdf'), width = 5, height = 12)

for(db in 1:length(databases_list)){
        enriched <- enrichr(features, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

In [None]:
# run the PEAand heatmap for the MS markers
#start the enrichr PEA for first partner
dir_pea_combined <- paste0('./outs_new_pairwise/combined_ONLY_pairwise/')
dir.create(dir_pea_combined)
databases_list <- c('GO_Biological_Process_2021', 'Reactome_2016', 'MSigDB_Hallmark_2020')
colours_diagnosis_groups <- c('#1D5B60', '#939393', '#8D2413')
partner <- "MS"
features <- ms_markers
colour <- '#8D2413'

#create ordered heatmap
ordered_genes <- object_av@assays$RNA@scale.data[features, ] 
ordered_genes <- as.data.frame(ordered_genes)
ordered_genes <- ordered_genes[order(-ordered_genes[, partner]), ]
ordered_genes <- rownames(ordered_genes)

#top 20 sorted
if(length(ordered_genes) > 20){
ordered_genes_plot <- ordered_genes[1:20]
} else {ordered_genes_plot <- ordered_genes}
options(repr.plot.width=5, repr.plot.height=12)
heatmap <- DoHeatmap(object_av, features = ordered_genes_plot, draw.lines = FALSE, size = 7,  angle = 270, hjust = 1, raster = FALSE, group.colors = colours_diagnosis_groups) + 
    theme(
        text = element_text(size = 23, colour = 'black', face = 'plain'),
         axis.text.y = element_text(size = 19, colour = 'black', face = 'plain')) + 
scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
ggsave(heatmap, file = paste0(dir_pea_combined, partner, '_heatmap_top20.pdf'), width = 5, height = 12)

for(db in 1:length(databases_list)){
        enriched <- enrichr(features, databases = databases_list[db])
        enriched <- enriched[[1]]
        enriched <- enriched[order(-enriched$Adjusted.P.value), ]
        enriched$Term <- factor(enriched$Term, levels = unique(enriched$Term))
        #save the positive enriched pathways
        write.csv(enriched, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive_', '_pea_list.csv'))
            
        
        reverselog_trans <- function(base = exp(1)) {
            trans <- function(x) -log(x, base)
            inv <- function(x) base^(-x)
            trans_new(paste0("reverselog-", format(base)), trans, inv,
                      log_breaks(base = base),
                      domain = c(1e-100, Inf))
            }
        #2881c1 - for blue
        #d3556e - for red
        options(repr.plot.width=22, repr.plot.height=11)
        if(nrow(enriched) > 20){
            plot_positive <- ggplot(enriched[(nrow(enriched)-19):nrow(enriched), ], aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
            } else {
            plot_positive <- ggplot(enriched, aes(y=Term, x= Adjusted.P.value))+
                geom_vline(xintercept = .05, color = "grey", linetype="dashed") +
                geom_segment( aes(yend=Term, xend=1), col= "black") +
                geom_point(shape=21, aes(size = abs(Combined.Score)), fill = colour) +
                scale_x_continuous(trans=reverselog_trans(10))+
                scale_size_continuous(range = c(2, 13)) +
                theme_tufte()+ xlab("p value (adj)") + ylab("") +
                theme(text=element_text(family="Helvetica", size = 26),
                  axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") + ggtitle(paste(partner,  'positive', databases_list[db]))
        }
        ggsave(plot_positive, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_positive', '.pdf'), width = 30, height = 11)
            
        #plot the genes
        if(nrow(enriched) > 20){ 
            genes_to_plot <- paste0(x = enriched[(nrow(enriched)-19):nrow(enriched), 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            #reverse because of ascending ordering of the enriched table to put the most significant at the beginning
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
            }else{
            genes_to_plot <- paste0(x = enriched[, 'Genes'], ';')
            genes_to_plot <- paste(genes_to_plot, collapse = '')
            genes_to_plot <- strsplit(genes_to_plot, split = ';')[[1]]
            genes_to_plot <- rev(genes_to_plot)
            genes_to_plot <- unique(genes_to_plot)
        }
        
        if(length(genes_to_plot) > 20){genes_to_plot <- genes_to_plot[1:20]}
        heatmap_plot <- DoHeatmap(object_av, features = genes_to_plot, draw.lines = FALSE, raster = FALSE, group.colors = colours_diagnosis_groups) + theme(text = element_text(size = 20, face = "bold")) + scale_fill_gradientn(colors = c("#2881C1", "white", "#D3556E"))
        ggsave(heatmap_plot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_HEATMAP_genes_positive', '.pdf'), width = 8, height = 11)
        
        vlnplot <- VlnPlot(object, features = genes_to_plot, stack = TRUE, flip = TRUE, cols = colours_diagnosis_groups, fill.by = "ident") +
                        theme(legend.position = "none",
                          text = element_text(size = 17),
                          axis.text = element_text(size = 17))
        ggsave(vlnplot, file = paste0(dir_pea_combined, partner, '_', databases_list[db], '_StackedVIOLIN_genes_positive', '.pdf'), width = 8, height = 11)
        }
    #end of the einrichr loop

# Load the non pairwise results

In [None]:
#healthy
healthy_markers1 <- read.csv('./outs/MS_vs_Healthy_NON_pairwise/DGE_Healthy.csv')
healthy_markers2 <- read.csv('./outs/SCNI_vs_Healthy_NON_pairwise/DGE_Healthy.csv')
#SCNI
scni_markers1 <- read.csv('./outs/SCNI_vs_Healthy_NON_pairwise/DGE_SCNI.csv')
scni_markers2 <- read.csv('./outs/SCNI_vs_MS_NON_pairwise/DGE_SCNI.csv')
#MS
ms_markers1 <- read.csv('./outs/MS_vs_Healthy_NON_pairwise/DGE_MS.csv')
ms_markers2 <- read.csv('./outs/SCNI_vs_MS_NON_pairwise/DGE_MS.csv')


In [None]:
healthy_markers_np <- c(healthy_markers1$genes, healthy_markers2$genes)
healthy_markers_np <- unique(healthy_markers_np)
scni_markers_np <- c(scni_markers1$genes, scni_markers2$genes)
scni_markers_np <- unique(scni_markers_np)
ms_markers_np <- c(ms_markers1$genes, ms_markers2$genes)
ms_markers_np <- unique(ms_markers_np)

In [None]:
combined_dis_np <- unique(c(scni_markers_np, ms_markers_np))

# Check the immunological signatures

In [None]:
#healthy
healthy_markers <- read.csv('./outs_new_pairwise/MS_vs_Healthy_pairwise/DGE_Healthy.csv')
#SCNI
scni_markers <- read.csv('./outs_new_pairwise/MS_vs_SCNI_pairwise/DGE_SCNI.csv')
#MS
ms_markers1 <- read.csv('./outs_new_pairwise/MS_vs_Healthy_pairwise/DGE_MS.csv')
ms_markers2 <- read.csv('./outs_new_pairwise/MS_vs_SCNI_pairwise/DGE_MS.csv')

In [None]:
healthy_markers <- healthy_markers$genes
scni_markers <- scni_markers$genes
ms_markers <- c(ms_markers1$genes, ms_markers2$genes)
ms_markers <- unique(ms_markers)

In [None]:
#load in immunological markers

markers_ordered <- read.csv2(file = './modules/immunological_markers_plot.csv')
markers_ordered$Gene <- factor(markers_ordered$Gene, levels = markers_ordered$Gene)
data <- markers_ordered$Gene
data <- data[data %in% combined_dis_np]
data
markers <- data

In [None]:
# Which one are in the paired results of MS and SCNI?
length(markers)
markers_matched <- markers[markers %in% ms_markers]
markers_matched
length(markers_matched)

## Building the immunological heatmap

In [None]:
ms_vs_h <- ms_markers1
ms_vs_h$comparison <- 'MS_vs_Healthy'
ms_vs_h <- ms_vs_h[ms_vs_h$genes %in% markers_matched, ]
missing_genes <- markers_matched[!(markers_matched %in% ms_vs_h$genes)]
missing_genes_df <- data.frame(matrix(0, nrow = length(missing_genes), ncol = ncol(ms_vs_h)))
colnames(missing_genes_df) <- colnames(ms_vs_h)
missing_genes_df$genes <- missing_genes
missing_genes_df$p_val_adj <- 'ns'
missing_genes_df$comparison <- ms_vs_h$comparison[1]
ms_vs_h <- rbind(ms_vs_h, missing_genes_df)


ms_vs_scni <- ms_markers2
ms_vs_scni$comparison <- 'MS_vs_SCNI'
ms_vs_scni <- ms_vs_scni[ms_vs_scni$genes %in% markers_matched, ]
missing_genes <- markers_matched[!(markers_matched %in% ms_vs_scni$genes)]
missing_genes_df <- data.frame(matrix(0, nrow = length(missing_genes), ncol = ncol(ms_vs_scni)))
colnames(missing_genes_df) <- colnames(ms_vs_scni)
missing_genes_df$genes <- missing_genes
missing_genes_df$p_val_adj <- 'ns'
missing_genes_df$comparison <- ms_vs_scni$comparison[1]
ms_vs_scni <- rbind(ms_vs_scni, missing_genes_df)

data_to_plot <- rbind(ms_vs_h, ms_vs_scni)

In [None]:
#plotting the heatmap
data_to_plot$comparison <- factor(data_to_plot$comparison, levels = c('MS_vs_SCNI', 'MS_vs_Healthy'))
data_to_plot$genes <- factor(data_to_plot$genes, levels = markers_ordered$Gene[markers_ordered$Gene %in% data_to_plot$genes])

#800000
#D3556E

width <- 15
height <- 3
name <- 'immunological_markers_matched_paired'
set_figsize(width, height)
plot <- ggplot(data_to_plot, aes(x = genes, y = comparison, fill = avg_log2FC)) + 
  geom_tile() + 
  cowplot::theme_cowplot() + 
  #grids(linetype = "dashed", size = 0.1) +
  theme(axis.text.x = element_text(angle = 60, vjust = 0.95, hjust=0.95, size = 13)) +
  ylab('') +
  theme(axis.ticks = element_blank()) +
scale_fill_gradientn(colours = c("white", "#D3556E", "#671727"), values = scales::rescale(c(0, max(data_to_plot$avg_log2FC)/2, max(data_to_plot$avg_log2FC)))) #+ coord_equal()
  plot
ggsave(plot, file=paste0(outs_dir, name, '.pdf'), width = width, height = height)

# Check the metabolic signatures

In [None]:
#healthy
healthy_markers <- read.csv('./outs_new_pairwise/MS_vs_Healthy_pairwise/DGE_Healthy.csv')
#SCNI
scni_markers <- read.csv('./outs_new_pairwise/MS_vs_SCNI_pairwise/DGE_SCNI.csv')
#MS
ms_markers1 <- read.csv('./outs_new_pairwise/MS_vs_Healthy_pairwise/DGE_MS.csv')
ms_markers2 <- read.csv('./outs_new_pairwise/MS_vs_SCNI_pairwise/DGE_MS.csv')

In [None]:
healthy_markers <- healthy_markers$genes
scni_markers <- scni_markers$genes
ms_markers <- c(ms_markers1$genes, ms_markers2$genes)
ms_markers <- unique(ms_markers)

In [None]:
#load in metabolic markers

markers_ordered <- read.csv2(file = './modules/metabolic_markers_plot.csv')
markers_ordered$Gene <- factor(markers_ordered$Gene, levels = markers_ordered$Gene)
data <- markers_ordered$Gene
data <- data[data %in% combined_dis_np]
data
markers <- data

In [None]:
# Which one are in the paired results of MS and SCNI?
length(markers)
markers_matched <- markers[markers %in% ms_markers]
markers_matched
length(markers_matched)

## Building the metabolic heatmap

In [None]:
ms_vs_h <- ms_markers1
ms_vs_h$comparison <- 'MS_vs_Healthy'
ms_vs_h <- ms_vs_h[ms_vs_h$genes %in% markers_matched, ]
missing_genes <- markers_matched[!(markers_matched %in% ms_vs_h$genes)]
missing_genes_df <- data.frame(matrix(0, nrow = length(missing_genes), ncol = ncol(ms_vs_h)))
colnames(missing_genes_df) <- colnames(ms_vs_h)
missing_genes_df$genes <- missing_genes
missing_genes_df$p_val_adj <- 'ns'
missing_genes_df$comparison <- ms_vs_h$comparison[1]
ms_vs_h <- rbind(ms_vs_h, missing_genes_df)


ms_vs_scni <- ms_markers2
ms_vs_scni$comparison <- 'MS_vs_SCNI'
ms_vs_scni <- ms_vs_scni[ms_vs_scni$genes %in% markers_matched, ]
missing_genes <- markers_matched[!(markers_matched %in% ms_vs_scni$genes)]
missing_genes_df <- data.frame(matrix(0, nrow = length(missing_genes), ncol = ncol(ms_vs_scni)))
colnames(missing_genes_df) <- colnames(ms_vs_scni)
missing_genes_df$genes <- missing_genes
missing_genes_df$p_val_adj <- 'ns'
missing_genes_df$comparison <- ms_vs_scni$comparison[1]
ms_vs_scni <- rbind(ms_vs_scni, missing_genes_df)

data_to_plot <- rbind(ms_vs_h, ms_vs_scni)

In [None]:
#plotting the heatmap with new levels of genes
data_to_plot$comparison <- factor(data_to_plot$comparison, levels = c('MS_vs_SCNI', 'MS_vs_Healthy'))
data_to_plot$genes <- factor(data_to_plot$genes, levels = markers_ordered$Gene[markers_ordered$Gene %in% data_to_plot$genes])

#800000
#D3556E

width <- 15
height <- 3
name <- 'metabolic_markers_matched_pairwise'
set_figsize(width, height)
plot <- ggplot(data_to_plot, aes(x = genes, y = comparison, fill = avg_log2FC)) + 
  geom_tile() + 
  cowplot::theme_cowplot() + 
  #grids(linetype = "dashed", size = 0.1) +
  theme(axis.text.x = element_text(angle = 60, vjust = 0.95, hjust=0.95, size = 13)) +
  ylab('') +
  theme(axis.ticks = element_blank()) +
scale_fill_gradientn(colours = c("white", "#D3556E", "#671727"), values = scales::rescale(c(0, max(data_to_plot$avg_log2FC)/2, max(data_to_plot$avg_log2FC)))) #+ coord_equal()
  plot
ggsave(plot, file=paste0(outs_dir, name, '.pdf'), width = width, height = height)