# Part III: Gene Set Enrichment Analysis

## Load the required packages

In [None]:
library(MOFA2)
library(msigdbr)
library(data.table)

## Load the MOFA

In [None]:
serialized_mofa_file = file.path(getwd(), "../220503.MOFAobject.LUAD.hdf5")
MOFAobject.trained <- load_model(serialized_mofa_file)

## Load and explore MSIGDB data

In [None]:
msigdbr(species = "Homo sapiens")
msigdbr_collections() %>% data.frame

#EXTRACT SPECIFIC DBs AS BINARY MATRIX
msigdb.hallmark <- msigdbr(
  species = "Homo sapiens",
  category = "H"
  ) %>% as.data.table %>% .[,id:=1] %>%
  dcast(gs_name~gene_symbol, value.var="id", fill=0) %>%
  column_to_rownames("gs_name") %>% 
  as.matrix()

msigdb.reactome <- msigdbr(
  species = "Homo sapiens",
  category = "C2", 
  subcategory = "CP:REACTOME"
  ) %>% as.data.table %>% .[,id:=1] %>%
  dcast(gs_name~gene_symbol, value.var="id", fill=0) %>%
  column_to_rownames("gs_name") %>%
  as.matrix()

msigdb.gobp <- msigdbr(
  species = "Homo sapiens",
  category = "C5", 
  subcategory = "GO:BP"
  ) %>% as.data.table %>% .[,id:=1] %>%
  dcast(gs_name~gene_symbol, value.var="id", fill=0) %>%
  column_to_rownames("gs_name") %>% 
  as.matrix()

#CONVERT TO BINARY MATRIX --- removes duplicate genes
msigdb.hallmark[which(msigdb.hallmark > 1)] <-  1
msigdb.reactome[which(msigdb.reactome > 1)] <-  1
msigdb.gobp[which(msigdb.gobp > 1)] <-  1

#RENAME GENE NAMES TO BE IDENTICAL WITH MOFA FEATURE NAMES
#LAYER-SPECIFIC! >>> NEED TO REDO FOR PROTEIN AND CNV

colnames(msigdb.hallmark) <- str_c("rna_",colnames(msigdb.hallmark)) 
colnames(msigdb.reactome) <- str_c("rna_",colnames(msigdb.reactome)) 
colnames(msigdb.gobp) <- str_c("rna_",colnames(msigdb.gobp))

## Run *Gene Set Enrichment Analysis*

In [None]:
# Hallmark on positive weights
gsea.hallmark.positive <- run_enrichment(MOFAobject.trained, 
  feature.sets = msigdb.hallmark, 
  view = "RNA", # change perhaps factors
  sign = "positive"
)

# Hallmark on negative weights
gsea.hallmark.negative <- run_enrichment(MOFAobject.trained, 
  feature.sets = msigdb.hallmark, 
  view = "RNA",
  sign = "negative"
)

#REPEAT FOR REACTOME AND gobp

### Visualize results

In [None]:
#SIMPLE GSEA OVERVIEW PER FACTOR, LAYER AND DIRECTIONALITY
plot_enrichment(gsea.hallmark.positive, factor=1)
plot_enrichment(gsea.hallmark.negative, factor=1)

#DETAILED GSEA OVERVIEW PER FACTOR, LAYER AND DIRECTIONALITY
plot_enrichment_detailed(gsea.hallmark.positive, factor=1)
plot_enrichment_detailed(gsea.hallmark.negative, factor=1)

#GSEA HEATMAP
#This method generates a heatmap with the adjusted p.values that result from the feature set enrichment analysis. Rows are feature sets and columns are factors.
plot_enrichment_heatmap(gsea.hallmark.positive)
plot_enrichment_heatmap(gsea.hallmark.negative)

#WRITE ENRICHMENT VALUES TO CSV
write.csv(as.data.frame(gsea.hallmark.positive$pval.adj), paste0(format(Sys.time(), "%d-%b-%Y %H.%M"), "gsea.hallmark.positive.pvals", ".csv"))
write.csv(as.data.frame(gsea.hallmark.negative$pval.adj), paste0(format(Sys.time(), "%d-%b-%Y %H.%M"), "gsea.hallmark.negative.pvals", ".csv"))