In [1]:
library(tidyr)
library(fgsea)
library(ggplot2)
library(Rcpp)
library(data.table)
library(stringi)

Loading required package: Rcpp


In [2]:
cluster_path <- '/home/jovyan/work/single_cell/week_13_no_batch_correction/gene_for_gsea_list.tsv'
gene_set_path <- 'combined_genesets.gmt'

In [3]:
pathways <- gmtPathways(gene_set_path) 

In [None]:
head(pathways)

In [45]:

for (paths in names(pathways)){
    remove_last_chars <- stri_sub(pathways$paths,1, nchar(pathways$paths)-3)
#     print(remove_last_chars)
    remove_first_chars <- stri_sub(remove_last_chars, 3)
    remove_gene_space_chars <- strsplit(remove_first_chars, '\', \'')
    path <- remove_gene_space_chars
#     print(path)
}

In [27]:
head(pathways)

In [3]:
generate_cluster_table <- function(cluster_path){
    cluster_table <- read.table(file = cluster_path, sep = '\t', header = TRUE)
    return(cluster_table)
}

In [4]:
cluster_table <- generate_cluster_table(cluster_path)

In [5]:
options(warn=-1)
execute_fgsea <- function(gene_set_path, cluster_table, output_label){
    
    num_clusters <- unique(cluster_table$cluster_number)
    pathways <- gmtPathways(gene_set_path) 
    for (cluster in num_clusters){
        cluster_set <- cluster_table[cluster_table$cluster_number == cluster,]
        ranks <- setNames(cluster_set$z_score, cluster_set$gene)
        fgseaRes <- fgsea(pathways, 
                      ranks, 
                      minSize=15, 
                      maxSize=500, 
                      nperm=15000,
                      gseaParam = 0.5)
        output_gsea_table_file_name <- paste(output_label, cluster, "gseaTable.png", sep="_")
        topPathwaysUp <- fgseaRes[ES > 0, ][head(order(pval), n=5), pathway]
        topPathwaysDown <- fgseaRes[ES < 0, ][head(order(pval), n=5), pathway]
        topPathways <- c(topPathwaysUp, rev(topPathwaysDown))
        
        png(filename=output_gsea_table_file_name, 
            units="in", 
            width=15, 
            height=10, 
            pointsize=12, 
            res=72)
        plotGseaTable(pathways[topPathways], ranks, fgseaRes, gseaParam = 0.5)
        dev.off()
        
        output_fgseaRes_file_name <- paste(output_label, cluster, "fgseaRes.txt", sep="_")
        fwrite(fgseaRes, file=output_fgseaRes_file_name, sep="\t", sep2=c("", " ", "")) 
    }
}

In [None]:
execute_fgsea(gene_set_path, cluster_table, 'week_13_cluster')