In [1]:
library(tidyr)
library(fgsea)
library(ggplot2)
library(Rcpp)
library(data.table)
library(stringi)

Loading required package: Rcpp


In [2]:
cluster_path <- '/home/jovyan/work/single_cell/week_13_no_batch_correction/gene_for_gsea_list.tsv'
gene_set_path <- 'combined_genesets.gmt'

In [3]:
pathways <- gmtPathways(gene_set_path) 

In [18]:
for (paths in names(pathways)){
    print(pathways$paths)
}

[1] "KEGG_CELL_CYCLE"
[1] "GO_OUTFLOW_TRACT_MORPHOGENESIS"
[1] "GO_VENTRICULAR_SEPTUM_MORPHOGENESIS"
[1] "GO_VENOUS_BLOOD_VESSEL_DEVELOPMENT"
[1] "GO_POSITIVE_REGULATION_OF_HEART_GROWTH"
[1] "GO_HEART_FORMATION"
[1] "GO_CARDIAC_CONDUCTION"
[1] "GO_REGULATION_OF_HEART_GROWTH"
[1] "GO_REGULATION_OF_HEART_RATE"
[1] "GO_HEART_DEVELOPMENT"
[1] "GO_ARTERY_DEVELOPMENT"
[1] "GO_PERICARDIUM_DEVELOPMENT"
[1] "GO_ENDOCARDIAL_CUSHION_FORMATION"
[1] "GO_EMBRYONIC_HEART_TUBE_DEVELOPMENT"
[1] "GO_REGULATION_OF_THE_FORCE_OF_HEART_CONTRACTION"
[1] "GO_CARDIAC_MUSCLE_CELL_DIFFERENTIATION"
[1] "GO_EMBRYONIC_HEART_TUBE_MORPHOGENESIS"
[1] "GO_ADULT_HEART_DEVELOPMENT"
[1] "GO_POSITIVE_REGULATION_OF_HEART_CONTRACTION"
[1] "GO_HEART_MORPHOGENESIS"
[1] "GO_ENDOCARDIUM_DEVELOPMENT"
[1] "GO_ARTERY_MORPHOGENESIS"
[1] "GO_AORTA_DEVELOPMENT"
[1] "GO_AORTA_MORPHOGENESIS"
[1] "GO_NEGATIVE_REGULATION_OF_HEART_CONTRACTION"
[1] "GO_ORGAN_FORMATION"
[1] "GO_ENDOCARDIAL_CUSHION_MORPHOGENESIS"
[1] "GO_ENDOCARDIAL_CUSHION_D

In [13]:
length <- nchar(pathways$KEGG_CELL_CYCLE)
check <- stri_sub(pathways$KEGG_CELL_CYCLE,1, length-3)
ex <- stri_sub(check, 3)
ex <- strsplit(ex, '\', \'')

In [14]:
ex

In [3]:
generate_cluster_table <- function(cluster_path){
    cluster_table <- read.table(file = cluster_path, sep = '\t', header = TRUE)
    return(cluster_table)
}

In [4]:
cluster_table <- generate_cluster_table(cluster_path)

In [5]:
options(warn=-1)
execute_fgsea <- function(gene_set_path, cluster_table, output_label){
    
    num_clusters <- unique(cluster_table$cluster_number)
    pathways <- gmtPathways(gene_set_path) 
    for (cluster in num_clusters){
        cluster_set <- cluster_table[cluster_table$cluster_number == cluster,]
        ranks <- setNames(cluster_set$z_score, cluster_set$gene)
        fgseaRes <- fgsea(pathways, 
                      ranks, 
                      minSize=15, 
                      maxSize=500, 
                      nperm=15000,
                      gseaParam = 0.5)
        output_gsea_table_file_name <- paste(output_label, cluster, "gseaTable.png", sep="_")
        topPathwaysUp <- fgseaRes[ES > 0, ][head(order(pval), n=5), pathway]
        topPathwaysDown <- fgseaRes[ES < 0, ][head(order(pval), n=5), pathway]
        topPathways <- c(topPathwaysUp, rev(topPathwaysDown))
        
        png(filename=output_gsea_table_file_name, 
            units="in", 
            width=15, 
            height=10, 
            pointsize=12, 
            res=72)
        plotGseaTable(pathways[topPathways], ranks, fgseaRes, gseaParam = 0.5)
        dev.off()
        
        output_fgseaRes_file_name <- paste(output_label, cluster, "fgseaRes.txt", sep="_")
        fwrite(fgseaRes, file=output_fgseaRes_file_name, sep="\t", sep2=c("", " ", "")) 
    }
}

In [None]:
execute_fgsea(gene_set_path, cluster_table, 'week_13_cluster')