## Gene set overrepresentation analysis

Perform gene set ORA analyses using the most commonly impacted genes from High-MAS, low-TAS compounds.

In [1]:
suppressPackageStartupMessages(library(dplyr))
library("WebGestaltR")

******************************************


*                                        *


*          Welcome to WebGestaltR !      *


*                                        *


******************************************




In [2]:
gene_sets <- c(
    "geneontology_Biological_Process",
    "geneontology_Cellular_Component",
    "geneontology_Molecular_Function"
)

In [3]:
# Load data
background_file <- file.path("results", "background_gene_list.tsv")

background_cols <- readr::cols(
    probe = readr::col_character(),
    gene_symbol = readr::col_character()
)

background_df <- readr::read_tsv(background_file, col_types = background_cols)
reference_genes <- background_df %>% dplyr::pull(gene_symbol)

cpd_gene_file <- file.path("results", "differential_mas_vs_tas_genes.tsv")

cpd_gene_cols <- readr::cols(
    pert_iname = readr::col_character(),
    moa = readr::col_character(),
    L1000_probe = readr::col_character(),
    L1000_readout = readr::col_double(),
    L1000_abs_readout = readr::col_double(),
    gene_symbol = readr::col_character()
)

cpd_gene_df <- readr::read_tsv(cpd_gene_file, col_types = cpd_gene_cols)

In [4]:
# Perform the analysis
ora_list <- list()
for (compound in unique(cpd_gene_df$pert_iname)) {
    print(paste("Now performing ORA for", compound, "genes..."))
    
    cpd_genes_of_interest <- cpd_gene_df %>%
        dplyr::filter(pert_iname == !!compound) %>%
        dplyr::pull(gene_symbol)
    
    results_df <- WebGestaltR(
        enrichMethod = "ORA",
        organism = "hsapiens",
        enrichDatabase = gene_sets,
        interestGene = cpd_genes_of_interest,
        interestGeneType = "genesymbol",
        referenceGene = reference_genes,
        referenceGeneType = "genesymbol",
        sigMethod = "top",
        isOutput = FALSE
    ) %>% dplyr::mutate(compound = paste(compound))
    
    ora_list[[compound]] <- results_df
    print("Done.\n")
}

[1] "Now performing ORA for alisertib genes..."
Loading the functional categories...
Loading the ID list...
Loading the reference list...
Performing the enrichment analysis...
[1] "Done.\n"
[1] "Now performing ORA for aphidicolin genes..."
Loading the functional categories...
Loading the ID list...
Loading the reference list...
Performing the enrichment analysis...
[1] "Done.\n"
[1] "Now performing ORA for at13387 genes..."
Loading the functional categories...
Loading the ID list...
Loading the reference list...
Performing the enrichment analysis...
[1] "Done.\n"
[1] "Now performing ORA for brequinar genes..."
Loading the functional categories...
Loading the ID list...
Loading the reference list...
Performing the enrichment analysis...
[1] "Done.\n"
[1] "Now performing ORA for dasatinib genes..."
Loading the functional categories...
Loading the ID list...
Loading the reference list...
Performing the enrichment analysis...
[1] "Done.\n"
[1] "Now performing ORA for l-ergothioneine genes.

In [5]:
# Output the results
ora_df <- do.call(rbind, ora_list) %>% tibble::remove_rownames()

output_file <- file.path("results", "ora_compound_results.tsv")
ora_df %>% readr::write_tsv(output_file)

print(dim(ora_df))
ora_df

[1] 80 13


geneSet,description,link,size,overlap,expect,enrichmentRatio,pValue,FDR,overlapId,database,userId,compound
<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
GO:0003824,catalytic activity,http://amigo.geneontology.org/amigo/term/GO:0003824,448,22,13.9249732,1.579895,0.0018514671,1.0000000,329;998;1017;1026;1643;1738;1983;2778;3162;5255;5603;8318;9133;10038;10730;23139;23536;27244;55008;55256;55699;60493,geneontology_Molecular_Function,PHKA1;MAST2;SESN1;CDK2;IARS2;CDKN1A;MAPK13;DLD;CDC42;HMOX1;ADAT1;BIRC2;HERC6;CDC45;ADI1;YME1L1;FASTKD5;GNAS;CCNB2;EIF5;DDB2;PARP2,alisertib
GO:0004674,protein serine/threonine kinase activity,http://amigo.geneontology.org/amigo/term/GO:0004674,68,7,2.1136120,3.311866,0.0034387253,1.0000000,998;1017;1026;5255;5603;9133;23139,geneontology_Molecular_Function,PHKA1;MAST2;CDK2;CDKN1A;MAPK13;CDC42;CCNB2,alisertib
GO:1902554,serine/threonine protein kinase complex,http://amigo.geneontology.org/amigo/term/GO:1902554,23,4,0.7148982,5.595202,0.0044347790,1.0000000,1017;1026;5255;9133,geneontology_Cellular_Component,PHKA1;CDK2;CDKN1A;CCNB2,alisertib
GO:0140096,"catalytic activity, acting on a protein",http://amigo.geneontology.org/amigo/term/GO:0140096,207,13,6.4340836,2.020490,0.0049195169,1.0000000,329;998;1017;1026;1643;5255;5603;9133;10038;10730;23139;55008;60493,geneontology_Molecular_Function,PHKA1;MAST2;CDK2;CDKN1A;MAPK13;CDC42;BIRC2;HERC6;YME1L1;FASTKD5;CCNB2;DDB2;PARP2,alisertib
GO:0043232,intracellular non-membrane-bounded organelle,http://amigo.geneontology.org/amigo/term/GO:0043232,322,17,10.0085745,1.698544,0.0060103040,1.0000000,329;998;1017;1026;2553;3162;7416;8204;8318;9093;9133;10038;10320;23139;27244;60493;85236,geneontology_Cellular_Component,MAST2;IKZF1;SESN1;CDK2;CDKN1A;NRIP1;CDC42;HMOX1;BIRC2;CDC45;VDAC1;DNAJA3;HIST1H2BK;FASTKD5;CCNB2;GABPB1;PARP2,alisertib
GO:1902911,protein kinase complex,http://amigo.geneontology.org/amigo/term/GO:1902911,25,4,0.7770632,5.147586,0.0060660263,1.0000000,1017;1026;5255;9133,geneontology_Cellular_Component,PHKA1;CDK2;CDKN1A;CCNB2,alisertib
GO:1901363,heterocyclic compound binding,http://amigo.geneontology.org/amigo/term/GO:1901363,415,20,12.8992497,1.550478,0.0060873681,1.0000000,998;1017;1643;1738;1983;2553;2778;3162;5603;8204;8318;9093;10038;10320;10730;23139;23536;55699;60493;85236,geneontology_Molecular_Function,MAST2;IKZF1;CDK2;IARS2;MAPK13;DLD;NRIP1;CDC42;HMOX1;ADAT1;CDC45;DNAJA3;HIST1H2BK;YME1L1;FASTKD5;GNAS;EIF5;DDB2;GABPB1;PARP2,alisertib
GO:0004672,protein kinase activity,http://amigo.geneontology.org/amigo/term/GO:0004672,96,8,2.9839228,2.681034,0.0064835517,1.0000000,998;1017;1026;5255;5603;9133;23139;60493,geneontology_Molecular_Function,PHKA1;MAST2;CDK2;CDKN1A;MAPK13;CDC42;FASTKD5;CCNB2,alisertib
GO:0043228,non-membrane-bounded organelle,http://amigo.geneontology.org/amigo/term/GO:0043228,325,17,10.1018221,1.682865,0.0066997103,1.0000000,329;998;1017;1026;2553;3162;7416;8204;8318;9093;9133;10038;10320;23139;27244;60493;85236,geneontology_Cellular_Component,MAST2;IKZF1;SESN1;CDK2;CDKN1A;NRIP1;CDC42;HMOX1;BIRC2;CDC45;VDAC1;DNAJA3;HIST1H2BK;FASTKD5;CCNB2;GABPB1;PARP2,alisertib
GO:0097159,organic cyclic compound binding,http://amigo.geneontology.org/amigo/term/GO:0097159,419,20,13.0235798,1.535676,0.0069574134,1.0000000,998;1017;1643;1738;1983;2553;2778;3162;5603;8204;8318;9093;10038;10320;10730;23139;23536;55699;60493;85236,geneontology_Molecular_Function,MAST2;IKZF1;CDK2;IARS2;MAPK13;DLD;NRIP1;CDC42;HMOX1;ADAT1;CDC45;DNAJA3;HIST1H2BK;YME1L1;FASTKD5;GNAS;EIF5;DDB2;GABPB1;PARP2,alisertib
