## Set up and install packages if they are missing

In [1]:
# Utility routine for installing packages
install_if_missing <- function(packages) {
    if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
        install.packages(setdiff(packages, rownames(installed.packages())))
    }
}

In [57]:
install_if_missing(c('tidyverse', 'viridis', 'ggthemes', 'pryr', 'skimr', 'testthat', 'reticulate', 'WebGestaltR', 'readxl'))

## Load libraries

In [58]:
library(viridis)    # A nice color scheme for plots.
library(ggthemes)   # Common themes to change the look and feel of plots.
library(scales)     # Graphical scales map data to aesthetics in plots.
library(testthat)   # Testing functions.
library(assertthat) # Assertion functions.
library(pryr)       # Memory usage functions.
library(skimr)      # Summary statistics for dataframes.
library(bigrquery)  # BigQuery R client.
library(tidyverse)  # Data wrangling packages.
library(reticulate)  # R Interface to Python

library(Ronaldo)    # Leonardo R package.

library(data.table) # Data Table package for faster reading and processing


library(reticulate) # reticulate for calling the FireCloud Python API

library(ggplot2)    # Load ggplot2 for graphs

library(bigrquery)  # Load biqrquery for interacting with BigQuery

library(WebGestaltR)# For pathway/gene set enrichment analysis for this pipeline

library(readxl)

## Set up environment variables

In [4]:
BILLING_PROJECT_ID <- Sys.getenv('GOOGLE_PROJECT')
WORKSPACE_NAMESPACE <- Sys.getenv('WORKSPACE_NAMESPACE')
WORKSPACE_NAME <- Sys.getenv('WORKSPACE_NAME')
WORKSPACE_BUCKET <-  Sys.getenv('WORKSPACE_BUCKET')

## Set up utility functions

In [5]:
# Utility routine for printing a shell command before executing it
shell_do <- function(command) {
    print(paste('Executing: ', command))
    system(command, intern = TRUE)
}

In [6]:
# Utility routines for reading files from Google Cloud Storage
gcs_read_file <- function(path) {
    pipe(str_glue('gsutil -u {BILLING_PROJECT_ID} cat {path}'))
}
gcs_read_csv <- function(path, sep=',') {
    readr::read_csv(gcs_read_file(path))
}

# Utility routines for reading files from Google BigQuery
bq_query <- function(query) {
    # Return the contents of a query against BigQuery    
    return(bigrquery::bq_table_download(
        bigrquery::bq_project_query(BILLING_PROJECT_ID, query = query)))
}

## Import GRCh37 gene coordinates into workspace

NCBI genome build 37.3 (this is an old version)

Obtained from MAGMA auxilary files: https://ctg.cncr.nl/software/magma

In [9]:
shell_do(str_glue('gsutil -mu {BILLING_PROJECT_ID} cp {WORKSPACE_BUCKET}/NCBI37.3.zip ~/bin/data_temp/'))

[1] "Executing:  gsutil -mu gp2-ipdgc-hackathon cp gs://fc-secure-b9f9d17f-b38c-407d-85e0-7a759f13cea0/NCBI37.3.zip ~/bin/data_temp/"


In [17]:
#List files in ~/bin/data_temp/
system('ls ~/bin/data_temp/', intern=TRUE) 

In [24]:
#Unzip gene coordinates file
system('unzip ~/bin/data_temp/NCBI37.3.zip', intern=TRUE) 

“running command 'unzip ~/bin/data_temp/NCBI37.3.zip' had status 1”


Download current genome build gene coordinates

Downloaded from: http://ftp.ensembl.org/pub/grch37/current/gtf/homo_sapiens/

In [30]:
system('wget http://ftp.ensembl.org/pub/grch37/current/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz')

In [35]:
system('ls', intern = TRUE)

In [34]:
system('gunzip Homo_sapiens.GRCh37.87.gtf.gz', intern=TRUE) 

## Read in PD GWAS summary statistics

In [12]:
data <- fread("~/bin/data_temp/nallsEtAl2019_excluding23andMe_allVariants.tab", header = T)

In [13]:
head(data)

SNP,A1,A2,freq,b,se,p,N_cases,N_controls
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<int>
chr11:88249377,T,C,0.9931,0.1575,0.1783,0.3771,7161,5356
chr1:60320992,A,G,0.9336,0.0605,0.0456,0.1846,26421,442271
chr2:18069070,T,C,0.9988,-0.6774,1.3519,0.6163,582,905
chr8:135908647,A,G,0.2081,-0.0358,0.0273,0.1887,26421,442271
chr12:3871714,A,C,0.9972,0.1489,1.0636,0.8886,749,658
chr16:77148858,A,G,0.9976,-0.1213,0.3874,0.7543,6248,4391


## Filter for SNPs with p-value < 5e-08

In [14]:
data_filtered <- data %>%
    filter(p < 5e-08)

In [15]:
dim(data_filtered)

## Read in gene coordinates file and tidy

In [38]:
gene_coords <- fread("Homo_sapiens.GRCh37.87.gtf", skip = 5)

In [39]:
head(gene_coords)

V1,V2,V3,V4,V5,V6,V7,V8,V9
<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>
1,ensembl_havana,gene,11869,14412,.,+,.,"gene_id ""ENSG00000223972""; gene_version ""4""; gene_name ""DDX11L1""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene"";"
1,havana,transcript,11869,14409,.,+,.,"gene_id ""ENSG00000223972""; gene_version ""4""; transcript_id ""ENST00000456328""; transcript_version ""2""; gene_name ""DDX11L1""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene""; transcript_name ""DDX11L1-002""; transcript_source ""havana""; transcript_biotype ""processed_transcript""; havana_transcript ""OTTHUMT00000362751""; havana_transcript_version ""1""; tag ""basic"";"
1,havana,exon,11869,12227,.,+,.,"gene_id ""ENSG00000223972""; gene_version ""4""; transcript_id ""ENST00000456328""; transcript_version ""2""; exon_number ""1""; gene_name ""DDX11L1""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene""; transcript_name ""DDX11L1-002""; transcript_source ""havana""; transcript_biotype ""processed_transcript""; havana_transcript ""OTTHUMT00000362751""; havana_transcript_version ""1""; exon_id ""ENSE00002234944""; exon_version ""1""; tag ""basic"";"
1,havana,exon,12613,12721,.,+,.,"gene_id ""ENSG00000223972""; gene_version ""4""; transcript_id ""ENST00000456328""; transcript_version ""2""; exon_number ""2""; gene_name ""DDX11L1""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene""; transcript_name ""DDX11L1-002""; transcript_source ""havana""; transcript_biotype ""processed_transcript""; havana_transcript ""OTTHUMT00000362751""; havana_transcript_version ""1""; exon_id ""ENSE00003582793""; exon_version ""1""; tag ""basic"";"
1,havana,exon,13221,14409,.,+,.,"gene_id ""ENSG00000223972""; gene_version ""4""; transcript_id ""ENST00000456328""; transcript_version ""2""; exon_number ""3""; gene_name ""DDX11L1""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene""; transcript_name ""DDX11L1-002""; transcript_source ""havana""; transcript_biotype ""processed_transcript""; havana_transcript ""OTTHUMT00000362751""; havana_transcript_version ""1""; exon_id ""ENSE00002312635""; exon_version ""1""; tag ""basic"";"
1,ensembl,transcript,11872,14412,.,+,.,"gene_id ""ENSG00000223972""; gene_version ""4""; transcript_id ""ENST00000515242""; transcript_version ""2""; gene_name ""DDX11L1""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene""; transcript_name ""DDX11L1-201""; transcript_source ""ensembl""; transcript_biotype ""transcribed_unprocessed_pseudogene"";"


Filter just for genes

In [40]:
gene_coords_genesonly <- gene_coords %>%
    filter(V3 == "gene")

In [41]:
head(gene_coords_genesonly)

V1,V2,V3,V4,V5,V6,V7,V8,V9
<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>
1,ensembl_havana,gene,11869,14412,.,+,.,"gene_id ""ENSG00000223972""; gene_version ""4""; gene_name ""DDX11L1""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene"";"
1,ensembl_havana,gene,14363,29806,.,-,.,"gene_id ""ENSG00000227232""; gene_version ""4""; gene_name ""WASH7P""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene"";"
1,ensembl_havana,gene,29554,31109,.,+,.,"gene_id ""ENSG00000243485""; gene_version ""2""; gene_name ""MIR1302-10""; gene_source ""ensembl_havana""; gene_biotype ""lincRNA"";"
1,ensembl_havana,gene,34554,36081,.,-,.,"gene_id ""ENSG00000237613""; gene_version ""2""; gene_name ""FAM138A""; gene_source ""ensembl_havana""; gene_biotype ""lincRNA"";"
1,ensembl_havana,gene,52473,54936,.,+,.,"gene_id ""ENSG00000268020""; gene_version ""2""; gene_name ""OR4G4P""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene"";"
1,havana,gene,62948,63887,.,+,.,"gene_id ""ENSG00000240361""; gene_version ""1""; gene_name ""OR4G11P""; gene_source ""havana""; gene_biotype ""pseudogene"";"


Separate column 9 to get gene names

In [43]:
setDT(gene_coords_genesonly)[, c("gene_id", "gene_version", "gene_name", "gene_source", "gene_biotype") := tstrsplit(V9, ";")]

In [44]:
head(gene_coords_genesonly)

V1,V2,V3,V4,V5,V6,V7,V8,V9,gene_id,gene_version,gene_name,gene_source,gene_biotype
<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,ensembl_havana,gene,11869,14412,.,+,.,"gene_id ""ENSG00000223972""; gene_version ""4""; gene_name ""DDX11L1""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene"";","gene_id ""ENSG00000223972""","gene_version ""4""","gene_name ""DDX11L1""","gene_source ""ensembl_havana""","gene_biotype ""pseudogene"""
1,ensembl_havana,gene,14363,29806,.,-,.,"gene_id ""ENSG00000227232""; gene_version ""4""; gene_name ""WASH7P""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene"";","gene_id ""ENSG00000227232""","gene_version ""4""","gene_name ""WASH7P""","gene_source ""ensembl_havana""","gene_biotype ""pseudogene"""
1,ensembl_havana,gene,29554,31109,.,+,.,"gene_id ""ENSG00000243485""; gene_version ""2""; gene_name ""MIR1302-10""; gene_source ""ensembl_havana""; gene_biotype ""lincRNA"";","gene_id ""ENSG00000243485""","gene_version ""2""","gene_name ""MIR1302-10""","gene_source ""ensembl_havana""","gene_biotype ""lincRNA"""
1,ensembl_havana,gene,34554,36081,.,-,.,"gene_id ""ENSG00000237613""; gene_version ""2""; gene_name ""FAM138A""; gene_source ""ensembl_havana""; gene_biotype ""lincRNA"";","gene_id ""ENSG00000237613""","gene_version ""2""","gene_name ""FAM138A""","gene_source ""ensembl_havana""","gene_biotype ""lincRNA"""
1,ensembl_havana,gene,52473,54936,.,+,.,"gene_id ""ENSG00000268020""; gene_version ""2""; gene_name ""OR4G4P""; gene_source ""ensembl_havana""; gene_biotype ""pseudogene"";","gene_id ""ENSG00000268020""","gene_version ""2""","gene_name ""OR4G4P""","gene_source ""ensembl_havana""","gene_biotype ""pseudogene"""
1,havana,gene,62948,63887,.,+,.,"gene_id ""ENSG00000240361""; gene_version ""1""; gene_name ""OR4G11P""; gene_source ""havana""; gene_biotype ""pseudogene"";","gene_id ""ENSG00000240361""","gene_version ""1""","gene_name ""OR4G11P""","gene_source ""havana""","gene_biotype ""pseudogene"""


In [48]:
gene_coords_genesonly_select <- gene_coords_genesonly %>%
    select(V1, V4, V5, gene_name) %>% #Select just relevant columns - chr, bp start, bp end, gene name
    mutate(gene = gsub("gene_name", "", gene_name)) %>% #Remove string 'gene_name' from the gene_name col
    mutate(gene_final = gsub('"', "", gene)) #Remove the "" from gene column and make gene_final col

In [49]:
head(gene_coords_genesonly_select)

V1,V4,V5,gene_name,gene,gene_final
<chr>,<int>,<int>,<chr>,<chr>,<chr>
1,11869,14412,"gene_name ""DDX11L1""","""DDX11L1""",DDX11L1
1,14363,29806,"gene_name ""WASH7P""","""WASH7P""",WASH7P
1,29554,31109,"gene_name ""MIR1302-10""","""MIR1302-10""",MIR1302-10
1,34554,36081,"gene_name ""FAM138A""","""FAM138A""",FAM138A
1,52473,54936,"gene_name ""OR4G4P""","""OR4G4P""",OR4G4P
1,62948,63887,"gene_name ""OR4G11P""","""OR4G11P""",OR4G11P


Select only relevant columns and rename

In [50]:
gene_coords_final <- gene_coords_genesonly_select %>%
    select(V1, V4, V5, gene_final) %>%
    rename(chr = V1,
          bp_start = V4,
          bp_end = V5,
          gene = gene_final)

In [51]:
head(gene_coords_final)

chr,bp_start,bp_end,gene
<chr>,<int>,<int>,<chr>
1,11869,14412,DDX11L1
1,14363,29806,WASH7P
1,29554,31109,MIR1302-10
1,34554,36081,FAM138A
1,52473,54936,OR4G4P
1,62948,63887,OR4G11P


## Annotate GWAS sumstats with nearest genes

In [53]:
tail(data_filtered)

SNP,A1,A2,freq,b,se,p,N_cases,N_controls
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<int>
chr5:60345424,T,G,0.8883,-0.1916,0.0266,5.618e-13,33674,449056
chr17:43758382,T,C,0.7814,0.2527,0.0269,5.906e-21,32505,448088
chr17:44201667,A,G,0.2176,-0.2554,0.0271,4.138e-21,32505,448088
chr12:40556767,T,C,0.9971,-4.0052,0.594,1.554e-11,8488,7671
chr17:43858187,T,G,0.2187,-0.2521,0.027,8.896e-21,32505,448088
chr17:43992233,T,C,0.2171,-0.251,0.0271,1.749e-20,32505,448088


## Read in GWAS top hits annotated with nearest genes

First import file into workspace

In [55]:
shell_do(str_glue('gsutil -mu {BILLING_PROJECT_ID} cp {WORKSPACE_BUCKET}/"Table S2. Detailed summary statistics.xlsx" ~/bin/data_temp/'))

[1] "Executing:  gsutil -mu gp2-ipdgc-hackathon cp gs://fc-secure-b9f9d17f-b38c-407d-85e0-7a759f13cea0/\"Table S2. Detailed summary statistics.xlsx\" ~/bin/data_temp/"


In [56]:
system('ls ~/bin/data_temp', intern = TRUE)

Read GWAS hits into R

In [59]:
GWAS_toploci <- read_xlsx("~/bin/data_temp/Table S2. Detailed summary statistics.xlsx")

In [60]:
head(GWAS_toploci)

SNP,CHR,BP,Nearest Gene,QTL Nominated Gene (nearest QTL),Effect allele,Other allele,Effect allele frequency,"Beta, all studies","SE, all studies",⋯,"Freq1, new studies","Beta, new studies","StdErr, new studies","P, new studies","I2, new studies",Passes pooled 23andMe QC,Known GWAS locus within 1MB,Failed final filtering and QC,Locus within 250KB,Locus Number
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>
rs114138760,1,154898185,PMVK,,c,g,0.0112,0.2812,0.0478,⋯,0.0109,0.1997,0.0843,0.01779,0.0,T,1,0,1,1
rs35749011,1,155135036,KRTCAP2,,a,g,0.0169,0.6068,0.0342,⋯,0.0177,0.6798,0.0615,2.16e-28,0.0,T,1,0,1,1
rs76763715,1,155205634,GBAP1,GBAP1,t,c,0.9953,-0.7467,0.0765,⋯,0.9941,-0.6693,0.1269,1.34e-07,0.0,F,1,0,1,1
rs6658353,1,161469054,FCGR2A,FCGR2A,c,g,0.5011,0.065,0.0094,⋯,0.5105,0.0662,0.0171,0.0001101,46.4,T,0,0,0,2
rs11578699,1,171719769,VAMP4,VAMP4,t,c,0.1949,-0.0704,0.012,⋯,0.1947,-0.0663,0.0226,0.003313,13.4,T,0,0,0,3
rs823118,1,205723572,NUCKS1,NUCKS1,t,c,0.566,0.1066,0.0094,⋯,0.5674,0.0842,0.0172,1.02e-06,0.0,T,1,0,1,4


Make list of SNPs in .txt file

In [88]:
SNPs_only <- GWAS_toploci %>%
    select(SNP) %>%
    arrange('P, all studies')

In [89]:
write.table(SNPs_only, "~/bin/data_temp/PDSNPs_sorted.txt", quote = F, row.names = F, col.names = F, sep = "\t")

In [92]:
system('ls ~/bin/data_temp/', intern = TRUE)

## Make gene list ranked by p-values

In [62]:
PDgenes <- GWAS_toploci %>%
    select('Nearest Gene', 'P, all studies') %>%
    rename(gene = 'Nearest Gene',
          p = 'P, all studies')

In [63]:
head(PDgenes)

gene,p
<chr>,<dbl>
PMVK,4.19e-09
KRTCAP2,1.7200000000000002e-70
GBAP1,1.59e-22
FCGR2A,6.1e-12
VAMP4,4.47e-09
NUCKS1,1.1100000000000001e-29


Remove any missing nearest gene and rank by p-value

In [67]:
PDgenes_sorted <- PDgenes %>%
    filter(!is.na(gene)) %>%
    arrange(p)

In [69]:
head(PDgenes_sorted)

gene,p
<chr>,<dbl>
SNCA,3.89e-154
SNCA,5.16e-149
LRRK2,3.6100000000000004e-148
KRTCAP2,1.7200000000000002e-70
TMEM175,9.980000000000001e-70
CRHR1,3.5799999999999998e-68


In [119]:
write.table(PDgenes_sorted, "~/bin/data_temp/PDgenes_sorted_pvals.rnk",
           quote = F, col.names = F, row.names = F, sep = "\t")

Make a dataframe with genes and -log10 pvalues

In [121]:
PDgenes_sorted_neglogp <- PDgenes_sorted %>%
    mutate(neg_log10p = -log10(p))

In [122]:
head(PDgenes_sorted_neglogp)

gene,p,neg_log10p
<chr>,<dbl>,<dbl>
SNCA,3.89e-154,153.41005
SNCA,5.16e-149,148.28735
LRRK2,3.6100000000000004e-148,147.44249
KRTCAP2,1.7200000000000002e-70,69.76447
TMEM175,9.980000000000001e-70,69.00087
CRHR1,3.5799999999999998e-68,67.44612


In [123]:
PDgenes_sorted_neglogp <- PDgenes_sorted_neglogp %>%
    select(gene, neg_log10p)

#Export as rnk file
write.table(PDgenes_sorted_neglogp, "~/bin/data_temp/PDgenes_sorted_pvals_neglog.rnk",
           quote = F, col.names = F, row.names = F, sep = "\t")

Make a dataframe with just the unique gene names

In [76]:
PDgenes_sorted_geneonly <- PDgenes_sorted %>%
    select(gene) %>%
    distinct(gene)

In [83]:
write.table(PDgenes_sorted_geneonly, "~/bin/data_temp/PDgenes_sorted_geneonly.txt",
           quote = F, col.names = F, row.names = F, sep = "/n")

In [84]:
system('head ~/bin/data_temp/PDgenes_sorted_geneonly.txt', intern = TRUE)

Write gene list to workspace bucket to check

In [103]:
shell_do(str_glue('gsutil -u {BILLING_PROJECT_ID} cp -r ~/bin/data_temp/PDgenes_sorted_geneonly.txt {WORKSPACE_BUCKET}'))

[1] "Executing:  gsutil -u gp2-ipdgc-hackathon cp -r ~/bin/data_temp/PDgenes_sorted_geneonly.txt gs://fc-secure-b9f9d17f-b38c-407d-85e0-7a759f13cea0"


## Show WebGestaltR options

In [77]:
listReferenceSet(
organism = "hsapiens",
hostName = "http://www.webgestalt.org/",
cache = NULL
)

In [94]:
listIdType(organism="hsapiens")

In [127]:
listGeneSet(organism = "hsapiens")

Unnamed: 0_level_0,name,description,idType
Unnamed: 0_level_1,<chr>,<chr>,<chr>
1,geneontology_Biological_Process,The gene ontology biological process database was downloaded from http://www.geneontology.org/.,entrezgene
2,geneontology_Biological_Process_noRedundant,"The gene ontology biological process database was downloaded from http://www.geneontology.org/. Then, we only contain the non-redundant categories by selecting the most general categories in each branch of the GO DAG structure from all categories with the number of annotated genes from 20 to 500.",entrezgene
3,geneontology_Cellular_Component,The gene ontology cellular component database was downloaded from http://www.geneontology.org/.,entrezgene
4,geneontology_Cellular_Component_noRedundant,"The gene ontology cellular component database was downloaded from http://www.geneontology.org/. Then, we only contain the non-redundant categories by selecting the most general categories in each branch of the GO DAG structure from all categories with the number of annotated genes from 20 to 500.",entrezgene
5,geneontology_Molecular_Function,The gene ontology molecular function database was downloaded from http://www.geneontology.org/.,entrezgene
6,geneontology_Molecular_Function_noRedundant,"The gene ontology molecular function database was downloaded from http://www.geneontology.org/. Then, we only contain the non-redundant categories by selecting the most general categories in each branch of the GO DAG structure from all categories with the number of annotated genes from 20 to 500.",entrezgene
7,pathway_KEGG,The KEGG pathway database was downloaded from http://www.kegg.jp/.,entrezgene
8,pathway_Panther,The PANTHER pathway database was downloaded from http://www.pantherdb.org/pathway/.,entrezgene
9,pathway_Reactome,The Reactome pathway database was downloaded from http://www.reactome.org/.,entrezgene
10,pathway_Wikipathway,The Wikipathway database was downloaded from http://www.wikipathway.org/.,entrezgene


## Run WebGestaltR Overrepresentation Analysis (ORA)

Run with gene list

In [125]:
refFile <- system.file("extdata", "referenceGenes.txt", package="WebGestaltR")
enrichResult <- WebGestaltR(enrichMethod="ORA", organism="hsapiens",
                            enrichDatabase="pathway_KEGG", interestGeneFile="~/bin/data_temp/PDgenes_sorted_geneonly.txt",
                            interestGeneType="genesymbol", referenceGeneFile=refFile,
                            referenceGeneType="genesymbol", isOutput=TRUE,
                            outputDirectory="~/bin/data_temp/", projectName=NULL)


Loading the functional categories...
Loading the ID list...
Loading the reference list...
Summarizing the input ID list by GO Slim data...
Performing the enrichment analysis...


“No significant gene set is identified based on FDR 0.05!”


Run with SNP list

In [112]:
refFile <- system.file("extdata", "referenceGenes.txt", package="WebGestaltR")
enrichResult <- WebGestaltR(enrichMethod="ORA", organism="hsapiens",
                            enrichDatabase="pathway_KEGG", interestGeneFile="~/bin/data_temp/PDSNPs_sorted.txt",
                            interestGeneType="dbSNP", referenceGeneFile=refFile,
                            referenceGeneType="genesymbol", isOutput=TRUE,
                            outputDirectory="~/bin/data_temp/", projectName=NULL)

Loading the functional categories...
Loading the ID list...
Loading the reference list...
Summarizing the input ID list by GO Slim data...
Performing the enrichment analysis...


“No significant gene set is identified based on FDR 0.05!”


## Run WebGestaltR Gene Set Enrichment Analysis

In [130]:
#Run with original pvalues
rankFile <- system.file("extdata", "GeneRankList.rnk", package="WebGestaltR")
enrichResult <- WebGestaltR(enrichMethod="GSEA", organism="hsapiens",
enrichDatabase="geneontology_Biological_Process_noRedundant", interestGeneFile="~/bin/data_temp/PDgenes_sorted_pvals.rnk",
interestGeneType="genesymbol", sigMethod="top", topThr=10, minNum=5,
outputDirectory="~/bin/data_temp")

Loading the functional categories...
Loading the ID list...
Summarizing the uploaded ID list by GO Slim data...
Performing the enrichment analysis...


“score does not contain minimum number of items in set for GO:0000041
”
“score does not contain minimum number of items in set for GO:0000075
”
“score does not contain minimum number of items in set for GO:0000209
”
“score does not contain minimum number of items in set for GO:0000910
”
“score does not contain minimum number of items in set for GO:0001101
”
“score does not contain minimum number of items in set for GO:0001503
”
“score does not contain minimum number of items in set for GO:0001525
”
“score does not contain minimum number of items in set for GO:0001539
”
“score does not contain minimum number of items in set for GO:0001655
”
“score does not contain minimum number of items in set for GO:0001667
”
“score does not contain minimum number of items in set for GO:0001763
”
“score does not contain minimum number of items in set for GO:0001764
”
“score does not contain minimum number of items in set for GO:0001818
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0006631
”
“score does not contain minimum number of items in set for GO:0006638
”
“score does not contain minimum number of items in set for GO:0006643
”
“score does not contain minimum number of items in set for GO:0006720
”
“score does not contain minimum number of items in set for GO:0006766
”
“score does not contain minimum number of items in set for GO:0006790
”
“score does not contain minimum number of items in set for GO:0006814
”
“score does not contain minimum number of items in set for GO:0006839
”
“score does not contain minimum number of items in set for GO:0006909
”
“score does not contain minimum number of items in set for GO:0006925
”
“score does not contain minimum number of items in set for GO:0006959
”
“score does not contain minimum number of items in set for GO:0006968
”
“score does not contain minimum number of items in set for GO:0006970
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0009123
”
“score does not contain minimum number of items in set for GO:0009132
”
“score does not contain minimum number of items in set for GO:0009141
”
“score does not contain minimum number of items in set for GO:0009266
”
“score does not contain minimum number of items in set for GO:0009268
”
“score does not contain minimum number of items in set for GO:0009308
”
“score does not contain minimum number of items in set for GO:0009314
”
“score does not contain minimum number of items in set for GO:0009410
”
“score does not contain minimum number of items in set for GO:0009581
”
“score does not contain minimum number of items in set for GO:0009582
”
“score does not contain minimum number of items in set for GO:0009593
”
“score does not contain minimum number of items in set for GO:0009595
”
“score does not contain minimum number of items in set for GO:0009612
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0019216
”
“score does not contain minimum number of items in set for GO:0019233
”
“score does not contain minimum number of items in set for GO:0019748
”
“score does not contain minimum number of items in set for GO:0019882
”
“score does not contain minimum number of items in set for GO:0019932
”
“score does not contain minimum number of items in set for GO:0021510
”
“score does not contain minimum number of items in set for GO:0021700
”
“score does not contain minimum number of items in set for GO:0021953
”
“score does not contain minimum number of items in set for GO:0022406
”
“score does not contain minimum number of items in set for GO:0022412
”
“score does not contain minimum number of items in set for GO:0022613
”
“score does not contain minimum number of items in set for GO:0022616
”
“score does not contain minimum number of items in set for GO:0030048
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0033028
”
“score does not contain minimum number of items in set for GO:0033044
”
“score does not contain minimum number of items in set for GO:0033500
”
“score does not contain minimum number of items in set for GO:0033555
”
“score does not contain minimum number of items in set for GO:0033619
”
“score does not contain minimum number of items in set for GO:0033865
”
“score does not contain minimum number of items in set for GO:0034067
”
“score does not contain minimum number of items in set for GO:0034248
”
“score does not contain minimum number of items in set for GO:0034330
”
“score does not contain minimum number of items in set for GO:0034340
”
“score does not contain minimum number of items in set for GO:0034341
”
“score does not contain minimum number of items in set for GO:0034394
”
“score does not contain minimum number of items in set for GO:0034612
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0043279
”
“score does not contain minimum number of items in set for GO:0043491
”
“score does not contain minimum number of items in set for GO:0043543
”
“score does not contain minimum number of items in set for GO:0043583
”
“score does not contain minimum number of items in set for GO:0043620
”
“score does not contain minimum number of items in set for GO:0043900
”
“score does not contain minimum number of items in set for GO:0043954
”
“score does not contain minimum number of items in set for GO:0044070
”
“score does not contain minimum number of items in set for GO:0044262
”
“score does not contain minimum number of items in set for GO:0044282
”
“score does not contain minimum number of items in set for GO:0044706
”
“score does not contain minimum number of items in set for GO:0044772
”
“score does not contain minimum number of items in set for GO:0044843
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0050803
”
“score does not contain minimum number of items in set for GO:0050817
”
“score does not contain minimum number of items in set for GO:0050866
”
“score does not contain minimum number of items in set for GO:0050878
”
“score does not contain minimum number of items in set for GO:0050879
”
“score does not contain minimum number of items in set for GO:0050890
”
“score does not contain minimum number of items in set for GO:0050900
”
“score does not contain minimum number of items in set for GO:0050905
”
“score does not contain minimum number of items in set for GO:0050906
”
“score does not contain minimum number of items in set for GO:0050918
”
“score does not contain minimum number of items in set for GO:0050919
”
“score does not contain minimum number of items in set for GO:0050920
”
“score does not contain minimum number of items in set for GO:0050953
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0061448
”
“score does not contain minimum number of items in set for GO:0061458
”
“score does not contain minimum number of items in set for GO:0061512
”
“score does not contain minimum number of items in set for GO:0061564
”
“score does not contain minimum number of items in set for GO:0061919
”
“score does not contain minimum number of items in set for GO:0062012
”
“score does not contain minimum number of items in set for GO:0070085
”
“score does not contain minimum number of items in set for GO:0070371
”
“score does not contain minimum number of items in set for GO:0070482
”
“score does not contain minimum number of items in set for GO:0070555
”
“score does not contain minimum number of items in set for GO:0070585
”
“score does not contain minimum number of items in set for GO:0070661
”
“score does not contain minimum number of items in set for GO:0070841
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0097581
”
“score does not contain minimum number of items in set for GO:0097696
”
“score does not contain minimum number of items in set for GO:0098542
”
“score does not contain minimum number of items in set for GO:0098656
”
“score does not contain minimum number of items in set for GO:0098732
”
“score does not contain minimum number of items in set for GO:0098739
”
“score does not contain minimum number of items in set for GO:0098742
”
“score does not contain minimum number of items in set for GO:0098780
”
“score does not contain minimum number of items in set for GO:0098876
”
“score does not contain minimum number of items in set for GO:0099072
”
“score does not contain minimum number of items in set for GO:0104004
”
“score does not contain minimum number of items in set for GO:0106027
”
“score does not contain minimum number of items in set for GO:0150076
”
“score does not contain minimum number of items in set for GO:01

1000 permutations of score complete...
Begin affinity propagation...
End affinity propagation...
Begin weighted set cover...
Remain is 0, ending weighted set cover
Generate the final report...
Results can be found in the ~/bin/data_temp/Project_1620831213!


In [132]:
system('ls ~/bin/data_temp/Project_1620831213', intern = TRUE)

Write results to workspace bucket

In [133]:
shell_do(str_glue('gsutil -u {BILLING_PROJECT_ID} cp -r ~/bin/data_temp/Project_1620831213/goslim_summary_1620831213.png {WORKSPACE_BUCKET}'))

[1] "Executing:  gsutil -u gp2-ipdgc-hackathon cp -r ~/bin/data_temp/Project_1620831213/goslim_summary_1620831213.png gs://fc-secure-b9f9d17f-b38c-407d-85e0-7a759f13cea0"


In [134]:
#Run with -log10 pvalues
rankFile <- system.file("extdata", "GeneRankList.rnk", package="WebGestaltR")
enrichResult <- WebGestaltR(enrichMethod="GSEA", organism="hsapiens",
enrichDatabase="geneontology_Biological_Process_noRedundant", interestGeneFile="~/bin/data_temp/PDgenes_sorted_pvals_neglog.rnk",
interestGeneType="genesymbol", sigMethod="top", topThr=10, minNum=5,
outputDirectory="~/bin/data_temp")

Loading the functional categories...
Loading the ID list...
Summarizing the uploaded ID list by GO Slim data...
Performing the enrichment analysis...


“score does not contain minimum number of items in set for GO:0000041
”
“score does not contain minimum number of items in set for GO:0000075
”
“score does not contain minimum number of items in set for GO:0000209
”
“score does not contain minimum number of items in set for GO:0000910
”
“score does not contain minimum number of items in set for GO:0001101
”
“score does not contain minimum number of items in set for GO:0001503
”
“score does not contain minimum number of items in set for GO:0001525
”
“score does not contain minimum number of items in set for GO:0001539
”
“score does not contain minimum number of items in set for GO:0001655
”
“score does not contain minimum number of items in set for GO:0001667
”
“score does not contain minimum number of items in set for GO:0001763
”
“score does not contain minimum number of items in set for GO:0001764
”
“score does not contain minimum number of items in set for GO:0001818
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0006631
”
“score does not contain minimum number of items in set for GO:0006638
”
“score does not contain minimum number of items in set for GO:0006643
”
“score does not contain minimum number of items in set for GO:0006720
”
“score does not contain minimum number of items in set for GO:0006766
”
“score does not contain minimum number of items in set for GO:0006790
”
“score does not contain minimum number of items in set for GO:0006814
”
“score does not contain minimum number of items in set for GO:0006839
”
“score does not contain minimum number of items in set for GO:0006909
”
“score does not contain minimum number of items in set for GO:0006925
”
“score does not contain minimum number of items in set for GO:0006959
”
“score does not contain minimum number of items in set for GO:0006968
”
“score does not contain minimum number of items in set for GO:0006970
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0009123
”
“score does not contain minimum number of items in set for GO:0009132
”
“score does not contain minimum number of items in set for GO:0009141
”
“score does not contain minimum number of items in set for GO:0009266
”
“score does not contain minimum number of items in set for GO:0009268
”
“score does not contain minimum number of items in set for GO:0009308
”
“score does not contain minimum number of items in set for GO:0009314
”
“score does not contain minimum number of items in set for GO:0009410
”
“score does not contain minimum number of items in set for GO:0009581
”
“score does not contain minimum number of items in set for GO:0009582
”
“score does not contain minimum number of items in set for GO:0009593
”
“score does not contain minimum number of items in set for GO:0009595
”
“score does not contain minimum number of items in set for GO:0009612
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0019216
”
“score does not contain minimum number of items in set for GO:0019233
”
“score does not contain minimum number of items in set for GO:0019748
”
“score does not contain minimum number of items in set for GO:0019882
”
“score does not contain minimum number of items in set for GO:0019932
”
“score does not contain minimum number of items in set for GO:0021510
”
“score does not contain minimum number of items in set for GO:0021700
”
“score does not contain minimum number of items in set for GO:0021953
”
“score does not contain minimum number of items in set for GO:0022406
”
“score does not contain minimum number of items in set for GO:0022412
”
“score does not contain minimum number of items in set for GO:0022613
”
“score does not contain minimum number of items in set for GO:0022616
”
“score does not contain minimum number of items in set for GO:0030048
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0033028
”
“score does not contain minimum number of items in set for GO:0033044
”
“score does not contain minimum number of items in set for GO:0033500
”
“score does not contain minimum number of items in set for GO:0033555
”
“score does not contain minimum number of items in set for GO:0033619
”
“score does not contain minimum number of items in set for GO:0033865
”
“score does not contain minimum number of items in set for GO:0034067
”
“score does not contain minimum number of items in set for GO:0034248
”
“score does not contain minimum number of items in set for GO:0034330
”
“score does not contain minimum number of items in set for GO:0034340
”
“score does not contain minimum number of items in set for GO:0034341
”
“score does not contain minimum number of items in set for GO:0034394
”
“score does not contain minimum number of items in set for GO:0034612
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0043279
”
“score does not contain minimum number of items in set for GO:0043491
”
“score does not contain minimum number of items in set for GO:0043543
”
“score does not contain minimum number of items in set for GO:0043583
”
“score does not contain minimum number of items in set for GO:0043620
”
“score does not contain minimum number of items in set for GO:0043900
”
“score does not contain minimum number of items in set for GO:0043954
”
“score does not contain minimum number of items in set for GO:0044070
”
“score does not contain minimum number of items in set for GO:0044262
”
“score does not contain minimum number of items in set for GO:0044282
”
“score does not contain minimum number of items in set for GO:0044706
”
“score does not contain minimum number of items in set for GO:0044772
”
“score does not contain minimum number of items in set for GO:0044843
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0050803
”
“score does not contain minimum number of items in set for GO:0050817
”
“score does not contain minimum number of items in set for GO:0050866
”
“score does not contain minimum number of items in set for GO:0050878
”
“score does not contain minimum number of items in set for GO:0050879
”
“score does not contain minimum number of items in set for GO:0050890
”
“score does not contain minimum number of items in set for GO:0050900
”
“score does not contain minimum number of items in set for GO:0050905
”
“score does not contain minimum number of items in set for GO:0050906
”
“score does not contain minimum number of items in set for GO:0050918
”
“score does not contain minimum number of items in set for GO:0050919
”
“score does not contain minimum number of items in set for GO:0050920
”
“score does not contain minimum number of items in set for GO:0050953
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0061448
”
“score does not contain minimum number of items in set for GO:0061458
”
“score does not contain minimum number of items in set for GO:0061512
”
“score does not contain minimum number of items in set for GO:0061564
”
“score does not contain minimum number of items in set for GO:0061919
”
“score does not contain minimum number of items in set for GO:0062012
”
“score does not contain minimum number of items in set for GO:0070085
”
“score does not contain minimum number of items in set for GO:0070371
”
“score does not contain minimum number of items in set for GO:0070482
”
“score does not contain minimum number of items in set for GO:0070555
”
“score does not contain minimum number of items in set for GO:0070585
”
“score does not contain minimum number of items in set for GO:0070661
”
“score does not contain minimum number of items in set for GO:0070841
”
“score does not contain minimum number of items in set for GO:00

“score does not contain minimum number of items in set for GO:0097581
”
“score does not contain minimum number of items in set for GO:0097696
”
“score does not contain minimum number of items in set for GO:0098542
”
“score does not contain minimum number of items in set for GO:0098656
”
“score does not contain minimum number of items in set for GO:0098732
”
“score does not contain minimum number of items in set for GO:0098739
”
“score does not contain minimum number of items in set for GO:0098742
”
“score does not contain minimum number of items in set for GO:0098780
”
“score does not contain minimum number of items in set for GO:0098876
”
“score does not contain minimum number of items in set for GO:0099072
”
“score does not contain minimum number of items in set for GO:0104004
”
“score does not contain minimum number of items in set for GO:0106027
”
“score does not contain minimum number of items in set for GO:0150076
”
“score does not contain minimum number of items in set for GO:01

1000 permutations of score complete...
Begin affinity propagation...
End affinity propagation...
Begin weighted set cover...
Remain is 0, ending weighted set cover
Generate the final report...
Results can be found in the ~/bin/data_temp/Project_1620831683!


In [136]:
#Look at the files in the results folder
system('ls ~/bin/data_temp/Project_1620831683', intern = TRUE)

Write results to workspace bucket

In [137]:
shell_do(str_glue('gsutil -u {BILLING_PROJECT_ID} cp -r ~/bin/data_temp/Project_1620831683/goslim_summary_1620831683.png {WORKSPACE_BUCKET}'))

[1] "Executing:  gsutil -u gp2-ipdgc-hackathon cp -r ~/bin/data_temp/Project_1620831683/goslim_summary_1620831683.png gs://fc-secure-b9f9d17f-b38c-407d-85e0-7a759f13cea0"
