# GO_MWU

started 01/06/2025

following protocol outlined [here](https://github.com/z0on/GO_MWU)

**Needed in the same directory:**
- scripts: GO_MWU.R, gomwu_a.pl, gomwu_b.pl, gomwu.functions.R
- GO hierarchy file ([go.obo](http://www.geneontology.org/GO.downloads.ontology.shtml))
- table of GO annotations for your sequences: two-column (gene id - GO terms), tab-delimited, one line per gene, multiple GO terms separated by semicolon. If you have multiple lines per gene, use nrify_GOtable.pl to merge them. Do NOT include genes without GO annotations.
- table of measure of interest for your sequences: two columns of comma-separated values: gene id, continuous measure of change such as log(fold-change). To perform standard GO enrichment analysis based on Fisher's exact test, use binary measure (1 or 0, i.e., either sgnificant or not). To analyze modules derived from WGCNA, specify 0 for genes not included in the module and the kME value (number between 0 and 1, module membership score) for genes included in the module.

In [25]:
library(tidyverse)
library(ape)

In [41]:
setwd('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu')
getwd()

### CH vs. HC
table of measure of interest for sequences: 

In [23]:
allGenes_CH <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/deseq_res_files/geneFeatures_res/CH_vs_HC.csv')

# select only gene id and continuous measure of change (log fold change)
allGenes_CH_lfc <- select(allGenes_CH, X, log2FoldChange)

colnames(allGenes_CH_lfc) <- c('Symbol', 'log2fc')

head(allGenes_CH_lfc)

# binary measure for significance
allGenes_CH$significant <- ifelse(is.na(allGenes_CH$padj) | allGenes_CH$padj > 0.05, 0, 1)

allGenes_CH_sig <- select(allGenes_CH, X, significant)
colnames(allGenes_CH_sig) <- c('Symbol', 'signif')

head(allGenes_CH_sig)

Unnamed: 0_level_0,Symbol,log2fc
Unnamed: 0_level_1,<chr>,<dbl>
1,LOC111116054,0.0
2,LOC111126949,-0.01768994
3,LOC111110729,-0.06356103
4,LOC111112434,0.11826212
5,LOC111120752,0.20389041
6,LOC111128944,


Unnamed: 0_level_0,Symbol,signif
Unnamed: 0_level_1,<chr>,<dbl>
1,LOC111116054,0
2,LOC111126949,0
3,LOC111110729,0
4,LOC111112434,0
5,LOC111120752,0
6,LOC111128944,0


In [111]:
write.csv(allGenes_CH_lfc, '/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/allGenes_CH_lfc.csv', row.names=FALSE, quote = FALSE)

In [112]:
write.csv(allGenes_CH_sig, '/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/allGenes_CH_sig.csv', row.names=FALSE, quote=FALSE)

table of GO annotations for your sequences (gene id and GO terms):

In [11]:
# Read the GAF file into R
GOobo <- read.table('/project/pi_sarah_gignouxwolfsohn_uml_edu/Reference_genomes/Cvirginica_genome/GCF_002022765.2_C_virginica-3.0_gene_ontology.gaf', 
                          sep = "\t", 
                          header = FALSE, 
                          comment.char = "!", 
                          stringsAsFactors = FALSE)

# assign column names
colnames(GOobo) <- c('DB', 'GeneID', 'Symbol', 'Qualifier', 'GO_ID', 'Reference', 'Evidence_Code', 'With,From', 'Aspect', 'Gene_Name', 'Gene_Synonym', 'Type', 'Taxon', 'Date', 'Assigned_By')

# only need gene id and GO terms
gene_GO <- select(GOobo, Symbol, GO_ID)

head(gene_GO)

Unnamed: 0_level_0,Symbol,GO_ID
Unnamed: 0_level_1,<chr>,<chr>
1,LOC111099029,GO:0005261
2,LOC111099029,GO:0098655
3,LOC111099032,GO:0004930
4,LOC111099032,GO:0004930
5,LOC111099032,GO:0005886
6,LOC111099032,GO:0007186


In [14]:
# Aggregate GO IDs for each gene symbol
geneGO <- aggregate(GO_ID ~ Symbol, data = gene_GO, function(x) paste(unique(x), collapse = "; "))

# Print the aggregated data frame
head(geneGO)

Unnamed: 0_level_0,Symbol,GO_ID
Unnamed: 0_level_1,<chr>,<chr>
1,LOC111099029,GO:0005261; GO:0098655
2,LOC111099032,GO:0004930; GO:0005886; GO:0007186
3,LOC111099033,GO:0016020; GO:0022857; GO:0055085
4,LOC111099034,GO:0005515
5,LOC111099036,GO:0008028; GO:0015718; GO:0055085
6,LOC111099039,GO:0004930; GO:0005886; GO:0007189


In [56]:
write.table(geneGO, '/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/allGenes2GO.tab', sep = '\t', row.names = FALSE, quote = FALSE)

#### GO_MWU.R

**molecular function**

In [72]:
# Edit these to match your data file names: 
input="allGenes_CH_lfc.csv" # two columns of comma-separated values: gene id, continuous measure of significance. To perform standard GO enrichment analysis based on Fisher's exact test, use binary measure (0 or 1, i.e., either sgnificant or not).
goAnnotations="allGenes2GO.tab" # two-column, tab-delimited, one line per gene, multiple GO terms separated by semicolon. If you have multiple lines per gene, use nrify_GOtable.pl prior to running this script.
goDatabase="go.obo" # download from http://www.geneontology.org/GO.downloads.ontology.shtml
goDivision="MF" # either MF, or BP, or CC
source("gomwu.functions.R")


In [73]:
# Calculating stats. It might take ~3 min for MF and BP. Do not rerun it if you just want to replot the data with different cutoffs, go straight to gomwuPlot. If you change any of the numeric values below, delete the files that were generated in previos runs first.
gomwuStats(input, goDatabase, goAnnotations, goDivision,
           perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
           largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
           smallest=5,   # a GO category should contain at least this many genes to be considered
           clusterCutHeight=0.25) # threshold for merging similar (gene-sharing) terms. See README for details.
# There are no GO term at 10% FDR

Continuous measure of interest: will perform MWU test


0 GO terms at 10% FDR



In [92]:
mwu_MF_ch <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/MWU_MF_allGenes_CH_lfc.csv', sep= ' ')
head(mwu_MF_ch)

Unnamed: 0_level_0,delta.rank,pval,level,nseqs,term,name,p.adj
Unnamed: 0_level_1,<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>
1,1665,0.09931216,2,5,GO:0000026,"alpha-1,2-mannosyltransferase activity",0.6784797
2,171,0.72876525,3,21,GO:0000030,mannosyltransferase activity,0.9388837
3,773,0.02890507,4,41,GO:0000049,tRNA binding,0.6102001
4,-239,0.76502275,6,8,GO:0000062;GO:0120227;GO:1901567;GO:1901681,acyl-CoA binding,0.9494588
5,667,0.37617952,4,9,GO:0000146,microfilament motor activity,0.7793527
6,-102,0.79309877,2,34,GO:0000149,SNARE binding,0.9555081


In [93]:
filter(mwu_MF_ch, mwu_MF_ch$p.adj < 0.05)

delta.rank,pval,level,nseqs,term,name,p.adj
<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>


no significantly enriched GO terms for molecular function

**biological process**

In [89]:
# Edit these to match your data file names: 
input="allGenes_CH_lfc.csv" # two columns of comma-separated values: gene id, continuous measure of significance. To perform standard GO enrichment analysis based on Fisher's exact test, use binary measure (0 or 1, i.e., either sgnificant or not).
goAnnotations="allGenes2GO.tab" # two-column, tab-delimited, one line per gene, multiple GO terms separated by semicolon. If you have multiple lines per gene, use nrify_GOtable.pl prior to running this script.
goDatabase="go.obo" # download from http://www.geneontology.org/GO.downloads.ontology.shtml
goDivision="BP" # either MF, or BP, or CC
source("gomwu.functions.R")


In [90]:
# Calculating stats. It might take ~3 min for MF and BP. Do not rerun it if you just want to replot the data with different cutoffs, go straight to gomwuPlot. If you change any of the numeric values below, delete the files that were generated in previos runs first.
gomwuStats(input, goDatabase, goAnnotations, goDivision,
           perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
           largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
           smallest=5,   # a GO category should contain at least this many genes to be considered
           clusterCutHeight=0.25) # threshold for merging similar (gene-sharing) terms. See README for details.
# There are no GO term at 10% FDR

Continuous measure of interest: will perform MWU test


0 GO terms at 10% FDR



In [80]:
mwu_BP_ch <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/MWU_BP_allGenes_CH_lfc.csv', sep= ' ')
head(mwu_BP_ch)

Unnamed: 0_level_0,delta.rank,pval,level,nseqs,term,name,p.adj
Unnamed: 0_level_1,<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>
1,115,0.61561963,4,6,GO:0000012,single strand break repair,0.8846008
2,67,0.61571333,2,18,GO:0000027,ribosomal large subunit assembly,0.8846008
3,-271,0.12691169,4,10,GO:0000028,ribosomal small subunit assembly,0.6039774
4,179,0.04102924,2,42,GO:0000045;GO:0070925;GO:1905037;GO:0007033,organelle assembly,0.4669454
5,-54,0.74003195,7,12,GO:0000054;GO:0033750;GO:0071428;GO:0071426;GO:0006611;GO:0031503,ribonucleoprotein complex export from nucleus,0.9399743
6,234,0.30712519,2,6,GO:0000055,ribosomal large subunit export from nucleus,0.7996669


In [94]:
filter(mwu_BP_ch, mwu_BP_ch$p.adj < 0.05)

delta.rank,pval,level,nseqs,term,name,p.adj
<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>


no significantly enriched GO terms for biological processes

### HH vs. HC
table of measure of interest for sequences: 

In [100]:
getwd()

In [95]:
allGenes_HH <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/deseq_res_files/geneFeatures_res/HH_vs_HC.csv')

# select only gene id and continuous measure of change (log fold change)
allGenes_HH_lfc <- select(allGenes_HH, X, log2FoldChange)

colnames(allGenes_HH_lfc) <- c('Symbol', 'log2fc')

head(allGenes_HH_lfc)

# binary measure for significance
allGenes_HH$significant <- ifelse(is.na(allGenes_HH$padj) | allGenes_HH$padj > 0.05, 0, 1)

allGenes_HH_sig <- select(allGenes_HH, X, significant)
colnames(allGenes_HH_sig) <- c('Symbol', 'signif')

head(allGenes_HH_sig)

Unnamed: 0_level_0,Symbol,log2fc
Unnamed: 0_level_1,<chr>,<dbl>
1,LOC111116054,0.023547556
2,LOC111126949,-0.008575931
3,LOC111110729,0.016854142
4,LOC111112434,-0.446553473
5,LOC111120752,-0.332082994
6,LOC111128944,


Unnamed: 0_level_0,Symbol,signif
Unnamed: 0_level_1,<chr>,<dbl>
1,LOC111116054,0
2,LOC111126949,0
3,LOC111110729,0
4,LOC111112434,0
5,LOC111120752,0
6,LOC111128944,0


In [102]:
write.csv(allGenes_HH_lfc, '/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/allGenes_HH_lfc.csv', row.names=FALSE, quote = FALSE)

In [101]:
write.csv(allGenes_HH_sig, '/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/allGenes_HH_sig.csv', row.names=FALSE, quote=FALSE)

#### GO_MWU.R

**molecular function**

In [103]:
# Edit these to match your data file names: 
input="allGenes_HH_lfc.csv" # two columns of comma-separated values: gene id, continuous measure of significance. To perform standard GO enrichment analysis based on Fisher's exact test, use binary measure (0 or 1, i.e., either sgnificant or not).
goAnnotations="allGenes2GO.tab" # two-column, tab-delimited, one line per gene, multiple GO terms separated by semicolon. If you have multiple lines per gene, use nrify_GOtable.pl prior to running this script.
goDatabase="go.obo" # download from http://www.geneontology.org/GO.downloads.ontology.shtml
goDivision="MF" # either MF, or BP, or CC
source("gomwu.functions.R")


In [104]:
# Calculating stats. It might take ~3 min for MF and BP. Do not rerun it if you just want to replot the data with different cutoffs, go straight to gomwuPlot. If you change any of the numeric values below, delete the files that were generated in previos runs first.
gomwuStats(input, goDatabase, goAnnotations, goDivision,
           perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
           largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
           smallest=5,   # a GO category should contain at least this many genes to be considered
           clusterCutHeight=0.25) # threshold for merging similar (gene-sharing) terms. See README for details.
# There are no GO term at 10% FDR

Continuous measure of interest: will perform MWU test


0 GO terms at 10% FDR



In [105]:
mwu_MF_hh <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/MWU_MF_allGenes_HH_lfc.csv', sep= ' ')
head(mwu_MF_hh)

Unnamed: 0_level_0,delta.rank,pval,level,nseqs,term,name,p.adj
Unnamed: 0_level_1,<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>
1,-1258,0.21320839,2,5,GO:0000026,"alpha-1,2-mannosyltransferase activity",0.7602279
2,-178,0.71876607,2,21,GO:0000030,mannosyltransferase activity,0.9697456
3,-634,0.07278138,2,41,GO:0000049,tRNA binding,0.5785622
4,628,0.43183942,6,8,GO:0000062;GO:0120227;GO:1901567;GO:1901681,acyl-CoA binding,0.891458
5,-1446,0.05480457,3,9,GO:0000146,microfilament motor activity,0.5515916
6,-32,0.9340641,2,34,GO:0000149,SNARE binding,0.9801257


In [106]:
filter(mwu_MF_hh, mwu_MF_hh$p.adj < 0.05)

delta.rank,pval,level,nseqs,term,name,p.adj
<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>


no significantly enriched GO terms for molecular function

**biological process**

In [107]:
# Edit these to match your data file names: 
input="allGenes_HH_lfc.csv" # two columns of comma-separated values: gene id, continuous measure of significance. To perform standard GO enrichment analysis based on Fisher's exact test, use binary measure (0 or 1, i.e., either sgnificant or not).
goAnnotations="allGenes2GO.tab" # two-column, tab-delimited, one line per gene, multiple GO terms separated by semicolon. If you have multiple lines per gene, use nrify_GOtable.pl prior to running this script.
goDatabase="go.obo" # download from http://www.geneontology.org/GO.downloads.ontology.shtml
goDivision="BP" # either MF, or BP, or CC
source("gomwu.functions.R")


In [108]:
# Calculating stats. It might take ~3 min for MF and BP. Do not rerun it if you just want to replot the data with different cutoffs, go straight to gomwuPlot. If you change any of the numeric values below, delete the files that were generated in previos runs first.
gomwuStats(input, goDatabase, goAnnotations, goDivision,
           perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
           largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
           smallest=5,   # a GO category should contain at least this many genes to be considered
           clusterCutHeight=0.25) # threshold for merging similar (gene-sharing) terms. See README for details.
# There are no GO term at 10% FDR

Continuous measure of interest: will perform MWU test


0 GO terms at 10% FDR



In [109]:
mwu_BP_hh <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/MWU_BP_allGenes_HH_lfc.csv', sep= ' ')
head(mwu_BP_hh)

Unnamed: 0_level_0,delta.rank,pval,level,nseqs,term,name,p.adj
Unnamed: 0_level_1,<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>
1,-82,0.72222335,2,6,GO:0000012,single strand break repair,0.9519533
2,-101,0.44475592,7,18,GO:0000027,ribosomal large subunit assembly,0.9512008
3,303,0.08829177,3,10,GO:0000028,ribosomal small subunit assembly,0.5567287
4,-47,0.59043612,2,42,GO:0000045;GO:0070925;GO:1905037;GO:0007033,organelle assembly,0.9512008
5,75,0.64263966,6,12,GO:0000054;GO:0033750;GO:0071428;GO:0071426;GO:0006611;GO:0031503,protein export from nucleus,0.9512008
6,-265,0.24702006,2,6,GO:0000055,ribosomal large subunit export from nucleus,0.801241


In [110]:
filter(mwu_BP_hh, mwu_BP_hh$p.adj < 0.05)

delta.rank,pval,level,nseqs,term,name,p.adj
<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>


no significantly enriched GO terms for biological processes

### CC vs. HC
table of measure of interest for sequences: 

In [113]:
allGenes_CC <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/deseq_res_files/geneFeatures_res/CC_vs_HC.csv')

# select only gene id and continuous measure of change (log fold change)
allGenes_CC_lfc <- select(allGenes_CC, X, log2FoldChange)

colnames(allGenes_CC_lfc) <- c('Symbol', 'log2fc')

head(allGenes_CC_lfc)

# binary measure for significance
allGenes_CC$significant <- ifelse(is.na(allGenes_CC$padj) | allGenes_CC$padj > 0.05, 0, 1)

allGenes_CC_sig <- select(allGenes_CC, X, significant)
colnames(allGenes_CC_sig) <- c('Symbol', 'signif')

head(allGenes_CC_sig)

Unnamed: 0_level_0,Symbol,log2fc
Unnamed: 0_level_1,<chr>,<dbl>
1,LOC111116054,0.0
2,LOC111126949,-0.01938737
3,LOC111110729,-0.06519374
4,LOC111112434,0.07479191
5,LOC111120752,0.01438339
6,LOC111128944,


Unnamed: 0_level_0,Symbol,signif
Unnamed: 0_level_1,<chr>,<dbl>
1,LOC111116054,0
2,LOC111126949,0
3,LOC111110729,0
4,LOC111112434,0
5,LOC111120752,0
6,LOC111128944,0


In [124]:
write.csv(allGenes_CC_lfc, '/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/allGenes_CC_lfc.csv', row.names=FALSE, quote = FALSE)

In [125]:
write.csv(allGenes_CC_sig, '/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/allGenes_CC_sig.csv', row.names=FALSE, quote=FALSE)

#### GO_MWU.R

**molecular function**

In [126]:
# Edit these to match your data file names: 
input="allGenes_CC_lfc.csv" # two columns of comma-separated values: gene id, continuous measure of significance. To perform standard GO enrichment analysis based on Fisher's exact test, use binary measure (0 or 1, i.e., either sgnificant or not).
goAnnotations="allGenes2GO.tab" # two-column, tab-delimited, one line per gene, multiple GO terms separated by semicolon. If you have multiple lines per gene, use nrify_GOtable.pl prior to running this script.
goDatabase="go.obo" # download from http://www.geneontology.org/GO.downloads.ontology.shtml
goDivision="MF" # either MF, or BP, or CC
source("gomwu.functions.R")

In [127]:
# Calculating stats. It might take ~3 min for MF and BP. Do not rerun it if you just want to replot the data with different cutoffs, go straight to gomwuPlot. If you change any of the numeric values below, delete the files that were generated in previos runs first.
gomwuStats(input, goDatabase, goAnnotations, goDivision,
           perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
           largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
           smallest=5,   # a GO category should contain at least this many genes to be considered
           clusterCutHeight=0.25) # threshold for merging similar (gene-sharing) terms. See README for details.
# There are no GO term at 10% FDR

Continuous measure of interest: will perform MWU test


0 GO terms at 10% FDR



In [128]:
mwu_MF_cc <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/MWU_MF_allGenes_CC_lfc.csv', sep= ' ')
head(mwu_MF_cc)

Unnamed: 0_level_0,delta.rank,pval,level,nseqs,term,name,p.adj
Unnamed: 0_level_1,<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>
1,916,0.36468387,2,5,GO:0000026,"alpha-1,2-mannosyltransferase activity",0.8429826
2,-102,0.83650913,5,21,GO:0000030,mannosyltransferase activity,0.9956253
3,492,0.16411439,2,41,GO:0000049,tRNA binding,0.6509256
4,372,0.64140733,6,8,GO:0000062;GO:0120227;GO:1901567;GO:1901681,fatty-acyl-CoA binding,0.9535436
5,1359,0.07114113,2,9,GO:0000146,microfilament motor activity,0.5920605
6,-187,0.62934759,3,34,GO:0000149,SNARE binding,0.9535436


In [129]:
filter(mwu_MF_cc, mwu_MF_cc$p.adj < 0.05)

delta.rank,pval,level,nseqs,term,name,p.adj
<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>


no significantly enriched GO terms for molecular function

**biological process**

In [130]:
# Edit these to match your data file names: 
input="allGenes_CC_lfc.csv" # two columns of comma-separated values: gene id, continuous measure of significance. To perform standard GO enrichment analysis based on Fisher's exact test, use binary measure (0 or 1, i.e., either sgnificant or not).
goAnnotations="allGenes2GO.tab" # two-column, tab-delimited, one line per gene, multiple GO terms separated by semicolon. If you have multiple lines per gene, use nrify_GOtable.pl prior to running this script.
goDatabase="go.obo" # download from http://www.geneontology.org/GO.downloads.ontology.shtml
goDivision="BP" # either MF, or BP, or CC
source("gomwu.functions.R")


In [131]:
# Calculating stats. It might take ~3 min for MF and BP. Do not rerun it if you just want to replot the data with different cutoffs, go straight to gomwuPlot. If you change any of the numeric values below, delete the files that were generated in previos runs first.
gomwuStats(input, goDatabase, goAnnotations, goDivision,
           perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
           largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
           smallest=5,   # a GO category should contain at least this many genes to be considered
           clusterCutHeight=0.25) # threshold for merging similar (gene-sharing) terms. See README for details.
# There are no GO term at 10% FDR

Continuous measure of interest: will perform MWU test


0 GO terms at 10% FDR



In [132]:
mwu_BP_cc <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_MethylRAD_analysis_2018/analysis/DMGs_analysis/go_mwu/MWU_BP_allGenes_CC_lfc.csv', sep= ' ')
head(mwu_BP_cc)

Unnamed: 0_level_0,delta.rank,pval,level,nseqs,term,name,p.adj
Unnamed: 0_level_1,<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>
1,60,0.79489448,5,6,GO:0000012,single strand break repair,0.9547015
2,53,0.68749613,7,18,GO:0000027,ribosomal large subunit assembly,0.9445336
3,-375,0.03477232,2,10,GO:0000028,ribosomal small subunit assembly,0.4239334
4,27,0.75894488,2,42,GO:0000045;GO:0070925;GO:1905037;GO:0007033,organelle assembly,0.9519334
5,5,0.97775236,5,12,GO:0000054;GO:0033750;GO:0071428;GO:0071426;GO:0006611;GO:0031503,rRNA-containing ribonucleoprotein complex export from nucleus,0.9903915
6,216,0.34538114,3,6,GO:0000055,ribosomal large subunit export from nucleus,0.7804439


In [133]:
filter(mwu_BP_cc, mwu_BP_cc$p.adj < 0.05)

delta.rank,pval,level,nseqs,term,name,p.adj
<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>


no significantly enriched GO terms for biological processes