# Co-Expression Module Annotation

**Created**: 16 October 2021

## Environment

In [1]:
if (!requireNamespace("enrichR")) {
    install.packages("enrichR")
}

Loading required namespace: enrichR



In [2]:
library(tidyverse)
library(enrichR)

options(stringsAsFactors = FALSE)

setwd("~/eQTL_pQTL_Characterization/")

source("04_Expression/scripts/utils/ggplot_theme.R")

── [1mAttaching packages[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.4     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Welcome to enrichR
Checking connection ... 

Enrichr ... 
Connection is Live!

FlyEnrichr 

## Load Data

In [3]:
gene.info <- read.table("/nfs/team282/data/gains_team282/Gene_info_864_20416.txt")
modules <- read.csv("~/gains_team282/nikhil/expression/gene_expression/modules.clr.csv")
eigengenes <- read.csv("~/gains_team282/nikhil/expression/gene_expression/eigengenes.clr.csv", row.names=1)
variance.explained <- read.csv("~/gains_team282/nikhil/expression/gene_expression/variance.explained.clr.csv")

## Ontology and Pathway Enrichment

I test for the enrichment of ontological terms and pathways from GO, KEGG, and Reactome.

In [4]:
setEnrichrSite("Enrichr")

enrichr.dbs <- listEnrichrDbs()

enrichr.dbs[grepl("GO", enrichr.dbs$libraryName),]
enrichr.dbs[grepl("KEGG", enrichr.dbs$libraryName),]
enrichr.dbs[grepl("Reactome", enrichr.dbs$libraryName),]

selected.dbs <- c(
  "GO_Biological_Process_2021",
  "GO_Cellular_Component_2021",
  "GO_Molecular_Function_2021",
  "KEGG_2021_Human",
  "Reactome_2016"
)

module.names <- paste0("Module_", 1:dim(eigengenes)[2])
module.list <- lapply(module.names, function(module.name) {
  modules %>%
    dplyr::filter(Module==module.name) %>%
    merge(., gene.info, by.x="Gene", by.y="gene_id") %>%
    dplyr::select(Gene.ID=Gene, Gene.Name=gene_name)
})
names(module.list) <- module.names

module.annotations <- lapply(module.list, function(module) {

  enriched <- enrichr(module$Gene.Name, selected.dbs)
  
  annots <- lapply(selected.dbs, function(db) {
    as.data.frame(enriched[[db]])
  })
  names(annots) <- selected.dbs
  
  return(annots)
})

annotations.by.db <- lapply(selected.dbs, function(db) {
  
  annots <- lapply(1:length(module.names), function(i, m, n) {
    m[[i]][[db]] %>%
      dplyr::mutate(Module=n[i]) %>%
      dplyr::select(Module, everything())
  }, m=module.annotations, n=module.names)
  names(annots) <- module.names
  
  return(annots)
})
names(annotations.by.db) <- selected.dbs

Connection changed to https://maayanlab.cloud/Enrichr/

Connection is Live!



Unnamed: 0_level_0,geneCoverage,genesPerTerm,libraryName,link,numTerms,appyter,categoryId
Unnamed: 0_level_1,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<dbl>
15,12753,57,GO_Molecular_Function_2015,http://www.geneontology.org/GO.downloads.annotations.shtml,1136,f531ac2b6acdf7587a54b79b465a5f4aab8f00f9,7
20,13236,82,GO_Cellular_Component_2015,http://www.geneontology.org/GO.downloads.annotations.shtml,641,e1d004d5797cbd2363ef54b1c3b361adb68795c6,7
21,14264,58,GO_Biological_Process_2015,http://www.geneontology.org/GO.downloads.annotations.shtml,5192,bf120b6e11242b1a64c80910d8e89f87e618e235,7
46,7682,78,GO_Biological_Process_2013,http://www.geneontology.org/GO.downloads.annotations.shtml,941,5216d1ade194ffa5a6c00f105e2b1899f64f45fe,7
47,7324,172,GO_Cellular_Component_2013,http://www.geneontology.org/GO.downloads.annotations.shtml,205,fd1332a42395e0bc1dba82868b39be7983a48cc5,7
48,8469,122,GO_Molecular_Function_2013,http://www.geneontology.org/GO.downloads.annotations.shtml,402,7e3e99e5aae02437f80b0697b197113ce3209ab0,7
112,10427,38,GO_Cellular_Component_2017,http://www.geneontology.org/,636,8fed21d22dfcc3015c05b31d942fdfc851cc8e04,7
113,10601,25,GO_Molecular_Function_2017,http://www.geneontology.org/,972,b4018906e0a8b4e81a1b1afc51e0a2e7655403eb,7
114,13822,21,GO_Biological_Process_2017,http://www.geneontology.org/,3166,d9da4dba4a3eb84d4a28a3835c06dfbbe5811f92,7
115,8002,143,GO_Cellular_Component_2017b,http://www.geneontology.org/,816,ecf39c41fa5bc7deb625a2b5761a708676e9db7c,7


Unnamed: 0_level_0,geneCoverage,genesPerTerm,libraryName,link,numTerms,appyter,categoryId
Unnamed: 0_level_1,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<dbl>
11,4128,48,KEGG_2013,http://www.kegg.jp/kegg/download/,200,eb26f55d3904cb0ea471998b6a932a9bf65d8e50,7
60,3800,48,KEGG_2015,http://www.kegg.jp/kegg/download/,179,e80d25c56de53c704791ddfdc6ab5eec28ae7243,7
94,7010,87,KEGG_2016,http://www.kegg.jp/kegg/download/,293,43f56da7540195ba3c94eb6e34c522a699b36da9,7
148,7802,92,KEGG_2019_Human,https://www.kegg.jp/,308,3477bc578c4ea5d851dcb934fe2a41e9fd789bb4,7
149,8551,98,KEGG_2019_Mouse,https://www.kegg.jp/,303,187eb44b2d6fa154ebf628eba1f18537f64e797c,7
173,8078,102,KEGG_2021_Human,https://www.kegg.jp/,320,,2


Unnamed: 0_level_0,geneCoverage,genesPerTerm,libraryName,link,numTerms,appyter,categoryId
Unnamed: 0_level_1,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<dbl>
8,3185,73,Reactome_2013,http://www.reactome.org/download/index.html,78,b343994a1b68483b0122b08650201c9b313d5c66,7
64,6768,47,Reactome_2015,http://www.reactome.org/download/index.html,1389,36e541bee015eddb8d53827579549e30fe7a3286,7
93,8973,64,Reactome_2016,http://www.reactome.org/download/index.html,1530,1f54638e8f45075fb79489f0e0ef906594cb0678,2


Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2021... Done.
  Querying GO_Cellular_Component_2021... Done.
  Querying GO_Molecular_Function_2021... Done.
  Querying KEGG_2021_Human... Done.
  Querying Reactome_2016... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2021... Done.
  Querying GO_Cellular_Component_2021... Done.
  Querying GO_Molecular_Function_2021... Done.
  Querying KEGG_2021_Human... Done.
  Querying Reactome_2016... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2021... Done.
  Querying GO_Cellular_Component_2021... Done.
  Querying GO_Molecular_Function_2021... Done.
  Querying KEGG_2021_Human... Done.
  Querying Reactome_2016... Done.
Parsing results... Done.
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2021... Done.
  Querying GO_Cellular_Component_2021... Done.
  Querying GO_Molecular_Function_2021... Done.
  Querying 

## Save Annotations

In [5]:
dir.create("~/gains_team282/nikhil/expression/gene_expression_annotations/", recursive=TRUE, showWarnings=FALSE)
for (db in selected.dbs) {
  
  output.annot <- do.call(rbind, annotations.by.db[[db]]) %>%
    dplyr::filter(Adjusted.P.value < 0.05)
  write.csv(output.annot, paste0("~/gains_team282/nikhil/expression/gene_expression_annotations/", db, ".csv"), row.names=F)
}