# KEGG Analysis

# Phase 1 vs. Phase 1

Using `enrichKEGG` in the clusterProfiler package to investigate enrichment of KEGG categories in our DEGs


## 0. load libraries

In [2]:
library(stringr)
library(dplyr)
library(ggplot2)
library(clusterProfiler)

## 1. read CSVs

read in as data frames

In [5]:
##### comparisons vs. control
# warm vs. control
w.c_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/DEGs/DEG_w_c.csv')

# both vs. control
b.c_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/DEGs/DEG_b_c.csv')

# hyp vs. control
h.c_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/DEGs/DEG_h_c.csv')

##### other comparisons
# hyp vs. both
h.b_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/DEGs/DEG_h_b.csv')

# warm vs. both
w.b_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/DEGs/DEG_w_b.csv')

# warm vs. hyp
w.h_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/DEGs/DEG_h_w.csv')

create a vector of entrez gene id (only numbers)

In [6]:
# remove leading LOC
wc <- str_replace(w.c_deg$X, 'LOC', '')
hc <- str_replace(h.c_deg$X, 'LOC', '')
bc <- str_replace(b.c_deg$X, 'LOC', '')
hb <- str_replace(h.b_deg$X, 'LOC', '')
wb <- str_replace(w.b_deg$X, 'LOC', '')
wh <- str_replace(w.h_deg$X, 'LOC', '')

# check it worked
head(hc)

In [8]:
universe_list <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/all_genes/p1.warm_v_cont.csv') 
universe_list <- str_replace(universe_list$X, 'LOC', '')
head(universe_list)

## 2. run `enrichKEGG`

### Phase 1 Warm vs. Control

In [9]:
wc.kegg <- enrichKEGG(
    gene = wc,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(wc.kegg)

Reading KEGG annotation online: "https://rest.kegg.jp/link/cvn/pathway"...

Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/cvn"...



Unnamed: 0_level_0,category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>
cvn00910,Metabolism,Energy metabolism,cvn00910,Nitrogen metabolism - Crassostrea virginica (eastern oyster),2/6,27/5527,0.000340629,0.001362516,0.0007171138,111137424/111117514,2
cvn04120,Genetic Information Processing,"Folding, sorting and degradation",cvn04120,Ubiquitin mediated proteolysis - Crassostrea virginica (eastern oyster),2/6,186/5527,0.015453701,0.030907402,0.0162670539,111123894/111130636,2


### Phase 1 Hypoxic vs. Control

In [10]:
hc.kegg <- enrichKEGG(
    gene = hc,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(hc.kegg)

--> No gene can be mapped....

--> Expected input gene ID: 111127323,111138520,111134492,111109888,111111035,111109436

--> return NULL...



NULL

either all 53 DEGs are not annotated in the KEGG database, or none of the pathways are enriched - since below, there are some dfs that are empty, I think the prior hypothesis is true

### Phase 1 Both vs. Control

In [11]:
bc.kegg <- enrichKEGG(
    gene = bc,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(bc.kegg)

Unnamed: 0_level_0,category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>
cvn00520,Metabolism,Carbohydrate metabolism,cvn00520,Amino sugar and nucleotide sugar metabolism - Crassostrea virginica (eastern oyster),6/24,75/5527,5.655051e-07,1.187561e-05,8.333759e-06,111119837/111124157/111125216/111125215/111127514/111125342,6
cvn04977,Organismal Systems,Digestive system,cvn04977,Vitamin digestion and absorption - Crassostrea virginica (eastern oyster),4/24,83/5527,0.0003996778,0.004196617,0.002944994,111125053/111103328/111103330/111111573,4


### Phase 1 Hypoxic vs. Both

In [12]:
hb.kegg <- enrichKEGG(
    gene = hb,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(hb.kegg)

Unnamed: 0_level_0,category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>
cvn04977,Organismal Systems,Digestive system,cvn04977,Vitamin digestion and absorption - Crassostrea virginica (eastern oyster),3/14,83/5527,0.001054943,0.0147692,0.01110466,111103328/111103330/111111573,3


### Phase 1 Warm vs. Both

In [13]:
wb.kegg <- enrichKEGG(
    gene = wb,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(wb.kegg)

category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>


so no enriched KEGG pathways

### Phase 1 Warm vs. Hypoxic

In [14]:
wh.kegg <- enrichKEGG(
    gene = wh,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(wh.kegg)

category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>


no enriched KEGG pathways