# KEGG Analysis

# Phase 1 vs. Phase 1

Using `enrichKEGG` in the clusterProfiler package to investigate enrichment of KEGG categories in our DEGs


## 0. load libraries

In [6]:
library(stringr)
library(dplyr)
library(ggplot2)
library(clusterProfiler)

## 1. read CSVs

read in as data frames

In [3]:
##### comparisons vs. control
# warm vs. control
w.c_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/p1.DEG_warm_v_cont.csv')

# both vs. control
b.c_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/p1.DEG_both_v_cont.csv')

# hyp vs. control
h.c_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/p1.DEG_hyp_v_cont.csv')

##### other comparisons
# hyp vs. both
h.b_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/p1.DEG_hyp_v_both.csv')

# warm vs. both
w.b_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/p1.DEG_warm_v_both.csv')

# warm vs. hyp
w.h_deg <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/p1.DEG_warm_v_hyp.csv')

create a vector of entrez gene id (only numbers)

In [19]:
# remove leading LOC
wc <- str_replace(w.c_deg$X, 'LOC', '')
hc <- str_replace(h.c_deg$X, 'LOC', '')
bc <- str_replace(b.c_deg$X, 'LOC', '')
hb <- str_replace(h.b_deg$X, 'LOC', '')
wb <- str_replace(w.b_deg$X, 'LOC', '')
wh <- str_replace(w.h_deg$X, 'LOC', '')

# check it worked
head(hc)

In [11]:
universe_list <- read.csv('/project/pi_sarah_gignouxwolfsohn_uml_edu/julia/CE_2024/CE24_RNA-seq/analysis/diff_expression/phase1_v_phase1/deseq_res_files/p1.warm_v_control.csv') 
universe_list <- str_replace(universe_list$X, 'LOC', '')
head(universe_list)

## 2. run `enrichKEGG`

### Phase 1 Warm vs. Control

In [25]:
wc.kegg <- enrichKEGG(
    gene = wc,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(wc.kegg)

Unnamed: 0_level_0,category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>
cvn00910,Metabolism,Energy metabolism,cvn00910,Nitrogen metabolism - Crassostrea virginica (eastern oyster),2/7,30/5530,0.0005875247,0.002937624,0.001236894,111137424/111117514,2


### Phase 1 Hypoxic vs. Control

In [27]:
hc.kegg <- enrichKEGG(
    gene = hc,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(hc.kegg)

--> No gene can be mapped....

--> Expected input gene ID: 111111942,111131063,111131432,111121750,111117164,111128802

--> return NULL...



NULL

### Phase 1 Both vs. Control

In [16]:
bc.kegg <- enrichKEGG(
    gene = bc,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(bc.kegg)

Unnamed: 0_level_0,category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>
cvn00520,Metabolism,Carbohydrate metabolism,cvn00520,Amino sugar and nucleotide sugar metabolism - Crassostrea virginica (eastern oyster),6/27,75/5530,1.20043e-06,2.881033e-05,2.148138e-05,111119837/111124157/111125216/111125215/111127514/111125342,6
cvn04977,Organismal Systems,Digestive system,cvn04977,Vitamin digestion and absorption - Crassostrea virginica (eastern oyster),4/27,83/5530,0.0006365782,0.007638939,0.0056957,111125053/111103328/111103330/111111573,4
cvn04142,Cellular Processes,Transport and catabolism,cvn04142,Lysosome - Crassostrea virginica (eastern oyster),5/27,192/5530,0.002073084,0.01658467,0.01236576,111127514/111106922/111113026/111113028/111113029,5


### Phase 1 Hypoxic vs. Both

In [17]:
hb.kegg <- enrichKEGG(
    gene = hb,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(hb.kegg)

Unnamed: 0_level_0,category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>
cvn04977,Organismal Systems,Digestive system,cvn04977,Vitamin digestion and absorption - Crassostrea virginica (eastern oyster),3/15,83/5530,0.00130242,0.0195363,0.01508065,111103328/111103330/111111573,3


### Phase 1 Warm vs. Both

In [18]:
wb.kegg <- enrichKEGG(
    gene = wb,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(wb.kegg)

category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>


### Phase 1 Warm vs. Hypoxic

In [31]:
wh.kegg <- enrichKEGG(
    gene = wh,
    organism = 'cvn',
    keyType = 'kegg',
    pvalueCutoff = 0.05,
    pAdjustMethod = 'BH',
    universe = universe_list)

head(wh.kegg)

category,subcategory,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>
