In [None]:
# Load R packages
library(dplyr)
packageVersion('dplyr')
library(tidyverse)
packageVersion('tidyverse')
library(xlsx)
packageVersion('xlsx')

In [None]:
# Data directory
project.dir = '...'
data.dir = '...'
results.dir = '...'

# Prepare input file

In [None]:
# load linear regression models result
linear.square <- read.csv(file.path(results.dir, 'linear_regression_cortisol_result.csv'))
dim(linear.square)
head(linear.square)

In [None]:
# select columns for input file
linear.square.sel.cols <- linear.square %>% select(CHEMICAL_NAME, SMILES, p.metabolite, beta.coef.mets, SUB_PATHWAY)
head(linear.square.sel.cols)

In [None]:
# check missing value of SMILES
colSums(is.na(linear.square.sel.cols))

In [None]:
linear.square.sel.cols %>% filter(is.na(linear.square.sel.cols$SMILES)) # because cis and trans

In [None]:
# aconitate [cis or trans] SMILES C(/C(=C\C(=O)O)/C(=O)O)C(=O)O
linear.square.sel.cols$SMILES[linear.square.sel.cols$CHEMICAL_NAME == 'aconitate [cis or trans]'] <- 'C(/C(=C\\C(=O)O)/C(=O)O)C(=O)O'
linear.square.sel.cols %>% filter(CHEMICAL_NAME == 'aconitate [cis or trans]')

In [None]:
# check missing value of SMILES
colSums(is.na(linear.square.sel.cols))

In [None]:
# find duplicated SMILES ID
linear.square.sel.cols['dup'] <- duplicated(linear.square.sel.cols$SMILES)
table(linear.square.sel.cols$dup) # 6 duplicated SMILES

In [None]:
# remove duplicated SMILS IDs
count.dup.ID <- as.data.frame(table(linear.square.sel.cols$SMILES))
colnames(count.dup.ID) <- c('SMILES', 'freq.SMILES')

linear.square.sel.cols <- merge(linear.square.sel.cols, count.dup.ID, 
                                by = 'SMILES')
linear.square.sel.cols <- linear.square.sel.cols %>% arrange(SMILES, p.metabolite) %>%
                                        group_by(SMILES) %>% 
                                        mutate(pval_rank = rank(p.metabolite)) %>%
                                        arrange(pval_rank) %>%
                                        as.data.frame()

linear.square.sel.cols <- linear.square.sel.cols %>% 
                          filter(pval_rank == '1') #filter out any dup smiles with higher p value
dim(linear.square.sel.cols)
head(linear.square.sel.cols)

In [None]:
# remove duplicated columns
linear.square.sel.cols <- linear.square.sel.cols[!names(linear.square.sel.cols) %in% c('dup','freq.SMILES','pval_rank')]
dim(linear.square.sel.cols)
head(linear.square.sel.cols)

In [None]:
# relocate column of metabolite name to front
linear.square.sel.cols <- linear.square.sel.cols %>% relocate(CHEMICAL_NAME, .before = SMILES) 
head(linear.square.sel.cols)

In [None]:
# rename colnames
colnames(linear.square.sel.cols) <- c('compound_name', 'smiles', 'pvalue', 'effect_size', 'set')
head(linear.square.sel.cols)

In [None]:
# save data
write.xlsx(linear.square.sel.cols, file.path(project.dir, 'chemrich_input_imputed_metabolon.xlsx'), row.names = F)

# Run ChemRICH

In [None]:
# chemrich
# Load the scripts.
source('chemrich_chemical_classes.R')
source('predict_mesh_chemical_class.R')
load.ChemRICH.Packages()

In [None]:
# output files are saved in ChemRICH folder
run_chemrich_chemical_classes(file.path(project.dir, 'chemrich_input_imputed_metabolon.xlsx'))

***Supplemental Figure 1: chemrich_class_impact_plot***

# Check result

In [None]:
chemrich.output <- file.path(project.dir,'chemRICH_class_results.xlsx')
excel_sheets(path = chemrich.output)

In [None]:
# load cluster result
cluster <- read_excel(file.path(project.dir,'chemRICH_class_results.xlsx'), sheet = 'ChemRICH_Results')
dim(cluster)
head(cluster)

In [None]:
# Table 4: significant cluster (FDR)
sig.cluster <- cluster %>% filter(FDR < 0.05) %>% arrange(FDR)
sig.cluster

In [None]:
# significant cluster (nominal)
nominal.sig.cluster <- cluster %>% filter(`p-values` < 0.05) %>% arrange(FDR)
nominal.sig.cluster

In [None]:
# load compound result
compound <- read_excel(file.path(project.dir,'chemRICH_class_results.xlsx'), sheet = 'Compound_ChemRICH')
dim(compound)
head(compound)

In [None]:
# significant compound(FDR)
sig.compound <- compound %>% filter(FDR < 0.05) %>% arrange(FDR)
sig.compound