06_pathway_enrich_figs.Rmd

---
title: "06_pathway_enrich_figs.Rmd"
author: "Puvvula"
date: "2023-03-14"
output: pdf_document
---

```{r}
library(pacman)
pacman::p_load(tidyverse, janitor)
```

#read and combine pathway enrichment results using multiple p-value thresholds
```{r}
file_list <- list.files(path = "~/Documents/MWAS_home/pj_result/pathway_enrich_results/", 
                        pattern = "mummi_import_combined.*\\.csv", full.names = TRUE)

dat <- do.call(rbind, lapply(file_list, read.csv))
```


```{r}
pathways<- dat |>
  clean_names()|>
  rename(p_fisher = "fet", 
         name = "x") |>
  mutate(mom_baby = fct_recode(mom_baby,
                               "Maternal" = "mom",
                               "Newborn" = "baby"),
         mb_thresh=paste(mom_baby, p_cutoff, sep = " - "))
  

#p_fisher = one-tailed Fisher Exact Test p-value (based on Gaussian hypergeometric probability distribution)
#p_ease = EASE score (a conservative adjustment to the Fisher exact probability that weights significance in favor of themes supported by more hits) Expression Analysis Systematic Explorer
#p_gamma = Gamma p values - using Gamma distribution to model permutation results

plot.mcg <- pathways |> 
  filter(p_fisher < 0.05) |>
  mutate(logp = -log10(p_fisher), enrichment = hits_sig/expected, 
         pct_sig_hits = (hits_sig/pathway_total)*100) |>
  mutate(pct_sig_hits_disc = cut(pct_sig_hits, breaks = seq(0, 90, by = 15), 
                                 labels = c("< 15", "15 - <30", "30 - <45", 
                                            "45 - <60", "60 - <75", "75 - <90"))) |>
  filter(pathway_total > 1)

# Rename labels for maternal and newborn
# Update the levels of fact_new with new labels
#plot.mcg$fact_new <- factor(plot.mcg$fact_new, levels = c("baby-neg", "baby-pos", "mom-neg", "mom-pos"),labels = c("Newborn C18", "Newborn HILIC", "Maternal C18", "Maternal HILIC"))

#########################################
# Create a new column with the cluster labels
plot.mcg$metabolism_cluster <- ifelse(grepl("metabolism", plot.mcg$name, ignore.case = TRUE), "metabolism",
                                      ifelse(grepl("biosynthesis", plot.mcg$name, ignore.case = TRUE),
                                             "biosynthesis",
                                             ifelse(grepl("oxidation", plot.mcg$name, ignore.case = TRUE),
                                                    "oxidation", 
                                                    ifelse(grepl("degradation", plot.mcg$name, ignore.case = TRUE),
                                                           "degradation","na"))))


# Creating new variable to cluster y axis
plot.mcg <- plot.mcg |>
  mutate(group = case_when(
    grepl("Vitamin", name, ignore.case = TRUE) ~ "vit",
    grepl("fatty acid|Arachidonic|Carnitine|Phytanic|Butanoate|Glycerophospholipid|Linoleate", name, ignore.case = TRUE) ~ "fatty",
    grepl("steroid|androgen", name, ignore.case = TRUE) ~ "ster",
    grepl("porphyrin|Pyrimidine|Purine|Glyoxylate", name, ignore.case = TRUE) ~ "organic",
    grepl("Methionine|Glutamate|Glycine|Beta-Alanine|Arginine|Glutathione|Aspartate|Lysine|Tyrosine|Alanine|Tryptophan|Histidine",
          name, ignore.case = TRUE) ~ "amino",
    grepl("Galactose|Fructose|Pentose", name, ignore.case = TRUE) ~ "sug",
    TRUE ~ "other"
  ))

plot.mcg$group<- factor(plot.mcg$group, levels = c("vit", "amino", "sug", "fatty", "ster", "organic"))

#export results csv
write_csv(plot.mcg, "~/Documents/MWAS_home/pj_result/plot_mcg_with_p_sensit.csv")
```


```{r}
plot.mcg<- read_csv("~/Documents/MWAS_home/pj_result/plot_mcg_with_p_sensit.csv")
my_colors <- colorRampPalette(c("#F8766D", "#7CAE00"))(7)

# Reorder y-axis based on the cluster labels
plot.mcg |>
  ggplot(aes(x = factor(chemical, levels = c("1-Hydroxynapthalene", "2-Hydroxynapthalene", "2-Hydroxyfluorene",
                                              "1-Hydroxyphenanthrene", "2,3-Hydroxyphenanthrene",
                                             "4-Hydroxyphenanthrene","9-Hydroxyphenanthrene", 
                                             "sigma-LMWT-PAH", "1-Hydroxypyrene")), 
             y = name)) + 
  geom_tile(aes(fill = factor(pct_sig_hits_disc)), color = "black") +
  scale_color_manual(values = my_colors) +
  xlab("") +
  ylab("") +
  theme_bw() +
  theme(axis.text=element_text(size=10), 
        axis.title=element_text(size=10,face="bold"),
        strip.text = element_text(size=10),
        legend.text=element_text(size=10),
        legend.title=element_text(size=10),
        legend.position="bottom",
        legend.justification="right",
        legend.box="horizontal",
        legend.box.just="center",
        legend.margin=margin(t=0.1, r=0.1, b=2, l=0.1),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.major.y = element_blank(),
        axis.text.x = element_text(angle = 40, hjust = 1, vjust = 1, size = 10),
        axis.text.y = element_text(size = 10), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing.y=unit(0.02, "lines"),
        strip.text.y = element_blank())+
  guides(fill = guide_legend(nrow = 1))+
  facet_grid(factor(metabolism_cluster, levels = c("metabolism", "oxidation",
                                                   "degradation", "biosynthesis", "na"))*group~ mom_baby, scales = "free_y", 
             switch = "y", space = "free_y")+
  labs(fill = "% significant hits per pathway")

ggsave("~/Documents/MWAS_home/pj_result/pah_pathway_senstiv.tiff", 
       width = 18, height = 14, dpi=300)
```

#Chemicals with overlapping pathways for mom and baby
```{r}
data<- read_csv("~/Documents/MWAS_home/pj_result/plot_mcg.csv")

#plot that doesnt provide set size
library(ggupset)
data |> 
  select(name, chemical) |>
  group_by(name) |>
  summarize(chemicals=list(chemical)) |>
  ggplot(aes(x=chemicals)) +
  theme_bw()+
  geom_bar()+
  scale_x_upset(order_by = "degree")
  

#### Overlapping pathways for newborns
library(UpSetR)

data|>
  filter(mom_baby=="Newborn")|> 
  select(name, chemical) |>
  distinct(name, chemical, .keep_all=TRUE) |>
  unnest(cols = chemical) |>
  mutate(GenreMember=1) |>
  pivot_wider(names_from = chemical, values_from = GenreMember, values_fill = list(GenreMember = 0)) |>
  as.data.frame() |>
  UpSetR::upset(sets = c("1-Hydroxypyrene", "sigma-LMWT-PAH", "9-Hydroxyphenanthrene", 
                         "4-Hydroxyphenanthrene", "2,3-Hydroxyphenanthrene", "1-Hydroxyphenanthrene",
                         "2-Hydroxyfluorene", "2-Hydroxynapthalene", "1-Hydroxynapthalene"), 
                keep.order = TRUE, set_size.show=TRUE,
                order.by = "freq", text.scale = 2, point.size = 5, line.size = 1.5,
                set_size.scale_max = 15, mainbar.y.max=4,
                mainbar.y.label="No.of Overlapping pathways \nacross chemicals", 
                sets.x.label="Total no.of pathways \nper chemical")

ggsave("~/Documents/MWAS_home/pj_result/pah_pathway_overlapp_newborn.tiff", 
       width = 0.1, height = 0.15, dpi=300)


data|>
  filter(mom_baby=="Maternal")|> 
  select(name, chemical) |>
  distinct(name, chemical, .keep_all=TRUE) |>
  unnest(cols = chemical) |>
  mutate(GenreMember=1) |>
  pivot_wider(names_from = chemical, values_from = GenreMember, values_fill = list(GenreMember = 0)) |>
  as.data.frame() |>
  UpSetR::upset(sets = c("1-Hydroxypyrene", "sigma-LMWT-PAH", "9-Hydroxyphenanthrene", 
                         "4-Hydroxyphenanthrene", "2,3-Hydroxyphenanthrene", "1-Hydroxyphenanthrene",
                         "2-Hydroxyfluorene", "2-Hydroxynapthalene", "1-Hydroxynapthalene"), 
                keep.order = TRUE, set_size.show=TRUE,
                order.by = "freq", text.scale = 2, point.size = 5, line.size = 1.5,
                set_size.scale_max = 30, mainbar.y.max=20,
                mainbar.y.label="No.of Overlapping pathways \nacross chemicals", 
                sets.x.label="Total no.of pathways \nper chemical")

ggsave("~/Documents/MWAS_home/pj_result/pah_pathway_overlapp_mom.tiff", 
       width = 12, height = 8, dpi=300)

```

#overlapping pathways between mom and baby
```{r}
data_mb<- read_csv("~/Documents/MWAS_home/pj_result/plot_mcg.csv")|>
  select(mom_baby, name)

data_mb |>
  distinct(name, mom_baby, .keep_all=TRUE) |>
  unnest(cols = mom_baby) |>
  mutate(GenreMember=1) |>
  pivot_wider(names_from = mom_baby, values_from = GenreMember, 
              values_fill = list(GenreMember = 0)) |>
  as.data.frame() |>
  UpSetR::upset(sets = c("Newborn","Maternal"), 
                keep.order = TRUE, set_size.show=TRUE,
                order.by = "freq", text.scale = 2, point.size = 5, line.size = 1.5,
                set_size.scale_max = 55, mainbar.y.max=36,
                mainbar.y.label="No.of Overlapping pathways \nacross maternal and newborn's", 
                sets.x.label="Total no.of pathways \nper chemical")

ggsave("~/Documents/MWAS_home/pj_result/pathway_overlapp_mom_baby.tiff", 
       width = 12, height =8, dpi=300)
```

#sensitivity overlapping
```{r}
data<- read_csv("~/Documents/MWAS_home/pj_result/plot_mcg.csv")|>
  select(mb_thresh, name)

data |>
  distinct(name, mb_thresh, .keep_all=TRUE) |>
  unnest(cols = mb_thresh) |>
  mutate(GenreMember=1) |>
  pivot_wider(names_from = mb_thresh, values_from = GenreMember, 
              values_fill = list(GenreMember = 0)) |>
  as.data.frame() |>
  UpSetR::upset(sets = c("Maternal - FDR-BH","Maternal - P<0.05", "Maternal - p<0.1", "Maternal - p<0.5",
                         "Newborn - FDR-BH","Newborn - P<0.05", "Newborn - p<0.1", "Newborn - p<0.5"), 
                keep.order = TRUE, set_size.show=TRUE,
                order.by = "freq", text.scale = 2, point.size = 5, line.size = 1.5,
                set_size.scale_max = 55, mainbar.y.max=8,
                mainbar.y.label="No.of Overlapping pathways \nby p-value thresholds \nin functional analysis", 
                sets.x.label="Total no.of pathways \nper chemical")

ggsave("~/Documents/MWAS_home/pj_result/pathway_overlapp_p_senst.tiff", 
       width = 12, height =8, dpi=300)
```


#Narrating results
```{r}
#pathways per mom and baby
pathways |>
  filter(p_fisher < 0.05) |>
  group_by(mom_baby) |>
  summarise(unique_count = n_distinct(name))

#overlapping categories
x<-pathways |>
  filter(p_fisher < 0.05) |>
  filter(mom_baby %in% c("Maternal", "Newborn")) |>
  group_by(name) |>
  filter(n_distinct(mom_baby) == 2) |>
  distinct(name) |>
  ungroup()

#list name of pathways by criteria
x<-plot.mcg |>
  filter(mom_baby=="Maternal" & chemical=="9-Hydroxyphenanthrene") |>
  distinct(name)

#times per baby_mom per pos_neg a pathway repeated
y<-plot.mcg |>
  group_by(name,mom_baby) |>
  summarise(times_repeat = n())
```