In [1]:
library(tidyverse)
library(glue)
library(stringr)
library(ggplotify)
library(janitor)
library(gridExtra)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.8
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

“package ‘glue’ was built under R version 4.1.2”
“package ‘ggplotify’ was built under R version 4.1.3”

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test



Attaching package: ‘gridExtra’


The following object is masked from ‘package:dplyr’:

    combine




# Load Data from Supplementary Table 6

In [1]:
readxl::excel_sheets("../../../data/manuscript/ST6.xlsx")

In [3]:
big_mod_tbl <- readxl::read_excel("../../../data/manuscript/ST6.xlsx", sheet = "ST6-1-Module-Genes")
enrich_tbl <- readxl::read_excel("../../../data/manuscript/ST6.xlsx", sheet = "ST6-2-All-Enrichment")
overlap_tbl <- readxl::read_excel("../../../data/manuscript/ST6.xlsx", sheet = "ST6-6-Module-Overlap")

In [4]:
enrich_tbl <- enrich_tbl %>%
                mutate(fdr = p.adjust(p, "fdr")) %>%
                mutate(sig = if_else(fdr <= 0.05, 1, 0))

In [5]:
all_module_ids <- stringr::str_sort(big_mod_tbl$assigned_module %>% unique,
                                    numeric = TRUE)
all_module_ids

In [6]:
wgcna_color_tbl <- big_mod_tbl %>%
    select(assigned_module, full_dendro_name, classic_name) %>%
    group_by(assigned_module) %>%
    slice_head(n = 1) %>%
    rowwise %>%
    mutate(wgcna_color = str_match(classic_name, "[^_]+_[^_]+_([^_]*)")[2])

head(wgcna_color_tbl, 3)

assigned_module,full_dendro_name,classic_name,wgcna_color
<chr>,<chr>,<chr>,<chr>
M1,M1-tr2,tr2_gene_lightyellow,lightyellow
M10,M10-iso,dev_iso_pink,pink
M100,M100-xx,xx_gene_yellow,yellow


In [7]:
rare_var_list <- readRDS("../../../data/accessory_files/rare_variant_list.rds")
rare_var_tbl <- do.call(bind_rows, rare_var_list)
rare_var_top_genes <- rare_var_tbl %>%
                        group_by(dataset) %>%
                        mutate(p = 10^-p_log) %>%
                        mutate(fdr = p.adjust(p, "fdr")) %>%
                        ungroup %>%
                        filter(is.na(fdr) | fdr <= 0.05) %>%
                        select(dataset, gene_id, gene_symbol)
table(rare_var_top_genes$dataset)


    ASD_Fu2022_TADA    ASD_SFARI_CAT1&2 ASD_SFARI_SYNDROMIC      DD_Fu2022_TADA 
                177                 895                 153                 450 
       DDD_DECIPHER              EPI_25               RUZZO          SCZ_SCHEMA 
               1269                   2                 116                  32 

In [8]:
options(warn=-1)

# Background for GO term enrichment
# Set of all genes with kME greater than 0 with any module
go_bg <- big_mod_tbl %>%
                filter(kME > 0) %>%
                select(gene_id) %>%
                pull %>%
                unique

for (module_id in all_module_ids) {
    ################
    ### Preamble ###
    ################
    
    print(glue("[INFO] Generating module dashboard for: {module_id}"))
    module_plot_list <- list()
    
    #####################
    ### Main Plotting ###
    #####################
    
    # Rectangle
    rect_color <- wgcna_color_tbl %>% filter(assigned_module == module_id) %>% pull(wgcna_color)
    rect_plot <- ggplot() + geom_rect(aes(xmin = 1, xmax = 1.5, ymin = 10, ymax = 15),
                                  fill = rect_color, alpha = 1, color = "black") + theme_void()

    # Top 25 module genes by kME
    one_mod_kme_tbl <- big_mod_tbl %>%
                        filter(assigned_module == module_id) %>%
                        arrange(desc(kME)) %>%
                        slice_head(n = 25) %>%
                        arrange(kME) %>%
                        mutate(rare_hit = if_else(gene_id %in% rare_var_top_genes$gene_id, 1, 0))
    one_mod_kme_tbl$gene_name <- make.names(one_mod_kme_tbl$gene_name,unique=T)
    one_mod_kme_tbl$gene_name <- as_factor(one_mod_kme_tbl$gene_name)

    kme_plot <- ggplot(one_mod_kme_tbl, aes(x = kME, y = gene_name, fill = factor(rare_hit))) +
        geom_col() +
        geom_text(aes(label = format(round(kME, 2), nsmall = 2)), hjust = -0.1) +
        coord_cartesian(xlim = c(0.7, 1)) +
        scale_x_continuous(expand = c(0,0), limits=c(0,1)) +
        theme_classic() + xlab("kME") + ylab("") +
        theme(axis.text = element_text(size = 13), legend.position="none") +
        theme(plot.margin = margin(t = 100, r = 50, b = 50, l = 0, unit = "pt")) +
        scale_fill_manual(values=c("#9BAA88", "#535F44"))

    # Overlap top other modules
    one_mod_compare_tbl <- overlap_tbl %>%
                            filter(module == module_id) %>%
                            filter(sig == 1) %>%
                            arrange(desc(or)) %>%
                            slice_head(n = 10) %>%
                            arrange(or) %>%
                            mutate(in_prev = if_else(grepl("werling_|walker_|li_", compare_against), 1, 0))
    one_mod_compare_tbl$compare_against <- factor(one_mod_compare_tbl$compare_against, unique(one_mod_compare_tbl$compare_against))
    compare_plot <- ggplot(one_mod_compare_tbl, aes(x = or, y = compare_against, fill = factor(in_prev))) +
                        geom_col() +
                        geom_text(aes(label = format(round(or, 1), nsmall = 1)), hjust = -0.1) +
                        coord_cartesian(xlim = c(0, max(one_mod_compare_tbl$or) + 40)) +
                        scale_x_continuous(expand = c(0,0), limits=c(0,100000)) +
                        theme_classic() + xlab("Overlap OR") + ylab("") +
                        theme(axis.text = element_text(size = 13), legend.position="none") +
                        theme(plot.margin = margin(t = 100, r = 50, b = 50, l = 0, unit = "pt")) +
                        scale_fill_manual(values=c("#D897A4", "#175676"))

    # Cell type results
    one_mod_cell_tbl <- enrich_tbl %>%
                            filter(enrich_method == "cell_type_ORA") %>%
                            filter(module == module_id) %>%
                            arrange(desc(enrich_group))
    one_mod_cell_tbl$enrich_group <- factor(one_mod_cell_tbl$enrich_group, unique(one_mod_cell_tbl$enrich_group))
    cell_plot <- ggplot(one_mod_cell_tbl, aes(x = -log10(p), y = enrich_group, fill = factor(sig))) +
                    geom_col(position=position_dodge(), width = 0.8) +
                    scale_x_continuous(expand = c(0,0), limits=c(0,
                                                                 max(50, max(-log10(one_mod_cell_tbl$p)) + 10)
                                                                )) +
                    theme_classic() + xlab("-log10(P)") + ylab("Cell Type (Polioudakis)") +
                    theme(axis.text = element_text(size = 12), legend.position="none") +
                    theme(plot.margin = margin(t = 100, r = 50, b = 50, l = 0, unit = "pt")) +
                    scale_fill_manual(values=c("0" = "#F7F3F7",
                                               "1" = "#885A89"))

    # Common variation
    one_mod_ldsc_tbl <- enrich_tbl %>%
                            filter(enrich_method == "gwas_LDSC") %>%
                            filter(module == module_id) %>%
                            mutate(method = paste0("LDSC", sig))
    one_mod_magma_tbl <- enrich_tbl %>%
                            filter(enrich_method == "gwas_MAGMA") %>%
                            filter(module == module_id) %>%
                            mutate(method = paste0("MAGMA", sig))
    combined <- rbind(one_mod_ldsc_tbl, one_mod_magma_tbl)
    combined <- combined %>% arrange(desc(enrich_group))
    combined$enrich_group <- factor(combined$enrich_group, levels = unique(combined$enrich_group))
    common_plot <- ggplot(combined, aes(x = -log10(p), y = enrich_group, fill = factor(method,
                        levels = c("LDSC0","LDSC1","MAGMA0","MAGMA1")))) +
                    geom_col(position=position_dodge(), width = 0.8) +
                    scale_x_continuous(expand = c(0,0), limits=c(0,20)) +
                    theme_classic() + xlab("-log10(P)") + ylab("GWAS") +
                    theme(axis.text = element_text(size = 12), legend.position="none") +
                    theme(plot.margin = margin(t = 100, r = 50, b = 50, l = 0, unit = "pt")) +
                    scale_fill_manual(values=c("LDSC0" = "#EFF4FB",
                                                "LDSC1" = "#6DA0D9",
                                                "MAGMA0" = "#FCEDED",
                                                "MAGMA1" = "#E66060"))

    # Rare variation
    one_mod_rare_tbl <- enrich_tbl %>%
                            filter(enrich_method == "rare_logit") %>%
                            filter(module == module_id) %>%
                            mutate(sig = if_else(or > 1, sig, 0)) %>%
                            arrange(desc(enrich_group)) %>%
                            mutate(label = paste0(format(round(or, 2), nsmall = 2), if_else(sig == 1, "*", "")))
    one_mod_rare_tbl$enrich_group <- factor(one_mod_rare_tbl$enrich_group, levels = unique(one_mod_rare_tbl$enrich_group))
    rare_plot <- ggplot(one_mod_rare_tbl, aes(x = -log10(p), y = enrich_group, fill = factor(sig))) +
                    geom_col(position=position_dodge(), width = 0.8) +
                    geom_text(aes(label = label), hjust = -0.1) +
                    scale_x_continuous(expand = c(0,0), limits=c(0,
                                                                 max(20, max(-log10(one_mod_rare_tbl$p)) + 10)
                                                                )) +
                    theme_classic() + xlab("-log10(P)\n# is OR") + ylab("Study") +
                    theme(axis.text = element_text(size = 12), legend.position="none") +
                    theme(plot.margin = margin(t = 100, r = 50, b = 50, l = 0, unit = "pt")) +
                    scale_fill_manual(values=c("0" = "#FFEBD6",
                                               "1" = "#CC6900"))

    # GO enrichment
    go_genes <- big_mod_tbl %>%
                    filter(assigned_module == module_id) %>%
                    group_by(gene_id) %>%
                    filter(kME == max(kME)) %>%
                    ungroup %>%
                    filter(kME > 0) %>%
                    arrange(desc(kME)) %>%
                    pull(gene_id) %>%
                    unique

    gostres <- gprofiler2::gost(query = go_genes, 
                    organism = "hsapiens",
                    domain_scope = "custom",
                    custom_bg = go_bg,
                    ordered_query = TRUE,
                    significant = FALSE)
    
    result_tbl <- gostres$result %>%
        filter(significant == TRUE) %>%
        select(source, term_size, intersection_size, p_value, term_id, term_name) %>%
        filter(term_size < 2000)

    # Take the top results for important categories
    top_terms <- result_tbl %>%
                filter(source %in% c("GO:BP","GO:MF","GO:CC","KEGG","REAC")) %>%
                group_by(source) %>%
                arrange(p_value, term_size) %>%
                slice(1:3) %>%
                ungroup %>%
                arrange(p_value) %>%
                select(term_id) %>%
                pull
    
    if (is_empty(top_terms)) {
        top_terms = gostres$result %>%
            filter(p_value < 1) %>%
            slice_head(n = 5) %>%
            pull(term_id)
    }

    # The gostplot function automatically plots the results, suppress that with pdf(file=NULL)
    pdf(file = NULL)
    gplot <- gprofiler2::gostplot(gostres, capped = TRUE, interactive = FALSE)
    go_plot <- gprofiler2::publish_gostplot(gplot, highlight_terms = top_terms) +
                theme(plot.margin = margin(t = 100, r = 50, b = 50, l = 0, unit = "pt"))
    dev.off()

    ##########################
    ### Assemble Dashboard ###
    ##########################
    dashboard <- arrangeGrob(rect_plot, kme_plot, compare_plot, cell_plot, common_plot, rare_plot, go_plot,
                            ncol = 7, nrow = 1, widths=c(1, 15, 15, 15, 15, 15, 35))

    mod_size <- big_mod_tbl %>%
                    filter(assigned_module == module_id) %>%
                    count %>%
                    pull
    
    full_dendro_name <- wgcna_color_tbl %>% filter(assigned_module == module_id) %>% pull(full_dendro_name)
    dash_ggplot <- ggplotify::as.ggplot(dashboard) + ylab(paste0(full_dendro_name, " [", mod_size, "]")) +
                                        theme(axis.title.y = element_text(vjust = 0.5, angle = 90, size = 12,
                                                                          face = "bold", color = "black",
                                                                          margin = margin(0,-1.35,0,0.7, unit = 'cm')))
    module_plot_list <- append(module_plot_list, list(dash_ggplot))
    
    print(glue("[INFO] Saving module dashboard for {module_id} as: {full_dendro_name}.pdf"))
    
    # Write to PDF
    ggsave(file.path("dashboards", glue("{full_dendro_name}.pdf")),
           plot = marrangeGrob(module_plot_list, nrow = 1, ncol = 1, top = NULL),
           height = 10, width = 40, dpi = "retina", units = "in", device='pdf')
}

[INFO] Generating module dashboard for: M1
[INFO] Saving module dashboard for M1 as: M1-tr2.pdf
[INFO] Generating module dashboard for: M2
[INFO] Saving module dashboard for M2 as: M2-gene.pdf
[INFO] Generating module dashboard for: M3
[INFO] Saving module dashboard for M3 as: M3-xx.pdf
[INFO] Generating module dashboard for: M4
[INFO] Saving module dashboard for M4 as: M4-xy.pdf
[INFO] Generating module dashboard for: M5
[INFO] Saving module dashboard for M5 as: M5-iso.pdf
[INFO] Generating module dashboard for: M6
[INFO] Saving module dashboard for M6 as: M6-iso.pdf
[INFO] Generating module dashboard for: M7
[INFO] Saving module dashboard for M7 as: M7-tr1.pdf
[INFO] Generating module dashboard for: M8
[INFO] Saving module dashboard for M8 as: M8-xy.pdf
[INFO] Generating module dashboard for: M9
[INFO] Saving module dashboard for M9 as: M9-iso.pdf
[INFO] Generating module dashboard for: M10
[INFO] Saving module dashboard for M10 as: M10-iso.pdf
[INFO] Generating module dashboard for:

None of the term IDs in the 'highlight_terms' was found from the results.



[INFO] Saving module dashboard for M13 as: M13-xy.pdf
[INFO] Generating module dashboard for: M14
[INFO] Saving module dashboard for M14 as: M14-xx.pdf
[INFO] Generating module dashboard for: M15
[INFO] Saving module dashboard for M15 as: M15-gene.pdf
[INFO] Generating module dashboard for: M16
[INFO] Saving module dashboard for M16 as: M16-xy.pdf
[INFO] Generating module dashboard for: M17
[INFO] Saving module dashboard for M17 as: M17-tr1.pdf
[INFO] Generating module dashboard for: M18
[INFO] Saving module dashboard for M18 as: M18-xx.pdf
[INFO] Generating module dashboard for: M19
[INFO] Saving module dashboard for M19 as: M19-gene.pdf
[INFO] Generating module dashboard for: M20
[INFO] Saving module dashboard for M20 as: M20-tr2.pdf
[INFO] Generating module dashboard for: M21
[INFO] Saving module dashboard for M21 as: M21-xy.pdf
[INFO] Generating module dashboard for: M22
[INFO] Saving module dashboard for M22 as: M22-iso.pdf
[INFO] Generating module dashboard for: M23
[INFO] Saving

“no non-missing arguments to max; returning -Inf”


[INFO] Saving module dashboard for M29 as: M29-iso.pdf
[INFO] Generating module dashboard for: M30
[INFO] Saving module dashboard for M30 as: M30-gene.pdf
[INFO] Generating module dashboard for: M31
[INFO] Saving module dashboard for M31 as: M31-iso.pdf
[INFO] Generating module dashboard for: M32
[INFO] Saving module dashboard for M32 as: M32-tr1.pdf
[INFO] Generating module dashboard for: M33
[INFO] Saving module dashboard for M33 as: M33-tr1.pdf
[INFO] Generating module dashboard for: M34
[INFO] Saving module dashboard for M34 as: M34-tr2.pdf
[INFO] Generating module dashboard for: M35
[INFO] Saving module dashboard for M35 as: M35-iso.pdf
[INFO] Generating module dashboard for: M36
[INFO] Saving module dashboard for M36 as: M36-xy.pdf
[INFO] Generating module dashboard for: M37
[INFO] Saving module dashboard for M37 as: M37-gene.pdf
[INFO] Generating module dashboard for: M38
[INFO] Saving module dashboard for M38 as: M38-xx.pdf
[INFO] Generating module dashboard for: M39
[INFO] Sav

In [29]:
# Rename files for filesystem ordering
old_names <- paste0(wgcna_color_tbl$full_dendro_name,".pdf")
old_names

new_names <- sprintf("%03d", as.integer(str_extract(wgcna_color_tbl$full_dendro_name, "[0-9]+")))
new_names <- str_replace(old_names, "[0-9]+", new_names)
new_names

In [30]:
file.rename(old_names,new_names)