In [286]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(reticulate))

In [287]:
#Load data
data_dir <- file.path("../4.gene_expression_signatures/results")

results_file <- file.path(data_dir, "combined_z_matrix_gsea_results.csv")
gsea_results_df <- readr::read_csv(
    results_file,
)

[1mRows: [22m[34m6906852[39m [1mColumns: [22m[34m8[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (2): model, reactome_pathway
[32mdbl[39m (5): z, full_model_z, gsea_es_score, nes_score, p_value
[33mlgl[39m (1): shuffled

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [288]:
# Source the themes.R file from the utils folder
source("../utils/themes.r")

In [289]:
# Prepare the data
# Filter for the single highest ES score for each model at each dimension
max_es_df <- gsea_results_df %>%
  group_by(model, full_model_z) %>%
  summarize(max_es = max(abs(`gsea_es_score`), na.rm = TRUE)) %>%
  ungroup()

[1m[22m`summarise()` has grouped output by 'model'. You can override using the
`.groups` argument.


In [290]:
# Prepare the data
# Filter for some specific pathway the single highest ES score for each model at each dimension
path_es_df <- gsea_results_df %>%
  filter(`reactome_pathway` == "Cardiac Conduction R-HSA-5576891") %>%
  group_by(model, full_model_z) %>%
  summarize(max_es = max(abs(`gsea_es_score`), na.rm = TRUE)) %>%
  ungroup()

[1m[22m`summarise()` has grouped output by 'model'. You can override using the
`.groups` argument.


In [291]:
# Plot the data
latent_plot <- ggplot(max_es_df, aes(x = factor(full_model_z), y = log(max_es), color = model, fill = model)) +
  geom_point(size = 3, shape = 21) +  # Points on the line
  geom_smooth(aes(group = model), method = "loess", se = TRUE, size = 1, alpha = 0.1) +  # Trend line with shading 
  scale_color_manual(name = "Algorithm", values = model_colors, labels = model_labels) +
  scale_fill_manual(name = "Algorithm", values = model_colors, labels = model_labels) +
  labs(x = "Latent Dimensions", y = "Highest ES Score", title = "Highest ES Score Across All Pathways by Latent Dimension for Each Model") +
  custom_theme()

In [292]:
# Save the plot with custom dimensions
ggsave("./visualize/latent_plot.png", plot = latent_plot, width = 10, height = 8, units = "in")

[1m[22m`geom_smooth()` using formula = 'y ~ x'


In [293]:
# Plot the data
path_plot <- ggplot(path_es_df, aes(x = factor(full_model_z), y = log(max_es), color = model, fill = model)) +
  geom_point(size = 3, shape = 21) +  # Points on the line
  geom_smooth(aes(group = model), method = "loess", se = TRUE, size = 1, alpha = 0.1) +  # Trend line with shading 
  scale_color_manual(name = "Algorithm", values = model_colors, labels = model_labels) +
  scale_fill_manual(name = "Algorithm", values = model_colors, labels = model_labels) +
  labs(x = "Latent Dimensions", y = "Highest ES Score", title = "Highest ES Score for Regulation of Cardiac Conduction Pathway by Latent Dimension for Each Model") +
  theme(legend.position = "right")

In [294]:
# Save the plot with custom dimensions
ggsave("./visualize/cardiac_conduction_latent_plot.png", plot = path_plot, width = 10, height = 8, units = "in")

[1m[22m`geom_smooth()` using formula = 'y ~ x'
