In [1]:
suppressPackageStartupMessages(suppressWarnings(library(ggplot2)))
suppressPackageStartupMessages(suppressWarnings(library(dplyr)))
suppressPackageStartupMessages(suppressWarnings(library(tidyr)))


In [2]:
cell_type = "PBMC"


In [3]:
# path set
input_file_path <- file.path(paste0("../results/","regression/",cell_type))
# read in the data
output_path <- file.path(paste0("../figures/","regression/",cell_type,"/"))
# create output directory if it doesn't exist
dir.create(output_path, recursive = TRUE, showWarnings = FALSE)


In [4]:
## function to process the data for visualization
process_subset_data <- function(data_path){
    # read in the data
    data <- read.csv(data_path, header = TRUE, sep = ",", stringsAsFactors = FALSE)
    # get the basename of the files

    data <- data %>%
        dplyr::arrange(desc(abs(coefficients))) %>%
        tidyr::separate(
            feature_names,
            into = c(
                "compartment",
                "feature_group",
                "measurement",
                "channel", 
                "parameter1", 
                "parameter2"
            ),
            sep = "_",
            remove = FALSE
        ) %>%
        dplyr::mutate(channel_cleaned = channel) %>%
        dplyr::arrange(desc(abs(coefficients)))

    # Clean channel for visualization
    data$channel_learned <- dplyr::recode(data$channel,
            "CorrDNA" = "nuclei",
            "CorrMito" = "Mito",
            "CorrER" = "ER",
            "CorrGasdermin" = "gasdermin",
            "CorrPM" = "PM",
            .default = "other",
            .missing="other"
    )
    data <- data %>%
        dplyr::group_by(feature_group, channel_learned, compartment) %>%
        dplyr::slice_max(order_by = coefficients, n = 1)
    return(data)
}


In [5]:
# get all files in a directory
files <- list.files(path = input_file_path, pattern = "*.csv", full.names = TRUE)
coef_gg_file <- file.path(paste0(output_path,"/","top_abs_val_coefficients_enet.pdf"))
pdf(file=coef_gg_file, width=7, height=4)
for (i in files){
    filename <- basename(i)
    # split the string at the first _
    filename <- strsplit(filename, "_", fixed = TRUE)[[1]]
    cytokine <- filename[1]
    shuffle <- filename[2]
    # preprocess the data
    data <- process_subset_data(i)
    # plot the data
    coef_gg <- (
        ggplot(data, aes(x = channel_learned, y = feature_group))
        + geom_point(aes(fill = abs(coefficients)), pch = 22, size = 6)
        + facet_wrap("~compartment", ncol = 3)
        + theme_bw()
        + scale_fill_continuous(
            name="Top Abs. val\ntreatment\nlinear model\ncoefficient",
            low = "darkblue",
            high = "yellow",
        )
        + xlab("Channel")
        + ylab("Feature")
        + theme(
            axis.text = element_text(size = 10),
            axis.title = element_text(size = 10),
            title = element_text(size = 14),
            legend.title = element_text(size = 12),
            legend.text = element_text(size = 12),
        )
        # rotate x axis labels
        + theme(axis.text.x = element_text(angle = 90, hjust = 1))
        + ggtitle(paste0("Top Abs. val treatment ElasticNet coefficients for \n",cytokine,"\n",shuffle," model"))
        + theme(plot.title = element_text(hjust = 0.5))
    )
    plot(coef_gg)
}
dev.off()


“[1m[22mExpected 6 pieces. Additional pieces discarded in 265 rows [13, 15, 16, 17, 18,
19, 20, 21, 22, 24, 26, 28, 46, 47, 48, 67, 69, 71, 72, 73, ...].”
“[1m[22mExpected 6 pieces. Missing pieces filled with `NA` in 329 rows [1, 2, 3, 4, 5,
6, 8, 9, 10, 11, 12, 14, 23, 25, 27, 29, 30, 31, 32, 33, ...].”
“[1m[22mExpected 6 pieces. Additional pieces discarded in 265 rows [4, 11, 15, 17, 19,
20, 26, 33, 38, 39, 43, 47, 48, 49, 60, 71, 74, 79, 81, 88, ...].”
“[1m[22mExpected 6 pieces. Missing pieces filled with `NA` in 329 rows [3, 6, 7, 8, 10,
13, 14, 16, 22, 23, 27, 29, 32, 34, 46, 50, 52, 53, 54, 55, ...].”
“[1m[22mExpected 6 pieces. Additional pieces discarded in 265 rows [7, 8, 9, 10, 11,
17, 18, 20, 21, 24, 40, 41, 62, 64, 65, 66, 67, 68, 69, 392, ...].”
“[1m[22mExpected 6 pieces. Missing pieces filled with `NA` in 329 rows [1, 2, 3, 4, 5,
6, 12, 13, 14, 15, 16, 19, 22, 23, 25, 26, 27, 28, 29, 30, ...].”
“[1m[22mExpected 6 pieces. Additional pieces discarded in 265 row