In [1]:
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(argparse))

“package ‘ggplot2’ was built under R version 4.2.3”


## One Beta Model Vizualization

In [None]:
# define command line arguments
parser <- ArgumentParser(description = "Visualize linear modeling results")
# add arguments
parser$add_argument('--celltype', type='character', help='Cell type to visualize')

# parse arguments from command line
args <- parser$parse_args()

# define cell type
celltype <- args$celltype


In [2]:

lm_file <- file.path(paste0("./results/", celltype, "/lm_one_beta.tsv"))

lm_cp_fig <- file.path(paste0("./figures/", celltype, "/lm_one_beta.pdf"))

# if path does not exist, create it
if (!dir.exists(file.path(paste0("./figures/", celltype)))) {
    dir.create(file.path(paste0("./figures/", celltype)))
}

# read in linear modeling results
lm_df <- readr::read_tsv(lm_file, col_types = readr::cols(.default = "d", feature ="c", dosage_treatments_list = "c"))
head(lm_df)

feature,r2_score,Metadata_number_of_singlecells,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,dosage_treatments_list
<chr>,<dbl>,<dbl>,<dbl>,<chr>
Cytoplasm_AreaShape_Area,0.0009343787,1.695962e-05,0.069624186,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_BoundingBoxArea,0.001740451,-3.367326e-05,0.069933662,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_BoundingBoxMaximum_X,0.0001077223,-2.358903e-05,-0.02646906,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_BoundingBoxMaximum_Y,0.0001354607,-2.231822e-05,0.009834249,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_BoundingBoxMinimum_X,0.0001515569,-2.184418e-05,-0.031244872,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_BoundingBoxMinimum_Y,8.637752e-05,-2.176965e-05,0.004278855,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025


In [3]:
unique(lm_df$dosage_treatments_list)

In [4]:

# Arrange by absolute value coefficient
# Split out components of feature name for visualization
lm_df <- lm_df %>%
    dplyr::arrange(desc(abs(oneb_Metadata_Treatment_Dose_Inhibitor_Dose))) %>%
    tidyr::separate(
        feature,
        into = c(
            "compartment",
            "feature_group",
            "measurement",
            "channel",
            "parameter1",
            "parameter2"
        ),
        sep = "_",
        remove = FALSE
    ) %>%
    dplyr::mutate(channel_cleaned = channel)



“[1m[22mExpected 6 pieces. Additional pieces discarded in 28080 rows [14, 15, 19, 21, 22, 23, 24, 25, 27, 29, 31, 32, 34, 35, 36, 40, 41, 42, 43, 44,
...].”
“[1m[22mExpected 6 pieces. Missing pieces filled with `NA` in 42012 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 20, 26, 28, 30, ...].”


In [5]:
unique(lm_df$channel)

In [6]:
unique(lm_df$channel_cleaned)

In [7]:
# Clean channel for visualization
lm_df$channel_learned <- dplyr::recode(lm_df$channel_cleaned,
        "CorrDNA" = "nuclei",
        "CorrMito" = "Mito",
        "CorrER" = "ER",
        "CorrGasdermin" = "gasdermin",
        "CorrPM" = "PM",
        .default = "other",
        .missing="other"
    )

print(dim(lm_df))
head(lm_df)
unique(lm_df$channel_learned)
lm_df$abs_Metadata_number_of_singlecells <- abs(lm_df$Metadata_number_of_singlecells)
lm_df$abs_oneb_Metadata_Treatment_Dose_Inhibitor_Dose <- abs(lm_df$oneb_Metadata_Treatment_Dose_Inhibitor_Dose)

[1] 102492     13


feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,dosage_treatments_list,channel_cleaned,channel_learned
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Cytoplasm_Correlation_Manders_CorrPM_CorrMito,Cytoplasm,Correlation,Manders,CorrPM,CorrMito,,0.3974507,-1.330689e-05,-2.772459,H2O2_100.000_Disulfiram_1.0-DMSO_0.100_DMSO_0.025,CorrPM,PM
Cells_Correlation_Manders_CorrPM_CorrMito,Cells,Correlation,Manders,CorrPM,CorrMito,,0.3931833,-4.000428e-05,-2.737578,H2O2_100.000_Disulfiram_1.0-DMSO_0.100_DMSO_0.025,CorrPM,PM
Cells_Correlation_Manders_CorrDNA_CorrMito,Cells,Correlation,Manders,CorrDNA,CorrMito,,0.2683756,-6.649527e-05,-2.689573,H2O2_100.000_Disulfiram_1.0-DMSO_0.100_DMSO_0.025,CorrDNA,nuclei
Nuclei_Correlation_Manders_CorrPM_CorrMito,Nuclei,Correlation,Manders,CorrPM,CorrMito,,0.1566935,2.525405e-05,-2.646582,H2O2_100.000_Disulfiram_1.0-DMSO_0.100_DMSO_0.025,CorrPM,PM
Cytoplasm_Correlation_Manders_CorrDNA_CorrMito,Cytoplasm,Correlation,Manders,CorrDNA,CorrMito,,0.2701912,5.697376e-06,-2.534237,H2O2_100.000_Disulfiram_1.0-DMSO_0.100_DMSO_0.025,CorrDNA,nuclei
Nuclei_Correlation_Manders_CorrDNA_CorrMito,Nuclei,Correlation,Manders,CorrDNA,CorrMito,,0.139789,5.213628e-05,-2.498875,H2O2_100.000_Disulfiram_1.0-DMSO_0.100_DMSO_0.025,CorrDNA,nuclei


In [8]:
loop_list <- unique(lm_df$dosage_treatments_list)

In [9]:
pdf(file=lm_cp_fig)
for (i in 1:length(loop_list)){

    df <- lm_df[lm_df$dosage_treatments_list == loop_list[i],]
    lm_fig_gg <- (
        ggplot(df, aes(x = abs_oneb_Metadata_Treatment_Dose_Inhibitor_Dose, y = r2_score))
        + geom_point(aes(size = abs_Metadata_number_of_singlecells, color = channel_learned), alpha = 0.7)

        + theme_bw()
        + guides(
            color = guide_legend(title = "Channel\n(if applicable)", order = 1),
            size = guide_legend(title = "Cell count contributution")
        )
        + ylab("R2 score of LM feature")
        + xlab("Treatment and Dose contribution (LM beta coefficient)")
        + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], " treatments and cell density"))
    )

    plot(lm_fig_gg)
}
dev.off()