In [17]:
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(argparse))

## Two Beta Model Vizualization

In [None]:
# define command line arguments
parser <- ArgumentParser(description = "Visualize linear modeling results")
# add arguments
parser$add_argument('--celltype', type='character', help='Cell type to visualize')

# parse arguments from command line
args <- parser$parse_args()

# define cell type
celltype <- args$celltype


In [18]:
celltype <- "SHSY5Y"

In [19]:

lm_file <- file.path(paste0("./results/", celltype, "/lm_two_beta.tsv"))

lm_cp_fig <- file.path(paste0("./figures/", celltype, "/lm_two_beta.pdf"))
lm_cp_fig_abs <- file.path(paste0("./figures/", celltype, "/lm_two_beta_abs.pdf"))

# if path does not exist, create it
if (!dir.exists(file.path(paste0("./figures/", celltype)))) {
    dir.create(file.path(paste0("./figures/", celltype)))
}
lm_df <- readr::read_tsv(lm_file, col_types = readr::cols(.default = "d", feature ="c", inducer1_inhibitor_inhibitor_dose__inducer1_dose = "c"))
head(lm_df, 2)

feature,r2_score,Metadata_number_of_singlecells,twob_Metadata_Treatment_Inhibitor_Dose,Treatment_Dose,inducer1_inhibitor_inhibitor_dose__inducer1_dose
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Cytoplasm_AreaShape_Compactness,0.0010758123,-7.546792e-05,-0.0403419414,0.0403419414,media ctr_Media ctr_0.0__0
Cytoplasm_AreaShape_Eccentricity,0.0005340345,-6.444612e-05,-0.0004922216,0.0004922216,media ctr_Media ctr_0.0__0


In [20]:
unique(lm_df$inducer1_inhibitor_inhibitor_dose__inducer1_dose)

In [21]:

# Arrange by absolute value coefficient
# Split out components of feature name for visualization
lm_df <- lm_df %>%
    dplyr::arrange(desc(abs(twob_Metadata_Treatment_Inhibitor_Dose))) %>%
    tidyr::separate(
        feature,
        into = c(
            "compartment",
            "feature_group",
            "measurement",
            "channel",
            "parameter1",
            "parameter2"
        ),
        sep = "_",
        remove = FALSE
    ) %>%
    dplyr::mutate(channel_cleaned = channel)



“[1m[22mExpected 6 pieces. Additional pieces discarded in 7956 rows [2, 4, 5, 8, 10,
12, 13, 14, 17, 18, 19, 21, 23, 24, 25, 26, 27, 30, 32, 33, ...].”
“[1m[22mExpected 6 pieces. Missing pieces filled with `NA` in 15264 rows [1, 3, 6, 7,
9, 11, 15, 16, 20, 22, 28, 29, 31, 34, 39, 40, 41, 43, 44, 45, ...].”


In [22]:
unique(lm_df$channel)

In [23]:
unique(lm_df$channel_cleaned)

In [24]:
# Clean channel for visualization
lm_df$channel_learned <- dplyr::recode(lm_df$channel_cleaned,
        "CorrDNA" = "nuclei",
        "CorrMito" = "Mito",
        "CorrER" = "ER",
        "CorrGasdermin" = "gasdermin",
        "CorrPM" = "PM",
        .default = "other",
        .missing="other"
    )

print(dim(lm_df))
head(lm_df,2)
unique(lm_df$channel_learned)
lm_df$abs_Metadata_number_of_singlecells <- abs(lm_df$Metadata_number_of_singlecells)
lm_df$abs_twob_Metadata_Treatment_Inhibitor_Dose <- abs(lm_df$twob_Metadata_Treatment_Inhibitor_Dose)
lm_df$abs_Treatment_Dose <- abs(lm_df$Treatment_Dose)

[1] 45036    14


feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,twob_Metadata_Treatment_Inhibitor_Dose,Treatment_Dose,inducer1_inhibitor_inhibitor_dose__inducer1_dose,channel_cleaned,channel_learned
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Cytoplasm_Correlation_RWC_CorrPM_CorrMito,Cytoplasm,Correlation,RWC,CorrPM,CorrMito,,0.3742011,-0.0002044911,-1.380149,-1.380149,H2O2_Disulfiram_1.0__100.000,CorrPM,PM
Cytoplasm_Texture_DifferenceEntropy_CorrMito_3_00_256,Cytoplasm,Texture,DifferenceEntropy,CorrMito,3,0.0,0.4806035,0.000374263,1.348076,1.348076,LPS_Nigericin_DMSO_0.025__100.000_10.0,CorrMito,Mito


In [25]:
loop_list <- unique(lm_df$inducer1_inhibitor_inhibitor_dose__inducer1_dose)
x_list <- c('abs_twob_Metadata_Treatment_Inhibitor_Dose','abs_Treatment_Dose')

In [26]:
pdf(file=lm_cp_fig_abs )
for (i in 1:length(loop_list)){
    df <- lm_df[lm_df$inducer1_inhibitor_inhibitor_dose__inducer1_dose == loop_list[i],]
    for (j in 1:length(x_list)){
        lm_fig_gg <- (
            ggplot(df, aes(x = .data[[x_list[j]]], y = r2_score))
            + geom_point(aes(size = abs_Metadata_number_of_singlecells, color = channel_learned), alpha = 0.7)
            + theme_bw()
            + guides(
                color = guide_legend(title = "Channel\n(if applicable)", order = 1),
                size = guide_legend(title = "Cell count contributution")
            )
            + ylab("R2 score of LM feature")
            + xlab(paste0(x_list[j]," contribution (LM beta coefficient)"))
            + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], "\ntreatments and cell density"))
        )
    plot(lm_fig_gg)    
    }
}
dev.off()

In [31]:
head(lm_df)

feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,twob_Metadata_Treatment_Inhibitor_Dose,Treatment_Dose,inducer1_inhibitor_inhibitor_dose__inducer1_dose,channel_cleaned,channel_learned,abs_Metadata_number_of_singlecells,abs_twob_Metadata_Treatment_Inhibitor_Dose,abs_Treatment_Dose
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
Cytoplasm_Correlation_RWC_CorrPM_CorrMito,Cytoplasm,Correlation,RWC,CorrPM,CorrMito,,0.3742011,-0.0002044911,-1.380149,-1.380149,H2O2_Disulfiram_1.0__100.000,CorrPM,PM,0.0002044911,1.380149,1.380149
Cytoplasm_Texture_DifferenceEntropy_CorrMito_3_00_256,Cytoplasm,Texture,DifferenceEntropy,CorrMito,3,0.0,0.4806035,0.000374263,1.348076,1.348076,LPS_Nigericin_DMSO_0.025__100.000_10.0,CorrMito,Mito,0.000374263,1.348076,1.348076
Nuclei_Correlation_Overlap_CorrMito_CorrPM,Nuclei,Correlation,Overlap,CorrMito,CorrPM,,0.3394022,-0.0002052585,-1.338728,-1.338728,H2O2_Disulfiram_1.0__100.000,CorrMito,Mito,0.0002052585,1.338728,1.338728
Cytoplasm_Texture_DifferenceEntropy_CorrMito_3_00_256,Cytoplasm,Texture,DifferenceEntropy,CorrMito,3,0.0,0.3247935,0.0005011261,1.30795,1.30795,H2O2_Disulfiram_1.0__100.000,CorrMito,Mito,0.0005011261,1.30795,1.30795
Cells_Texture_InverseDifferenceMoment_CorrMito_3_00_256,Cells,Texture,InverseDifferenceMoment,CorrMito,3,0.0,0.4846559,-0.0002868892,-1.295927,-1.295927,LPS_Nigericin_DMSO_0.025__100.000_10.0,CorrMito,Mito,0.0002868892,1.295927,1.295927
Nuclei_Intensity_MaxIntensityEdge_CorrMito,Nuclei,Intensity,MaxIntensityEdge,CorrMito,,,0.2796684,0.0001916182,1.278755,1.278755,H2O2_Disulfiram_1.0__100.000,CorrMito,Mito,0.0001916182,1.278755,1.278755


In [28]:
pdf(file=lm_cp_fig )
for (i in 1:length(loop_list)){
    df <- lm_df[lm_df$inducer1_inhibitor_inhibitor_dose__inducer1_dose == loop_list[i],]
    for (j in 1:length(x_list)){
        lm_fig_gg <- (
            ggplot(df, aes(x = .data[[x_list[j]]], y = Metadata_number_of_singlecells))
            + geom_point(aes(size = r2_score, color = channel_learned), alpha = 0.7)
            + theme_bw()
            + guides(
                color = guide_legend(title = "Channel\n(if applicable)", order = 1),
                size = guide_legend(title = "Cell count contributution")
            )
            + geom_vline(xintercept = 0, linetype = "dashed", color = "red")
            + geom_hline(yintercept = 0, linetype = "dashed", color = "red")
            + geom_density2d(color="black", show.legend = FALSE)
            + ylab("R2 score of LM feature")
            + xlab(paste0(x_list[j]," contribution (LM beta coefficient)"))
            + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], "\ntreatments and cell density"))
        )
    plot(lm_fig_gg)    
    }
}
dev.off()

“[1m[22mComputation failed in `stat_density2d()`
Caused by error in `MASS::kde2d()`:
[33m![39m bandwidths must be strictly positive”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to max; returning -Inf”


ERROR: [1m[33mError[39m in `geom_density2d()`:[22m
[1m[22m[33m![39m Problem while computing stat.
[36mℹ[39m Error occurred in the 4th layer.
[1mCaused by error in `seq_len()`:[22m
[33m![39m argument must be coercible to non-negative integer


In [None]:
pdf(file=lm_cp_fig)
for (i in 1:length(loop_list)){
    df <- lm_df[lm_df$dosage_treatments_list == loop_list[i],]
    lm_fig_gg <- (
        ggplot(df, aes(x = oneb_Metadata_Treatment_Dose_Inhibitor_Dose, y = Metadata_number_of_singlecells))

        + geom_point(aes(size = r2_score, color = channel_learned,), alpha = 0.7)

        + scale_size_continuous(range = c(2, 8), limits = c(0, 1))


        + geom_vline(xintercept = 0, linetype = "dashed", color = "red")
        + geom_hline(yintercept = 0, linetype = "dashed", color = "red")
        + geom_density2d(color="black", show.legend = FALSE)
        + theme_bw()
        + guides(
            color = guide_legend(title = "Channel\n(if applicable)", order = 1),
            size = guide_legend(title = "R2 score")
        )
        # make legend dots bigger
        + ylab("WT genotype contribution (LM beta coefficient)")
        + xlab("Cell count contribution (LM beta coefficient)")
        + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], " treatments and cell density"))
    )
    plot(lm_fig_gg)
}
dev.off()


In [None]:
pdf(file=lm_cp_fig_abs)
for (i in 1:length(loop_list)){

    df <- lm_df[lm_df$dosage_treatments_list == loop_list[i],]
    lm_fig_gg <- (
        ggplot(df, aes(x = abs_oneb_Metadata_Treatment_Dose_Inhibitor_Dose, y = r2_score))
        + geom_point(aes(size = abs_Metadata_number_of_singlecells, color = channel_learned), alpha = 0.7)

        + theme_bw()
        + guides(
            color = guide_legend(title = "Channel\n(if applicable)", order = 1),
            size = guide_legend(title = "Cell count contributution")
        )
        + ylab("R2 score of LM feature")
        + xlab("Treatment and Dose contribution (LM beta coefficient)")
        + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], " treatments and cell density"))
    )

    plot(lm_fig_gg)
}
dev.off()