In [1]:
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(argparse))

“package ‘ggplot2’ was built under R version 4.2.3”


## Three Beta Model Vizualization

In [None]:
# define command line arguments
parser <- ArgumentParser(description = "Visualize linear modeling results")
# add arguments
parser$add_argument('--celltype', type='character', help='Cell type to visualize')

# parse arguments from command line
args <- parser$parse_args()

# define cell type
celltype <- args$celltype


In [2]:

lm_file <- file.path(paste0("./results/", celltype, "/lm_three_beta.tsv"))

lm_cp_fig <- file.path(paste0("./figures/", celltype, "/lm_three_beta.pdf"))

# if path does not exist, create it
if (!dir.exists(file.path(paste0("./figures/", celltype)))) {
    dir.create(file.path(paste0("./figures/", celltype)))
}
lm_df <- readr::read_tsv(lm_file, col_types = readr::cols(.default = "d", feature ="c", inducer1__inducer1_dose__inhibitor_inhibitor_dose = "c"))
head(lm_df)

feature,r2_score,Metadata_number_of_singlecells,threeb_Treatment,threeb_Treatment_Dose,threeb_Inhibitor_and_Dose,inducer1__inducer1_dose__inhibitor_inhibitor_dose
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Cytoplasm_AreaShape_Area,0.0009343787,1.695962e-05,0.023208062,-0.023208062,0.023208062,media ctr__0__Media ctr_0.0
Cytoplasm_AreaShape_BoundingBoxArea,0.001740451,-3.367326e-05,0.023311221,-0.023311221,0.023311221,media ctr__0__Media ctr_0.0
Cytoplasm_AreaShape_BoundingBoxMaximum_X,0.0001077223,-2.358903e-05,-0.00882302,0.00882302,-0.00882302,media ctr__0__Media ctr_0.0
Cytoplasm_AreaShape_BoundingBoxMaximum_Y,0.0001354607,-2.231822e-05,0.003278083,-0.003278083,0.003278083,media ctr__0__Media ctr_0.0
Cytoplasm_AreaShape_BoundingBoxMinimum_X,0.0001515569,-2.184418e-05,-0.010414957,0.010414957,-0.010414957,media ctr__0__Media ctr_0.0
Cytoplasm_AreaShape_BoundingBoxMinimum_Y,8.637752e-05,-2.176965e-05,0.001426285,-0.001426285,0.001426285,media ctr__0__Media ctr_0.0


In [3]:
unique(lm_df$inducer1__inducer1_dose__inhibitor_inhibitor_dose)

In [4]:

# Arrange by absolute value coefficient
# Split out components of feature name for visualization
lm_df <- lm_df %>%
    dplyr::arrange(desc(abs(threeb_Treatment_Dose))) %>%
    tidyr::separate(
        feature,
        into = c(
            "compartment",
            "feature_group",
            "measurement",
            "channel",
            "parameter1",
            "parameter2"
        ),
        sep = "_",
        remove = FALSE
    ) %>%
    dplyr::mutate(channel_cleaned = channel)



“[1m[22mExpected 6 pieces. Additional pieces discarded in 28080 rows [3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 16, 18, 19, 20, 23, 24, 25, 26, 27, ...].”
“[1m[22mExpected 6 pieces. Missing pieces filled with `NA` in 42012 rows [1, 2, 11, 14, 17, 21, 22, 40, 42, 45, 49, 52, 55, 56, 57, 64, 66, 67, 70, 76,
...].”


In [5]:
unique(lm_df$channel)

In [6]:
unique(lm_df$channel_cleaned)

In [7]:
# Clean channel for visualization
lm_df$channel_learned <- dplyr::recode(lm_df$channel_cleaned,
        "CorrDNA" = "nuclei",
        "CorrMito" = "Mito",
        "CorrER" = "ER",
        "CorrGasdermin" = "gasdermin",
        "CorrPM" = "PM",
        .default = "other",
        .missing="other"
    )

print(dim(lm_df))
head(lm_df)
unique(lm_df$channel_learned)
lm_df$abs_Metadata_number_of_singlecells <- abs(lm_df$Metadata_number_of_singlecells)
lm_df$abs_threeb_Treatment <- abs(lm_df$threeb_Treatment)
lm_df$abs_threeb_Treatment_Dose <- abs(lm_df$threeb_Treatment_Dose)
lm_df$abs_threeb_Inhibitor_and_Dose <- abs(lm_df$threeb_Inhibitor_and_Dose)

[1] 102492     15


feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,threeb_Treatment,threeb_Treatment_Dose,threeb_Inhibitor_and_Dose,inducer1__inducer1_dose__inhibitor_inhibitor_dose,channel_cleaned,channel_learned
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Nuclei_Intensity_MeanIntensityEdge_CorrMito,Nuclei,Intensity,MeanIntensityEdge,CorrMito,,,0.4552401,-0.0002177032,1.237168,1.237168,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito
Nuclei_Intensity_MaxIntensityEdge_CorrMito,Nuclei,Intensity,MaxIntensityEdge,CorrMito,,,0.4135021,-0.0002126844,1.168403,1.168403,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito
Cells_Texture_DifferenceEntropy_CorrMito_3_00_256,Cells,Texture,DifferenceEntropy,CorrMito,3.0,0.0,0.5400279,-0.0002267092,1.16435,1.16435,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito
Cells_Texture_DifferenceEntropy_CorrMito_3_02_256,Cells,Texture,DifferenceEntropy,CorrMito,3.0,2.0,0.5326017,-0.0002233205,1.145984,1.145984,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito
Cytoplasm_Texture_DifferenceEntropy_CorrMito_3_00_256,Cytoplasm,Texture,DifferenceEntropy,CorrMito,3.0,0.0,0.516508,-0.000220056,1.121624,1.121624,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito
Cytoplasm_Texture_DifferenceEntropy_CorrMito_3_02_256,Cytoplasm,Texture,DifferenceEntropy,CorrMito,3.0,2.0,0.5100435,-0.000216112,1.105229,1.105229,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito


In [8]:
head(lm_df)

feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,threeb_Treatment,threeb_Treatment_Dose,threeb_Inhibitor_and_Dose,inducer1__inducer1_dose__inhibitor_inhibitor_dose,channel_cleaned,channel_learned,abs_Metadata_number_of_singlecells,abs_threeb_Treatment,abs_threeb_Treatment_Dose,abs_threeb_Inhibitor_and_Dose
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Nuclei_Intensity_MeanIntensityEdge_CorrMito,Nuclei,Intensity,MeanIntensityEdge,CorrMito,,,0.4552401,-0.0002177032,1.237168,1.237168,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito,0.0002177032,1.237168,1.237168,0
Nuclei_Intensity_MaxIntensityEdge_CorrMito,Nuclei,Intensity,MaxIntensityEdge,CorrMito,,,0.4135021,-0.0002126844,1.168403,1.168403,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito,0.0002126844,1.168403,1.168403,0
Cells_Texture_DifferenceEntropy_CorrMito_3_00_256,Cells,Texture,DifferenceEntropy,CorrMito,3.0,0.0,0.5400279,-0.0002267092,1.16435,1.16435,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito,0.0002267092,1.16435,1.16435,0
Cells_Texture_DifferenceEntropy_CorrMito_3_02_256,Cells,Texture,DifferenceEntropy,CorrMito,3.0,2.0,0.5326017,-0.0002233205,1.145984,1.145984,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito,0.0002233205,1.145984,1.145984,0
Cytoplasm_Texture_DifferenceEntropy_CorrMito_3_00_256,Cytoplasm,Texture,DifferenceEntropy,CorrMito,3.0,0.0,0.516508,-0.000220056,1.121624,1.121624,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito,0.000220056,1.121624,1.121624,0
Cytoplasm_Texture_DifferenceEntropy_CorrMito_3_02_256,Cytoplasm,Texture,DifferenceEntropy,CorrMito,3.0,2.0,0.5100435,-0.000216112,1.105229,1.105229,0,LPS_Nigericin__100.000_10.0__DMSO_0.025,CorrMito,Mito,0.000216112,1.105229,1.105229,0


In [9]:
loop_list <- unique(lm_df$inducer1__inducer1_dose__inhibitor_inhibitor_dose)
x_list <- c('abs_threeb_Inhibitor_and_Dose','abs_threeb_Treatment','abs_threeb_Treatment_Dose')

In [10]:
pdf(file=lm_cp_fig )
for (i in 1:length(loop_list)){
    df <- lm_df[lm_df$inducer1__inducer1_dose__inhibitor_inhibitor_dose == loop_list[i],]
    for (j in 1:length(x_list)){
        lm_fig_gg <- (
            ggplot(df, aes(x = .data[[x_list[j]]], y = r2_score))
            + geom_point(aes(size = abs_Metadata_number_of_singlecells, color = channel_learned), alpha = 0.7)
            + theme_bw()
            + guides(
                color = guide_legend(title = "Channel\n(if applicable)", order = 1),
                size = guide_legend(title = "Cell count contributution")
            )
            + ylab("R2 score of LM feature")
            + xlab(paste0(x_list[j]," contribution (LM beta coefficient)"))
            + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], "\ntreatments and cell density"))
        )
    plot(lm_fig_gg)    
    }
}
dev.off()