In [1]:
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(argparse))

“package ‘ggplot2’ was built under R version 4.2.3”


## Two Beta Model Vizualization

In [None]:
# define command line arguments
parser <- ArgumentParser(description = "Visualize linear modeling results")
# add arguments
parser$add_argument('--celltype', type='character', help='Cell type to visualize')

# parse arguments from command line
args <- parser$parse_args()

# define cell type
celltype <- args$celltype


In [2]:

lm_file <- file.path(paste0("./results/", celltype, "/lm_two_beta.tsv"))

lm_cp_fig <- file.path(paste0("./figures/", celltype, "/lm_two_beta.pdf"))

# if path does not exist, create it
if (!dir.exists(file.path(paste0("./figures/", celltype)))) {
    dir.create(file.path(paste0("./figures/", celltype)))
}
lm_df <- readr::read_tsv(lm_file, col_types = readr::cols(.default = "d", feature ="c", inducer1_inhibitor_inhibitor_dose__inducer1_dose = "c"))
head(lm_df)

feature,r2_score,Metadata_number_of_singlecells,twob_Metadata_Treatment_Inhibitor_Dose,Treatment_Dose,inducer1_inhibitor_inhibitor_dose__inducer1_dose
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Cytoplasm_AreaShape_Area,0.0009343787,1.695962e-05,0.034812093,-0.034812093,media ctr_Media ctr_0.0__0
Cytoplasm_AreaShape_BoundingBoxArea,0.001740451,-3.367326e-05,0.034966831,-0.034966831,media ctr_Media ctr_0.0__0
Cytoplasm_AreaShape_BoundingBoxMaximum_X,0.0001077223,-2.358903e-05,-0.01323453,0.01323453,media ctr_Media ctr_0.0__0
Cytoplasm_AreaShape_BoundingBoxMaximum_Y,0.0001354607,-2.231822e-05,0.004917124,-0.004917124,media ctr_Media ctr_0.0__0
Cytoplasm_AreaShape_BoundingBoxMinimum_X,0.0001515569,-2.184418e-05,-0.015622436,0.015622436,media ctr_Media ctr_0.0__0
Cytoplasm_AreaShape_BoundingBoxMinimum_Y,8.637752e-05,-2.176965e-05,0.002139427,-0.002139427,media ctr_Media ctr_0.0__0


In [3]:
unique(lm_df$inducer1_inhibitor_inhibitor_dose__inducer1_dose)

In [4]:

# Arrange by absolute value coefficient
# Split out components of feature name for visualization
lm_df <- lm_df %>%
    dplyr::arrange(desc(abs(twob_Metadata_Treatment_Inhibitor_Dose))) %>%
    tidyr::separate(
        feature,
        into = c(
            "compartment",
            "feature_group",
            "measurement",
            "channel",
            "parameter1",
            "parameter2"
        ),
        sep = "_",
        remove = FALSE
    ) %>%
    dplyr::mutate(channel_cleaned = channel)



“[1m[22mExpected 6 pieces. Additional pieces discarded in 28080 rows [14, 15, 19, 21, 22, 23, 24, 25, 28, 31, 33, 34, 36, 37, 38, 42, 43, 44, 45, 46,
...].”
“[1m[22mExpected 6 pieces. Missing pieces filled with `NA` in 42012 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 20, 26, 27, 29, ...].”


In [5]:
unique(lm_df$channel)

In [6]:
unique(lm_df$channel_cleaned)

In [7]:
# Clean channel for visualization
lm_df$channel_learned <- dplyr::recode(lm_df$channel_cleaned,
        "CorrDNA" = "nuclei",
        "CorrMito" = "Mito",
        "CorrER" = "ER",
        "CorrGasdermin" = "gasdermin",
        "CorrPM" = "PM",
        .default = "other",
        .missing="other"
    )

print(dim(lm_df))
head(lm_df)
unique(lm_df$channel_learned)
lm_df$abs_Metadata_number_of_singlecells <- abs(lm_df$Metadata_number_of_singlecells)
lm_df$abs_twob_Metadata_Treatment_Inhibitor_Dose <- abs(lm_df$twob_Metadata_Treatment_Inhibitor_Dose)
lm_df$abs_Treatment_Dose <- abs(lm_df$Treatment_Dose)

[1] 102492     14


feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,twob_Metadata_Treatment_Inhibitor_Dose,Treatment_Dose,inducer1_inhibitor_inhibitor_dose__inducer1_dose,channel_cleaned,channel_learned
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Cytoplasm_Correlation_Manders_CorrPM_CorrMito,Cytoplasm,Correlation,Manders,CorrPM,CorrMito,,0.3974507,-1.330689e-05,-1.386229,-1.386229,H2O2_Disulfiram_1.0__100.000,CorrPM,PM
Cells_Correlation_Manders_CorrPM_CorrMito,Cells,Correlation,Manders,CorrPM,CorrMito,,0.3931833,-4.000428e-05,-1.368789,-1.368789,H2O2_Disulfiram_1.0__100.000,CorrPM,PM
Cells_Correlation_Manders_CorrDNA_CorrMito,Cells,Correlation,Manders,CorrDNA,CorrMito,,0.2683756,-6.649527e-05,-1.344787,-1.344787,H2O2_Disulfiram_1.0__100.000,CorrDNA,nuclei
Nuclei_Correlation_Manders_CorrPM_CorrMito,Nuclei,Correlation,Manders,CorrPM,CorrMito,,0.1566935,2.525405e-05,-1.323291,-1.323291,H2O2_Disulfiram_1.0__100.000,CorrPM,PM
Cytoplasm_Correlation_Manders_CorrDNA_CorrMito,Cytoplasm,Correlation,Manders,CorrDNA,CorrMito,,0.2701912,5.697376e-06,-1.267118,-1.267118,H2O2_Disulfiram_1.0__100.000,CorrDNA,nuclei
Nuclei_Correlation_Manders_CorrDNA_CorrMito,Nuclei,Correlation,Manders,CorrDNA,CorrMito,,0.139789,5.213628e-05,-1.249437,-1.249437,H2O2_Disulfiram_1.0__100.000,CorrDNA,nuclei


In [8]:
head(lm_df)

feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,twob_Metadata_Treatment_Inhibitor_Dose,Treatment_Dose,inducer1_inhibitor_inhibitor_dose__inducer1_dose,channel_cleaned,channel_learned,abs_Metadata_number_of_singlecells,abs_twob_Metadata_Treatment_Inhibitor_Dose,abs_Treatment_Dose
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
Cytoplasm_Correlation_Manders_CorrPM_CorrMito,Cytoplasm,Correlation,Manders,CorrPM,CorrMito,,0.3974507,-1.330689e-05,-1.386229,-1.386229,H2O2_Disulfiram_1.0__100.000,CorrPM,PM,1.330689e-05,1.386229,1.386229
Cells_Correlation_Manders_CorrPM_CorrMito,Cells,Correlation,Manders,CorrPM,CorrMito,,0.3931833,-4.000428e-05,-1.368789,-1.368789,H2O2_Disulfiram_1.0__100.000,CorrPM,PM,4.000428e-05,1.368789,1.368789
Cells_Correlation_Manders_CorrDNA_CorrMito,Cells,Correlation,Manders,CorrDNA,CorrMito,,0.2683756,-6.649527e-05,-1.344787,-1.344787,H2O2_Disulfiram_1.0__100.000,CorrDNA,nuclei,6.649527e-05,1.344787,1.344787
Nuclei_Correlation_Manders_CorrPM_CorrMito,Nuclei,Correlation,Manders,CorrPM,CorrMito,,0.1566935,2.525405e-05,-1.323291,-1.323291,H2O2_Disulfiram_1.0__100.000,CorrPM,PM,2.525405e-05,1.323291,1.323291
Cytoplasm_Correlation_Manders_CorrDNA_CorrMito,Cytoplasm,Correlation,Manders,CorrDNA,CorrMito,,0.2701912,5.697376e-06,-1.267118,-1.267118,H2O2_Disulfiram_1.0__100.000,CorrDNA,nuclei,5.697376e-06,1.267118,1.267118
Nuclei_Correlation_Manders_CorrDNA_CorrMito,Nuclei,Correlation,Manders,CorrDNA,CorrMito,,0.139789,5.213628e-05,-1.249437,-1.249437,H2O2_Disulfiram_1.0__100.000,CorrDNA,nuclei,5.213628e-05,1.249437,1.249437


In [9]:
loop_list <- unique(lm_df$inducer1_inhibitor_inhibitor_dose__inducer1_dose)
x_list <- c('abs_twob_Metadata_Treatment_Inhibitor_Dose','abs_Treatment_Dose')

In [10]:
pdf(file=lm_cp_fig )
for (i in 1:length(loop_list)){
    df <- lm_df[lm_df$inducer1_inhibitor_inhibitor_dose__inducer1_dose == loop_list[i],]
    for (j in 1:length(x_list)){
        lm_fig_gg <- (
            ggplot(df, aes(x = .data[[x_list[j]]], y = r2_score))
            + geom_point(aes(size = abs_Metadata_number_of_singlecells, color = channel_learned), alpha = 0.7)
            + theme_bw()
            + guides(
                color = guide_legend(title = "Channel\n(if applicable)", order = 1),
                size = guide_legend(title = "Cell count contributution")
            )
            + ylab("R2 score of LM feature")
            + xlab(paste0(x_list[j]," contribution (LM beta coefficient)"))
            + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], "\ntreatments and cell density"))
        )
    plot(lm_fig_gg)    
    }
}
dev.off()