In [2]:
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(argparse))

“package ‘ggplot2’ was built under R version 4.2.3”


## One Beta Model Vizualization

In [3]:
# define command line arguments
parser <- ArgumentParser(description = "Visualize linear modeling results")
# add arguments
parser$add_argument('--celltype', type='character', help='Cell type to visualize')

# parse arguments from command line
args <- parser$parse_args()

# define cell type
celltype <- args$celltype


ERROR: Error in "argparse::parse_args_output(output)": parse error:
usage: PROGRAM [-h] [--celltype CELLTYPE]
PROGRAM: error: unrecognized arguments: /home/lippincm/.local/share/jupyter/runtime/kernel-v2-562366qI74kefqDRYk.json


In [9]:
celltype = "SHSY5Y"

In [10]:

lm_file <- file.path(paste0("./results/", celltype, "/lm_one_beta.tsv"))

lm_cp_fig <- file.path(paste0("./figures/", celltype, "/lm_one_beta.pdf"))

lm_cp_fig_abs <- file.path(paste0("./figures/", celltype, "/lm_one_beta_abs.pdf"))

# if path does not exist, create it
if (!dir.exists(file.path(paste0("./figures/", celltype)))) {
    dir.create(file.path(paste0("./figures/", celltype)))
}

# read in linear modeling results
lm_df <- readr::read_tsv(lm_file, col_types = readr::cols(.default = "d", feature ="c", dosage_treatments_list = "c"))
head(lm_df)

feature,r2_score,Metadata_number_of_singlecells,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,dosage_treatments_list
<chr>,<dbl>,<dbl>,<dbl>,<chr>
Cytoplasm_AreaShape_Compactness,0.0010758123,-7.546792e-05,-0.0806838829,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_Eccentricity,0.0005340345,-6.444612e-05,-0.0009844432,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_Extent,0.0010686948,9.424593e-05,0.0782239535,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_FormFactor,0.0006766865,7.923037e-05,0.0608552602,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_MajorAxisLength,0.001552285,-7.570263e-05,0.0336568321,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025
Cytoplasm_AreaShape_MinorAxisLength,0.0004700079,8.941444e-06,0.0483550044,media ctr_0_Media ctr_0.0-DMSO_0.100_DMSO_0.025


In [11]:
unique(lm_df$dosage_treatments_list)

In [12]:

# Arrange by absolute value coefficient
# Split out components of feature name for visualization
lm_df <- lm_df %>%
    dplyr::arrange(desc(abs(oneb_Metadata_Treatment_Dose_Inhibitor_Dose))) %>%
    tidyr::separate(
        feature,
        into = c(
            "compartment",
            "feature_group",
            "measurement",
            "channel",
            "parameter1",
            "parameter2"
        ),
        sep = "_",
        remove = FALSE
    ) %>%
    dplyr::mutate(channel_cleaned = channel)



“[1m[22mExpected 6 pieces. Additional pieces discarded in 7956 rows [2, 4, 5, 8, 10,
12, 13, 14, 17, 18, 19, 21, 23, 24, 25, 26, 27, 30, 32, 33, ...].”
“[1m[22mExpected 6 pieces. Missing pieces filled with `NA` in 15264 rows [1, 3, 6, 7,
9, 11, 15, 16, 20, 22, 28, 29, 31, 34, 39, 40, 41, 43, 44, 45, ...].”


In [13]:
unique(lm_df$channel)

In [14]:
unique(lm_df$channel_cleaned)

In [15]:
# Clean channel for visualization
lm_df$channel_learned <- dplyr::recode(lm_df$channel_cleaned,
        "CorrDNA" = "nuclei",
        "CorrMito" = "Mito",
        "CorrER" = "ER",
        "CorrGasdermin" = "gasdermin",
        "CorrPM" = "PM",
        .default = "other",
        .missing="other"
    )

print(dim(lm_df))
head(lm_df, 2)
unique(lm_df$channel_learned)
lm_df$abs_Metadata_number_of_singlecells <- abs(lm_df$Metadata_number_of_singlecells)
lm_df$abs_oneb_Metadata_Treatment_Dose_Inhibitor_Dose <- abs(lm_df$oneb_Metadata_Treatment_Dose_Inhibitor_Dose)

[1] 45036    13


feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,dosage_treatments_list,channel_cleaned,channel_learned
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Cytoplasm_Correlation_RWC_CorrPM_CorrMito,Cytoplasm,Correlation,RWC,CorrPM,CorrMito,,0.3742011,-0.0002044911,-2.760298,H2O2_100.000_Disulfiram_1.0-DMSO_0.100_DMSO_0.025,CorrPM,PM
Cytoplasm_Texture_DifferenceEntropy_CorrMito_3_00_256,Cytoplasm,Texture,DifferenceEntropy,CorrMito,3,0.0,0.4806035,0.000374263,2.696151,LPS_Nigericin_100.000_10.0_DMSO_0.025-DMSO_0.100_DMSO_0.025,CorrMito,Mito


In [30]:
df <- lm_df[lm_df$dosage_treatments_list == 'LPS_100.000_DMSO_0.025-DMSO_0.100_DMSO_0.025',]

In [31]:
df <- df[df$channel_learned == 'Mito',]
head(df,2)
# get the max row of the oneb_Metadata_Treatment_Dose_Inhibitor_Dose channel
df <- df[which.min(df$oneb_Metadata_Treatment_Dose_Inhibitor_Dose),]
df

feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,dosage_treatments_list,channel_cleaned,channel_learned,abs_Metadata_number_of_singlecells,abs_oneb_Metadata_Treatment_Dose_Inhibitor_Dose
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>
Cells_Intensity_MinIntensityEdge_CorrMito,Cells,Intensity,MinIntensityEdge,CorrMito,,,0.3625977,0.0002094766,1.833204,LPS_100.000_DMSO_0.025-DMSO_0.100_DMSO_0.025,CorrMito,Mito,0.0002094766,1.833204
Nuclei_Intensity_MinIntensityEdge_CorrMito,Nuclei,Intensity,MinIntensityEdge,CorrMito,,,0.246732,0.0001475019,1.465545,LPS_100.000_DMSO_0.025-DMSO_0.100_DMSO_0.025,CorrMito,Mito,0.0001475019,1.465545


feature,compartment,feature_group,measurement,channel,parameter1,parameter2,r2_score,Metadata_number_of_singlecells,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,dosage_treatments_list,channel_cleaned,channel_learned,abs_Metadata_number_of_singlecells,abs_oneb_Metadata_Treatment_Dose_Inhibitor_Dose
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>
Cells_Texture_InverseDifferenceMoment_CorrMito_3_00_256,Cells,Texture,InverseDifferenceMoment,CorrMito,3,0,0.210258,-0.0002194226,-1.244634,LPS_100.000_DMSO_0.025-DMSO_0.100_DMSO_0.025,CorrMito,Mito,0.0002194226,1.244634


In [16]:
loop_list <- unique(lm_df$dosage_treatments_list)
# drop 'DMSO_0.100_DMSO_0.025-DMSO_0.100_DMSO_0.025' from loop_list to avoid error in plotting
loop_list <- loop_list[!grepl('DMSO_0.100_DMSO_0.025-DMSO_0.100_DMSO_0.025', loop_list)]

In [17]:
pdf(file=lm_cp_fig)
for (i in 1:length(loop_list)){
    df <- lm_df[lm_df$dosage_treatments_list == loop_list[i],]
    lm_fig_gg <- (
        ggplot(df, aes(x = Metadata_number_of_singlecells,, y = oneb_Metadata_Treatment_Dose_Inhibitor_Dose))

        + geom_point(aes(size = r2_score, color = channel_learned,), alpha = 0.7)

        + scale_size_continuous(range = c(2, 8), limits = c(0, 1))


        + geom_vline(xintercept = 0, linetype = "dashed", color = "red")
        + geom_hline(yintercept = 0, linetype = "dashed", color = "red")
        + geom_density2d(color="black", show.legend = FALSE)
        + theme_bw()
        + guides(
            color = guide_legend(title = "Channel\n(if applicable)", order = 1),
            size = guide_legend(title = "R2 score")
        )
        # make legend dots bigger
        + ylab("Treatment contribution (LM beta coefficient)")
        + xlab("Cell count contribution (LM beta coefficient)")
        + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], " treatments and cell density"))
    )
    plot(lm_fig_gg)
}
dev.off()


In [18]:
pdf(file=lm_cp_fig_abs)
for (i in 1:length(loop_list)){

    df <- lm_df[lm_df$dosage_treatments_list == loop_list[i],]
    lm_fig_gg <- (
        ggplot(df, aes(x = abs_oneb_Metadata_Treatment_Dose_Inhibitor_Dose, y = r2_score))
        + geom_point(aes(size = abs_Metadata_number_of_singlecells, color = channel_learned), alpha = 0.7)

        + theme_bw()
        + guides(
            color = guide_legend(title = "Channel\n(if applicable)", order = 1),
            size = guide_legend(title = "Cell count contributution")
        )
        + ylab("R2 score of LM feature")
        + xlab("Treatment and Dose contribution (LM beta coefficient)")
        + ggtitle(paste0("How CellProfiler features contribute\nto ",loop_list[i], " treatments and cell density"))
    )

    plot(lm_fig_gg)
}
dev.off()