In [None]:
#
library(ggplot2)

#install.packages("drc", dependencies = TRUE)

devtools::install_github("jessica-ewald/fastbmdR")

library(fastbmdR)
library(dplyr)
library(knitr)
library(arrow)

library(ggplot2)
library(ggforce)


## Normalization of cell count of CP data


In [None]:
import pandas as pd
from pycytominer import normalize

# no need to re run just download the df_CC_Proba file



df = pd.read_csv("./outputs/df_CC_Proba.csv")

features = ["Metadata_Count_Cells"]

normalized_list = []

for plate, plate_df in df.groupby("Metadata_plate_map_name"):
    normalized = normalize(
        plate_df,
        features=features,
        samples="Metadata_Compound == 'DMSO'",
        method="mad_robustize"
    )

    normalized_list.append(normalized)

# Fusionner toutes les plaques normalisées
normalized_df = pd.concat(normalized_list, ignore_index=True)

normalized_df = normalized_df.rename(columns={
    'Metadata_Count_Cells_x': 'Metadata_Count_Cells',
    'Metadata_Count_Cells_y': 'Metadata_Count_Cells_Normalized'
})


normalized_df.to_csv("./outputs/df_CC_Proba.csv", index=False)


In [3]:
# we keep only the columns of interest

df <- read.csv("./outputs/df_CC_Proba.csv", sep= ',')


df_cc <- df[,c("Metadata_Compound", "Metadata_Concentration", "Metadata_Count_Cells", "Metadata_Count_Cells_Normalized")]


df_cc <- df_cc[df_cc$Metadata_Compound != "UNTREATED", ]


df_cc <- df_cc %>%
  rename(
    CC1 = Metadata_Count_Cells,
    CC2 = Metadata_Count_Cells_Normalized
  )

print(head(df_cc))


          Metadata_Compound Metadata_Concentration  CC1       CC2
1                 LY2109761                  5.100 1570 -6.069453
2                Cladribine                  1.235 3059 -3.199977
3              Treprostinil                  3.704 3355 -2.629550
4 Aminodarone Hydrochloride                  0.154 3784 -1.802817
5             Actinomycin D                300.000 1822 -5.583820
6                 MUPIROCIN                100.000 3626 -2.107302


### Curves of all compounds : Cell Count  vs Dose

In [4]:
models <- c("Exp2", "Exp3", "Exp4", "Exp5", "Poly2", "Lin", "Power", "Hill")
ncpus <- 1  

unique_compounds <- unique(df_cc$Metadata_Compound)
feat_cols <- colnames(df_cc)[!grepl("Metadata", colnames(df_cc))]

res <- list()
gene_table <- list()

all_bmd_pass <- data.frame()
results_df <- data.frame()

for (compound in unique_compounds) {

    compound_data <- df_cc[df_cc$Metadata_Compound %in% c(compound, "DMSO"), ]
    dose <- compound_data$Metadata_Concentration
    dose_log <- dose
    dose_log[dose_log > 0] <- log10(dose_log[dose_log > 0])

    rank_dose <- unique(dose) %>% sort(decreasing = TRUE)
    dose_spacing <- abs(log10(rank_dose[2] / rank_dose[1]))
    dose_shift <- abs(min(dose_log)) + dose_spacing

    dose_shifted <- dose_log
    dose_shifted[dose_shifted != 0] <- dose_shifted[dose_shifted != 0] + dose_shift

    if (length(unique(compound_data$Metadata_Concentration)) > 2) {

        mat <- t(compound_data[, feat_cols])
        min_val <- abs(min(mat, na.rm = TRUE))
        mat_shifted <- mat + min_val + 0.1 * min_val

        if (length(dose_shifted) == ncol(mat_shifted)) {
            cat("Processing compound:", compound, "\n")
            
            tryCatch({
                fit <- PerformCurveFitting(data = mat_shifted, dose = dose_shifted, ncpus = ncpus, models = models)
                fit_filtered <- FilterDRFit(fit, lof.pval = 0.1, filt.var = "AIC.model")
                fit_final <- PerformBMDCalc(fit_filtered, ncpus = ncpus, num.sds = 2, bmr.method = "sample.mean", log10.dose = TRUE)

                bmd_pass <- fit_final$bmd_res
                bmd_pass <- bmd_pass[bmd_pass$gene.id == 'CC2', ]

                if (nrow(bmd_pass) > 0) {
                    bmd_pass_valid <- bmd_pass[bmd_pass$all.pass == TRUE, c("bmd", "bmdl", "bmdu")]

                    if (nrow(bmd_pass_valid) > 0) {
                        plot_data <- plot_bmd_curve('CC2', fit_final, return_type = "plot.data")
                        plot_data$compound_name <- compound

                        for (i in 1:nrow(bmd_pass_valid)) {
                            plot_data$bmd <- bmd_pass_valid$bmd[i]
                            plot_data$bmd_l <- bmd_pass_valid$bmdl[i]
                            plot_data$bmd_u <- bmd_pass_valid$bmdu[i]

                            results_df <- rbind(results_df, plot_data)
                        }
                    } else {
                        message(sprintf("Aucun bmd valide pour %s / CC2", compound))
                    }
                } else {
                    message(sprintf("Pas de données pour gene.id == 'CC2' dans %s", compound))
                }
            }, error = function(e) {
                message(sprintf("Error with compound %s: %s", compound, e$message))
            })  # <-- ici la parenthèse manquait
        }
    }
}

# Afficher les premiers résultats de results_df
print(head(results_df))


Processing compound: Cladribine 


Aucun bmd valide pour Cladribine / CC2



Processing compound: Treprostinil 


Aucun bmd valide pour Treprostinil / CC2



Processing compound: Aminodarone Hydrochloride 


Aucun bmd valide pour Aminodarone Hydrochloride / CC2



Processing compound: Actinomycin D 


Aucun bmd valide pour Actinomycin D / CC2



Processing compound: MUPIROCIN 


Aucun bmd valide pour MUPIROCIN / CC2



Processing compound: Staurosporine 


Aucun bmd valide pour Staurosporine / CC2



Processing compound: 5,8,11-Eicosatriynoic acid 


Pas de donn<U+00E9>es pour gene.id == 'CC2' dans 5,8,11-Eicosatriynoic acid



Processing compound: Lys05 


Aucun bmd valide pour Lys05 / CC2



Processing compound: Nedocromil (sodium salt) 


Aucun bmd valide pour Nedocromil (sodium salt) / CC2



Processing compound: Rifampicin 


Aucun bmd valide pour Rifampicin / CC2



Processing compound: Amperozide 


Aucun bmd valide pour Amperozide / CC2



Processing compound: IOPANOIC ACID 


Aucun bmd valide pour IOPANOIC ACID / CC2



Processing compound: Triamcinolone acetonide 


Aucun bmd valide pour Triamcinolone acetonide / CC2



Processing compound: Colistin Methanesulfonate (sodium salt) 
Processing compound: 5,6-benzoflavone 


Aucun bmd valide pour 5,6-benzoflavone / CC2



Processing compound: Calcipotriol (hydrate) 


Aucun bmd valide pour Calcipotriol (hydrate) / CC2



Processing compound: Cucurbitacin I 


Aucun bmd valide pour Cucurbitacin I / CC2



Processing compound: Berberine chloride 


Aucun bmd valide pour Berberine chloride / CC2



Processing compound: CLIOQUINOL 


Aucun bmd valide pour CLIOQUINOL / CC2



Processing compound: PEMIROLAST POTASSIUM 


Aucun bmd valide pour PEMIROLAST POTASSIUM / CC2



Processing compound: FCCP 
Processing compound: Bevirimat 


Aucun bmd valide pour Bevirimat / CC2



Processing compound: Natamycin 


Aucun bmd valide pour Natamycin / CC2



Processing compound: Imiquimod 


Aucun bmd valide pour Imiquimod / CC2



Processing compound: MILTEFOSINE 


Aucun bmd valide pour MILTEFOSINE / CC2



Processing compound: Cycloheximide 


Aucun bmd valide pour Cycloheximide / CC2



Processing compound: Fluazinam 


Aucun bmd valide pour Fluazinam / CC2



Processing compound: Ethoxyquin 
  x Observations     f_x                           compound_name      bmd
1 0     5.205924 5.14552 Colistin Methanesulfonate (sodium salt) 4.757576
2 0     6.302454 5.14552 Colistin Methanesulfonate (sodium salt) 4.757576
3 0     5.864998 5.14552 Colistin Methanesulfonate (sodium salt) 4.757576
4 0     5.527753 5.14552 Colistin Methanesulfonate (sodium salt) 4.757576
5 0     7.784406 5.14552 Colistin Methanesulfonate (sodium salt) 4.757576
6 0     4.396536 5.14552 Colistin Methanesulfonate (sodium salt) 4.757576
     bmd_l    bmd_u
1 4.742565 4.771508
2 4.742565 4.771508
3 4.742565 4.771508
4 4.742565 4.771508
5 4.742565 4.771508
6 4.742565 4.771508


In [5]:
models <- c("Exp2", "Exp3", "Exp4", "Exp5", "Poly2", "Lin", "Power", "Hill")
ncpus <- 1  

unique_compounds <- unique(df_cc$Metadata_Compound)
feat_cols <- colnames(df_cc)[!grepl("Metadata", colnames(df_cc))]

res <- list()
gene_table <- list()

all_bmd_pass <- data.frame()
results_df <- data.frame()

for (compound in unique_compounds) {

    compound_data <- df_cc[df_cc$Metadata_Compound %in% c(compound, "DMSO"), ]
    dose <- compound_data$Metadata_Concentration
    dose_log <- dose
    dose_log[dose_log > 0] <- log10(dose_log[dose_log > 0])

    rank_dose <- unique(dose) %>% sort(decreasing = TRUE)
    dose_spacing <- abs(log10(rank_dose[2] / rank_dose[1]))
    dose_shift <- abs(min(dose_log)) + dose_spacing

    dose_shifted <- dose_log
    dose_shifted[dose_shifted != 0] <- dose_shifted[dose_shifted != 0] + dose_shift

    if (length(unique(compound_data$Metadata_Concentration)) > 2) {
       
        # for the negative values
        mat <- t(compound_data[, feat_cols])
        min_val <- abs(min(mat, na.rm = TRUE))
        mat_shifted <- mat + min_val + 0.1 * min_val

        if (length(dose_shifted) == ncol(mat_shifted)) {
            cat("Processing compound:", compound, "\n")
            
            tryCatch({
                fit <- PerformCurveFitting(data = mat_shifted, dose = dose_shifted, ncpus = ncpus, models = models)
                fit_filtered <- FilterDRFit(fit, lof.pval = 0.1, filt.var = "AIC.model")
                fit_final <- PerformBMDCalc(fit_filtered, ncpus = ncpus, num.sds = 2, bmr.method = "sample.mean", log10.dose = TRUE)

                bmd_pass <- fit_final$bmd_res
                bmd_pass <- bmd_pass[bmd_pass$gene.id == 'CC2', ]

                if (nrow(bmd_pass) > 0) {
                    bmd_pass_valid <- bmd_pass[bmd_pass$all.pass == TRUE, c("bmd", "bmdl", "bmdu")]

                    if (nrow(bmd_pass_valid) > 0) {
                        plot_data <- plot_bmd_curve('CC2', fit_final, return_type = "plot.data")
                        plot_data$compound_name <- compound

                        for (i in 1:nrow(bmd_pass_valid)) {
                            plot_data$bmd <- bmd_pass_valid$bmd[i]
                            plot_data$bmd_l <- bmd_pass_valid$bmdl[i]
                            plot_data$bmd_u <- bmd_pass_valid$bmdu[i]

                            results_df <- rbind(results_df, plot_data)
                        }
                    } else {
        
                        plot_data <- plot_bmd_curve('CC2', fit_final, return_type = "plot.data")
                        plot_data$compound_name <- compound
                        plot_data$bmd <- NaN
                        plot_data$bmd_l <- NaN
                        plot_data$bmd_u <- NaN

                        results_df <- rbind(results_df, plot_data)
                    }
                }
            }, error = function(e) {
                message(sprintf("Error with compound %s: %s", compound, e$message))
            })
        }
    }
}

# Afficher les premiers résultats de results_df
print(head(results_df))


Processing compound: Cladribine 
Processing compound: Treprostinil 
Processing compound: Aminodarone Hydrochloride 
Processing compound: Actinomycin D 
Processing compound: MUPIROCIN 
Processing compound: Staurosporine 
Processing compound: 5,8,11-Eicosatriynoic acid 
Processing compound: Lys05 
Processing compound: Nedocromil (sodium salt) 
Processing compound: Rifampicin 
Processing compound: Amperozide 
Processing compound: IOPANOIC ACID 
Processing compound: Triamcinolone acetonide 
Processing compound: Colistin Methanesulfonate (sodium salt) 
Processing compound: 5,6-benzoflavone 
Processing compound: Calcipotriol (hydrate) 
Processing compound: Cucurbitacin I 
Processing compound: Berberine chloride 
Processing compound: CLIOQUINOL 
Processing compound: PEMIROLAST POTASSIUM 
Processing compound: FCCP 
Processing compound: Bevirimat 
Processing compound: Natamycin 
Processing compound: Imiquimod 
Processing compound: MILTEFOSINE 
Processing compound: Cycloheximide 
Processing comp

In [6]:
n_per_page <- 9  


plot_groups_CC0 <- unique(results_df$compound_name)
n_pages_CC0 <- ceiling(length(plot_groups_CC0) / n_per_page)


pdf_file_CC0 <- "./outputs/cc_norm_bmd_curves_all.pdf"
pdf(pdf_file_CC0, width = 15, height = 10)

for (i in 1:n_pages_CC0) {
  tryCatch({
    p <- ggplot(results_df, aes(x = x, y = Observations)) +
      geom_point(show.legend = FALSE) +
      geom_line(aes(y = f_x), show.legend = FALSE) +
      geom_vline(aes(xintercept = bmd), linetype = "solid", color = "red") +  
      geom_vline(aes(xintercept = bmd_l), linetype = "dashed", color = "red") + 
      geom_vline(aes(xintercept = bmd_u), linetype = "dashed", color = "red") +  
      facet_wrap_paginate(~compound_name, ncol = 3, nrow = 3, page = i, scales = "free_y") +
      labs(title = sprintf("BMD curves - Normalized CC  - Page %d", i),
           x = "Concentration",
           y = "Observation") +
      theme_bw() +
      theme(strip.text = element_text(size = 8))
    
    print(p) 
  }, error = function(e) {
    message(sprintf("Error in the page %d : %s", i, e$message))
  })
}

dev.off()

"[1m[22mRemoved 2646 rows containing missing values or values outside the scale range
(`geom_point()`)."
"[1m[22mRemoved 4368 rows containing missing values or values outside the scale range
(`geom_vline()`)."
"[1m[22mRemoved 4368 rows containing missing values or values outside the scale range
(`geom_vline()`)."
"[1m[22mRemoved 4368 rows containing missing values or values outside the scale range
(`geom_vline()`)."
"[1m[22mRemoved 2646 rows containing missing values or values outside the scale range
(`geom_point()`)."
"[1m[22mRemoved 4368 rows containing missing values or values outside the scale range
(`geom_vline()`)."
"[1m[22mRemoved 4368 rows containing missing values or values outside the scale range
(`geom_vline()`)."
"[1m[22mRemoved 4368 rows containing missing values or values outside the scale range
(`geom_vline()`)."
"[1m[22mRemoved 2646 rows containing missing values or values outside the scale range
(`geom_point()`)."
"[1m[22mRemoved 4368 rows containin