## Curve fiting of proteins by dose 

In order to start plotting the curves, we made some modifications to the dose values:  

- we transformed the doses to a logarithmic scale: `dose_log <- log10(dose)`  
- we set the dose spacing to 1: `dose_spacing = 1`  
- we shifted the dose values to ensure all were positive:  
  `dose_shift = abs(min(dose_log)) + dose_spacing`  
- finally, we applied the shift: `dose_log_shifted <- dose_log + dose_shift`  

We then used the **fastbmdR** package on our **norm_signal** data, after filtering out proteins that had 100% missing values.

In [None]:
install.packages("dplyr")


Les packages binaires t'el'echarg'es sont dans
	/var/folders/s6/prvmffjx7bl2mwcbcpxy05ph0000gn/T//RtmpSpTsiO/downloaded_packages


In [15]:
install.packages(c("devtools", "remotes"))

devtools::install_github("jessica-ewald/fastbmdR")


Les packages binaires t'el'echarg'es sont dans
	/var/folders/s6/prvmffjx7bl2mwcbcpxy05ph0000gn/T//RtmpSpTsiO/downloaded_packages


Using GitHub PAT from the git credential store.

Skipping install of 'fastbmdR' from a github remote, the SHA1 (b3681861) has not changed since last install.
  Use `force = TRUE` to force installation



In [16]:
install.packages('ggplot2')
library(ggplot2)

install.packages("drc", dependencies = TRUE)

devtools::install_github("jessica-ewald/fastbmdR")



Les packages binaires t'el'echarg'es sont dans
	/var/folders/s6/prvmffjx7bl2mwcbcpxy05ph0000gn/T//RtmpSpTsiO/downloaded_packages

Les packages binaires t'el'echarg'es sont dans
	/var/folders/s6/prvmffjx7bl2mwcbcpxy05ph0000gn/T//RtmpSpTsiO/downloaded_packages


Using GitHub PAT from the git credential store.

Skipping install of 'fastbmdR' from a github remote, the SHA1 (b3681861) has not changed since last install.
  Use `force = TRUE` to force installation



In [1]:
library(fastbmdR)
library(dplyr)
library(knitr)


Attachement du package : 'dplyr'


Les objets suivants sont masqu'es depuis 'package:stats':

    filter, lag


Les objets suivants sont masqu'es depuis 'package:base':

    intersect, setdiff, setequal, union




In [15]:
df <- read.csv("./outputs/norm_signal_filtered.csv", sep= ',')


models <- c("Exp2", "Exp3", "Exp4", "Exp5", "Poly2", "Lin", "Power", "Hill")

ncpus <- 1  

unique_compounds <- setdiff(unique(df$Metadata_Compound), c("UNTREATED"))
feat_cols =colnames(df)[!grepl("Metadata", colnames(df))]
res = list()
gene_table <- list()

for (compound in unique_compounds) 
{
  compound_dt <- df[df$Metadata_Compound == compound, ]
  dmso_data <- df[df$Metadata_Compound == "DMSO", ]
  dmso_dose <- dmso_data$Metadata_Concentration

  dose <- compound_dt$Metadata_Concentration
  dose_log <- log10(dose)
  dose_spacing = 1
  dose_shift = abs(min(dose_log)) + dose_spacing

  dose_log_shifted <- dose_log + dose_shift
  dose_log_shifted_with_dmso <- c(dose_log_shifted, dmso_dose)
  compound_data <- df[df$Metadata_Compound == compound | df$Metadata_Compound == "DMSO", ]

  if (length(unique(compound_data$Metadata_Concentration)) > 2) 
  {  
    
    print(paste("Processing compound:", compound))
    dose <- dose_log_shifted_with_dmso
    
    compound_mat <- compound_data[, feat_cols]  
    compound_mat = t(compound_mat)  
    
    min_val = abs(min(compound_mat, na.rm=TRUE))  
    add_min = min_val + 0.1 * min_val  
    mat_new = compound_mat + add_min  

    
    if (length(dose) == ncol(mat_new)) {  
      tryCatch({
 
        fit_obj <- PerformCurveFitting(data = mat_new, dose = dose, ncpus = ncpus, models = models)
        
        fit_obj <- FilterDRFit(fit_obj, lof.pval = 0.1, filt.var = "AIC.model")
        

        fit_obj <- PerformBMDCalc(fit_obj, ncpus = ncpus, num.sds = 2, bmr.method = "sample.mean", log10.dose = TRUE)
        

        bmd_res <- fit_obj$bmd_res
        bmd_pass <- bmd_res[bmd_res$all.pass, ]
        fit_obj$bmd_pass <- bmd_pass

        res[[compound]] = fit_obj
        
        gene_ids <- res[[compound]][["bmd_pass"]][["gene.id"]]
        bmd_values <- res[[compound]][["bmd_pass"]][["bmd"]]
        gene_table[[compound]] <- data.frame(
          gene_id = gene_ids,
          bmd_value = bmd_values
        )
        
        
        #plot_bmd_curve("O00175", res$compound)
        
        #print(paste("Results for compound:", compound))
        #print(bmd_pass)
        
      }, error = function(e) {
        
        print(paste("Error with compound:", compound, "- ignoring this compound"))
      })
    }
  }  
}



[1] "Processing compound: Actinomycin D"
[1] "Processing compound: Bevirimat"
[1] "Processing compound: Ethoxyquin"
[1] "Processing compound: Amperozide"
[1] "Processing compound: IOPANOIC ACID"
[1] "Processing compound: Treprostinil"
[1] "Processing compound: MILTEFOSINE"
[1] "Error with compound: MILTEFOSINE - ignoring this compound"
[1] "Processing compound: Lys05"
[1] "Processing compound: 5,8,11-Eicosatriynoic acid"
[1] "Processing compound: Cycloheximide"
[1] "Processing compound: CLIOQUINOL"
[1] "Processing compound: Berberine chloride"
[1] "Processing compound: Rifampicin"


"Production de NaN"


[1] "Processing compound: Cucurbitacin I"
[1] "Processing compound: Triamcinolone acetonide"
[1] "Processing compound: Imiquimod"
[1] "Processing compound: MUPIROCIN"
[1] "Processing compound: Nedocromil (sodium salt)"
[1] "Processing compound: Staurosporine"
[1] "Processing compound: Cladribine"
[1] "Processing compound: 5,6-benzoflavone"


"Production de NaN"


[1] "Processing compound: Natamycin"
[1] "Error with compound: Natamycin - ignoring this compound"
[1] "Processing compound: FCCP"
[1] "Processing compound: PEMIROLAST POTASSIUM"
[1] "Processing compound: Calcipotriol (hydrate)"
[1] "Processing compound: Aminodarone Hydrochloride"
[1] "Processing compound: Colistin Methanesulfonate (sodium salt)"
[1] "Processing compound: Fluazinam"


In [None]:
## we can now create a table with the protein ids that has a response and their corresponding compound names # nolint
 
gene_df <- do.call(rbind, lapply(names(gene_table), function(compound) {  
  gene_data <- gene_table[[compound]]  # Récupérer le data.frame
  
  if (!is.null(gene_data) && nrow(gene_data) > 0) {  
    return(data.frame(
      gene_id = gene_data$gene_id,
      bmd_value = gene_data$bmd_value,
      compound_name = compound,
      stringsAsFactors = FALSE
    ))
  } else {  
    return(NULL)  
  }  
}))

## we calcule the actual concentration of the bmd value


gene_df$Concentration <- NA

for (i in 1:nrow(gene_df)) 
{
  compound <- gene_df$compound_name[i] 
  bmd_value <- gene_df$bmd_value[i]  

  compound_dt <- df[df$Metadata_Compound == compound, ]
  
  dose <- compound_dt$Metadata_Concentration
  
  dose_shift <- abs(min(log10(dose))) + 1
  
  recovered_concentration <- 10^(bmd_value - dose_shift)
  
  gene_df$Concentration[i] <- recovered_concentration
}




kable(head(gene_df))



|gene_id | bmd_value|compound_name | Concentration|
|:-------|---------:|:-------------|-------------:|
|P04141  |  5.290310|Actinomycin D |      292.6858|
|P12645  |  5.263819|Actinomycin D |      275.3657|
|P39900  |  5.233620|Actinomycin D |      256.8690|
|P49767  |  5.231685|Actinomycin D |      255.7270|
|P60568  |  5.264667|Actinomycin D |      275.9044|
|Q03167  |  5.227861|Actinomycin D |      253.4847|

In [6]:
list_plot <- list()  

for (i in 1:nrow(gene_df)) {  
  tryCatch({
    gene_id <- gene_df$gene_id[i]
    compound <- gene_df$compound_name[i]
    
    temp <- plot_bmd_curve(gene_id, res[[compound]]  , return_type = "plot.data")
    
    temp$protein <- gene_id
    temp$compound <- compound
    
    list_plot[[i]] <- temp
  }, error = function(e) {
    message(sprintf("Erreur pour gene_id %s et compound %s : %s", gene_id, compound, e$message))
  })
}

final_df <- do.call(rbind, list_plot)

kable(head(final_df))






|  x| Observations|       f_x|protein |compound      |
|--:|------------:|---------:|:-------|:-------------|
|  0|    0.1938238| 0.1930744|P04141  |Actinomycin D |
|  0|    0.1969728| 0.1930744|P04141  |Actinomycin D |
|  0|    0.1887979| 0.1930744|P04141  |Actinomycin D |
|  0|    0.1924886| 0.1930744|P04141  |Actinomycin D |
|  0|    0.1884608| 0.1930744|P04141  |Actinomycin D |
|  0|    0.1857967| 0.1930744|P04141  |Actinomycin D |

In [31]:
library(ggplot2)
library(ggforce)  # Pour facet_wrap_paginate

plot_results <- final_df 
n_cols <- 5
n_rows <- 5
n_per_page <- n_cols * n_rows
pdf_w <- 12
pdf_h <- 10

compounds <- unique(plot_results$compound)
n_pages <- ceiling(length(compounds) / n_per_page)

pdf("plot_results.pdf", width = pdf_w, height = pdf_h)
for (i in 1:n_pages) {
  tryCatch({
    p <- ggplot(plot_results, aes(x = x)) +
      geom_point(aes(y = Observations), color = "black", alpha = 0.5) +  # Points expérimentaux
      geom_line(aes(y = f_x), color = "blue") +  # Courbe prédite
      facet_wrap_paginate(~ compound, ncol = n_cols, nrow = n_rows, page = i, scales = "free_y") +
      theme_bw() +
      theme(strip.text = element_text(size = 6))

    print(p)
  }, error = function(e) {
    message(sprintf("Error in plotting page %d: %s", i, e$message))
  })
}
dev.off()


"[1m[22mRemoved 4018 rows containing missing values or values outside the scale range
(`geom_point()`)."


In [None]:
library(gridExtra)

gridExtra::grid.arrange(grobs = plot_list, ncol = 3)


In [None]:
for (i in 1:nrow(gene_df)) {
  tryCatch({
    gene_id <- gene_df$gene_id[i]
    compound <- gene_df$compound_name[i]


    temp <- plot_bmd_curve(gene_id, res[[compound]])
    print(temp)  

  }, error = function(e) {
    message(sprintf("Erreur pour gene_id %s et compound %s : %s", gene_id, compound, e$message))
  })
}
