<span STYLE="font-size:150%"> 
    Calculate concentrations from emission data
</span>

Docker image: gnasello/datascience-env:2023-03-30 \
Latest update: 31 March 2023

# Load libraries

In [1]:
library(ggplot2)
library(latex2exp)
library(ggpubr)
library(yaml)
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




Clone the [r_utils library](https://github.com/gabnasello/r_utils.git) from GitHub in the same folder of this script. 

You can simply running the following command in a new terminal (open it from JupyterLab):

`git clone https://github.com/gabnasello/r_utils.git`

How to Reuse Functions That You Create In Scripts, [tutorial](https://www.earthdatascience.org/courses/earth-analytics/multispectral-remote-sensing-data/source-function-in-R/)

In [2]:
source("r_utils/ggplot_utils.R")

# Reading data

<span style="color:red">**User Input**</span>

In [3]:
filename <- "data/2023-03-28_GN011.csv"

In [4]:
# Import the data and look at the first six rows
df <- read.csv(file = filename)
head(df)

Unnamed: 0_level_0,Well.ID,Name,Well,Conc.Dil,X275480,Count,Mean,Std.Dev,CV....,Mean_blnk,concentration
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<dbl>,<int>,<int>,<int>,<chr>,<chr>,<int>,<dbl>
1,BLK,,H1,,4249,3,3395,777,22.888000000000002,485,1.088247
2,SPL1,,A1,,10349,1,10349,?????,?????,7439,16.691694
3,SPL2,,A2,,22987,1,22987,?????,?????,20077,45.04895
4,SPL3,,A3,,9415,1,9415,?????,?????,6505,14.595977
5,SPL4,,A4,,8467,1,8467,?????,?????,5557,12.468846
6,SPL5,,A5,,13380,1,13380,?????,?????,10470,23.492679


# Select standard samples

In [5]:
prefix <- 'SPL'

In [6]:
condition <- startsWith(df$Well.ID, prefix)

df_spl <- df[condition,]
head(df_spl)

Unnamed: 0_level_0,Well.ID,Name,Well,Conc.Dil,X275480,Count,Mean,Std.Dev,CV....,Mean_blnk,concentration
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<dbl>,<int>,<int>,<int>,<chr>,<chr>,<int>,<dbl>
2,SPL1,,A1,,10349,1,10349,?????,?????,7439,16.69169
3,SPL2,,A2,,22987,1,22987,?????,?????,20077,45.04895
4,SPL3,,A3,,9415,1,9415,?????,?????,6505,14.59598
5,SPL4,,A4,,8467,1,8467,?????,?????,5557,12.46885
6,SPL5,,A5,,13380,1,13380,?????,?????,10470,23.49268
7,SPL6,,A6,,8133,1,8133,?????,?????,5223,11.71941


# Calculate concentrations

Determine sample dilution

<span style="color:red">**User Input**</span>

In [7]:
dilution = 4

## Load regression model of the calibration curve

In [8]:
model <- readRDS("calibration.rds")
model


Call:
lm(formula = Conc.Dil ~ Mean_blnk + 0, data = df_std)

Coefficients:
Mean_blnk  
 0.000561  


## Compute concentrations based on calibration curve

Of the samples

In [9]:
sampledata <- df_spl %>% select(Mean_blnk)

df_spl$concentration <- predict(model, sampledata) * dilution
head(df_spl)

Unnamed: 0_level_0,Well.ID,Name,Well,Conc.Dil,X275480,Count,Mean,Std.Dev,CV....,Mean_blnk,concentration
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<dbl>,<int>,<int>,<int>,<chr>,<chr>,<int>,<dbl>
2,SPL1,,A1,,10349,1,10349,?????,?????,7439,16.69169
3,SPL2,,A2,,22987,1,22987,?????,?????,20077,45.04895
4,SPL3,,A3,,9415,1,9415,?????,?????,6505,14.59598
5,SPL4,,A4,,8467,1,8467,?????,?????,5557,12.46885
6,SPL5,,A5,,13380,1,13380,?????,?????,10470,23.49268
7,SPL6,,A6,,8133,1,8133,?????,?????,5223,11.71941


Of the whole dataset

In [10]:
newdata <- df %>% select(Mean_blnk)

df$concentration <- predict(model, newdata) * dilution
head(df)

Unnamed: 0_level_0,Well.ID,Name,Well,Conc.Dil,X275480,Count,Mean,Std.Dev,CV....,Mean_blnk,concentration
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<dbl>,<int>,<int>,<int>,<chr>,<chr>,<int>,<dbl>
1,BLK,,H1,,4249,3,3395,777,22.888000000000002,485,1.088247
2,SPL1,,A1,,10349,1,10349,?????,?????,7439,16.691694
3,SPL2,,A2,,22987,1,22987,?????,?????,20077,45.04895
4,SPL3,,A3,,9415,1,9415,?????,?????,6505,14.595977
5,SPL4,,A4,,8467,1,8467,?????,?????,5557,12.468846
6,SPL5,,A5,,13380,1,13380,?????,?????,10470,23.492679


## Save data

Overwrite initial dataset after calculating the sample concentration

In [11]:
write.csv(df, filename, row.names=FALSE)

# Calculate amount (percentage) released

## Get the volume of the released media for each sample tested

Prepare and load the `sample_info.csv` file

In [12]:
info_file <- "data/samples_info.csv"

In [13]:
# Import the data and look at the first six rows
df_info <- read.csv(file = info_file)
head(df_info)

Unnamed: 0_level_0,Well.ID,volume,day,group,Laponite,sample
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<chr>,<int>,<int>
1,SPL1,1.7,0,4-AS,0,1
2,SPL2,1.7,0,4-AS,0,2
3,SPL3,1.7,0,4-AS,0,3
4,SPL4,1.7,0,4-AS,0,4
5,SPL5,1.7,0,4-AS-L,1,1
6,SPL6,1.7,0,4-AS-L,1,2


Merge `df` and `df_info` using the sample ID info and droppoing the not sample data in `df` 

In [14]:
df_release <- merge(df_spl, df_info, by = "Well.ID", 
                   all.x = FALSE, all.y = FALSE, sort=FALSE)
head (df_release)

Unnamed: 0_level_0,Well.ID,Name,Well,Conc.Dil,X275480,Count,Mean,Std.Dev,CV....,Mean_blnk,concentration,volume,day,group,Laponite,sample
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<dbl>,<int>,<int>,<int>,<chr>,<chr>,<int>,<dbl>,<dbl>,<int>,<chr>,<int>,<int>
1,SPL1,,A1,,10349,1,10349,?????,?????,7439,16.69169,1.7,0,4-AS,0,1
2,SPL2,,A2,,22987,1,22987,?????,?????,20077,45.04895,1.7,0,4-AS,0,2
3,SPL3,,A3,,9415,1,9415,?????,?????,6505,14.59598,1.7,0,4-AS,0,3
4,SPL4,,A4,,8467,1,8467,?????,?????,5557,12.46885,1.7,0,4-AS,0,4
5,SPL5,,A5,,13380,1,13380,?????,?????,10470,23.49268,1.7,0,4-AS-L,1,1
6,SPL6,,A6,,8133,1,8133,?????,?????,5223,11.71941,1.7,0,4-AS-L,1,2


Alternatively, you can select a volume value equal for all samples by uncommenting the cell below

In [15]:
# volume_sample <- 0.450 #mul
# df$volume <- volume_sample

## Compute amounts (percentage) released

<span style="color:red">**User Input**</span>

In [16]:
drug_amount = 200 #ug

In [18]:
df_release$release = df_release$concentration * df_release$volume / drug_amount * 100
head(df_release)

Unnamed: 0_level_0,Well.ID,Name,Well,Conc.Dil,X275480,Count,Mean,Std.Dev,CV....,Mean_blnk,concentration,volume,day,group,Laponite,sample,release
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<dbl>,<int>,<int>,<int>,<chr>,<chr>,<int>,<dbl>,<dbl>,<int>,<chr>,<int>,<int>,<dbl>
1,SPL1,,A1,,10349,1,10349,?????,?????,7439,16.69169,1.7,0,4-AS,0,1,14.18794
2,SPL2,,A2,,22987,1,22987,?????,?????,20077,45.04895,1.7,0,4-AS,0,2,38.291608
3,SPL3,,A3,,9415,1,9415,?????,?????,6505,14.59598,1.7,0,4-AS,0,3,12.40658
4,SPL4,,A4,,8467,1,8467,?????,?????,5557,12.46885,1.7,0,4-AS,0,4,10.598519
5,SPL5,,A5,,13380,1,13380,?????,?????,10470,23.49268,1.7,0,4-AS-L,1,1,19.968777
6,SPL6,,A6,,8133,1,8133,?????,?????,5223,11.71941,1.7,0,4-AS-L,1,2,9.961502


## Save data

Overwrite initial dataset after calculating the amount of drug released at each timepoint

In [19]:
new_filename <- paste(tools::file_path_sans_ext(filename), '_release.csv', sep='')
new_filename

In [20]:
write.csv(df_release, new_filename, row.names=FALSE)

# Info on units

If the concentration of the standard samples used to build the calibration curve is in *ug/ml*, then:

1. `df$concentration` is also in  ug/ml
2. `df$volume` must be in ml
3. `df$release` is in % of the total drug loaded into the gel