In [1]:
using CSV, DataFrames, Serialization

### Extract Training Data from CSV
This script extracts relevant inputs from the experimental data reported in Yi et al. For example, it extracts what timepoints to save at for the timecourse simulation, then serializes the files. As another example, it extracts the ligand dose inputs, and converts them to the correct units (from nM to molecules)

Outputs:

00_processed_active_G_timecourse.dict: <br>

Dict{String, Any} with 3 entries: <br>
  "response"                       => [0.0, 0.35, 0.4, 0.36, 0.39, 0.33, 0.24, … <br>
  "save_at"                        => [0, 10, 30, 60, 120, 210, 300, 450, 600] <br>
  "ligand_stimulation (molecules)" => 6.022e17 <br>
  "average_error"                 => 0.015 <br>

00_processed_active_G_dose_response.dict: <br>

Dict{String, Any} with 4 entries: <br>
  "response"                       => [0.0253298, 0.145646, 0.265963, 0.497098,… <br>
  "save_at"                        => [60] <br>
  "normalize_to_response_at_dose"  => 6.022e17 <br>
  "ligand_stimulation (molecules)" => [6.022e13, 6.022e14, 1.20155e15, 3.01815e… <br>
  "average_error"                 => 0.03 <br>

In [2]:
timecourse = DataFrame(CSV.File("data/active_G_timecourse.csv"))
dose_response = DataFrame(CSV.File("data/active_G_dose_response.csv"));

In [3]:
#convert ligand stimulation amount to molecules, save with the processed timecourse dictionary
ligand_stimulation_M = 1.0e-6 #1 uM
avogadros_constant = 6.022e23
ligand_stimulation_molecules = ligand_stimulation_M*avogadros_constant
average_error = 0.015
timecourse_dict = Dict("save_at"=>timecourse[!,"t"], "response"=>timecourse[!," percent Ga"], 
"ligand_stimulation (molecules)" =>ligand_stimulation_molecules, "average_error" => average_error)
serialize("outputs/000_processed_active_G_timecourse.dict", timecourse_dict)

In [4]:
#convert ligand stimulation from log scale, and from nM to molecules, save with the processed timecourse dictionary
save_at = [60] #seconds, needs to be a vector to be inputted into ODEProblem later
avogadros_constant = 6.022e23
normalize_to = 1.0e-6*6.022e23 #1 uM
average_error = 0.03
#convert dosages extracted from figure 5B of Yi from log10 nM to molecules - note, doesn't include 1 uM stimulation:
dose_response_molecules = [round(x, sigdigits=2) for x in 10.0.^dose_response[!,"dose(log nM)"].*1.0e-9.*avogadros_constant]
#save
dose_response_dict = Dict("ligand_stimulation (molecules)"=>dose_response_molecules, "response"=>dose_response[!,"relative Ga"], "save_at" =>save_at, 
"normalize_to_response_at_dose"=>normalize_to,"average_error" => average_error)
serialize("outputs/000_processed_active_G_dose_response.dict", dose_response_dict)

### Extract Relevant Outputs for Test Set

000_processed_binding_affinity_dose_response.dict: <br>

Dict{String, Any} with 4 entries: <br>
  "response"                       => [0.0253298, 0.145646, 0.265963, 0.497098,… <br>
  "save_at"                        => [60] <br>
  "normalize_to_response_at_dose"  => 6.022e17 <br>
  "ligand_stimulation (molecules)" => [6.022e13, 6.022e14, 1.20155e15, 3.01815e… <br>
  "average_error"                 => 0.03 <br>

In [5]:
#convert ligand stimulation from nM to molecules, save with the processed timecourse dictionary
dose_response = DataFrame(CSV.File("data/binding_affinity_dose_response.csv"));
save_at = [60] #seconds, needs to be a vector to be inputted into ODEProblem later
avogadros_constant = 6.022e23
normalize_to = 1.0e-6*6.022e23 #1 uM
average_error = 0.03 #assume error is equal across dose response measurements
#convert dosages extracted from figure 4A of Yi from nM to molecules - note, doesn't include 1 uM stimulation:
#also note, this is not on log scale, unlike previous conversion
dose_response_molecules = [round(x, sigdigits=2) for x in dose_response[!,"dose(nM)"].*1.0e-9.*avogadros_constant]
#save
dose_response_dict = Dict("ligand_stimulation (molecules)"=>dose_response_molecules, "response"=>dose_response[!," relative RL"], "save_at" =>save_at, 
"normalize_to_response_at_dose"=>normalize_to,"average_error" => average_error)
serialize("outputs/000_processed_binding_affinity_dose_response.dict", dose_response_dict)