The aim of this notebook is to generate the initial training set (or initial plate) for the subsequent active learning loop.

# Python libraries imports

In [1]:
import numpy as np
import random

# Create a folder to save the resulting dataset

In [2]:
data_folder = "echo_files"

**Important**  
The seed was not set when the file was first executed.  
The concentrations used for the initial plate in the paper can be found in **data/plate_AL_1_raw_everything.csv**.

**Caution**  
When you run the code under the pre-existing files architecture, the results are overwritten in the **initial_plate.csv** dataset.

# Defining the tested concentrations and volumes

The concentrations below are defined as a proportion of the maximum allowed concentration.

In [4]:
# Allowed concentration ratios for each component. Mostly, four ratios were chosen for each component.

KCl_ratios = [0.2, 0.4, 0.6, 1]
Aminoacids_ratios = [0.2, 0.4, 0.6, 1]
tRNA_ratios = [0.2, 0.4, 0.6, 1]
Phosphocreatine_ratios = [0.2, 0.4, 0.6, 1]
Creatinekinase_ratios = [0.2, 0.4, 0.6, 1]
mRNA_ratios = [0.2, 0.4, 0.6, 1]
Ribosomes_ratios = [0.2, 0.4, 0.6, 1]
RRL_ratios = [0.2, 0.4, 0.6, 1]
Glucose_ratios = [0.2, 0.4, 0.6, 1]
MgCl2_ratios = [0.1, 0.3, 0.5, 1]

In [5]:
# Maximum concentration for each component. Resulting concentration = Maximum concentration * Ratio.

KCl_max = 125
Aminoacids_max = 500
tRNA_max = 2000
Phosphocreatine_max = 100
Creatinekinase_max = 1000 
mRNA_max = 1000
Ribosomes_max = 20
RRL_max = 0
Glucose_max = 10
MgCl2_max = 1.5

# Defining control samples (maximum and autofluorescence)

This defines the control samples (reference) used in the first plate, that have maximum concentrations for all components. 
The autofluorescence **<sup>1</sup>** control has no DNA. **<sup>1</sup>** is the fluorescence measured in the cell-free reaction supplemented with water and using the reference composition.

In [6]:
# Reference extract where all components are at the maximum concentrations.

maximum_extract = np.ones((1, 10), dtype = 'int')

max_sample_conc = [maximum_extract[0, 0] * KCl_max, 
                   maximum_extract[0, 1] * Aminoacids_max, 
                   maximum_extract[0, 2] * tRNA_max,
                   maximum_extract[0, 3] * Phosphocreatine_max,
                   maximum_extract[0, 4] * Creatinekinase_max,
                   maximum_extract[0, 5] * mRNA_max,
                   maximum_extract[0, 6] * Ribosomes_max,
                   maximum_extract[0, 7] * RRL_max,
                   maximum_extract[0, 8] * Glucose_max,
                   maximum_extract[0, 9] * MgCl2_max
                  ]

In [7]:
# Reference autofluorescence extract 

autofluorescence_extract = np.ones((1, 10), dtype = 'int')

autofluorescence_sample_conc = [autofluorescence_extract[0, 0] * KCl_max, 
                                autofluorescence_extract[0, 1] * Aminoacids_max, 
                                autofluorescence_extract[0, 2] * tRNA_max,
                                autofluorescence_extract[0, 3] * Phosphocreatine_max,
                                autofluorescence_extract[0, 4] * Creatinekinase_max,
                                autofluorescence_extract[0, 5] * 0, #No mRNA in the autofluorescence control
                                autofluorescence_extract[0, 6] * Ribosomes_max,
                                autofluorescence_extract[0, 7] * RRL_max,
                                autofluorescence_extract[0, 8] * Glucose_max,
                                autofluorescence_extract[0, 9] * MgCl2_max
                               ]

In [8]:
max_sample_conc = np.reshape(max_sample_conc, (1, 10))

autofluorescence_sample_conc = np.asarray(autofluorescence_sample_conc)
autofluorescence_sample_conc = np.reshape(autofluorescence_sample_conc, (1, 10))

ctrl_array = max_sample_conc
ctrl_array = np.concatenate((ctrl_array, autofluorescence_sample_conc), axis = 0)

# All components at maximum, except one at minimum 

This defines a set of combinations for the initial plate as follows:  
* 10 forced combinations: All the components are at the maximum concentration, except one at the minimum.
* 1  forced comibnation: All the components are at the maximum concentration.

In [9]:
all_max_except_one_low = max_sample_conc

In [10]:
KCl = KCl_ratios[-1]
Aminoacids = Aminoacids_ratios[-1]
tRNA = tRNA_ratios[-1]
Phosphocreatine = Phosphocreatine_ratios[-1]
Creatinekinase = Creatinekinase_ratios[-1]
mRNA = mRNA_ratios[-1]
Ribosomes = Ribosomes_ratios[-1]
RRL = RRL_ratios[-1]
Glucose = Glucose_ratios[-1]
MgCl2 = MgCl2_ratios[-1]

In [11]:
this_sample_conc = [0.1 * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [12]:
this_sample_conc = [KCl * KCl_max,
                    0.1 * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [13]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    0.1 * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [14]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    0.1 * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [15]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    0.1 * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [16]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    0.1 * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [17]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    0.1 * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [18]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    0.1 * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [19]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    0.1 * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [20]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    0.1 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [21]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_max_except_one_low = np.concatenate((all_max_except_one_low, this_sample_conc), axis = 0)

In [22]:
print(all_max_except_one_low.shape)

(12, 10)


# All components at minimum, except one at maximum 

This defines a set of combinations for the initial plate as follows:  
* 10 forced combinations: All the components are at the minimum concentration, except one at the maximum.
* 1  forced comibnation: All the components are at the minimum concentration.

In [23]:
all_min_except_one_high = this_sample_conc

In [24]:
KCl = KCl_ratios[0]
Aminoacids = Aminoacids_ratios[0]
tRNA = tRNA_ratios[0]
Phosphocreatine = Phosphocreatine_ratios[0]
Creatinekinase = Creatinekinase_ratios[0]
mRNA = mRNA_ratios[0]
Ribosomes = Ribosomes_ratios[0]
RRL = RRL_ratios[0]
Glucose = Glucose_ratios[0]
MgCl2 = MgCl2_ratios[0]

In [25]:
this_sample_conc = [1 * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [26]:
this_sample_conc = [KCl * KCl_max,
                    1 * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [27]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    1 * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [28]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    1 * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [29]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    1 * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [30]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    1 * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [31]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    1 * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [32]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    1 * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [33]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    1 * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [34]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    1 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [35]:
this_sample_conc = [KCl * KCl_max,
                    Aminoacids * Aminoacids_max,
                    tRNA * tRNA_max,
                    Phosphocreatine * Phosphocreatine_max,
                    Creatinekinase * Creatinekinase_max,
                    mRNA * mRNA_max,
                    Ribosomes * Ribosomes_max,
                    RRL * RRL_max,
                    Glucose * Glucose_max,
                    MgCl2 * MgCl2_max]

this_sample_conc = np.reshape(this_sample_conc, (1, 10)) 
all_min_except_one_high = np.concatenate((all_min_except_one_high, this_sample_conc), axis = 0)

In [36]:
print(all_min_except_one_high.shape)

(12, 10)


# Filling the rest randomly

The rest of the plate will be filled randomly, while avoiding repeats.

In [37]:
answer_array = np.concatenate((ctrl_array, all_max_except_one_low, all_min_except_one_high), axis = 0)

In [38]:
print(answer_array.shape)

(26, 10)


In [39]:
def present_in_array_index(new_sample, array):
    
    """
    Verify if a sample is present in an array.
    """
    
    if array is None:
        return(False, -1)
    present = False
    new_sample = np.reshape(np.array(new_sample), (1, 10))
    for i in range(array.shape[0]):
        if np.array_equiv(array[i,:],new_sample):
            present = True
            break   
    return(present, i)

In [42]:
def generate_random_grid(array_to_avoid, sample_size = 100):
    active_learning_array = array_to_avoid
    if array_to_avoid is None:
        answerSize = 0
    else:
        answerSize = active_learning_array.shape[0]

    while answerSize < sample_size:
        this_sample = np.random.randint(0, high = 3, size = 10, dtype='int')
        this_sample_conc = [KCl_ratios[this_sample[0]] * KCl_max, 
                            Aminoacids_ratios[this_sample[1]] * Aminoacids_max,
                            tRNA_ratios[this_sample[2]] * tRNA_max,
                            Phosphocreatine_ratios[this_sample[3]] * Phosphocreatine_max,
                            Creatinekinase_ratios[this_sample[4]] * Creatinekinase_max,
                            mRNA_ratios[this_sample[5]] * mRNA_max,
                            Ribosomes_ratios[this_sample[6]] * Ribosomes_max,
                            RRL_ratios[this_sample[7]] * RRL_max,
                            Glucose_ratios[this_sample[8]] * Glucose_max,
                            MgCl2_ratios[this_sample[9]] * MgCl2_max]
                             
        this_sample_conc = np.reshape(this_sample_conc, (1, 10))
        if not present_in_array_index(this_sample_conc, active_learning_array)[0]:
            answerSize = answerSize + 1
            if active_learning_array is None:
                active_learning_array = this_sample_conc
            else:
                active_learning_array = np.concatenate((active_learning_array, this_sample_conc), axis = 0)
    return(active_learning_array)

In [43]:
answer_array = generate_random_grid(answer_array, sample_size = 102)

In [44]:
# Saving output in csv file
headers = "KCl; Amino acids; tRNA; Phosphocreatine; Creatine kinase; mRNA; Ribosomes; RRL; Glucose; MgCl2"
np.savetxt("{}/initial_plate_euCFPS_TL.csv".format(data_folder), answer_array, delimiter=";", header = headers,fmt='%.5f')