Mathilde Koch, INRA, Faulon's group.

The aim of this script is to convert a named concentration file and and TECAN result to yield results.

# Importing necessary libraries

In [1]:
import numpy as np
import csv
import copy

In [2]:
folder = "example"
file_name = "example"
result_name = "example_TECAN"
localisation = "{}/{}.csv".format(folder, result_name)
data_source = "{}/{}_concentrations_reconstituted.csv".format(folder, file_name)

In [3]:
CV = 30  # Allowed variation
export_place = "{}/{}_yield_and_std.csv".format(folder, file_name)
all_together_place = "{}/{}_everything.csv".format(folder, file_name)
draw_mean = "{}/{}_draw_mean.csv".format(folder, file_name)
draw_std =  "{}/{}_draw_std.csv".format(folder, file_name)
draw_ratio =  "{}/{}_draw_ratio.csv".format(folder, file_name)
outliers =  "{}/{}_outliers.csv".format(folder, file_name)
comments_file = "{}/{}_comments.txt".format(folder, file_name)


In [4]:
my_current_data = np.genfromtxt(data_source, delimiter=',', skip_header  = 1, dtype = "float")

In [5]:
wells_information = {}

with open(data_source) as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row["name"] == "":
            pass
        if row["name"] == "P3":
            wells_information[row["name"]] = row
        else:
            wells_information[row["name"]] = row
            if row["DNA"] == "0":
                print(row["name"])


A6
A12
A18


# Functions

In [6]:
def present_in_array(new_sample, array):
    present = False
    new_sample = np.reshape(np.array(new_sample), (1,16))
    for i in range(array.shape[0]):
        if np.array_equiv(array[i,:],new_sample):
            present = True
            break
    return(present, i)

In [7]:
def remove_outlier_cv(row, cv):
    x = row[16:19]
    main_row = row[0:16]
    mean = np.mean(x)
    sd = np.std(x)
    ratio = sd/mean * 100
    if ratio > cv:
        min_index = np.argmin(x)
        max_index = np.argmax(x)
        other_point_arg = 3 - min_index - max_index
        if (x[other_point_arg] - x[min_index]) > (x[max_index] - x[other_point_arg]):
            # Distance between medium and low is above distance between max and medium: discard lowest
            new_x = np.concatenate((x[[other_point_arg, max_index]], np.array([-1])), axis = 0)
        else:
            new_x = np.concatenate((x[[min_index, other_point_arg]], np.array([-1])), axis = 0)
        return(True, np.concatenate((main_row, new_x), axis = 0), row)
    else:
        return(False, row, 0)

In [8]:
def calculate_yield_mean_sd(x, autofluo, ref_fluo):
    combinations = []
    auto_value = np.mean(autofluo)
    if ref_fluo[2] == -1:
        ref_fluo = ref_fluo[0:2] 
    ref_value = np.mean(ref_fluo)
    ref_value = np.mean(ref_fluo)
    if x[2] == -1:
        x = x[0:2]  
    for value_x in x:
        if value_x == np.array([-1]):
            pass
        else:
            normlised_value = (value_x - auto_value)/(ref_value - auto_value)
            combinations.append(normlised_value)
    yield_mean = np.mean(combinations)
    yield_sd = np.std(combinations)
    return({"yield_mean": yield_mean, "yield_std": yield_sd})

# Extracting control and plate information

In [9]:
control_array = None
plate_array = None
control_max_array = None

for well, row in wells_information.items():
    this_sample_conc = [row["nad"],
                        row["folinic_acid"], 
                        row["DNA"], 
                        row["coa"], 
                        row["RBS"], 
                        row["peg"], 
                        row["nucleo_mix"],
                        row["spermidin"],
                        row["pga"],
                        row["aa"],
                        row["trna"],
                        row["mg_gluta"],
                        row["hepes"],
                        row["camp"],
                        row["K_gluta"],
                        row["promoter"]
                         ]
    if well.startswith("A"):        
        if control_array is None:
            this_sample_conc.extend([well, '', ''])
            this_sample_conc = np.reshape(this_sample_conc, (1, 19))
            control_array = this_sample_conc 
        else:
            present, i = present_in_array(this_sample_conc, control_array[:, 0:16])
            if not present:
                if well == 'A6':
                    print("Empty DNA row is {}".format(i))
                this_sample_conc.extend([well, '', ''])
                this_sample_conc = np.reshape(this_sample_conc, (1, 19))
                control_array = np.concatenate((control_array, this_sample_conc), axis = 0)
            else:
                if control_array[i,17] == '':
                    control_array[i,17] = well
                elif control_array[i,18] == '':
                    control_array[i,18] = well
                else:
                    print(row["name"])
    elif well.startswith("P"): 
        if control_max_array is None:
            this_sample_conc.extend([well, '', ''])
            this_sample_conc = np.reshape(this_sample_conc, (1, 19))
            control_max_array = this_sample_conc 
        else:
            present, i = present_in_array(this_sample_conc, control_max_array[:, 0:16])
            if well == 'P3':
                print("P3 is {}".format(i))
            if not present: 
                this_sample_conc.extend([well, '', '']) 
                if well == 'P3':
                    print("P3 is {}".format(i))
                this_sample_conc = np.reshape(this_sample_conc, (1, 19))
                control_max_array = np.concatenate((control_max_array, this_sample_conc), axis = 0)
            else:
                if control_max_array[i,17] == '':
                    control_max_array[i,17] = well
                elif control_max_array[i,18] == '':
                    control_max_array[i,18] = well
                else:
                    print(row["name"])
    else:
        if plate_array is None:
            this_sample_conc.extend([well, '', ''])
            this_sample_conc = np.reshape(this_sample_conc, (1, 19))
            plate_array = this_sample_conc
        else:
            present, i = present_in_array(this_sample_conc, plate_array[:, 0:16])
            if not present:    
                this_sample_conc.extend([well, '', ''])
                this_sample_conc = np.reshape(this_sample_conc, (1, 19))
                plate_array = np.concatenate((plate_array, this_sample_conc), axis = 0)
            else:
                if plate_array[i,17] == '':
                    plate_array[i,17] = well
                elif plate_array[i,18] == '':
                    plate_array[i,18] = well
                else:
                    this_sample_conc.extend([well, '', ''])
                    this_sample_conc = np.reshape(this_sample_conc, (1, 19))
                    plate_array = np.concatenate((this_sample_conc, plate_array), axis = 0)

Empty DNA row is 3
P3 is 0
P3 is 0


In [10]:
if control_array is None:
    whole_array = plate_array
else:
    try:
        whole_array = np.concatenate((control_array, plate_array, control_max_array), axis = 0)
    except ValueError:
        whole_array = np.concatenate((control_array, control_max_array), axis = 0)

# Replacing named wells with their values

In [11]:
name_yield_dictionnary = {}

with open(localisation) as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        name_yield_dictionnary[row["name"]] = row["Time_5"]

In [12]:
wells_name_array = np.copy(whole_array[:,16:19])
for i in range(whole_array.shape[0]):
    for j in range(16,19):
        try:
            whole_array[i,j] = name_yield_dictionnary[whole_array[i,j]]
        except KeyError:
            whole_array[i,j] = -10000
whole_array = whole_array.astype(np.float32)

# Removing outliers

In [13]:
outliers_array = None
cleaned_array = None

for row in whole_array:
    outlier, row, outlier_row = remove_outlier_cv(row, cv = CV)
    if outlier:
        if outliers_array is None:
            outliers_array = np.reshape(outlier_row, (1, 19)) 
        else:
            outliers_array = np.concatenate((outliers_array, np.reshape(outlier_row, (1, 19))), axis = 0)
    if cleaned_array is None:
        cleaned_array = np.reshape(row, (1, 19)) 
    else:
        cleaned_array = np.concatenate((cleaned_array, np.reshape(row, (1, 19))), axis = 0)
    

In [14]:
if outliers_array is None:
    number_outliers = 0
else:
    print(outliers_array.shape)
    number_outliers = outliers_array.shape[0]
number_total = whole_array.shape[0]
number_bis = cleaned_array.shape[0]
assert number_bis == number_total
percentage = number_outliers/number_total * 100
text = "There are {} outliers out of {} ({}%) for CV of {}".format(number_outliers, number_total, round(percentage, 2), CV)
with open(comments_file, "w") as file_handle:
    file_handle.write(text)
    

In [15]:
print(outliers_array)

None


In [16]:
outliers_as_dict = []
fieldnames = ["nad", "folinic_acid", "DNA", "coa", "RBS", "peg", "nucleo_mix", 
              "spermidin", "pga", "aa", "trna", "mg_gluta", "hepes", "camp", "K_gluta", "promoter", 
              "value_1", "value_2", "value_3", "plaque_name"]

if not outliers_array is None:
    for row in outliers_array:
        new_dict = {}
        new_dict["nad"] = round(float(row[0]), 5)
        new_dict["folinic_acid"] = round(float(row[1]), 5)
        new_dict["DNA"] = round(float(row[2]), 4)
        new_dict["coa"] = round(float(row[3]), 5)
        new_dict["RBS"] = round(float(row[4]), 4)
        new_dict["peg"] = round(float(row[5]), 5)
        new_dict["nucleo_mix"] = round(float(row[6]), 5)
        new_dict["spermidin"] = round(float(row[7]), 5)
        new_dict["pga"] = round(float(row[8]), 5)
        new_dict["aa"] = round(float(row[9]), 5)
        new_dict["trna"] = round(float(row[10]), 5)
        new_dict["mg_gluta"] = round(float(row[11]), 4)
        new_dict["hepes"] = round(float(row[12]), 4)
        new_dict["camp"] = round(float(row[13]), 4)
        new_dict["K_gluta"] = round(float(row[14]), 4)
        new_dict["promoter"] = round(float(row[15]), 4)
        new_dict["value_1"] = round(float(row[16]), 4)
        new_dict["value_2"] = round(float(row[17]), 4)
        new_dict["value_3"] = round(float(row[18]), 4)
        new_dict["plaque_name"] = plate_name
        outliers_as_dict.append(new_dict)

In [17]:
with open(outliers, "w") as csv_handle:
    csv_writer = csv.DictWriter(csv_handle, fieldnames, restval='', extrasaction='ignore')
    csv_writer.writeheader()
    for result in outliers_as_dict:
        csv_writer.writerow(result)

# Calculating yield

In [18]:
if control_array is None:
    print("controls are different")
    autofluo = np.array([1427, 1495, 1697])
    max_extract = np.array([8246, 9788, 7592])
else:
    try: 
        max_extract = cleaned_array[109,16:19]  # Will be changed to reference extract in later versions of the code
    except:
        max_extract = cleaned_array[7,16:19]
        
    autofluo = whole_array[4,16:19]

print(autofluo)
print(max_extract)

[2287. 2325. 2222.]
[21822. 19555. 20269.]


In [19]:
autofluo_from_data = autofluo
max_from_data = max_extract

In [20]:
# Verifications 
print(calculate_yield_mean_sd(max_from_data, autofluo_from_data, max_from_data))
print(calculate_yield_mean_sd(autofluo_from_data, autofluo_from_data, max_from_data))

{'yield_mean': 1.0, 'yield_std': 0.051798366}
{'yield_mean': 0.0, 'yield_std': 0.0023276887}


In [21]:
new_array = cleaned_array[:,0:16]
full_array = cleaned_array[:,0:19]
mean_yield_list = []
std_yield_list = []
mean_list = []
std_list = []


for i in range(cleaned_array.shape[0]):
    if cleaned_array[i, 18] == -1:
        mean = np.mean(cleaned_array[i, 16:18])
        std = np.std(cleaned_array[i, 16:18])
    else:
        mean = np.mean(cleaned_array[i, 16:19])
        std = np.std(cleaned_array[i, 16:19])
    dict_results = calculate_yield_mean_sd(cleaned_array[i, 16:19], autofluo_from_data, max_from_data)
    yield_mean = dict_results["yield_mean"]
    yield_std = dict_results["yield_std"]
    mean_yield_list.append(yield_mean)
    std_yield_list.append(yield_std)
    mean_list.append(mean)
    std_list.append(std)
    

# Exporting results

In [22]:
full_array = np.concatenate((full_array, wells_name_array), axis = 1)
mean_array = np.reshape(mean_list, (new_array.shape[0], 1))
std_array = np.reshape(std_list, (new_array.shape[0], 1))
mean_yield_array = np.reshape(mean_yield_list, (new_array.shape[0], 1))
std_yield_array = np.reshape(std_yield_list, (new_array.shape[0], 1))


In [23]:
array_for_saving = np.concatenate((new_array, mean_array, std_array, mean_yield_array, std_yield_array), axis = 1)
array_for_saving_everything = np.concatenate((full_array, mean_array, std_array, mean_yield_array, std_yield_array), axis = 1)

In [24]:
list_of_dict_everything = []
list_of_dict_ML = []
fieldnames = ["nad", "folinic_acid", "DNA", "coa", "RBS", "peg", "nucleo_mix", 
              "spermidin", "pga", "aa", "trna", "mg_gluta", "hepes", "camp", "K_gluta", "promoter", 
              "value_1", "value_2", "value_3", "well_1", "well_2", "well_3", "mean", "std", "yield", "yield_std",
             "plaque_name"]

for row in array_for_saving_everything:
    new_dict = {}
    new_dict["nad"] = round(float(row[0]), 5)
    new_dict["folinic_acid"] = round(float(row[1]), 5)
    new_dict["DNA"] = round(float(row[2]), 4)
    new_dict["coa"] = round(float(row[3]), 5)
    new_dict["RBS"] = round(float(row[4]), 4)
    new_dict["peg"] = round(float(row[5]), 5)
    new_dict["nucleo_mix"] = round(float(row[6]), 5)
    new_dict["spermidin"] = round(float(row[7]), 5)
    new_dict["pga"] = round(float(row[8]), 5)
    new_dict["aa"] = round(float(row[9]), 5)
    new_dict["trna"] = round(float(row[10]), 5)
    new_dict["mg_gluta"] = round(float(row[11]), 4)
    new_dict["hepes"] = round(float(row[12]), 4)
    new_dict["camp"] = round(float(row[13]), 4)
    new_dict["K_gluta"] = round(float(row[14]), 4)
    new_dict["promoter"] = round(float(row[15]), 4)
    new_dict["value_1"] = round(float(row[16]), 4)
    new_dict["value_2"] = round(float(row[17]), 4)
    new_dict["value_3"] = round(float(row[18]), 4)
    new_dict["well_1"] = row[19]
    new_dict["well_2"] = row[20]
    new_dict["well_3"] = row[21]
    new_dict["mean"] = round(float(row[22]), 1)
    new_dict["std"] = round(float(row[23]), 1)
    new_dict["yield"] = round(float(row[24]), 4)
    new_dict["yield_std"] = round(float(row[25]), 4)
    new_dict["plaque_name"] = file_name
    if round(float(row[2]), 4) == 0:
        print("is zero")
        print(new_dict)
    else:
        list_of_dict_ML.append(new_dict)
    list_of_dict_everything.append(new_dict)


is zero
{'nad': 0.33, 'folinic_acid': 0.068, 'DNA': 0.0, 'coa': 0.26, 'RBS': 10.0, 'peg': 2.0, 'nucleo_mix': 1.5, 'spermidin': 1.0, 'pga': 30.0, 'aa': 1.5, 'trna': 0.2, 'mg_gluta': 4.0, 'hepes': 50.0, 'camp': 0.75, 'K_gluta': 80.0, 'promoter': 10.0, 'value_1': 2287.0, 'value_2': 2325.0, 'value_3': 2222.0, 'well_1': 'A6', 'well_2': 'A12', 'well_3': 'A18', 'mean': 2278.0, 'std': 42.5, 'yield': 0.0, 'yield_std': 0.0023, 'plaque_name': 'example'}


In [25]:
with open(all_together_place, "w") as csv_handle:
    csv_writer = csv.DictWriter(csv_handle, fieldnames, restval='', extrasaction='ignore')
    csv_writer.writeheader()
    for result in list_of_dict_everything:
        csv_writer.writerow(result)

In [26]:
fieldnames_for_ml = ["nad", "folinic_acid", "coa", "nucleo_mix", 
                    "spermidin", "pga", "aa", "trna", "mg_gluta", "camp", "K_gluta", 
                     "yield", "yield_std"]
with open(export_place, "w") as csv_handle:
    csv_writer = csv.DictWriter(csv_handle, fieldnames_for_ml, restval='', extrasaction='ignore')
    csv_writer.writeheader()
    for result in list_of_dict_ML:
        csv_writer.writerow(result)

In [27]:
fieldnames_plate = ['row']
for i in range(2,24):
    fieldnames_plate.append(str(i))

In [28]:
mean_info, std_info, ratio_info = {}, {}, {}
for element in list_of_dict_everything:
    mean_info[element["well_1"]] = element["mean"]
    std_info[element["well_1"]] = element["std"]
    ratio_info[element["well_1"]] = float(element["std"])/float(element["mean"]) * 100
    mean_info[element["well_2"]] = element["mean"]
    std_info[element["well_2"]] = element["std"]
    ratio_info[element["well_2"]] = float(element["std"])/float(element["mean"]) * 100
    mean_info[element["well_3"]] = element["mean"]
    std_info[element["well_3"]] = element["std"]
    ratio_info[element["well_3"]] = float(element["std"])/float(element["mean"]) * 100

In [29]:
with open(draw_mean, "w") as mean_drawing_file:
    writer = csv.DictWriter(mean_drawing_file, fieldnames=fieldnames_plate, restval='0')
    writer.writeheader()
    current_row = 'A'
    row = {}
    row["row"] = current_row
    for element in sorted(mean_info.keys()):
        if element.startswith(current_row):
            row[element[1:]] = mean_info[element]
        else:
            writer.writerow(row)
            current_row = element[0]
            row = {"row": current_row}
            row[element[1:]] = mean_info[element]
    writer.writerow(row)

In [30]:
with open(draw_std, "w") as std_drawing_file:
    writer = csv.DictWriter(std_drawing_file, fieldnames=fieldnames_plate, restval='0')
    writer.writeheader()
    current_row = 'A'
    row = {}
    row["row"] = current_row
    for element in sorted(std_info.keys()):
        if element.startswith(current_row):
            row[element[1:]] = std_info[element]
        else:
            writer.writerow(row)
            current_row = element[0]
            row = {"row": current_row}
            row[element[1:]] = std_info[element]
    writer.writerow(row)

In [31]:
with open(draw_ratio, "w") as ratio_drawing_file:
    writer = csv.DictWriter(ratio_drawing_file, fieldnames=fieldnames_plate, restval='0')
    writer.writeheader()
    current_row = 'A'
    row = {}
    row["row"] = current_row
    for element in sorted(ratio_info.keys()):
        if element.startswith(current_row):
            row[element[1:]] = ratio_info[element]
        else:
            writer.writerow(row)
            current_row = element[0]
            row = {"row": current_row}
            row[element[1:]] = ratio_info[element]
    writer.writerow(row)