In [1]:
import pandas as pd, ipywidgets as widgets, numpy as np, matplotlib.pyplot as plt, io
raw_uploader, eln_uploader = widgets.FileUpload(multiple = False), widgets.FileUpload(multiple = False)
display("Upload iQue3 Data (csv)")
display(raw_uploader)
display("Upload ELN w/ Antibodies Sheet (xlsx)")
display(eln_uploader)

'Upload iQue3 Data (csv)'

FileUpload(value={}, description='Upload')

'Upload ELN w/ Antibodies Sheet (xlsx)'

FileUpload(value={}, description='Upload')

In [2]:
iquename, elnname = list(raw_uploader.value.keys())[0], list(eln_uploader.value.keys())[0]
iquedata = pd.read_csv(io.BytesIO(raw_uploader.value[iquename]['content']))
eln = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']))
ab_sheet = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']), sheet_name = "Antibody Plate Map")
iquedata

Unnamed: 0,Experiment,Analysis,Plate,Export Date and Time,Well ID,Row,Column,Well Type,Count of All Wells,Median BL1-H of singlets,Median RL1-H of singlets,Count of singlets
0,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,A01,A,1,A2a,5721,12315476.0,2857980.0,1864
1,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,B01,B,1,A2a,6619,10337342.0,2730841.0,2889
2,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,C01,C,1,A2a,3840,14080844.0,4170739.5,1224
3,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,D01,D,1,A2a,4418,13668953.0,3522645.5,1544
4,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,E01,E,1,A2a,5028,12003735.0,2264594.0,2059
...,...,...,...,...,...,...,...,...,...,...,...,...
295,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,P22,P,22,Negative,1605,11362.0,693327.0,456
296,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,A23,A,23,Negative,1243,11626.0,367201.0,119
297,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,B23,B,23,Negative,1221,11390.0,297713.0,373
298,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 2,9/6/22 16:11,C23,C,23,Negative,1068,11682.0,767562.5,302


In [3]:
data_cleaned = iquedata[["Plate", "Well ID", "Well Type", "Median RL1-H of singlets"]]
data_cleaned

Unnamed: 0,Plate,Well ID,Well Type,Median RL1-H of singlets
0,Plate 2,A01,A2a,2857980.0
1,Plate 2,B01,A2a,2730841.0
2,Plate 2,C01,A2a,4170739.5
3,Plate 2,D01,A2a,3522645.5
4,Plate 2,E01,A2a,2264594.0
...,...,...,...,...
295,Plate 2,P22,Negative,693327.0
296,Plate 2,A23,Negative,367201.0
297,Plate 2,B23,Negative,297713.0
298,Plate 2,C23,Negative,767562.5


In [4]:
#Return data frame with only samples of the well type specified
def clean_for_well_type(well_type_string):
    return data_cleaned[data_cleaned["Well Type"] == well_type_string]

#Average function that takes a string indicating the well type of interest (marked in iQue3 software, or manually changed
#Separates n=1 and n=2 replicates from 384 well format, resets indeces, and averages
#Returns a series with the averaged MFI ratios, removing the replicate wells
def average_MFI(well_type_string):
    selected_wells = clean_for_well_type(well_type_string)
    MFIs = selected_wells["Median RL1-H of singlets"]
    firsts = MFIs.iloc[::2]
    seconds = MFIs.iloc[1::2]
    firsts.reset_index(inplace = True, drop = True)
    seconds.reset_index(inplace = True, drop = True)
    average = firsts + seconds
    average = average/2
    return average

#Calculates the MFI ratio given a positive and negative series, 
#and appends the MFI ratio to the original data frame, excluding the negative population
#positive_series: the series of the positive values
#negative_series: the series of the negative (untransfected) values
#positive_string: the string that marks the positive samples in the Well Type column from iQue3 csv
def calculate_MFI_Ratio(positive_series, negative_series, positive_string):
    MFI_ratio = positive_series/negative_series
    original_positive = clean_for_well_type(positive_string)
    original_positive = original_positive[::2]
    original_positive.reset_index(inplace = True, drop = True)
    original_positive["MFI Ratio"] = MFI_ratio
    MFIs_Antibody = pd.concat([original_positive, ab_sheet], axis = 1)
    MFIs_Antibody.index += 1
    return MFIs_Antibody[["Plate", "Well ID", "Well Type", "MFI Ratio", "DNA", "Protein Concentration"]] 

In [7]:
pos_string = "A2a"
averagePositive = average_MFI(pos_string)
averageNegative = average_MFI("Negative")

In [8]:
#Create new DF for MFI ratios(don't modify originals)
MFI_Ratios_Antibody = calculate_MFI_Ratio(averagePositive, averageNegative, pos_string)
MFI_Ratios_Antibody
MFI_Ratios_Antibody.to_excel(pos_string + "Well_ID_MFI_Ratio.xlsx", sheet_name='Analyzed Data')
AllLeadsSorted = MFI_Ratios_Antibody.sort_values(by="MFI Ratio", ascending = False)
AllLeadsSorted

Unnamed: 0,Plate,Well ID,Well ID.1,Well Type,MFI Ratio,DNA,Protein Concentration
47,Plate 2,M06,G6,A2a,14.945438,TB756-H_control + TB756_L4-few,0.015
49,Plate 2,A07,A7,A2a,7.041022,TB756-H_control + TB756_L7-zero,0.015
45,Plate 2,I06,E6,A2a,6.005863,TB756-H_control + TB756_L1-few,0.015
39,Plate 2,M05,G5,A2a,5.738209,TB756_H4-zero + TB756-L_control,0.015
2,Plate 2,C01,B1,A2a,5.420037,TB754_H1-few + TB754-L_control,0.015
42,Plate 2,C06,B6,A2a,5.2542,TB756_H7-few + TB756-L_control,0.015
44,Plate 2,G06,D6,A2a,5.030903,TB756_H9-few + TB756-L_control,0.015
43,Plate 2,E06,C6,A2a,4.900456,TB756_H8-few + TB756-L_control,0.015
1,Plate 2,A01,A1,A2a,4.769294,TB754-H_control + TB754-L_control_A1,0.015
36,Plate 2,G05,D5,A2a,4.410749,TB756_H1-few + TB756-L_control,0.015


In [None]:
# #Sort DF by highest MFI ratio (top hits)
# AllLeadsSorted = MFIRatio_Antibody_cleaned.sort_values(by="MFI Ratio", ascending = False)
# Top10 = MFIRatio_Antibody_cleaned.nlargest(10, "MFI Ratio")
# #Top10 = Top10.append(MFIRatio_Antibody_cleaned.loc[MFIRatio_Antibody_cleaned["DNA"] == "SC-175-017"])
# fig, ax = plt.subplots()
# ax.scatter(x = AllLeadsSorted["Protein Concentration (ug/uL)"], y=AllLeadsSorted["MFI Ratio"], alpha = .2, color = "blue")
# ax.scatter(x = Top10["Protein Concentration (ug/uL)"], y = Top10["MFI Ratio"], alpha = .8, color = "blue", label = "Top Hits")
# ax.set_title("CD40 Primary Screen")
# ax.set_xlabel("Concentration (mg/ml)")
# ax.set_ylabel("MFI Ratio \n Transfected/Untransfected")
# ax.legend()

# texts = []
# for row in Top10.itertuples(index = False):
#     ab, conc, MFI = row[4], row[5], row[3]
#     ax.annotate(row[4], xy = (conc, MFI), textcoords = "offset points", xytext = (8, -2), ha ='left')
#     #texts.append(plt.text(ab, conc, MFI))