In [1]:
import pandas as pd, ipywidgets as widgets, numpy as np, matplotlib.pyplot as plt
import io as io
from ipywidgets import interact, interactive, fixed, interact_manual
raw_uploader, eln_uploader = widgets.FileUpload(multiple = False), widgets.FileUpload(multiple = False)
display("Upload iQue3 Data (csv)")
display(raw_uploader)
display("Upload ELN w/ Antibodies Sheet (xlsx)")
display(eln_uploader)

'Upload iQue3 Data (csv)'

FileUpload(value={}, description='Upload')

'Upload ELN w/ Antibodies Sheet (xlsx)'

FileUpload(value={}, description='Upload')

In [11]:
iquename, elnname = list(raw_uploader.value.keys())[0], list(eln_uploader.value.keys())[0]
iquedata = pd.read_csv(io.BytesIO(raw_uploader.value[iquename]['content']))
eln = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']))
ab_sheet = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']), sheet_name = "Antibody Plate Map")
data_cleaned = iquedata[["Plate", "Well ID", "Well Type", "Median RL1-H of singlets", "Count of singlets"]]
data_cleaned

Unnamed: 0,Plate,Well ID,Well Type,Median RL1-H of singlets,Count of singlets
0,Plate 1,A01,Positive,13761.5,2002
1,Plate 1,B01,Positive,15501.0,873
2,Plate 1,C01,Positive,170104.0,618
3,Plate 1,D01,Positive,116277.0,777
4,Plate 1,E01,Positive,45532.0,779
...,...,...,...,...,...
827,Plate 3,L23,Negative,13390.0,97
828,Plate 3,M23,Negative,7706.0,173
829,Plate 3,N23,Negative,9460.0,209
830,Plate 3,O23,Negative,6346.0,163


In [15]:
#Function that ensures a given antibody platemap is formatted correctly (by columns, not rows)
#Mutable function that changes the data frame passed in by argument "ab_sheet"
#ie A1->B1-->H1->A2-->B2-->A12-->H12
#First, separate the Well ID into separate "Plate Number","Format Row" and "Format Column" values 
#(Row is Letter, Column is Number)
#Sort the platemap into proper Plate -> Column -> Row format
#Returns nothing, mutates the ab_sheet antibody plate map
def format_platemap(ab_sheet):
    ab_sheet["Format Row"] = ab_sheet["Well ID"].str.slice(start = 0, stop = 1)
    numeric_col = ab_sheet["Well ID"].str.slice(start = 1)
    numeric_col = pd.to_numeric(numeric_col)
    ab_sheet["Format Column"] = numeric_col
    ab_sheet.sort_values(by=["Plate ID/Barcode","Format Column", "Format Row"], inplace=True, ignore_index = True)
    return

#Return cleaned iQue3 data with only samples of the well type specified
def clean_for_well_type(well_type_string):
    return data_cleaned[data_cleaned["Well Type"] == well_type_string]

#Average function that takes a string indicating which well type to average (well_type_string)
#Also takes a string indicating what data from the inidicated well type to average (to_average_string)
#ie. "Median RL1H" or "Cell Count"(
#Separates n=1 and n=2 replicates from 384 well format, resets indeces, and averages
#Returns a series with the averaged MFI ratios, removing the replicate wells
def average(well_type_string, to_average_string):
    selected_wells = clean_for_well_type(well_type_string)
    values_to_average = selected_wells[to_average_string]
    return average_series(values_to_average)

#Average function that takes a series with n=1, n=2 replicates in sequential order within the series, and 
#Separates n=1 and n=2 replicates resets indeces, and calculates average
#Returns a series with the averaged MFI ratios, removing the replicate wells
def average_series(series_to_average):
    firsts = series_to_average.iloc[::2]
    seconds = series_to_average.iloc[1::2]
    firsts.reset_index(inplace = True, drop = True)
    seconds.reset_index(inplace = True, drop = True)
    averaged_series = firsts + seconds
    averaged_series = averaged_series/2
    return averaged_series


#Calculates the MFI ratio given a positive and negative series, 
#and appends the MFI ratio to the original data frame, excluding the negative population
#Calculates the average cell count for each duplicate, and appends average cell count to original data frame
def calculate_analysis_sheet(positive, negative):
    MFI_ratio = positive/negative
    original_positive = clean_for_well_type("Positive")
    original_positive = original_positive[::2]
    original_positive.reset_index(inplace = True, drop = True)
    original_positive["MFI Ratio"] = MFI_ratio
    MFIs_Antibody = pd.concat([original_positive, ab_sheet], axis = 1)
    MFIs_Antibody.index += 1
    averagePositiveCounts = average("Positive", "Count of singlets")
    return MFIs_Antibody[["Plate", "Well ID", "Well Type", "MFI Ratio", "Protein Name", "Format Row", "Format Column"]] 

In [17]:
#Create average MFI of positive population series and average MFI of negative population series
averagePositive = average("Positive", "Median RL1-H of singlets")
averageNegative = average("Negative", "Median RL1-H of singlets")

#Format the antibody platemap
format_platemap()

#Create new DF for MFI ratios(don't modify originals)
analyzed_data = calculate_analysis_sheet(averagePositive, averageNegative)
analyzed_data.to_excel("Flow Binding MFI Ratios.xlsx", sheet_name='Analyzed Data')
AllLeadsSorted = analyzed_data.sort_values(by="MFI Ratio", ascending = False)
analyzed_data
#AllLeadsSorted

Unnamed: 0,Plate,Plate.1,Well ID,Well ID.1,Well Type,MFI Ratio,Protein Name,Format Row,Format Column
1,Plate 1,,A01,A1,Positive,1.669138,TB736A-002,A,1
2,Plate 1,,C01,A1,Positive,1.812549,TB737A-076,A,1
3,Plate 1,pPUR_R-IGGP-383-221017_001,E01,A1,Positive,2.272864,TB735A-088,A,1
4,Plate 1,,G01,B1,Positive,1.657103,TB736A-004,B,1
5,Plate 1,,I01,B1,Positive,1.410610,TB737A-077,B,1
...,...,...,...,...,...,...,...,...,...
204,Plate 3,,G11,D12,Positive,1.256535,TB737A-069,D,12
205,Plate 3,,I11,E12,Positive,0.267620,TB737A-071,E,12
206,Plate 3,,K11,F12,Positive,0.612525,TB194-11_IgG4,F,12
207,Plate 3,,M11,G12,Positive,0.881656,TB192-5-IgG4,G,12


In [None]:
# #Sort DF by highest MFI ratio (top hits)
# AllLeadsSorted = MFIRatio_Antibody_cleaned.sort_values(by="MFI Ratio", ascending = False)
# Top10 = MFIRatio_Antibody_cleaned.nlargest(10, "MFI Ratio")
# #Top10 = Top10.append(MFIRatio_Antibody_cleaned.loc[MFIRatio_Antibody_cleaned["DNA"] == "SC-175-017"])
# fig, ax = plt.subplots()
# ax.scatter(x = AllLeadsSorted["Protein Concentration (ug/uL)"], y=AllLeadsSorted["MFI Ratio"], alpha = .2, color = "blue")
# ax.scatter(x = Top10["Protein Concentration (ug/uL)"], y = Top10["MFI Ratio"], alpha = .8, color = "blue", label = "Top Hits")
# ax.set_title("CD40 Primary Screen")
# ax.set_xlabel("Concentration (mg/ml)")
# ax.set_ylabel("MFI Ratio \n Transfected/Untransfected")
# ax.legend()

# texts = []
# for row in Top10.itertuples(index = False):
#     ab, conc, MFI = row[4], row[5], row[3]
#     ax.annotate(row[4], xy = (conc, MFI), textcoords = "offset points", xytext = (8, -2), ha ='left')
#     #texts.append(plt.text(ab, conc, MFI))