In [None]:
import pandas as pd, ipywidgets as widgets, numpy as np, matplotlib.pyplot as plt
import io as io
raw_uploader, eln_uploader = widgets.FileUpload(multiple = False), widgets.FileUpload(multiple = False)
display("Upload iQue3 Data (csv)")
display(raw_uploader)
display("Upload ELN w/ Antibodies Sheet (xlsx)")
display(eln_uploader)

In [None]:
iquename, elnname = list(raw_uploader.value.keys())[0], list(eln_uploader.value.keys())[0]
iquedata = pd.read_csv(io.BytesIO(raw_uploader.value[iquename]['content']))
eln = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']))
ab_sheet = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']), sheet_name = "Antibody Plate Map")
data_cleaned = iquedata[["Plate", "Well ID", "Well Type", "Median RL1-H of singlets", "Count of singlets"]]
data_cleaned

In [None]:
#Function that ensures a given antibody platemap is formatted correctly (by columns, not rows)
#ie A1->B1-->H1->A2-->B2-->A12-->H12
#Currently only working for single platemap, not multiple platemaps merged and identified by a plate number
#First, separate the Well ID into separate Row and Column values (Row is Letter, Column is Number)
#Sort the platemap into proper Column -> Row format
#Returns a new dataframe that is exactly ab_sheet, but sorted/formatted correctly
def format_platemap():
    ab_sheet["Format Row"] = ab_sheet["Well ID"].str.slice(start = 0, stop = 1)
    numeric_col = ab_sheet["Well ID"].str.slice(start = 1)
    numeric_col = pd.to_numeric(numeric_col)
    ab_sheet["Format Column"] = numeric_col
    ab_sheet.sort_values(by=["Format Column", "Format Row"], inplace=True, ignore_index = True)
    return

#Return cleaned iQue3 data with only samples of the well type specified
def clean_for_well_type(well_type_string):
    return data_cleaned[data_cleaned["Well Type"] == well_type_string]

#Average function that takes a string indicating which well type to average (well_type_string)
#Also takes a string indicating what data from the inidicated well type to average (to_average_string)
#ie. "Median RL1H" or "Cell Count"(
#Separates n=1 and n=2 replicates from 384 well format, resets indeces, and averages
#Returns a series with the averaged MFI ratios, removing the replicate wells
def average(well_type_string, to_average_string):
    selected_wells = clean_for_well_type(well_type_string)
    values_to_average = selected_wells[to_average_string]
    return average(values_to_average)

#Average function that takes a series with n=1, n=2 replicates in sequential order within the series, and 
#Separates n=1 and n=2 replicates resets indeces, and calculates average
#Returns a series with the averaged MFI ratios, removing the replicate wells
def average(series_to_average):
    firsts = series_to_average.iloc[::2]
    seconds = series_to_average.iloc[1::2]
    firsts.reset_index(inplace = True, drop = True)
    seconds.reset_index(inplace = True, drop = True)
    averaged_series = firsts + seconds
    averaged_series = averaged_series/2
    return averaged_series


#Calculates the MFI ratio given a positive and negative series, 
#and appends the MFI ratio to the original data frame, excluding the negative population
#Calculates the average cell count for each duplicate, and appends average cell count to original data frame
def calculate_analysis_sheet(positive, negative):
    MFI_ratio = positive/negative
    original_positive = clean_for_well_type("Positive")
    original_positive = original_positive[::2]
    original_positive.reset_index(inplace = True, drop = True)
    original_positive["MFI Ratio"] = MFI_ratio
    MFIs_Antibody = pd.concat([original_positive, ab_sheet], axis = 1)
    MFIs_Antibody.index += 1
    averagePositiveCounts = average("Positive", "Count of singlets")
    return MFIs_Antibody[["Plate", "Well ID", "Well Type", "MFI Ratio", "DNA", "Format Row", "Format Column"]] 

In [None]:
#Create average MFI of positive population series and average MFI of negative population series
averagePositive = average("Positive", "Median RL1-H of singlets")
averageNegative = average("Negative", "Median RL1-H of singlets")

#Format the antibody platemap
format_platemap()

#Create new DF for MFI ratios(don't modify originals)
analyzed_data = calculate_analysis_sheet(averagePositive, averageNegative)
analyzed_data.to_excel("Flow Binding MFI Ratios", sheet_name='Analyzed Data')
AllLeadsSorted = analyzed_data.sort_values(by="MFI Ratio", ascending = False)
analyzed_data
#AllLeadsSorted

In [None]:
# #Sort DF by highest MFI ratio (top hits)
# AllLeadsSorted = MFIRatio_Antibody_cleaned.sort_values(by="MFI Ratio", ascending = False)
# Top10 = MFIRatio_Antibody_cleaned.nlargest(10, "MFI Ratio")
# #Top10 = Top10.append(MFIRatio_Antibody_cleaned.loc[MFIRatio_Antibody_cleaned["DNA"] == "SC-175-017"])
# fig, ax = plt.subplots()
# ax.scatter(x = AllLeadsSorted["Protein Concentration (ug/uL)"], y=AllLeadsSorted["MFI Ratio"], alpha = .2, color = "blue")
# ax.scatter(x = Top10["Protein Concentration (ug/uL)"], y = Top10["MFI Ratio"], alpha = .8, color = "blue", label = "Top Hits")
# ax.set_title("CD40 Primary Screen")
# ax.set_xlabel("Concentration (mg/ml)")
# ax.set_ylabel("MFI Ratio \n Transfected/Untransfected")
# ax.legend()

# texts = []
# for row in Top10.itertuples(index = False):
#     ab, conc, MFI = row[4], row[5], row[3]
#     ax.annotate(row[4], xy = (conc, MFI), textcoords = "offset points", xytext = (8, -2), ha ='left')
#     #texts.append(plt.text(ab, conc, MFI))