In [1]:
import pandas as pd, ipywidgets as widgets, numpy as np, matplotlib.pyplot as plt
import io as io
#from adjustText import adjust_text
raw_uploader, eln_uploader = widgets.FileUpload(multiple = False), widgets.FileUpload(multiple = False)
display("Upload iQue3 Data (csv)")
display(raw_uploader)
display("Upload ELN w/ Antibodies Sheet (xlsx)")
display(eln_uploader)

'Upload iQue3 Data (csv)'

FileUpload(value={}, description='Upload')

'Upload ELN w/ Antibodies Sheet (xlsx)'

FileUpload(value={}, description='Upload')

In [2]:
iquename, elnname = list(raw_uploader.value.keys())[0], list(eln_uploader.value.keys())[0]
iquedata = pd.read_csv(io.BytesIO(raw_uploader.value[iquename]['content']))
eln = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']))
ab_sheet = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']), sheet_name = "Antibody Plate Map")
iquedata

Unnamed: 0,Experiment,Analysis,Plate,Export Date and Time,Well ID,Row,Column,Well Type,Count of All Wells,Median BL1-H of singlets,Median RL1-H of singlets,Count of singlets
0,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,A01,A,1,Positive,5846,7979753.5,144093.0,1062
1,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,B01,B,1,Positive,8516,8140091.5,129241.5,1878
2,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,C01,C,1,Positive,6077,5665137.0,77295.0,1025
3,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,D01,D,1,Positive,7645,7563151.0,99757.0,1725
4,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,E01,E,1,Positive,7834,8012311.5,150515.5,1714
...,...,...,...,...,...,...,...,...,...,...,...,...
379,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,L24,L,24,Negative,1521,13867.5,8036.0,752
380,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,M24,M,24,Negative,1696,13697.0,230107.0,905
381,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,N24,N,24,Negative,1405,14101.5,142458.5,666
382,22_07_28 GPRC5D Primary Screen,Analysis 1,Plate 1,7/29/2022 2:10:59 PM,O24,O,24,Negative,894,13502.0,26563.0,289


In [3]:
data_cleaned = iquedata[["Plate", "Well ID", "Well Type", "Median RL1-H of singlets", "Count of singlets"]]
data_cleaned
ab_sheet
print(ab_sheet.dtypes)

IGGP Ticket#                          object
BPP Ticket# (If available)            object
Plate ID/Barcode                      object
Well ID                               object
DNA                                   object
Transfection cell line                object
Purification Method                   object
Concentration (mg/mL)                float64
Transfection scale (mL)               object
Volume (uL)                            int64
Yield (ug)                           float64
Lot#                                  object
Date Delivered                datetime64[ns]
Requested by                         float64
dtype: object


In [4]:
#Function that ensures a given antibody platemap is formatted correctly (by columns, not rows)
#ie A1->B1-->H1->A2-->B2-->A12-->H12
#Currently only working for single platemap, not multiple platemaps merged and identified by a plate number
#First, separate the Well ID into separate Row and Column values (Row is Letter, Column is Number)
#Sort the platemap into proper Column -> Row format
#Returns a new dataframe that is exactly ab_sheet, but sorted/formatted correctly
def format_platemap():
    ab_sheet["Format Row"] = ab_sheet["Well ID"].str.slice(start = 0, stop = 1)
    numeric_col = ab_sheet["Well ID"].str.slice(start = 1)
    numeric_col = pd.to_numeric(numeric_col)
    ab_sheet["Format Column"] = numeric_col
    ab_sheet.sort_values(by=["Format Column", "Format Row"], inplace=True, ignore_index = True)
    return

#Return data frame with only samples of the well type specified
def clean_for_well_type(well_type_string):
    return data_cleaned[data_cleaned["Well Type"] == well_type_string]

#Average function that takes a string indicating the well type of interest (marked in iQue3 software, or manually changed in csv)
#Also takes a string indicating the data from the csv to be averaged (to_average_string)
#Separates n=1 and n=2 replicates from 384 well format, resets indeces, and averages
#Returns a series with the averaged MFI ratios, removing the replicate wells
# def average_MFI(well_type_string):
#     selected_wells = clean_for_well_type(well_type_string)
#     MFIs = selected_wells["Median RL1-H of singlets"]
#     firsts = MFIs.iloc[::2]
#     seconds = MFIs.iloc[1::2]
#     firsts.reset_index(inplace = True, drop = True)
#     seconds.reset_index(inplace = True, drop = True)
#     average = firsts + seconds
#     average = average/2
#     return average

def average(well_type_string, to_average_string):
    selected_wells = clean_for_well_type(well_type_string)
    values_to_average = selected_wells[to_average_string]
    firsts = values_to_average.iloc[::2]
    seconds = values_to_average.iloc[1::2]
    firsts.reset_index(inplace = True, drop = True)
    seconds.reset_index(inplace = True, drop = True)
    averaged_series = firsts + seconds
    averaged_series = averaged_series/2
    return averaged_series

#Calculates the MFI ratio given a positive and negative series, 
#and appends the MFI ratio to the original data frame, excluding the negative population
#Calculates the average cell count for each duplicate, and appends average cell count to original data frame
def calculate_analysis_sheet(positive, negative):
    MFI_ratio = positive/negative
    original_positive = clean_for_well_type("Positive")
    original_positive = original_positive[::2]
    original_positive.reset_index(inplace = True, drop = True)
    original_positive["MFI Ratio"] = MFI_ratio
    MFIs_Antibody = pd.concat([original_positive, ab_sheet], axis = 1)
    MFIs_Antibody.index += 1
    averagePositiveCounts = average("Positive", "Count of singlets")
    return MFIs_Antibody[["Plate", "Well ID", "Well Type", "MFI Ratio", "DNA", "Format Row", "Format Column"]] 

In [7]:
#Create average positive series and average negative series
averagePositive = average("Positive", "Median RL1-H of singlets")
averageNegative = average("Negative", "Median RL1-H of singlets")

#Format the antibody platemap
format_platemap()

#Create new DF for MFI ratios(don't modify originals)
analyzed_data = calculate_analysis_sheet(averagePositive, averageNegative)
analyzed_data.to_excel(elnname, sheet_name='Analyzed Data')
AllLeadsSorted = analyzed_data.sort_values(by="MFI Ratio", ascending = False)
analyzed_data
#AllLeadsSorted

Unnamed: 0,Plate,Well ID,Well ID.1,Well Type,MFI Ratio,DNA,Format Row,Format Column
1,Plate 1,A01,A1,Positive,1.706922,TB629-037,A,1
2,Plate 1,C01,B1,Positive,2.423843,TB629-002,B,1
3,Plate 1,E01,C1,Positive,1.290765,TB629-014,C,1
4,Plate 1,G01,D1,Positive,1.343032,TB629-050,D,1
5,Plate 1,I01,E1,Positive,1.239374,TB629-074,E,1
...,...,...,...,...,...,...,...,...
92,Plate 1,G12,D12,Positive,1.823506,TB628-107,D,12
93,Plate 1,I12,E12,Positive,15.009795,TB628-167,E,12
94,Plate 1,K12,F12,Positive,2.093933,PUR_Tmab_220423_TN_037,F,12
95,Plate 1,M12,G12,Positive,1.701749,PUR_Tmab_220423_TN_038,G,12


In [None]:
# #Sort DF by highest MFI ratio (top hits)
# AllLeadsSorted = MFIRatio_Antibody_cleaned.sort_values(by="MFI Ratio", ascending = False)
# Top10 = MFIRatio_Antibody_cleaned.nlargest(10, "MFI Ratio")
# #Top10 = Top10.append(MFIRatio_Antibody_cleaned.loc[MFIRatio_Antibody_cleaned["DNA"] == "SC-175-017"])
# fig, ax = plt.subplots()
# ax.scatter(x = AllLeadsSorted["Protein Concentration (ug/uL)"], y=AllLeadsSorted["MFI Ratio"], alpha = .2, color = "blue")
# ax.scatter(x = Top10["Protein Concentration (ug/uL)"], y = Top10["MFI Ratio"], alpha = .8, color = "blue", label = "Top Hits")
# ax.set_title("CD40 Primary Screen")
# ax.set_xlabel("Concentration (mg/ml)")
# ax.set_ylabel("MFI Ratio \n Transfected/Untransfected")
# ax.legend()

# texts = []
# for row in Top10.itertuples(index = False):
#     ab, conc, MFI = row[4], row[5], row[3]
#     ax.annotate(row[4], xy = (conc, MFI), textcoords = "offset points", xytext = (8, -2), ha ='left')
#     #texts.append(plt.text(ab, conc, MFI))