In [1]:
import pandas as pd, ipywidgets as widgets, numpy as np, matplotlib.pyplot as plt
import io as io
#from adjustText import adjust_text
raw_uploader, eln_uploader = widgets.FileUpload(multiple = False), widgets.FileUpload(multiple = False)
display("Upload iQue3 Data (csv)")
display(raw_uploader)
display("Upload ELN w/ Antibodies Sheet (xlsx)")
display(eln_uploader)

'Upload iQue3 Data (csv)'

FileUpload(value={}, description='Upload')

'Upload ELN w/ Antibodies Sheet (xlsx)'

FileUpload(value={}, description='Upload')

In [2]:
iquename, elnname = list(raw_uploader.value.keys())[0], list(eln_uploader.value.keys())[0]
iquedata = pd.read_csv(io.BytesIO(raw_uploader.value[iquename]['content']))
eln = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']))
ab_sheet = pd.read_excel(io.BytesIO(eln_uploader.value[elnname]['content']), sheet_name = "Antibody Plate Map")
iquedata

Unnamed: 0,Experiment,Analysis,Plate,Export Date and Time,Well ID,Row,Column,Well Type,Count of All Wells,Median BL1-H of singlets,Median RL1-H of singlets,Count of singlets
0,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,A01,A,1,A2a,5535,10990492.0,494946.0,1332
1,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,B01,B,1,A2a,5537,10129510.0,460304.5,1098
2,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,C01,C,1,A2a,5279,11871181.0,517101.0,1137
3,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,D01,D,1,A2a,4776,10783093.0,382650.0,1221
4,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,E01,E,1,A2a,5053,11023892.0,379285.0,1101
...,...,...,...,...,...,...,...,...,...,...,...,...
295,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,P22,P,22,Negative,1455,11374.5,19065.0,668
296,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,A23,A,23,Negative,1033,13026.0,21227.0,121
297,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,B23,B,23,Negative,4885,12612.0,13625.0,3091
298,22_09_06 A2a LM Variants Primary Screen Norm +...,Analysis 1,Plate 1,9/6/22 12:43,C23,C,23,Negative,2706,13100.0,16379.5,1484


In [3]:
data_cleaned = iquedata[["Plate", "Well ID", "Well Type", "Median RL1-H of singlets", "Count of singlets"]]
data_cleaned
data_cleaned

Unnamed: 0,Plate,Well ID,Well Type,Median RL1-H of singlets,Count of singlets
0,Plate 1,A01,A2a,494946.0,1332
1,Plate 1,B01,A2a,460304.5,1098
2,Plate 1,C01,A2a,517101.0,1137
3,Plate 1,D01,A2a,382650.0,1221
4,Plate 1,E01,A2a,379285.0,1101
...,...,...,...,...,...
295,Plate 1,P22,Negative,19065.0,668
296,Plate 1,A23,Negative,21227.0,121
297,Plate 1,B23,Negative,13625.0,3091
298,Plate 1,C23,Negative,16379.5,1484


In [4]:
#Function that ensures a given antibody platemap is formatted correctly (by columns, not rows)
#ie A1->B1-->H1->A2-->B2-->A12-->H12
#Currently only working for single platemap, not multiple platemaps merged and identified by a plate number
#First, separate the Well ID into separate Row and Column values (Row is Letter, Column is Number)
#Sort the platemap into proper Column -> Row format
#Returns a new dataframe that is exactly ab_sheet, but sorted/formatted correctly
def format_platemap():
    ab_sheet["Format Row"] = ab_sheet["Well ID"].str.slice(start = 0, stop = 1)
    numeric_col = ab_sheet["Well ID"].str.slice(start = 1)
    numeric_col = pd.to_numeric(numeric_col)
    ab_sheet["Format Column"] = numeric_col
    ab_sheet.sort_values(by=["Format Column", "Format Row"], inplace=True, ignore_index = True)
    return

#Return data frame with only samples of the well type specified
def clean_for_well_type(well_type_string):
    return data_cleaned[data_cleaned["Well Type"] == well_type_string]

#Average function that takes a string indicating the well type of interest (marked in iQue3 software, or manually changed in csv)
#Also takes a string indicating the data from the csv to be averaged (to_average_string)
#Separates n=1 and n=2 replicates from 384 well format, resets indeces, and averages
#Returns a series with the averaged MFI ratios, removing the replicate wells
# def average_MFI(well_type_string):
#     selected_wells = clean_for_well_type(well_type_string)
#     MFIs = selected_wells["Median RL1-H of singlets"]
#     firsts = MFIs.iloc[::2]
#     seconds = MFIs.iloc[1::2]
#     firsts.reset_index(inplace = True, drop = True)
#     seconds.reset_index(inplace = True, drop = True)
#     average = firsts + seconds
#     average = average/2
#     return average

def average(well_type_string, to_average_string):
    selected_wells = clean_for_well_type(well_type_string)
    values_to_average = selected_wells[to_average_string]
    firsts = values_to_average.iloc[::2]
    seconds = values_to_average.iloc[1::2]
    firsts.reset_index(inplace = True, drop = True)
    seconds.reset_index(inplace = True, drop = True)
    averaged_series = firsts + seconds
    averaged_series = averaged_series/2
    return averaged_series

#Appends the MFI of a series to the original data frame
#Calculates the average cell count for each duplicate, and appends average cell count to original data frame
def create_analysis_sheet(to_add_name, to_add):
    original_positive = clean_for_well_type("Positive")
    original_positive = original_positive[::2]
    original_positive.reset_index(inplace = True, drop = True)
    original_positive[to_add_name] = to_add
    averagePositiveCounts = average("Positive", "Count of singlets")
    original_positive["Cell counts"] = averagePositiveCounts 
    MFIs_Antibody = pd.concat([original_positive, ab_sheet], axis = 1)
    MFIs_Antibody.index += 1
    return MFIs_Antibody[["Plate", "Well ID", "Well Type", to_add_name, "DNA", "Cell counts",
                          "Concentration", "Format Row", "Format Column"]] 

#Calculates the MFI ratio given a positive and negative series, 
#and appends the MFI ratio to the original data frame, excluding the negative population
#Calculates the average cell count for each duplicate, and appends average cell count to original data frame
# def calculate_analysis_sheet(positive, negative):
#     MFI_ratio = positive/negative
#     original_positive = clean_for_well_type("Positive")
#     original_positive = original_positive[::2]
#     original_positive.reset_index(inplace = True, drop = True)
#     original_positive["MFI Ratio"] = MFI_ratio
#     MFIs_Antibody = pd.concat([original_positive, ab_sheet], axis = 1)
#     MFIs_Antibody.index += 1
#     averagePositiveCounts = average("Positive", "Count of singlets")
#     return MFIs_Antibody[["Plate", "Well ID", "Well Type", "MFI Ratio", "DNA", "Format Row", "Format Column"]] 

In [5]:
#Create average positive series and average negative series
averagePositive = average("A2a", "Median RL1-H of singlets")
averageNegative = average("Negative", "Median RL1-H of singlets")
ratio = averagePositive/averageNegative

#Format the antibody platemap
format_platemap()

#Create new DF for MFI ratios(don't modify originals)
to_add_name = "MFI of Transfected"
analyzed_data = create_analysis_sheet(to_add_name, averageNegative)
analyzed_data.to_excel(elnname, sheet_name='Analyzed Data')
AllLeadsSorted = analyzed_data.sort_values(by=to_add_name, ascending = False)
analyzed_data
#AllLeadsSorted

Unnamed: 0,Plate,Well ID,Well ID.1,Well Type,MFI of Transfected,DNA,Cell counts,Concentration,Format Row,Format Column
1,,,A1,,186739.75,TB754-H_control + TB754-L_control_A1,,0.015,A,1
2,,,B1,,188332.5,TB754_H1-few + TB754-L_control,,0.015,B,1
3,,,C1,,237270.5,TB754_H2-few + TB754-L_control,,0.015,C,1
4,,,D1,,276048.0,TB754_H3-few + TB754-L_control,,0.015,D,1
5,,,E1,,113669.25,TB754_H4-few + TB754-L_control,,0.015,E,1
6,,,F1,,494566.75,TB754_H5-few + TB754-L_control,,0.015,F,1
7,,,G1,,125506.0,TB754_H6-few + TB754-L_control,,0.015,G,1
8,,,H1,,632190.0,TB754_H8-zero + TB754-L_control,,0.015,H,1
9,,,A2,,196929.5,TB754-H_control + TB754_L1-few,,0.015,A,2
10,,,B2,,206132.5,TB754-H_control + TB754_L2-few,,0.015,B,2


In [None]:
# #Sort DF by highest MFI ratio (top hits)
# AllLeadsSorted = MFIRatio_Antibody_cleaned.sort_values(by="MFI Ratio", ascending = False)
# Top10 = MFIRatio_Antibody_cleaned.nlargest(10, "MFI Ratio")
# #Top10 = Top10.append(MFIRatio_Antibody_cleaned.loc[MFIRatio_Antibody_cleaned["DNA"] == "SC-175-017"])
# fig, ax = plt.subplots()
# ax.scatter(x = AllLeadsSorted["Protein Concentration (ug/uL)"], y=AllLeadsSorted["MFI Ratio"], alpha = .2, color = "blue")
# ax.scatter(x = Top10["Protein Concentration (ug/uL)"], y = Top10["MFI Ratio"], alpha = .8, color = "blue", label = "Top Hits")
# ax.set_title("CD40 Primary Screen")
# ax.set_xlabel("Concentration (mg/ml)")
# ax.set_ylabel("MFI Ratio \n Transfected/Untransfected")
# ax.legend()

# texts = []
# for row in Top10.itertuples(index = False):
#     ab, conc, MFI = row[4], row[5], row[3]
#     ax.annotate(row[4], xy = (conc, MFI), textcoords = "offset points", xytext = (8, -2), ha ='left')
#     #texts.append(plt.text(ab, conc, MFI))