In [None]:
# This script performs the following:
### Import raw intensities from Magellan software for DNA quants, where:
##### Q1 is a zig-zagging standard, and
##### Q2-4 are triplicate reads of a sample;
### Allow the removal of outlier samples;
### Generate a standard curve;
### Calculate the concentration of samples; and
### Generate a GWL file for normalizing to a specified concentration.

In [None]:
# IMPORTS
import pandas
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Set variables, file names, and metadata
# Use this section to set variables, locations, and names

PlateName = 'Plasmid Libary MAM92, Raw Quant' # unique name of the pDNA plate that you are quanting

quant_fn = "/Users/jeffrey.quinn/Desktop/DNA Quant/20220711_MAM92_Quant1.csv" # CSV file from Magellan software containing intensities data
out_fn = "/Users/jeffrey.quinn/Desktop/DNA Quant/20220711_MAM92_Quant1.gwl" # where final GWL will be saved

stdConc = [0,5,10,20,30,40,50,60,70,80,90,100] # ng/uL concentration of lambda DNA in standard; modify as needed

totalV = 60   # uL in diluted DNA plate
finalC = 12.5   # ng/uL of diluted DNA plate
header = "<>" # This is the column name in the first cell in your CSV, defaults to "<>"

In [None]:
# import quantitation file, check
quant_file = pandas.read_csv(quant_fn, index_col=header)
quant_file.head()

# Define row and column sets for each quadrant
alphabet = 'ABCDEFGHIJKLMNOP'
Q13_cols = [str(i) for i in list(range(1,25,2))]
Q24_cols = [str(i) for i in list(range(2,25,2))]
Q12_rows = [alphabet[i] for i in list(range(0,len(alphabet),2))]
Q34_rows = [alphabet[i] for i in list(range(1,len(alphabet),2))]

# Subset quadrants from the 384-well plate
Q1 = quant_file.loc[Q12_rows,Q13_cols]
Q2 = quant_file.loc[Q12_rows,Q24_cols]
Q3 = quant_file.loc[Q34_rows,Q13_cols]
Q4 = quant_file.loc[Q34_rows,Q24_cols]

### Calculate AVG and STDEV across three quadrants
# First, reorder the samples in Q1 (standard)
Q1_inv = Q1.iloc[[1,3,5,7], ::-1]
Q1_inv.rename(columns=dict(zip(Q1_inv.columns,Q1.columns)), inplace=True)
Q1_ord = pandas.concat([Q1_inv, Q1.iloc[[0,2,4,6],:]]).sort_index()
# Plot quadrants
fig, axs = plt.subplots(2, 2, figsize=(8,6))
q1 = axs[0,0].imshow(Q1_ord, cmap="viridis")
axs[0,0].set_title("Q1_ord")
q2 = axs[0,1].imshow(Q2, cmap="viridis")
axs[0,1].set_title("Q2")
q3 = axs[1,0].imshow(Q3, cmap="viridis")
axs[1,0].set_title("Q3")
q4 = axs[1,1].imshow(Q4, cmap="viridis")
axs[1,1].set_title("Q4")
plt.tight_layout()
plt.show()

Sample_Intensities = pandas.DataFrame(0, index=["A","B","C","D","E","F","G","H"], columns=range(1,13))
Sample_Devs = pandas.DataFrame(0, index=["A","B","C","D","E","F","G","H"], columns=range(1,13))

# Take AVG and STDEV of three quadrants
for row_i in range(Q1.shape[0]):
    for col_j in range(Q1.shape[1]):
        Sample_Intensities.iloc[row_i, col_j] = np.mean([Q2.iloc[row_i, col_j], Q3.iloc[row_i, col_j], Q4.iloc[row_i, col_j]])
        Sample_Devs.iloc[row_i, col_j] = np.std((Q2.iloc[row_i, col_j],Q3.iloc[row_i, col_j],Q4.iloc[row_i, col_j]))
        
# Visualize AVG and CV
fig, axs = plt.subplots(1,2, figsize=(15,5))
p1 = axs[0].imshow(Sample_Intensities, cmap="viridis")
p1title = axs[0].set_title("Average Sample Intensity, 3 Replicates")
fig.colorbar(p1, ax=axs[0])
p2 = axs[1].imshow(Sample_Devs/Sample_Intensities, cmap="viridis")
p2title = axs[1].set_title("CV of Sample Intensity, 3 Replicates")
fig.colorbar(p2, ax=axs[1])
plt.show()

In [None]:
# Drop Outlier Samples
## e.g. Q2.iloc[6,3] = np.nan
## will delete sample G4 from quadrant Q2 (iloc specification is [row,column])
## use Q1_ord to delete standard samples

Q1_ord.iloc[1,3] = np.nan
Q1_ord.iloc[6,5] = np.nan

Q3.iloc[0,0] = np.nan
Q2.iloc[4,0] = np.nan
Q4.iloc[7,11] = np.nan

In [None]:
# Take new AVG and STDEV of three quadrants (outliers removed)
for row_i in range(Q1.shape[0]):
    for col_j in range(Q1.shape[1]):
        Sample_Intensities.iloc[row_i, col_j] = np.nanmean([Q2.iloc[row_i, col_j], Q3.iloc[row_i, col_j], Q4.iloc[row_i, col_j]])
        Sample_Devs.iloc[row_i, col_j] = np.nanstd((Q2.iloc[row_i, col_j],Q3.iloc[row_i, col_j],Q4.iloc[row_i, col_j]))
        
# Visualize AVG and CV
fig, axs = plt.subplots(1,2, figsize=(15,5))
p1 = axs[0].imshow(Sample_Intensities, cmap="viridis")
p1title = axs[0].set_title("Average Sample Intensity, 3 Replicates")
fig.colorbar(p1, ax=axs[0])
p2 = axs[1].imshow(Sample_Devs/Sample_Intensities, cmap="viridis")
p2title = axs[1].set_title("CV of Sample Intensity, 3 Replicates")
fig.colorbar(p2, ax=axs[1])
plt.show()

In [None]:
# Plot standard curve (without dropping)
x = list(Q1_ord.mean())
plt.scatter(x, stdConc, c='k')
plt.xlabel("RFU")
plt.ylabel("DNA Conc.")
plt.title("Standard Curve (without dropping)")
m,b = np.polyfit(x[0:10], stdConc[0:10], 1)
plt.plot(x, m*np.asarray(x) + b, c='r')
plt.show()

In [None]:
# Which standards do you want to exclude?
## e.g. if the first 6 samples are linear, but the final 6 are bad, use the commands:
    #del x[6:12]
    #del stdConc[6:12]
# Also, you can remove regions of the standard curve that are not relevant to your range of concentrations;
## e.g. if your samples never register about 20000 RFU, you can delete standards above that range.

del x[8:12]
del stdConc[8:12]
    
# Replot standard curve (with dropping)
plt.scatter(x, stdConc, c='k')
m,b = np.polyfit(x, stdConc, 1)
plt.plot(x, m*np.asarray(x) + b, c='r')
plt.xlabel("RFU")
plt.ylabel("DNA Conc.")
plt.title("Standard Curve (cropped)")
plt.show()

In [None]:
# Calculate concentrations
Conc = m*Sample_Intensities+b
Conc_Devs = m*Sample_Devs+b

# Linearize the data
DF = pandas.DataFrame(columns=['Pos','Conc'])
for i, content in Conc.items():
    col_i = str(i)
    for j,k in content.iteritems():
        DF = DF.append({'Pos':j+col_i, 'Conc':k}, ignore_index=True)
# Calculate the volume of DNA and EB required for totalV @ finalC ng/uL
DF['uL_DNA'] = round(totalV*finalC / DF.Conc,2)
DF['uL_EB'] = round(totalV-DF.uL_DNA,2)
DF.head()

In [None]:
# Generate final plots for presentation
rows = ["A","B","C","D","E","F","G","H"]
cols = [1,2,3,4,5,6,7,8,9,10,11,12]
fig, ax = plt.subplots(figsize=(7,5))
im = ax.imshow(Conc, cmap="viridis")
plt.title("pDNA Concentration [ng/uL], "+PlateName)
ax.set_yticks(np.arange(len(rows)))
ax.set_yticklabels(rows)
ax.set_xticks(np.arange(len(cols)))
ax.set_xticklabels(cols)
plt.colorbar(im, shrink=0.75)
plt.show()

# Plot Histogram of Concentrations
hist = plt.hist(DF.Conc, range=(0,60), bins=50, color='k')
xlab = plt.xlabel("pDNA Concentration (ng/uL)")
ylab = plt.ylabel("Frequency")
title = plt.title("pDNA Concentration, "+PlateName)

In [None]:
## Negative volume handling
# Cannot pipette volumes less than or equal to 0

for i, content in DF.iterrows():
    if content.Conc < finalC:
        print(content.Pos, "is below", finalC, "ng/uL!")
        DF.loc[i,'uL_DNA'] = totalV-1
        DF.loc[i,'uL_EB'] = 1
DF.head()       

In [None]:
# Make A/D commands for GWL script

# First, make commands for EB
DF['A_EB'] = "A;Trough_EB;;;"+DF.Pos+";;"+DF.uL_EB.astype(str)+";"
DF['D_EB'] = "D;pDNA_P1_Dil;;;"+DF.Pos+";;"+DF.uL_EB.astype(str)+";"
DF['W_EB'] = "F;"

# Drop tips (W, not F) for the last column of samples
DF.loc[88:,'W_EB'] = "W;"

# Second, make commands for DNA
DF['A_DNA'] = "A;pDNA_P1;;;"+DF.Pos+";;"+DF.uL_DNA.astype(str)+";"
DF['D_DNA'] = "D;pDNA_P1_Dil;;;"+DF.Pos+";;"+DF.uL_DNA.astype(str)+";"
DF['W_DNA'] = "W;"

# Check file
DF.head()

In [None]:
# Write the GWL file out
with open(out_fn,'w+') as outfile:
    # First, write the EB A/D cycles
    for index, row in DF.iterrows():
        outfile.write(row["A_EB"]+"\n")
        outfile.write(row["D_EB"]+"\n")
        outfile.write(row["W_EB"]+"\n")
    # Second, write the DNA A/D cycles
    for index, row in DF.iterrows():
        outfile.write(row["A_DNA"]+"\n")
        outfile.write(row["D_DNA"]+"\n")
        outfile.write(row["W_DNA"]+"\n")