In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
# Assign raw data and plate diagram files from environment
RAW_SAMPLE_1 = ""
RAW_SAMPLE_1_DIAGRAM = ""

# Read in the raw data and plate diagram as pandas dataframes
df_data = pd.read_excel(RAW_SAMPLE_1, sheet_name=0)
df_diagram = pd.read_excel(RAW_SAMPLE_1_DIAGRAM)

# Show first 5 rows
df_diagram.head()

In [None]:
# Set the first column as the index, remove whitespace and add a space to the "dup" values
df_diagram = df_diagram.set_index(df_diagram.columns[0])
df_diagram = df_diagram.replace('\s+', '', regex=True)
df_diagram = df_diagram.replace('dup', ' dup', regex=True)

# Use the rows and columns (besides the first one) of the plate diagram to create a dictionary of corresponding Sample and Well IDs
sample_map = {}

for row in df_diagram.index:
    for col in df_diagram.columns[1:]:
        well_id = f"{row}{int(col):02d}"
        sample_name = df_diagram.loc[row, col]
        sample_map[well_id] = sample_name

# Read in the raw qPCR data and map the well IDs to sample names using the dictionary
df_data["Sample"] = df_data["Well"].map(sample_map)

# Show first 5 rows
df_data.head()

In [None]:
# Select relevant columns, make mtDNA1 & mtDNA2 columns, and drop NaN rows
df = df_data[['Well', 'Cq', 'Sample']]

df['mtDNA1'] = "mtDNA1"
df['mtDNA2'] = "mtDNA2"

df = df.loc[:,["Well", "Sample", "mtDNA1", "mtDNA2", "Cq"]]

df = df.dropna()

# Show first 5 rows
df.head()

In [None]:
# set mtDNA1 and mtDNA2 values to Cq values by treating mtDNA1 as the Cq for the first sample and mtDNA2 as the Cq for the duplicate sample if it exists as "Sample dup"

# Note, exactly "Sample dup" is used to avoid matching "Sample dup **" or any additions to the name

for row, index in df.iterrows():
    df.loc[row, 'mtDNA1'] = df.loc[row, 'Cq']
    if df.loc[row, 'Sample'] + ' dup' in df['Sample'].values:
        df.loc[row, 'mtDNA2'] = df.loc[df['Sample'] == df.loc[row, 'Sample'] + ' dup', 'Cq'].values[0]
    else:
        df.loc[row, 'mtDNA2'] = np.NAN

df = df.drop(columns=['Cq'])
df = df.dropna()

# calculate standard deviation of each row
df['St.Dev'] = df[['mtDNA1', 'mtDNA2']].std(axis=1)

# Show first 5 rows
df.head()

In [None]:
# Throw warnings for standard deviations greater than .22

for row, index in df.iterrows():
    if df.loc[row, 'St.Dev'] > .22:
        print(f"\n Warning: Standard deviation for {df.loc[row, 'Sample']} is {round(df.loc[row, 'St.Dev'],ndigits=3)} "
              f"(Sample 1: {round(df.loc[row, 'mtDNA1'],ndigits=3)} vs Sample 2: {round(df.loc[row, 'mtDNA2'], ndigits=2)}) \n")

In [None]:
#Drop index, sort by St.Dev, and download the file
df = df.sort_values(by=['St.Dev'], ascending=False)
df = df.reset_index(drop=True)

df.to_excel("50_gcr_random_name_test_output.xlsx",
          index=False)