# PCR report Work In Progress :)

In [1]:
DATA_DIR = './data'

In [2]:
from pcrep import parse_input

input = '230901_GN004308-086_20230901_112734_186.csv'
dc = parse_input.parse_inputname(input)
dc

{'date': '230901',
 'gn': 'GN004308-086',
 'dateex': '20230901',
 'time': '112734',
 'n': '186',
 'ext': 'csv'}

In [3]:
# 230901_GN004308-086.md
out_name = dc['date'] + dc['gn'] + '.md'
out_name

'230901GN004308-086.md'

In [4]:
from pcrep.pcrep import well2idx
wrc = well2idx('A02')
print(wrc)

('A', 2)


In [5]:
import pandas as pd
import os

INPUT_PCR_DATA = "./example/230901_GN004308-086/230901_GN004308-086_20230901_112734_186.csv"
df = pd.read_csv(INPUT_PCR_DATA, delimiter=';')
df = df.replace(',', '.', regex=True)
CONC_NAME = 'Conc(copies/µL)'
df[CONC_NAME] = df[CONC_NAME].astype('Float64')
# df.info()

DIL_FINAL_FACTOR_NAME = 'final dilution factor'
DIL_SAMPLE_DESCRIPTION_NAME = 'Sample name'
DIL_TYPE_NAME = 'type'


def get_dir(path_name):
    if not os.path.isfile(path_name):
        raise Exception('Not file!')
    return os.path.split(path_name)[0]


WORK_DIR = get_dir(INPUT_PCR_DATA)
display(WORK_DIR)

'./example/230901_GN004308-086'

In [6]:
INPUT_CONCENTRATION_DATA = "./example/230901_GN004308-086/230901_GN004308-086_conc.csv"
df_conc = pd.read_csv(INPUT_CONCENTRATION_DATA)

df_conc.set_index(['sample_id'], inplace=True)
df_conc

Unnamed: 0_level_0,type,Sample name,final dilution factor
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,nc,Negative control nuclease free dH20,100.0
2,pc,Plasmid control (pXL029_SacI),28600.0
3,rc,Reference control (PP073_1933_FDP),50000.0
4,s,1.1A,500000.0
5,s,2.12,50000.0
6,s,2.19,500000.0
7,s,5.35A,500000.0
8,s,5.36A,500000.0
9,s,5.37A,500000.0
10,s,5.38A,500000.0


In [7]:
df[["wr", "wc"]] = df.apply(lambda x: well2idx(
    x['Well']), axis='columns', result_type='expand')
df.set_index(['wr', 'wc'], inplace=True)
# df

In [8]:
FDL_NAME = "final dilution factor"
SAMPLE_NAME = "Sample"
TYPE_NAME = "sample type"
SAMPLE_NUM_NAME = "Sample description 1"

df.loc[:, [FDL_NAME]] = df[SAMPLE_NUM_NAME].map(
    df_conc[DIL_FINAL_FACTOR_NAME], na_action='ignore')

df.loc[:, [SAMPLE_NAME]] = df[SAMPLE_NUM_NAME].map(
    df_conc[DIL_SAMPLE_DESCRIPTION_NAME], na_action='ignore')

df.loc[:, [TYPE_NAME]] = df[SAMPLE_NUM_NAME].map(
    df_conc[DIL_TYPE_NAME], na_action='ignore')

df = df.dropna(subset=['sample type'])

## Compute results

In [9]:
def result_fn(conc, dil, a=20.0, b=2.0):
    """Compute results

    Parameters:
    conc : float
    dil : float
        final dilution factor of the sample
    a : float
        ddPCR Volume 20 µL
    b : float
        Sample volume in the ddPCR reaction 2 µL
    """
    return ((a * conc) * (1000.0 / b)) * dil


WELL_RESULT_NAME = 'vg/ml'
df.loc[:, [WELL_RESULT_NAME]] = df.apply(lambda x: result_fn(
    x['Conc(copies/µL)'], x['final dilution factor']), axis=1)

### Limits

In [10]:
PLASMID_CONTROL_LIMITS_FILE = 'plasmid_control_limits.csv'
palsmid_control_limits = pd.read_csv(
    os.path.join(DATA_DIR, PLASMID_CONTROL_LIMITS_FILE))
palsmid_control_limits.set_index(['Target'], inplace=True)
palsmid_control_limits

Unnamed: 0_level_0,mean,upper 2s warning,lower 2s warning,upper 3s action,lower 3s action
Target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
IDT,62000000000.0,75600000000.0,48400000000.0,82400000000.0,41600000000.0
FVIII,20400000000.0,31200000000.0,9660000000.0,36600000000.0,4270000000.0
HT2,192000000000.0,233000000000.0,151000000000.0,254000000000.0,131000000000.0
ITR,59400000000.0,73000000000.0,45900000000.0,79700000000.0,39100000000.0
FIX,93200000000.0,141000000000.0,45800000000.0,164000000000.0,22000000000.0


In [11]:
REFERENCE_CONTROL_LIMITS_FILE = 'reference_control_limits.csv'
reference_control_limits = pd.read_csv(
    os.path.join(DATA_DIR, REFERENCE_CONTROL_LIMITS_FILE))
reference_control_limits.set_index(['Target'], inplace=True)
reference_control_limits

Unnamed: 0_level_0,mean,upper 2s warning,lower 2s warning,upper 3s action,lower 3s action
Target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
IDT,132000000000.0,171000000000.0,92000000000.0,191000000000.0,72300000000.0
ITR,187000000000.0,230000000000.0,144000000000.0,251000000000.0,123000000000.0
ITR - IDT,187000000000.0,230000000000.0,144000000000.0,251000000000.0,123000000000.0
FVIII,903000000000.0,1140000000000.0,670000000000.0,1250000000000.0,553000000000.0
ITR - FVIII,517000000000.0,603000000000.0,430000000000.0,647000000000.0,387000000000.0
HT2,191000000000.0,227000000000.0,155000000000.0,245000000000.0,137000000000.0
FIX,172000000000.0,224000000000.0,121000000000.0,249000000000.0,95100000000.0


How to access limits

In [12]:
rcl = reference_control_limits
lmts = rcl.loc['IDT']
lmts['upper 3s action']

191000000000.0

In [13]:
from pcrep.check import METHOD_LIMIT_MULTIPLIER_NEGATIVE_CONTROL
METHOD_TARGET_ID = 'IDT'
METHOD_LIMIT_MULTIPLIER = 1.0e3  # conversion μl -> ml
method_limits = pd.read_csv('./data/method_limits.csv')

# method_limits['Lower [vg/μl]'] = method_limits['Lower [vg/μl]'].multiply(
#     METHOD_LIMIT_MULTIPLIER)
# method_limits.rename(columns={"Lower [vg/μl]": "Lower [vg/ml]"}, inplace=True)

# method_limits['Upper [vg/μl]'] = method_limits['Upper [vg/μl]'].multiply(
#     METHOD_LIMIT_MULTIPLIER)
# method_limits.rename(columns={"Upper [vg/μl]": "Upper [vg/ml]"}, inplace=True)

method_limits.set_index(['target_id'], inplace=True)
display(method_limits)

dc_limits = {'method': method_limits, 'reference_control': reference_control_limits,
             'plasmid_control': palsmid_control_limits}

Unnamed: 0_level_0,Target,Lower [vg/μl],Upper [vg/μl]
target_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FIX,FIX (TAK-748),17,2675
HT2,Huntington (TAK-686),10,4690
IDT,ID tag,15,3360
FVIII-ITR,FVIII/ITR Duplex (TAK-754 and TAK-709),15,3360
IDT-ITR,ID tag/ITR Duplex,15,3360
ITR,ITR,15,3360
FXN,FXN,15,3360


In [14]:
method_limits.loc['IDT']['Lower [vg/μl]']
mlmts = method_limits.loc['IDT']
mlmts['Lower [vg/μl]']

15

In [15]:
print(method_limits.loc[METHOD_TARGET_ID]['Lower [vg/μl]'],
      method_limits.loc[METHOD_TARGET_ID]['Upper [vg/μl]'])

negative_control_limit = method_limits.loc[:,
                                           'Lower [vg/μl]'][METHOD_TARGET_ID] * METHOD_LIMIT_MULTIPLIER_NEGATIVE_CONTROL
negative_control_limit

15 3360


1.5

In [16]:
from pcrep.check import method_check_routing

METHOD_CHECK_COLNAME = 'Conc(copies/µL)'


def method_check_fn(s):
    return method_check_routing(dc_limits['method'], s['sample type'],
                                s[METHOD_CHECK_COLNAME], s['Target'])


VALUE_CHECK_NAME = 'method_check'
df.loc[:, [VALUE_CHECK_NAME]] = df.apply(lambda x: method_check_fn(x), axis=1)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Well,Sample description 1,Sample description 2,Sample description 3,Sample description 4,Target,Conc(copies/µL),Status,Experiment,SampleType,...,Supermix,DyeName(s),Accepted Droplets,Positives,Negatives,final dilution factor,Sample,sample type,vg/ml,method_check
wr,wc,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A,1,A01,1,,,,IDT,0.0,Manual,DQ,Unknown,...,ddPCR Supermix for Probes (No dUTP),FAM,20028,0,20028,100.0,Negative control nuclease free dH20,nc,0.0,"(True, None)"
A,1,A01,1,,,,ITR,0.605218,Manual,DQ,Unknown,...,ddPCR Supermix for Probes (No dUTP),VIC,20028,10,20018,100.0,Negative control nuclease free dH20,nc,605217.8,"(True, None)"
A,2,A02,1,,,,IDT,0.0,Manual,DQ,Unknown,...,ddPCR Supermix for Probes (No dUTP),FAM,6643,0,6643,100.0,Negative control nuclease free dH20,nc,0.0,"(True, None)"
A,2,A02,1,,,,ITR,5.075813,Manual,DQ,Unknown,...,ddPCR Supermix for Probes (No dUTP),VIC,6643,26,6617,100.0,Negative control nuclease free dH20,nc,5075813.0,"(False, nc > threshold (5.075813293 > 1.5))"
B,1,B01,2,,,,IDT,193.32196,Manual,DQ,Unknown,...,ddPCR Supermix for Probes (No dUTP),FAM,19578,2903,16675,28600.0,Plasmid control (pXL029_SacI),pc,55290080000.0,"(True, None)"


In [18]:
from pcrep.check import droplets_check

DROPLET_THRESHOLD = int(10000)
DRPLET_CHECK_COLNAME = 'Accepted Droplets'


def droplets_check_fn(s):
    return droplets_check(s[DRPLET_CHECK_COLNAME], DROPLET_THRESHOLD)


DROPLET_CHECK_NAME = 'droplet_check'
df.loc[:, [DROPLET_CHECK_NAME]] = df.apply(
    lambda x: droplets_check_fn(x), axis=1)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Well,Sample description 1,Sample description 2,Sample description 3,Sample description 4,Target,Conc(copies/µL),Status,Experiment,SampleType,...,DyeName(s),Accepted Droplets,Positives,Negatives,final dilution factor,Sample,sample type,vg/ml,method_check,droplet_check
wr,wc,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A,1,A01,1,,,,IDT,0.0,Manual,DQ,Unknown,...,FAM,20028,0,20028,100.0,Negative control nuclease free dH20,nc,0.0,"(True, None)","(True, None)"
A,1,A01,1,,,,ITR,0.605218,Manual,DQ,Unknown,...,VIC,20028,10,20018,100.0,Negative control nuclease free dH20,nc,605217.8,"(True, None)","(True, None)"
A,2,A02,1,,,,IDT,0.0,Manual,DQ,Unknown,...,FAM,6643,0,6643,100.0,Negative control nuclease free dH20,nc,0.0,"(True, None)","(False, droplets < threshold (6643 < 10000))"
A,2,A02,1,,,,ITR,5.075813,Manual,DQ,Unknown,...,VIC,6643,26,6617,100.0,Negative control nuclease free dH20,nc,5075813.0,"(False, nc > threshold (5.075813293 > 1.5))","(False, droplets < threshold (6643 < 10000))"
B,1,B01,2,,,,IDT,193.32196,Manual,DQ,Unknown,...,FAM,19578,2903,16675,28600.0,Plasmid control (pXL029_SacI),pc,55290080000.0,"(True, None)","(True, None)"


In [19]:
df['Target'].unique()

array(['IDT', 'ITR'], dtype=object)

In [None]:
df_idt = df.query("Target == 'IDT'")
# df_idt

targets = df['Target'].unique()
dc = dict.fromkeys(targets)
for t in targets:
    dc[t] = df.query("Target == @t")

# dc['ITR']

In [None]:
dc['ITR'].head()

In [None]:
df_itr = dc['ITR']

In [None]:
df_itr.head()

In [None]:
def get_sample(dfa, sample_num):
    # TODO: check for valid `type` `and sample_num`
    dfa = dfa.loc[dfa['Sample description 1'] == sample_num]
    return dfa

In [None]:
s = get_sample(dc['ITR'], 22)
s

In [None]:
def process_sample(s):
    m = s.loc[:, WELL_RESULT_NAME].mean()
    t = s.loc[:, WELL_RESULT_NAME].std(ddof=0)
    if isinstance(m, float) and m != 0.0:
        cv = 100.0 * t / m
    else:
        cv = float("nan")
    return {'id': s['Sample description 1'][0], 'name': s[SAMPLE_NAME][0], 'mean': m, 'std': t, 'cv': cv}


process_sample(s)

## Checks control, samples, ...

### Check CV

In [None]:
import json

params_file = './data/params.json'
with open(params_file) as json_file:
    check_params = json.load(json_file)
# print(check_params)


def check_sample_cv(sample, ref):
    cv_hreshold = ref['assesmentCriteria']['cvThreshold'] * 100.0  # in %
    complies = True
    comment = None
    if sample['cv'] > cv_hreshold:
        complies = False
        comment = "CV {:.2f} > {:.2f}".format(sample['cv'], cv_hreshold)
    return {"complies": complies, "comment": comment}


check_sample_cv(process_sample(s), check_params)

In [None]:
def gen_sample_results(df):
    targets = df['Target'].unique()
    df_res = pd.DataFrame(columns=['id', 'target', 'sample name',
                          'result [vg/ml]', 'STDE', 'CV [%]', 'check'])
    for t in targets:
        dft = df.query("Target == @t")
        samples = dft[SAMPLE_NUM_NAME].unique()
        for n in samples:
            s = get_sample(dft, n)
            r = process_sample(s)
            check = check_sample_cv(r, check_params)
            df_res.loc[len(df_res)] = [r['id'], t, r['name'],
                                       r['mean'], r['std'], r['cv'],
                                       check]

    return df_res


df_t = gen_sample_results(df)
df_t = df_t.dropna(subset=['sample name'])
df_t

In [None]:
dfidt = df_t.loc[df_t['target'] == 'IDT']
dfidt = dfidt[['id', 'target', 'sample name', 'result [vg/ml]', 'check']]
dfidt.set_index(['id'], inplace=True)
starget = 'IDT'
dfidt.rename(columns={"target": f"target {starget}",
                      "result [vg/ml]": f"result {starget} [vg/ml]",
                      'check': f'comment {starget}'}, inplace=True)
dfidt

In [None]:
dfitr = df_t.loc[df_t['target'] == 'ITR']
dfitr = dfitr[['id', 'target', 'result [vg/ml]', 'check']]
dfitr.set_index(['id'], inplace=True)
starget = 'ITR'
dfitr.rename(columns={"target": f"target {starget}",
                      "result [vg/ml]": f"result {starget} [vg/ml]",
                      'check': f'comment {starget}'}, inplace=True)
dfitr

In [None]:
dfj = pd.concat([dfidt, dfitr], axis=1, join="inner")

dfj


def fn_concat(s1, s2):

    if s1 and s2:
        return s1 + '/' + s2
    elif s1:
        return s1
    else:
        return s2
    return None


dfj.loc[:, ['target']] = dfj.apply(lambda x: fn_concat(
    x['target IDT'], x['target ITR']), axis=1)
# TODO: !!! Uncomment below to add comments; needs to be refactored to generate comments from dictionary
# dfj.loc[:, ['comment']] = dfj.apply(lambda x: fn_concat(
#     x['comment IDT'], x['comment ITR']), axis=1)

final = dfj.reindex(
    ['target', 'sample name', 'result IDT [vg/ml]', 'result ITR [vg/ml]', 'comment'], axis=1)

final

## Export report

In [None]:
with open('md_intro.md', 'r') as f:
    md_intro = f.read()
with open('md_end.md', 'r') as f:
    md_end = f.read()

md_eval = final.to_markdown()

md = md_intro + md_eval + md_end


def save_md(file_path, md_txt):
    try:
        with open(file_path, 'w') as fl:
            fl.write(md_txt)
    except Exception as e:
        print('Error: ' + str(e))


MD_FILE = './example/230901_GN004308-086/230901_GN004308-086.md'
save_md(MD_FILE, md)

xls_path = os.path.splitext(MD_FILE)[0] + '.xlsx'
final.to_excel(xls_path)

In [None]:
from pcrep import mdhandling

with open(os.path.join(DATA_DIR, "config.json")) as json_file:
    jd = json.load(json_file)
    reference_doc = jd['reference_docx']
    pdflatex_bin = jd['pdflatex_bin']
    pandoc_bin = jd['pandoc_bin']

mdhandling.md2docx(pandoc_bin, reference_doc, MD_FILE)
print("Done.")