# PCR report Work In Progress :)

In [None]:
import pandas as pd
import os

from pcrep.parse_input import parse_analysis_filepath
from pcrep.constants import CONC_NAME, DIL_FINAL_FACTOR_NAME, DIL_TYPE_NAME, DIL_SAMPLE_DESCRIPTION_NAME

INPUT_PCR_DATA = "./example/231108_GN004773-019/230811_GN004773-019_20230811_100101_999.csv"
CONFIG_DIR = "C:/work/pcr-report/data"
df = pd.read_csv(INPUT_PCR_DATA, delimiter=';', decimal=',')
# df = df.replace(',', '.', regex=True)
df[CONC_NAME] = df[CONC_NAME].astype('Float64')

parsedc = parse_analysis_filepath(INPUT_PCR_DATA)
ANALYSIS_DIR = parsedc['analysis_dir']

BASE_FILEPATH = os.path.join(
    ANALYSIS_DIR, '{}_{}'.format(parsedc['date'], parsedc['gn']))
display(ANALYSIS_DIR)
display(BASE_FILEPATH)

In [None]:
df

In [None]:
INPUT_CONCENTRATION_DATA = BASE_FILEPATH + '_conc.csv'
df_conc = pd.read_csv(INPUT_CONCENTRATION_DATA, sep=";", decimal=',')

df_conc.set_index(['sample_id'], inplace=True)
df_conc

In [None]:
from pcrep.constants import FDL_NAME, SAMPLE_NAME, SAMPLE_TYPE_NAME, SAMPLE_NUM_NAME



df.loc[:, [FDL_NAME]] = df[SAMPLE_NUM_NAME].map(
    df_conc[DIL_FINAL_FACTOR_NAME], na_action='ignore')



df.loc[:, [SAMPLE_NAME]] = df[SAMPLE_NUM_NAME].map(
    df_conc[DIL_SAMPLE_DESCRIPTION_NAME], na_action='ignore')



df.loc[:, [SAMPLE_TYPE_NAME]] = df[SAMPLE_NUM_NAME].map(
    df_conc[DIL_TYPE_NAME], na_action='ignore')



df = df.dropna(subset=[SAMPLE_TYPE_NAME])
df

In [None]:
targets = df['Target'].unique()
display(targets)
samples = df['Sample description 1'].unique()
samples.sort()
display(samples)

## Compute results

In [None]:
from pcrep.constants import WELL_RESULT_NAME
from pcrep.pcrep import result_fn



df.loc[:, [WELL_RESULT_NAME]] = df.apply(lambda x: result_fn(
    x['Conc(copies/µL)'], x['final dilution factor']), axis=1)

### Limits

In [None]:
PLASMID_CONTROL_LIMITS_FILE = 'plasmid_control_limits.csv'
palsmid_control_limits = pd.read_csv(
    os.path.join(CONFIG_DIR, PLASMID_CONTROL_LIMITS_FILE))
palsmid_control_limits.set_index(['Target'], inplace=True)
palsmid_control_limits

In [None]:
REFERENCE_CONTROL_LIMITS_FILE = 'reference_control_limits.csv'
reference_control_limits = pd.read_csv(
    os.path.join(CONFIG_DIR, REFERENCE_CONTROL_LIMITS_FILE))
reference_control_limits.set_index(['Target'], inplace=True)
reference_control_limits

How to access limits

In [None]:
rcl = reference_control_limits
lmts = rcl.loc['IDT']
lmts['upper 3s action']

In [None]:
method_limits = pd.read_csv('./data/method_limits.csv')
method_limits.set_index(['target_id'], inplace=True)
display(method_limits)

dc_limits = {'method': method_limits, 'reference_control': reference_control_limits,
             'plasmid_control': palsmid_control_limits}

In [None]:
method_limits.loc['IDT']['Lower [vg/μl]']
mlmts = method_limits.loc['IDT']
mlmts['Lower [vg/μl]']

### Multiindex ????

In [None]:
dfi = df.copy()
dfi.reset_index(inplace=True)
dfi.rename(columns={'Sample description 1': 'sample_id'}, inplace=True)
dfi.set_index(['sample_id', 'Target', 'Well'], inplace=True)
dfi.sort_index(inplace=True)
dfi.sort_index(axis=1)
dfi.drop(['Sample description 2', 'Sample description 3', 'Sample description 4',
          'TargetType', 'Supermix', 'Status', 'Experiment', 'SampleType'],
         axis=1, inplace=True)

Compute mean and standard deviation of `[vg/ml]`

In [None]:
dfi.loc[:, ['mean [vg/ml]']
        ] = dfi.groupby(level=["sample_id", 'Target']).apply(lambda x: x['vg/ml'].mean())

dfi.loc[:, ['STDE']] = dfi.groupby(level=["sample_id", 'Target']).apply(
    lambda x: x['vg/ml'].std(ddof=0))
# dfi.head()

In [None]:
def cv_fn(mean_vam: float, std_val: float, stype: str):
    cv = float("nan")
    # cv is not applied to negative samples
    if stype == 'nc':
        return cv

    if isinstance(mean_vam, float) and mean_vam != 0.0:
        cv = 100.0 * std_val / mean_vam
    return cv


CV_COLNAME = 'CV [%]'
dfi.loc[:, [CV_COLNAME]] = dfi.apply(lambda x: cv_fn(
    x['mean [vg/ml]'], x['STDE'], x['sample type']), axis=1)
dfi

### Method check

In [None]:
from pcrep.check import method_check_routing

METHOD_CHECK_COLNAME = 'Conc(copies/µL)'


def method_check_fn(s):
    return method_check_routing(dc_limits['method'], s[SAMPLE_TYPE_NAME],
                                s[METHOD_CHECK_COLNAME], s.name[1])


VALUE_CHECK_NAME = 'method_check'
dfi.loc[:, [VALUE_CHECK_NAME]] = dfi.apply(
    lambda x: method_check_fn(x), axis=1)
dfi.head()

### Droplets check

In [None]:
from pcrep.check import droplets_check

DROPLET_THRESHOLD = int(10000)
DROPLET_CHECK_COLNAME = 'Accepted Droplets'


def droplets_check_fn(s):
    return droplets_check(s[DROPLET_CHECK_COLNAME], DROPLET_THRESHOLD)


DROPLET_CHECK_NAME = 'droplet_check'
dfi.loc[:, [DROPLET_CHECK_NAME]] = dfi.apply(
    lambda x: droplets_check_fn(x), axis=1)
dfi.head()

#### Control check

In [None]:
from pcrep.check import control_check_routing

CONTROL_CHECK_COLNAME_ORIG = 'mean [vg/ml]'


def control_check_fn(s):
    c = control_check_routing(dc_limits, s[SAMPLE_TYPE_NAME],
                              s[CONTROL_CHECK_COLNAME_ORIG], s.name[1])
    return c[0]


def warning_check_fn(s):
    c = control_check_routing(dc_limits, s[SAMPLE_TYPE_NAME],
                              s[CONTROL_CHECK_COLNAME_ORIG], s.name[1])
    return c[1]


CONTROL_CHECK_NAME = 'control_check'
dfi.loc[:, [CONTROL_CHECK_NAME]] = dfi.apply(
    lambda x: control_check_fn(x), axis=1)

WARNING_CHECK_NAME = 'warning_check'
dfi.loc[:, [WARNING_CHECK_NAME]] = dfi.apply(
    lambda x: warning_check_fn(x), axis=1)


dfi.head()

#### CV check

In [None]:
from pcrep.check import cv_check


def cv_check_fn(cv_val: float):
    return cv_check(cv_val)


CV_CHECK_NAME = 'cv_check'
dfi.loc[:, [CV_CHECK_NAME]] = dfi.apply(
    lambda x: cv_check_fn(x[CV_COLNAME]), axis=1)

### Floating point formatting

In [None]:
def add_comment(s, n):
    if s and n:
        s += ', ' + n
    elif not s and n:
        s = n
    return s


def concat_comments(x):
    s = None
    s = add_comment(s, x['method_check'])
    s = add_comment(s, x['droplet_check'])
    s = add_comment(s, x['control_check'])
    s = add_comment(s, x['cv_check'])
    s = add_comment(s, x['warning_check'])
    return s


dfi = dfi.assign(comments=dfi.apply(lambda x: concat_comments(x), axis=1))
dfi
dfc = dfi.copy()

In [None]:
col_order = ['Sample', 'final dilution factor', 'Conc(copies/µL)',
             'vg/ml', 'mean [vg/ml]', 'STDE', 'CV [%]', 'comments',
             'Accepted Droplets', 'Positives', 'Negatives', 'sample type']
dfi = dfi.loc[:, col_order]
dfi

In [None]:
format_mapping = {'Conc(copies/µL)': '{:.2f}',
                  'vg/ml': '{:.2e}',
                  'mean [vg/ml]': '{:.2e}',
                  'STDE': '{:.2e}',
                  'CV [%]': '{:.2f}',
                  'final dilution factor': '{:.0e}'
                  }
dff = dfi.style.format(format_mapping)
display(dff)
dff.to_excel(BASE_FILEPATH + '-data_analysis_raw.xlsx', engine='openpyxl')
dfi.to_markdown(BASE_FILEPATH + '-data_analysis.md')

In [None]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(
    BASE_FILEPATH + '-data_analysis.xlsx', engine="xlsxwriter")

# Convert the dataframe to an XlsxWriter Excel object.
dfi.to_excel(writer, sheet_name="Sheet1")

# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets["Sheet1"]

# Add some cell formats.
fmt_fdl = workbook.add_format({"num_format": "0E+00"})
fmt_dec2e = workbook.add_format({"num_format": "0.00E+00"})
fmt_dec2f = workbook.add_format({"num_format": "0.00"})

# Note: It isn't possible to format any cells that already have a format such
# as the index or headers or any cells that contain dates or datetimes.

# Set the column width and format.
worksheet.set_column('D:D', 32)
worksheet.set_column('E:E', 18, fmt_fdl)
worksheet.set_column('F:F', 14, fmt_dec2f)
worksheet.set_column('G:I', 14, fmt_dec2e)
worksheet.set_column('J:J', None, fmt_dec2f)
worksheet.set_column('K:K', 32)
worksheet.set_column('L:O', 16)

# Close the Pandas Excel writer and output the Excel file.
writer.close()

#### Get sample...

In [None]:
def get_sample(df, samnple_num, target_id=None):
    idx = pd.IndexSlice
    if target_id:
        return df.loc[idx[samnple_num, target_id, :], :]
    else:
        return df.loc[idx[samnple_num, :, :], :]


tmps = get_sample(dfi, 2)
tmps

In [None]:
idxs = pd.IndexSlice
tmps.loc[idxs[2, ['IDT'], :], :]['mean [vg/ml]'].values

In [None]:
import json

params_file = './data/params.json'
with open(params_file) as json_file:
    check_params = json.load(json_file)
# print(check_params)

## Export report

### Create final (MS Word) dataframe

In [None]:
tmps.index

In [None]:
idxs = pd.IndexSlice
display(tmps.loc[idxs[2, ['IDT'], :], :]['mean [vg/ml]'].values[0])
display(tmps.loc[idxs[2, ['IDT'], :], :]['Sample'].values[0])
display(tmps.index[0])
tmps.index.get_level_values('Target').unique()

In [None]:
tmps.index.get_level_values('sample_id').unique()[0]
tmps['Sample'].array[0]

In [None]:
from functools import reduce

DC_CONTROLS = {'IDT': {True: 'valid', False: 'not valid'},
               'ITR': {True: 'fulfill assay criteria', False: 'does not fulfill assay criteria'}}


def add_to(first, second, delim):
    if first:
        return first + delim + second
    elif second:
        return second
    else:
        return None


def isvalid_nc(s):
    comment = None
    val = s['mean [vg/ml]'].values[0]
    target = s.index.get_level_values('Target')[0]
    valid = not any(x is not None for x in s['droplet_check'].values)
    if any(x is not None for x in s['droplet_check'].values):
        comment = reduce(lambda s1, s2: s1 or s2, s['droplet_check'].values)
        valid = False
    if any(x is not None for x in s['method_check'].values):
        comment = add_to(comment, reduce(
            lambda s1, s2: s1 or s2, s['method_check'].values), '; ')
        valid = False
    # valid &= not any(x is not None for x in s['method_check'].values)
    val = DC_CONTROLS[target][valid]

    return (valid, val, comment)


def isvalid_prs(s):
    # display(s)
    ret = s['mean [vg/ml]'].values[0]
    comment = None
    valid = True
    if any(x is not None for x in s['droplet_check'].values):
        comment = reduce(lambda s1, s2: s1 or s2, s['droplet_check'].values)
        valid = False
    if any(x is not None for x in s['method_check'].values):
        comment = add_to(comment, reduce(
            lambda s1, s2: s1 or s2, s['method_check'].values), '; ')
        valid = False
    if any(x is not None for x in s['cv_check'].values):
        comment = add_to(comment, reduce(
            lambda s1, s2: s1 or s2, s['cv_check'].values), '; ')
        valid = False
    if any(x is not None for x in s['warning_check'].values):
        comment = add_to(comment, reduce(
            lambda s1, s2: s1 or s2, s['warning_check'].values), '; ')

    return (valid, ret, comment)


def process_sample(s):
    targets = s.index.get_level_values('Target').unique()
    target = '/'.join(targets)
    id = int(s.index.get_level_values('sample_id').unique()[0])
    stype = s['sample type'].array[0]
    dc = {'id': id,
          'target': target,
          'type': stype,
          'name': s['Sample'].array[0]
          }
    for t in targets:
        comment = None
        k = f'result {t} [vg/ml]'
        kc = f'comment {t}'
        if stype == 'nc':
            v = isvalid_nc(s.loc[idxs[:, [t], :], :])
            if not v[0]:
                comment = DC_CONTROLS[t][v[0]] + '; ' + v[2]
            # else:
            #     comment = DC_CONTROLS[t][v[0]]
        elif stype == 'pc' or stype == 'rc':
            v = isvalid_prs(s.loc[idxs[:, [t], :], :])
            comment = DC_CONTROLS[t][v[0]]
        elif stype == 's':
            v = isvalid_prs(s.loc[idxs[:, [t], :], :])
            if not v[0]:
                v = (v[0], v[2])
        dc[k] = v[1]
        dc[kc] = comment
    return dc


dff = pd.DataFrame()
for n in samples:
    s = get_sample(dfc, n)
    r = process_sample(s)
    dff = pd.concat([dff, pd.DataFrame([r])], ignore_index=True)
dff.set_index(['id'], inplace=True)

col_order = ['target', 'type', 'name', 'result IDT [vg/ml]',
             'result ITR [vg/ml]', 'comment IDT', 'comment ITR']
dff = dff.loc[:, col_order]
display(dff)

In [None]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(
    BASE_FILEPATH + '-final.xlsx', engine="xlsxwriter")

# Convert the dataframe to an XlsxWriter Excel object.
dff.to_excel(writer, sheet_name="Sheet1")

# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets["Sheet1"]

# Add some cell formats.
fmt_dec2e = workbook.add_format({"num_format": "0.00E+00"})

# Note: It isn't possible to format any cells that already have a format such
# as the index or headers or any cells that contain dates or datetimes.

# Set the column width and format.
worksheet.set_column('D:D', 32)
worksheet.set_column('E:E', 18, fmt_dec2e)
worksheet.set_column('F:F', 24, fmt_dec2e)
worksheet.set_column('G:G', 32)
worksheet.set_column('H:H', 32)

# Close the Pandas Excel writer and output the Excel file.
writer.close()

#### Checks

In [None]:
s = get_sample(dfc, 1)
s

### Markdown and word export

In [None]:
# with open('md_intro.md', 'r') as f:
#     md_intro = f.read()
# with open('md_end.md', 'r') as f:
#     md_end = f.read()

# md_eval = final.to_markdown()

# md = md_intro + md_eval + md_end


# def save_md(file_path, md_txt):
#     try:
#         with open(file_path, 'w') as fl:
#             fl.write(md_txt)
#     except Exception as e:
#         print('Error: ' + str(e))


# MD_FILE = './example/230901_GN004308-086/230901_GN004308-086.md'
# save_md(MD_FILE, md)

# xls_path = os.path.splitext(MD_FILE)[0] + '.xlsx'
# final.to_excel(xls_path)

In [None]:
# from pcrep import mdhandling

# with open(os.path.join(DATA_DIR, "config.json")) as json_file:
#     jd = json.load(json_file)
#     reference_doc = jd['reference_docx']
#     pdflatex_bin = jd['pdflatex_bin']
#     pandoc_bin = jd['pandoc_bin']

# mdhandling.md2docx(pandoc_bin, reference_doc, MD_FILE)
# print("Done.")

In [None]:
# ! pip install jinja2
# ! pip install tabulate
# ! pip install xlsxwriter