# Intro


## Goal
**WHAT**: Automatic report generation from Hamilton measurements.  
**WHY**: Speed up the report generation, and avoid human errors (copying data, subjective evaluation, ....)

## Tools
Fast iteration in an agile way.  
Generic approach - different plates setup, prameters, ... all with the same code, no changes needed.  

**Python** programming language.  
**jupyter** notebook is currently used, with some functions divided into small modules.  
**Visual Studio Code** IDE (Integrated Development Environment).  
**Markdown** (*.md) format for generated report (Simple, humanly redable).  

## Input:
 - Worklist file path (*.xls) as used for Hamilton input.
   - Sample name
   - Dilution
   - Viscosity
 - Measurement results file path (*.xls) as output from Hamilton.
 - Parameters; constants in code (file path *.json)
   - CV (Coefficient of variation) threshold
   - Referennce value (1.7954e+10 cp/ml)
   - Dilutions [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]
   - Decimal digits for output

## Output:
  - Report (*.md, printable to pdf)
    - Could be manually edited
    - Image files
    - Result sheets
  - Estimated size <2kB (current)

## Done
  - Invalid sample:
    - CV >THRESHOLD
    - Only one point
  - Parameters file (*.scv, *.json)
  - Multiple plates (in worklist file)

## TODO:
  - Modules
  - Finalize the report
  - Running modes
    - Python script - automatic run (command line with parameters)
    - GUI; use modules to crete an App (code remains the same, but used from GUI)
  - Tests (unit, integration)
  - checksum (*.sdax); put into report
  - Extensive testing...
  - Automatic print to *.pdf ?

## Conclusion
End to end evaluation time reduction approximately 2h -> 20min per measurement. (thx Felix)


# Generate report  - POC

## Imports

In [None]:
VERBOSE_NOTEBOOK = False
WARNING_DISABLE = True

In [None]:
import pandas as pd
import numpy as np
from os import path
import os

In [None]:
def make_input_paths(input_dir, base_name, sample_num):
    worklist = path.join(input_dir, base_name + 'worklist-ELISA.xls')
    if not os.path.isfile(worklist):
        raise Exception("Worklist file path is invlaid: {}".format(worklist))
    results =  path.join(input_dir, base_name + 'calc{}.xlsx'.format(sample_num))
    if not os.path.isfile(results):
        raise Exception("Rewsults file path is invlaid: {}".format(results))
    
    report = path.join(input_dir, 'results_{}'.format(sample_num))
    report = path.join(report, '{}report_{}.md'.format(base_name, sample_num))

    params = path.join(input_dir, base_name + 'AAV9-ELISA_Parameters.csv')

    return {'worklist': worklist, 'results': results, 'report': report, 'params': params}

In [None]:
WORKING_DIR = './data/input/'
BASE_NAME = '230426_GN004240-033_-_'

## Read data

In [None]:
PLATE_ID = 1 # plate id

input_files = make_input_paths(WORKING_DIR, BASE_NAME, PLATE_ID)
WORKLIST_FILE_PATH = input_files['worklist']
RESULT_FILE_PATH = input_files['results']
REPORT_FILE_PATH = input_files['report']
REPORT_DIR = os.path.dirname(os.path.abspath(REPORT_FILE_PATH))

params = pd.read_csv(input_files['params'], sep=';')
params.set_index('Variable', inplace=True)

In [None]:
from readdata import read_concat_data

m = read_concat_data(RESULT_FILE_PATH)
display(m)

### Layouts

In [None]:
from layouthandle import read_plate_layout

plate_layout = read_plate_layout('./data/plate_layout.csv')
plate_layout_id = read_plate_layout('./data/plate_layout_ident.csv')
plate_layout_num = read_plate_layout('./data/plate_layout_num.csv')
plate_layout_dil_id = read_plate_layout('./data/plate_layout_dil_id.csv')

In [None]:
if VERBOSE_NOTEBOOK:
    display(plate_layout_id)
    display(plate_layout_num)
    display(plate_layout_dil_id)

### Combine read data from XLSX with layouts

In [None]:
from readdata import to_multi_index
from layouthandle import concat_data_and_layout

df_all = concat_data_and_layout(m, to_multi_index(plate_layout_id, 'plate_layout_ident'))
df_all = concat_data_and_layout(df_all, to_multi_index(plate_layout_num, 'plate_layout_num'))
df_all = concat_data_and_layout(df_all, to_multi_index(plate_layout_dil_id, 'plate_layout_dil_id'))

if VERBOSE_NOTEBOOK:
    display(m)
    display(df_all)

Filter data

In [None]:
if VERBOSE_NOTEBOOK:
    display(df_all.loc[(df_all['plate_layout_ident']=='r')])

In [None]:
from sample import get_sample

if VERBOSE_NOTEBOOK:
    display(get_sample(df_all, 's', 1))

### Dilution to Concentration

Define dilution dataframe. The dataframe is indexed according plate layout, index of refference dataframe corresponds to refference of the `plate_layout_dil`.

In [None]:
# TODO: read reference value from parameters
REF_VAL_MAX = 1.7954e+10
DILUTIONS = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]

from sample import make_concentration
reference_conc = make_concentration(REF_VAL_MAX, DILUTIONS)

Check the `reference_set_conc` indexing

In [None]:
if VERBOSE_NOTEBOOK:
    display(reference_conc)

## Fit

In [None]:
import fitdata
import warnings
from scipy.optimize import OptimizeWarning

if WARNING_DISABLE:
    warnings.simplefilter('ignore', RuntimeWarning)
    warnings.simplefilter('ignore', OptimizeWarning)

### Get the fitting data from dataframe

In [None]:
ref = df_all.loc[(df_all['plate_layout_ident']=='r')].copy()
ref['plate_layout_dil'] = ref['plate_layout_dil_id'].map(reference_conc['concentration'])
if VERBOSE_NOTEBOOK:
    display(ref)

### Fit with confidence interval

In [None]:
CONFIDENCE_INTERVAL = 95.0 # 95% confidence interval = 100*(1-alpha)

Backfit

In [None]:
from image import fit_image
from fitdata import fit_reference_auto_rm

x = ref.reset_index(level=[0,1])['plate_layout_dil']
y = ref.reset_index(level=[0,1])['OD_delta']
fit_result = fit_reference_auto_rm(x, y, verbose=False)
popt = fit_result[0][0]
pcov = fit_result[0][1]

fit_image(x, y, fit_result[0][0], fit_result[0][1], None, confidence='student-t', rm_index=fit_result[1])
display(fit_result[3])
display(fit_result[1])

In [None]:
from fitdata import backfit

bf = backfit(ref, popt)

od_min = bf['Optical density'].min()
od_max = bf['Optical density'].max()

od_fit_min = bf['SV to OD fit'].min()
od_fit_max = bf['SV to OD fit'].max()

sv_min = bf['Standard Value [cp/ml]'].min()
sv_max = bf['Standard Value [cp/ml]'].max()

cb_min = bf['Concentration backfit [cp/ml]'].min()
cb_max = bf['Concentration backfit [cp/ml]'].max()

if VERBOSE_NOTEBOOK:
    display(bf)
    print('Concentration backfit [cp/ml] range <{0}, {1}>'.format(cb_min, cb_min))
    print('Standard Value [cp/ml] range <{0}, {1}>'.format(sv_min, sv_max))
    print('SV to OD fit range <{0:.4f}, {1:.4f}>'.format(od_fit_min, od_fit_max))
    print('Optical density range <{0:.4f}, {1:.4f}>'.format(od_min, od_max))

## Sample evaluation

In [None]:
if VERBOSE_NOTEBOOK:
    display(get_sample(df_all, 's', 1))

Fit the data, and apply the inverse function as a check...

In [None]:
from sample import unique_sample_numbers

samplesk = df_all.loc[(df_all['plate_layout_ident']=='s') | (df_all['plate_layout_ident']=='k') | (df_all['plate_layout_ident']=='r')]
samplesk.loc[:, ['plate_layout_dil']] = samplesk['plate_layout_dil_id'].map(reference_conc['dilution'])
display(samplesk)

sample_nums = unique_sample_numbers(samplesk)
display(sample_nums)

### Compute concentration for all `s` and `k` samples

In [None]:
from fitdata import conc_func

samplesk.loc[:, ['concentration']] = samplesk.apply(lambda x: conc_func(x['OD_delta'], x['plate_layout_dil'], *popt), axis=1)
samplesk.loc[:, ['backfit']] = samplesk.apply(lambda x: fitdata.inv_func(x['OD_delta'], *popt), axis=1)

if VERBOSE_NOTEBOOK:
    display(samplesk)

### Sample masking

In [None]:
from sample import mask_reason_fn
from sample import mask_reason_short_fn

samplesk.loc[:, ['od_mask_reason']] = samplesk.apply(lambda x: mask_reason_fn(x['OD_delta'], od_min, od_max, 'Measured OD'), axis=1)
samplesk.loc[:, ['mask_reason']] = samplesk.apply(lambda x: mask_reason_short_fn(x['backfit'], cb_min, cb_max, x['plate_layout_dil'], ''), axis=1)

if VERBOSE_NOTEBOOK:
    display(samplesk)
    print('Optical density range = <{0:.4f}, {1:.4f}>'.format(od_min, od_max))
    print('Backfit range = <{0:.4e}, {1:.4e}>'.format(Decimal(sv_min), sv_max))

In [None]:
from scipy.stats import variation
import constants as cnt
from sample import process_sample
from sample import sample_check


# def process_sample(samples, stype, sample_num):
#     sample = get_sample(samples, stype, sample_num)
#     smp_t = sample[sample.mask_reason.isna()]
#     cv = np.nan
#     mean = np.nan
#     if len(smp_t['concentration']) > 1:
#         cv = variation(smp_t['concentration'], ddof=1)
#         mean = np.mean(smp_t['concentration'])
#     elif len(smp_t['concentration']) == 1:
#         mean = numpy.mean(smp_t['concentration'])

#     return sample, cv, mean


# def sample_check(samples, stype, sample_num, cv_thresh=CV_THRESHOLD,
#                  min_valid_pts=MIN_VALID_SAMPLE_POINTS):
#     s = process_sample(samples, stype, sample_num)
#     valid = True
#     note = ''
#     if s[1] > cv_thresh:
#         note = 'CV > {}; '.format(cv_thresh)
#         valid = False
#     smp = s[0]
#     valid_pts = smp['mask_reason'].isna().sum()
#     if valid_pts < min_valid_pts:
#         note += 'Not enough valid sample points. Required {}, available {};'.format(min_valid_pts, valid_pts)
#         valid = False
#     elif valid_pts != len(smp['mask_reason']):
#         note += 'Reduced number of sample points. Measured {}, valid {};'.format(len(smp['mask_reason']), valid_pts)
#         valid &= True

#     note_cols = smp[~smp['mask_reason'].isna()]
#     if len(note_cols)!= 0:
#         if (note_cols['mask_reason'] == note_cols['mask_reason'][0]).all():
#             note += note_cols['mask_reason'][0] + ';' + note_cols['od_mask_reason'][0]
#         # else:
#         #     note += note_cols['mask_reason'].str.cat(sep=', ')

#     return {'sample':smp, 'cv':s[1], 'mean':s[2], 'note':note, 'type':stype, 'num':sample_num, 'valid':valid, 'valid_pts': valid_pts}


def print_sample(number, stype, sample, cv, mean):
    display(sample[['OD_delta', 'plate_layout_dil', 'concentration', 'backfit', 'mask_reason']])
    print("{1} '{2}' {0}".format(number, SAMPLE_TYPES[stype], stype))
    print("CV = {:2.3} [%]".format(100 * cv))
    print("mean = {:.4} [cp/ml]".format(mean))


def print_sample_dc(sample_dict):
    display(sample_dict['sample'][['OD_delta', 'plate_layout_dil', 'concentration', 'backfit', 'mask_reason']])
    print("{1} '{2}' {0}".format(sample_dict['num'], cnt.SAMPLE_TYPES[sample_dict['type']], sample_dict['type']))
    print("CV = {:2.3} [%]".format(100 * sample_dict['cv']))
    print("mean = {:.4} [cp/ml]".format(sample_dict['mean']))
    print("valid = {}".format(sample_dict['valid']))
    print("note: {}".format(sample_dict['note']))

sc = sample_check(samplesk, 'k', 1)
sample_results = pd.DataFrame(columns=['id', 'cv', 'cp_mean', 'Note', 'Valid'])
for i in [14]:#sample_nums: [5, 6, 9]
    stype = 's'
    # s = process_sample(samplesk, 's', i)
    # print_sample(i, 's', *s)
    sc = sample_check(samplesk, 's', i)
    print_sample_dc(sc)
    sample_results.loc[len(sample_results)] = ['sample {:02d}'.format(i),
                                               sc['cv'], sc['mean'], sc['note'], sc['valid']]
    if i == 3: break;

display(sample_results)

In [None]:
from enum import Enum
from decimal import Decimal

class SampleInfo(str, Enum):
    NAN_LOW = 'NaN below reference'
    NAN_HIGH = 'NaN above reference'
    LOW = 'value below reference'
    HIGH = 'value above reference'
    CV = 'CV above threshold'
    VALID_PTS = 'few valid points'


def sampleinfo_to_str(info, multiplier=1.0):
    if info is None:
        return None

    if not info:
        return None;
    
    if info['enum'] == SampleInfo.CV:
        return 'CV>{:.1f}%({:.1f}%)'.format(CV_THRESHOLD * 100, float(info['value']) * 100.0)

    if info['enum'] == SampleInfo.VALID_PTS:
        return '{} valid point'.format(info['value'])

    return '{}{:.4e}'.format(info['sign'], float(info['value']) * multiplier)


def sample_info(samples, stype, sample_num, verbose=False):
    s = get_sample(samples, stype, sample_num)
    sc = sample_check(samples, stype, sample_num)
    if verbose:
        display(s)
        # display(k)
        print('OD=[{}, {}]'.format(od_min, od_max))
        print('OD_fit=[{:.3}, {:.3}]'.format(Decimal(od_fit_min), Decimal(od_fit_max)))
        print('SV=[{:.3e}, {:.3e}]'.format(Decimal(sv_min), Decimal(sv_max)))
        print('CB=[{}, {}]'.format(cb_min, cb_max))
    above_ref_od_max = s['OD_delta'] > od_fit_max
    below_ref_od_min = s['OD_delta'] < od_fit_min
    msgdc = {}
    if s['backfit'].isna().all():
        if above_ref_od_max.all():
            msgdc = {'sign': '>', 'value': Decimal(sv_max), 'enum': SampleInfo.NAN_HIGH}
        if below_ref_od_min.all():
            msgdc = {'sign': '<', 'value': Decimal(sv_min), 'enum': SampleInfo.NAN_LOW}
    elif sc['cv'] > cnt.CV_THRESHOLD:
        msgdc = {'sign': '>{:.2f}'.format(cnt.CV_THRESHOLD), 'value': sc['cv'], 'enum': SampleInfo.CV}
    elif not s['mask_reason'].isna().all():
        t = s[['OD_delta', 'plate_layout_dil', 'concentration', 'backfit']]
        t_not_na = t[~t['backfit'].isna()]
        
        if t_not_na['OD_delta'].max() < od_fit_min:
            t_below_ref = t_not_na[below_ref_od_min]
            # msgdc = {'sign': '<', 'value': t_below_ref['concentration'].max(), 'enum': SampleInfo.LOW}
            msgdc = {'sign': '<', 'value': Decimal(sv_min * sc['sample']['plate_layout_dil'].min()), 'enum': SampleInfo.LOW}
        elif t_not_na['OD_delta'].min() > od_fit_max:
            t_above_ref = t_not_na[above_ref_od_max]
            # print('*** {} *  {} = {}'.format(sv_max, sc['sample']['plate_layout_dil'].max(), sv_max * sc['sample']['plate_layout_dil'].max()))
            msgdc = {'sign': '>', 'value': Decimal(sv_max * sc['sample']['plate_layout_dil'].max()), 'enum': SampleInfo.HIGH}
    
    if sc['valid_pts'] < cnt.MIN_VALID_SAMPLE_POINTS and sc['valid_pts'] != 0:
        msgdc = {'sign': '', 'value': sc['valid_pts'], 'enum': SampleInfo.VALID_PTS}

    del sc['sample']
    del sc['note']
    sc['info'] = msgdc
    
    return sc

In [None]:
si = sample_info(samplesk, 's', 6)
si

In [None]:
sample_results = pd.DataFrame(columns=['id', 'CV [%]', 'Reader Data [cp/ml]', 'Note', 'Valid', 'info'])
knum = 1
s = sample_check(samplesk, 'k', knum)
si = sample_info(samplesk, 'k', knum)
display(si)
sample_results.loc[len(sample_results)] = ['control {:02d}'.format(knum), s['cv'], s['mean'], s['note'], s['valid'], si]

rnum = 1
s = sample_check(samplesk, 'r', rnum)
si = sample_info(samplesk, 'r', knum)
sample_results.loc[len(sample_results)] = ['reference {:02d}'.format(knum), s['cv'], s['mean'], s['note'], s['valid'], si]

for i in sample_nums:
    stype = 's'
    s = sample_check(samplesk, 's', i)
    si = sample_info(samplesk, 's', i)
    sample_results.loc[len(sample_results)] = ['sample {:02d}'.format(i), s['cv'], s['mean'], s['note'], s['valid'], si]

sample_results.set_index(sample_results['id'], inplace=True)
sample_results = sample_results.drop('id', axis=1)
sl = sample_results
display(sl)

In [None]:
si = sample_info(samplesk, 's', 1, True)
display(si)
sampleinfo_to_str(si['info'])

### Plot sample with referene curve

In [None]:
def mask_index(df):
    b = df.reset_index(level=[0,1])
    b = b[b['mask_reason'].notna()]

    return b.index


def na_index(df):
    b = df.reset_index(level=[0,1])
    b = b[b['backfit'].isna()]
    
    return b.index


def sample_img(samples, sample_type, sample_num, img_file=None, show=True, verbose=False):
    sd = sample_check(samplesk, sample_type, sample_num)
    if verbose:
        print(sample_type, sample_num)
        display(sd['sample'])

    mask_idx = mask_index(sd['sample'])
    x = ref.reset_index(level=[0,1])['plate_layout_dil']
    y = ref.reset_index(level=[0,1])['OD_delta']
    fit_result = fit_reference_auto_rm(x, y, verbose=verbose)
    # compute original concenmtration 
    sd['sample'].loc[:, ['conc_plot']] = sd['sample'].apply(lambda x: x['concentration'] / x['plate_layout_dil'], axis=1)
    sx = sd['sample'].reset_index(level=[0,1])['conc_plot']
    sy = sd['sample'].reset_index(level=[0,1])['OD_delta']
    fit_image(x, y, fit_result[0][0], fit_result[0][1], img_file, confidence='student-t',
              rm_index=fit_result[1], mask_index=mask_idx,
              sx=sx, sy=sy, sna_idx=na_index(sd['sample']), show=show, valid_sample=sd['valid'], interval_ratio=1.0)
    # display(na_index(sd['sample']))
    
sample_img(samplesk, 's', 6)

## Worklist

In [None]:
def check_worklist(wl):
    valid_plates = []
    for i in range(1, 4):
        invalid_sample = wl['SampleID_{}'.format(i)].isnull().values.any()
        if not invalid_sample: valid_plates.append(i)
    return valid_plates


def read_worklist(worklist_file):
    wl = pd.read_excel(worklist_file)
    wl.set_index([['control 01', 'reference 01', 'blank', 'sample 01', 'sample 02', 'sample 03',
        'sample 04', 'sample 05', 'sample 06', 'sample 07', 'sample 08', 'sample 09', 'sample 10',
        'sample 11', 'sample 12', 'sample 13', 'sample 14', 'sample 15', 'sample 16', 'sample 17',
        'sample 18', 'sample 19', 'sample 20', 'sample 21']], inplace=True)
    check_worklist(wl)
    wl.drop('blank', axis=0, inplace=True)
    wl.index.name = 'Sample type'

    return wl


def worklist_sample(wl, plate_id):
    invalid_sample = wl['SampleID_{}'.format(plate_id)].isnull().values.any()
    if invalid_sample:
        return None, None
    
    cols_id =['SampleID', 'Dilution', 'Viscosity']
    cols = [x + '_' + str(plate_id) for x in cols_id]
    cols_dict = {x : y for x,y in zip(cols_id, cols)}

    return wl[cols], cols_dict


wl_raw = read_worklist(WORKLIST_FILE_PATH)
valid_plates = check_worklist(wl_raw)
wl, wl_cols_dict = worklist_sample(wl_raw, PLATE_ID)

In [None]:
def final_sample_info(all_info, pre_dilution, verbose=False):
    info = all_info['info']
    if not all_info: raise Exception("Invalid sample info!")
    if not info:
        return '', True
    
    msg = ''
    valid_ex = False
    if info['enum'] == SampleInfo.NAN_HIGH:
        msg = '>{:.4e}'.format(info['value'] * pre_dilution)
    elif info['enum'] == SampleInfo.NAN_LOW:
        valid_ex = True
        msg = '<{:.4e}'.format(info['value'] * pre_dilution)
    elif info['enum'] == SampleInfo.HIGH:
        msg = '>{:.4e}'.format(info['value'] * pre_dilution)
    elif info['enum'] == SampleInfo.LOW:
        msg = '<{:.4e}'.format(info['value'] * pre_dilution)
        valid_ex = True
    elif info['enum'] == SampleInfo.VALID_PTS:
        msg = '{} valid point'.format(all_info['valid_pts'])
    elif info['enum'] == SampleInfo.CV:
        msg = 'CV>{:.2f}%({:.2f}%)'.format(CV_THRESHOLD * 100.0, info['value'] * 100.0)
    else:
        msg = ''
        valid_ex = True

    return msg, valid_ex

In [None]:
# TODO: nasty, using globals!!!
def make_final():
    final = pd.concat([wl, sl], axis=1)
    cd = wl_cols_dict
    final.loc[:, ['Result [cp/ml]']] = final.apply(lambda x: x['Reader Data [cp/ml]'] * x[cd['Dilution']], axis=1)
    final.loc[:, ['CV [%]']] = final.apply(lambda x: x['CV [%]'] * 100, axis=1)
    # reorder columns
    final = final.reindex([cd['SampleID'], cd['Dilution'], cd['Viscosity'], 'Reader Data [cp/ml]', 'Result [cp/ml]', 'CV [%]', 'Valid', 'info'], axis=1)
    final.rename(columns={cd['SampleID']: 'Sample Name', cd['Dilution']: 'Pre-dilution'}, inplace=True)
    final.drop('Viscosity_{}'.format(PLATE_ID), axis=1, inplace=True)
    final.index.name = 'Sample type'
    final.loc[:, ['info_ex']] = final.apply(lambda x: final_sample_info(x['info'], x['Pre-dilution'])[0], axis=1)
    final.loc[:, ['valid_ex']] = final.apply(lambda x: final_sample_info(x['info'], x['Pre-dilution'])[1], axis=1)
    return final

# final = make_final()
# final

In [None]:
final = make_final()
final

## Plate Layout

In [None]:
df = plate_layout_num.replace({'b':-99}).astype(float)
df

In [None]:
from matplotlib import pyplot as plt

vals = np.around(df.values, 2)
norm = plt.Normalize(vals.min()-1, vals.max()+1)
colours = plt.cm.hot(norm(vals))

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, frameon=False, xticks=[], yticks=[])

the_table=plt.table(cellText=vals, rowLabels=df.index, colLabels=df.columns,
                    loc='center', cellColours=colours)
plt.show()

## Report  
We build a report here...

### Fit Reference Curve

In [None]:
from fitdata import fit_sheet

def fit_section_md(df_ref, popt, pcov, out_dir):
    x = df_ref.reset_index(level=[0,1])['plate_layout_dil']
    y = df_ref.reset_index(level=[0,1])['OD_delta']
    fit_result = fit_reference_auto_rm(x, y)
    result_img = path.join(out_dir, 'fit.svg')
    fit_image(x, y, fit_result[0][0], fit_result[0][1], result_img, confidence='student-t', rm_index=fit_result[1])
 
    n = len(x) - len(fit_result[1])
    df_fit = fit_sheet(popt, pcov, n)
    display(df_fit)

    md = '## Reference Curve Fit\n\n'
    md += '$\LARGE y = {d + {a - d \over {1 + ({ x \over c })^b}} }$  \n\n'
    md += '!["alt text"](./img/fit.svg)'

    md += '\n\n'
    md += 'Verbose fitting progress, metric is R-squared:\n\n'
    md += fit_result[3].to_markdown() + '\n\n'

    md += 'Fit parameters\n\n'
    md += df_fit.to_markdown(index=False) + '\n\n'
    md += 'Backfit...'
    fit_result = fit_reference_auto_rm(x, y)
    df_backfit = backfit(df_ref, fit_result[0][0])
    md += '\n\n' + df_backfit.to_markdown() + '\n\n'

    # cv = variation(df_backfit['concentration'], ddof=1)

    return md

# fit_section_md(ref, popt, pcov, REPORT_DIR)

### Sample

In [None]:
def sample_to_md(dc):
    s_view = dc['sample'][['OD_delta', 'plate_layout_dil', 'concentration', 'mask_reason']]
    md = "### Sample: {0} '{1}' {2}\n\n".format(SAMPLE_TYPES[dc['type']], dc['type'], dc['num'])
    md += s_view.to_markdown()
    md += '\n\n'
    md += "CV = {:2.3} [%]  \n".format(100 * dc['cv'])
    md += "mean = {:.4} [cp/ml]  \n".format(dc['mean'])
    md += "valid = {}  \n".format(dc['valid'])
    if dc['note']:
         md += "note: {}  ".format(dc['note'])

    return md

def sample_section_md(samples, img_dir):
    md = '## Sample evaluation\n\n' 
    k = sample_check(samples, 'k', 1)
    md += sample_to_md(k)
    sfile = 'control_{0:02d}.svg'.format(1)
    img_file = path.join(img_dir, sfile)
    sample_img(samples, 'k', 1, img_file, show=False)
    md += '!["alt text"](./img/{})\n\n'.format(sfile)
    sample_n = samples['plate_layout_num'].astype(int).unique()
    sample_n.sort()
    for i in sample_n:
        stype = 's'
        s = sample_check(samples, stype, i)
        md += sample_to_md(s)
        # sample info
        si = sample_info(samples, stype, i, verbose=False)
        si_str = sampleinfo_to_str(si['info'])
        if si_str:
            md += '\n'
            md += 'info: ' + si_str + '  '
        md += '\n'
        sfile = 'sample_{0:02d}.svg'.format(i)
        img_file = path.join(img_dir, sfile)
        sample_img(samples, stype, i, img_file=img_file, show=False, verbose=False)
        md += '![{0}](./img/{0})\n\n'.format(sfile)
    return md

def save_md(file_path, md_txt):
    try:
        with open(file_path, 'w') as fl:
            fl.write(md_txt)
    except Exception as e:
        print('Error: ' + str(e))

### Results

In [None]:
final_result = make_final()

def format_resluts_val(x):
    res = ''
    if math.isnan(x['Result [cp/ml]']):
        res = x['Comment']
    else:
        res = '{:.4e}'.format(x['Result [cp/ml]'])
    if x['valid_ex']:
        res = '**{}**'.format(res)
    else:
        res = '( {} )*'.format(res)
    
    return res

def format_results(df):
    df.loc[:, ['Comment']] = df.apply(lambda x: final_sample_info(x['info'], x['Pre-dilution'])[0], axis=1)
    df.loc[:, ['CV [%]']] = df.apply(lambda x:'{:.2f}'.format(x['CV [%]']), axis=1)
    # df.loc[:, ['Result [cp/ml]']] = df.apply(lambda x: x['Comment'] if math.isnan(x['Result [cp/ml]']) else '{:.4e}'.format(x['Result [cp/ml]']), axis=1)
    # display(df)
    df.loc[:, ['Result [cp/ml]']] = df.apply(lambda x: format_resluts_val(x), axis=1)
    df.drop(['info', 'Valid', 'Reader Data [cp/ml]', 'info_ex', 'valid_ex'], axis=1, inplace=True)
    
    return df

# format_results(final_result)
# final_result

In [None]:
def result_section(df):
    md = '## Analysis Results\n\n'

    md += format_results(df).to_markdown()
    md += '\n\n'
    
    return md


### Header

In [None]:
def header_section(date, id, plate_id, msg):
    md =  '## Header\n\n'

    md += 'Date: {}\n\n'.format(date)
    md += 'Identification: {}\n\n'.format(id)
    md += 'Plate: {}\n\n'.format(plate_id)
    md += 'Comment: {}\n\n'.format(msg)

    return md;

### Parameters

In [None]:
def param_section(df_params):
    md =  '## Parameters\n\n'

    md += 'Parameters:\n\n' + df_params.to_markdown() + '\n\n'

    return md;

In [None]:
final_result

### Report Assembly

In [None]:
report = '''
# Automatically Generated Markdown report

This a PoC for automatic report generation...  

'''

report += header_section('05 May 2023', 'GN004240-033', PLATE_ID, ':)')
report += result_section(final_result.drop('reference 01', axis=0))
report += param_section(params)
img_dir = path.join(REPORT_DIR, 'img')
os.makedirs(img_dir, exist_ok=True)
report += fit_section_md(ref, popt, pcov, img_dir) # TODO: !!! global fit_result[3]

report += sample_section_md(samplesk, img_dir)

print(REPORT_FILE_PATH)
save_md(REPORT_FILE_PATH, report)

In [None]:
# from Markdown2docx import Markdown2docx
# report_path_noext = os.path.abspath(os.path.splitext(REPORT_FILE_PATH)[0])
# print(report_path_noext)
# project = Markdown2docx(report_path_noext)
# # project = Markdown2docx('data/input/results_2/aaa')
# project.eat_soup()
# project.save()

### Export to PDF

In [None]:
from md2pdf.core import md2pdf
PDF_FILE_PATH = path.join(REPORT_DIR,"{}.pdf".format(os.path.basename(REPORT_FILE_PATH)))

In [None]:
md2pdf(PDF_FILE_PATH,
       md_content=report,
       md_file_path=None,
       css_file_path=None,
       base_url=None)