# Intro


## Goal
**WHAT**: Automatic report generation from Hamilton measurements.  
**WHY**: Speed up the report generation, and avoid human errors (copying data, subjective evaluation, ....)

## Tools
Fast iteration in an agile way.  
Generic approach - different plates setup, prameters, ... all with the same code, no changes needed.  

**Python** programming language.  
**jupyter** notebook is currently used, with some functions divided into small modules.  
**Visual Studio Code** IDE (Integrated Development Environment).  
**Markdown** (*.md) format for generated report (Simple, humanly redable).  

## Input:
 - Worklist file path (*.xls) as used for Hamilton input.
   - Sample name
   - Dilution
   - Viscosity
 - Measurement results file path (*.xls) as output from Hamilton.
 - Parameters; constants in code (file path *.json)
   - CV (Coefficient of variation) threshold
   - Referennce value (1.7954e+10 cp/ml)
   - Dilutions [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]
   - Decimal digits for output

## Output:
  - Report (*.md, printable to pdf)
    - Could be manually edited
    - Image files
    - Result sheets
  - Estimated size <2kB (current)

## Done
  - Invalid sample:
    - CV >THRESHOLD
    - Only one point
  - Parameters file (*.scv, *.json)
  - Multiple plates (in worklist file)

## TODO:
  - Modules
  - Finalize the report
  - Running modes
    - Python script - automatic run (command line with parameters)
    - GUI; use modules to crete an App (code remains the same, but used from GUI)
  - Tests (unit, integration)
  - checksum (*.sdax); put into report
  - Extensive testing...
  - Automatic print to *.pdf ?

## Conclusion
End to end evaluation time reduction approximately 2h -> 20min per measurement. (thx Felix)


# Generate report  - POC

## Imports

In [None]:
VERBOSE_NOTEBOOK = True
WARNING_DISABLE = True
DEBUG = False

In [None]:
import pandas as pd
import numpy as np
from os import path
import os
import constants as cc

In [None]:
def make_input_paths(input_dir, base_name):
    worklist = path.join(input_dir, base_name + 'worklist-ELISA.xls')
    if not os.path.isfile(worklist):
        raise Exception("Worklist file path is invlaid: {}".format(worklist))

    params = path.join(input_dir, base_name + 'AAV9-ELISA_Parameters.csv')

    return {'worklist': worklist, 'params': params}

def make_output_paths(input_dir, base_name, sample_num):
    results =  path.join(input_dir, base_name + 'calc{}.xlsx'.format(sample_num))
    if not os.path.isfile(results):
        raise Exception("Rewsults file path is invlaid: {}".format(results))
    
    report = path.join(input_dir, 'results_{}'.format(sample_num))
    report = path.join(report, '{}report_{}.md'.format(base_name, sample_num))

    return {'results': results, 'report': report}

In [None]:
WORKING_DIR = './data/input/'
BASE_NAME = '230426_GN004240-033_-_'

## Read data

In [None]:
PLATE_ID = 1 # plate id

input_files = make_input_paths(WORKING_DIR, BASE_NAME)
WORKLIST_FILE_PATH = input_files['worklist']
PARAMS_FILE_PATH = input_files['params']

output_files = make_output_paths(WORKING_DIR, BASE_NAME, PLATE_ID)
RESULT_FILE_PATH = output_files['results']
REPORT_FILE_PATH = output_files['report']
REPORT_DIR = os.path.dirname(os.path.abspath(REPORT_FILE_PATH))

In [None]:
from readdata import read_concat_data

g_od = read_concat_data(RESULT_FILE_PATH)
display(g_od)

### Layouts

In [None]:
from layouthandle import read_plate_layout

g_plate_layout_id = read_plate_layout('./data/plate_layout_ident.csv')
g_plate_layout_num = read_plate_layout('./data/plate_layout_num.csv')
g_plate_layout_dil_id = read_plate_layout('./data/plate_layout_dil_id.csv')

if VERBOSE_NOTEBOOK:
    display(g_plate_layout_id)
    display(g_plate_layout_num)
    display(g_plate_layout_dil_id)

In [None]:
from readdata import concat_data_with_layouts

df_all = concat_data_with_layouts(g_od, g_plate_layout_id, g_plate_layout_num, g_plate_layout_dil_id)

if VERBOSE_NOTEBOOK:
    display(g_od)
    display(df_all)

### Dilution to Concentration

Define dilution dataframe. The dataframe is indexed according plate layout, index of refference dataframe corresponds to refference of the `plate_layout_dil`.

In [None]:
# TODO: read reference value from parameters
REF_VAL_MAX = 1.7954e+10
DILUTIONS = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]

from sample import make_concentration
g_reference_conc = make_concentration(REF_VAL_MAX, DILUTIONS)

if VERBOSE_NOTEBOOK:
    display(g_reference_conc)

## Fit

In [None]:
import warnings
from scipy.optimize import OptimizeWarning

if WARNING_DISABLE:
    warnings.simplefilter('ignore', RuntimeWarning)
    warnings.simplefilter('ignore', OptimizeWarning)

Get the fitting data from dataframe

In [None]:
g_ref = df_all.loc[(df_all['plate_layout_ident']=='r')].copy()
g_ref['plate_layout_conc'] = g_ref['plate_layout_dil_id'].map(g_reference_conc['concentration'])
if VERBOSE_NOTEBOOK:
    display(g_ref)

Fit with confidence interval

In [None]:
from image import fit_image
from fitdata import fit_reference_auto_rm

x = g_ref.reset_index(level=[0,1])['plate_layout_conc']
y = g_ref.reset_index(level=[0,1])['OD_delta']
g_fit = fit_reference_auto_rm(x, y, verbose=False)
g_popt = g_fit[0][0]
g_pcov = g_fit[0][1]

fit_image(x, y, g_fit[0][0], g_fit[0][1], None, confidence='student-t', rm_index=g_fit[1])
display(g_fit[3])
display(g_fit[1])

In [None]:
from sample import data_range

g_dr = data_range(g_ref, g_popt)

if VERBOSE_NOTEBOOK:
    print('Concentration backfit [cp/ml] range <{0}, {1}>'.format(g_dr.cb[0], g_dr.cb[1]))
    print('Standard Value [cp/ml] range <{0}, {1}>'.format(g_dr.sv[0], g_dr.sv[1]))
    print('SV to OD fit range <{0:.4f}, {1:.4f}>'.format(g_dr.od_fit[0], g_dr.od_fit[1]))
    print('Optical density range <{0:.4f}, {1:.4f}>'.format(g_dr.od[0], g_dr.od[1]))

## Sample evaluation

### Compute concentration for all `s` and `k` samples

Fit the data, and apply the inverse function as a check...

In [None]:
from sample import init_samples
from sample import apply_fit
from sample import mask_sample
from sample import generate_results
from image import fit_image
from fitdata import fit_reference_auto_rm
from fitdata import backfit
from sample import data_range

dfg = init_samples(df_all, g_reference_conc)


# g_ref = dfg.loc[(dfg['plate_layout_ident']=='r')].copy()
# display(g_ref)
# x = g_ref.reset_index(level=[0,1])['plate_layout_conc']
# y = g_ref.reset_index(level=[0,1])['OD_delta']
# g_fit = fit_reference_auto_rm(x, y, verbose=False)
# g_popt = g_fit[0][0]
# g_pcov = g_fit[0][1]

# fit_image(x, y, g_fit[0][0], g_fit[0][1], None, confidence='student-t', rm_index=g_fit[1])
# display(g_fit[3])
# display(g_fit[1])

# g_dr = data_range(g_ref, g_popt)

dfg = apply_fit(dfg, g_popt)
sadfgmplesk = mask_sample(dfg, g_dr)
sl = generate_results(dfg, g_dr)

samplesk = dfg

if VERBOSE_NOTEBOOK:
    display(samplesk)
    display(sl)

In [None]:
if VERBOSE_NOTEBOOK:
    print('Concentration backfit [cp/ml] range <{0}, {1}>'.format(g_dr.cb[0], g_dr.cb[1]))
    print('Standard Value [cp/ml] range <{0}, {1}>'.format(g_dr.sv[0], g_dr.sv[1]))
    print('SV to OD fit range <{0:.4f}, {1:.4f}>'.format(g_dr.od_fit[0], g_dr.od_fit[1]))
    print('Optical density range <{0:.4f}, {1:.4f}>'.format(g_dr.od[0], g_dr.od[1]))

### Plot sample with referene curve

In [None]:
from image import sample_img

if VERBOSE_NOTEBOOK:
    sample_img(samplesk, g_ref, 's', 6)

## Worklist

In [None]:
from sample import final_sample_info
import worklist as wk

g_wl_raw = wk.read_worklist(WORKLIST_FILE_PATH)
g_valid_plates = wk.check_worklist(g_wl_raw)

# TODO: nasty, using globals!!!
def make_final(wl_raw, plate_id):
    wl, wl_cols_dict = wk.worklist_sample(wl_raw, plate_id)

    final = pd.concat([wl, sl], axis=1)
    cd = wl_cols_dict
    final.loc[:, ['Result [cp/ml]']] = final.apply(lambda x: x['Reader Data [cp/ml]'] * x[cd['Dilution']], axis=1)
    final.loc[:, ['CV [%]']] = final.apply(lambda x: x['CV [%]'] * 100, axis=1)
    # reorder columns
    final = final.reindex([cd['SampleID'], cd['Dilution'], cd['Viscosity'], 'Reader Data [cp/ml]', 'Result [cp/ml]', 'CV [%]', 'Valid', 'info'], axis=1)
    final.rename(columns={cd['SampleID']: 'Sample Name', cd['Dilution']: 'Pre-dilution'}, inplace=True)
    final.drop('Viscosity_{}'.format(plate_id), axis=1, inplace=True)
    final.index.name = 'Sample type'
    final.loc[:, ['info_ex']] = final.apply(lambda x: final_sample_info(x['info'], x['Pre-dilution'])[0], axis=1)
    final.loc[:, ['valid_ex']] = final.apply(lambda x: final_sample_info(x['info'], x['Pre-dilution'])[1], axis=1)
    return final

## Report  
We build a report here...

### Fit Reference Curve

In [None]:
from fitdata import fit_sheet

def fit_section_md(df_ref, popt, pcov, out_dir):
    x = df_ref.reset_index(level=[0,1])['plate_layout_conc']
    y = df_ref.reset_index(level=[0,1])['OD_delta']
    fit_result = fit_reference_auto_rm(x, y)
    result_img = path.join(out_dir, 'fit.svg')
    fit_image(x, y, fit_result[0][0], fit_result[0][1], result_img,
      confidence='student-t', rm_index=fit_result[1], verbose=False, show=False)
 
    n = len(x) - len(fit_result[1])
    df_fit = fit_sheet(popt, pcov, n)

    md = '## Reference Curve Fit\n\n'
    md += '$\LARGE y = {d + {a - d \over {1 + ({ x \over c })^b}} }$  \n\n'
    md += '!["alt text"](./img/fit.svg)'

    md += '\n\n'
    md += 'Verbose fitting progress, metric is R-squared:\n\n'
    md += fit_result[3].to_markdown() + '\n\n'

    md += 'Fit parameters\n\n'
    md += df_fit.to_markdown(index=False) + '\n\n'
    md += 'Backfit...'
    fit_result = fit_reference_auto_rm(x, y)
    df_backfit = backfit(df_ref, fit_result[0][0])
    md += '\n\n' + df_backfit.to_markdown() + '\n\n'

    return md

### Sample

In [None]:
from sample import sampleinfo_to_str
from sample import sample_check
from sample import sample_info

def sample_to_md(dc):
    s_view = dc['sample'][['OD_delta', 'plate_layout_dil', 'concentration', 'mask_reason']]
    md = "### Sample: {0} '{1}' {2}\n\n".format(cc.SAMPLE_TYPES[dc['type']], dc['type'], dc['num'])
    md += s_view.to_markdown()
    md += '\n\n'
    md += "CV = {:2.3} [%]  \n".format(100 * dc['cv'])
    md += "mean = {:.4} [cp/ml]  \n".format(dc['mean'])
    md += "valid = {}  \n".format(dc['valid'])
    if dc['note']:
         md += "note: {}  ".format(dc['note'])

    return md

def sample_section_md(samples, reference, img_dir):
    md = '## Sample evaluation\n\n' 
    k = sample_check(samples, 'k', 1)
    md += sample_to_md(k)
    sfile = 'control_{0:02d}.svg'.format(1)
    img_file = path.join(img_dir, sfile)
    sample_img(samples, reference, 'k', 1, img_file, show=False)
    md += '!["alt text"](./img/{})\n\n'.format(sfile)
    sample_n = samples['plate_layout_num'].astype(int).unique()
    sample_n.sort()
    for i in sample_n:
        stype = 's'
        s = sample_check(samples, stype, i)
        md += sample_to_md(s)
        # sample info
        si = sample_info(samples, stype, i, g_dr, verbose=False)
        si_str = sampleinfo_to_str(si['info'])
        if si_str:
            md += '\n'
            md += 'info: ' + si_str + '  '
        md += '\n'
        sfile = 'sample_{0:02d}.svg'.format(i)
        img_file = path.join(img_dir, sfile)
        sample_img(samples, reference, stype, i, img_file=img_file, show=False, verbose=False)
        md += '![{0}](./img/{0})\n\n'.format(sfile)
    return md

def save_md(file_path, md_txt):
    try:
        with open(file_path, 'w') as fl:
            fl.write(md_txt)
    except Exception as e:
        print('Error: ' + str(e))

### Results

In [None]:
from sample import final_sample_info
import math

final_result = make_final(g_wl_raw, PLATE_ID)

def format_resluts_val(x):
    res = ''
    if math.isnan(x['Result [cp/ml]']):
        res = x['Comment']
    else:
        res = '{:.4e}'.format(x['Result [cp/ml]'])
    if x['valid_ex']:
        res = '**{}**'.format(res)
    else:
        res = '( {} )*'.format(res)
    
    return res

def format_results(df):
    df.loc[:, ['Comment']] = df.apply(lambda x: final_sample_info(x['info'], x['Pre-dilution'])[0], axis=1)
    df.loc[:, ['CV [%]']] = df.apply(lambda x:'{:.2f}'.format(x['CV [%]']), axis=1)
    # df.loc[:, ['Result [cp/ml]']] = df.apply(lambda x: x['Comment'] if math.isnan(x['Result [cp/ml]']) else '{:.4e}'.format(x['Result [cp/ml]']), axis=1)
    # display(df)
    df.loc[:, ['Result [cp/ml]']] = df.apply(lambda x: format_resluts_val(x), axis=1)
    df.drop(['info', 'Valid', 'Reader Data [cp/ml]', 'info_ex', 'valid_ex'], axis=1, inplace=True)
    
    return df

def result_section(df):
    md = '## Analysis Results\n\n'

    md += format_results(df).to_markdown()
    md += '\n\n'
    md += '\* sample will be retested\n\n'
    
    return md

### Header

In [None]:
def header_section(date, id, plate_id, msg):
    md =  '## Header\n\n'

    md += 'Date: {}\n\n'.format(date)
    md += 'Identification: {}\n\n'.format(id)
    md += 'Plate: {}\n\n'.format(plate_id)
    md += 'Comment: {}\n\n'.format(msg)

    return md;

### Parameters

In [None]:
def param_section(df_params):
    md =  '## Parameters\n\n'

    md += 'Parameters:\n\n' + df_params.to_markdown() + '\n\n'

    return md;

if VERBOSE_NOTEBOOK:
    final_result

### Report Assembly

In [None]:
from readdata import read_params
from zlib import crc32

params = read_params(PARAMS_FILE_PATH)

report = '''
# Automatically Generated Markdown report

This a PoC for automatic report generation...  

'''

report += header_section('05 May 2023', 'GN004240-033', PLATE_ID, ':)')
report += result_section(final_result.drop('reference 01', axis=0))
report += param_section(params)
img_dir = path.join(REPORT_DIR, 'img')
os.makedirs(img_dir, exist_ok=True)
report += fit_section_md(g_ref, g_popt, g_pcov, img_dir) # TODO: !!! global fit_result[3]

report += sample_section_md(samplesk, g_ref, img_dir)

print(REPORT_FILE_PATH)
save_md(REPORT_FILE_PATH, report)

In [None]:
res = bytearray(report,'utf8')
t = crc32(res)
crc_report = 2898421151
if t != crc_report:
    raise Exception('Report CRC missmatch! {} != {}'.format(t, crc_report))
    print('\nReport CRC  = {}\n'.format(t))

### Export to PDF

In [None]:
from md2pdf.core import md2pdf
PDF_FILE_PATH = path.join(REPORT_DIR,"{}.pdf".format(os.path.basename(REPORT_FILE_PATH)))

In [None]:
md2pdf(PDF_FILE_PATH,
       md_content=report,
       md_file_path=None,
       css_file_path=None,
       base_url=None)