# Intro


## Goal
**WHAT**: Automatic report generation from Hamilton measurements.  
**WHY**: Speed up the report generation, and avoid human errors (copying data, subjective evaluation, ....)

## Tools
Fast iteration in an agile way.  
Generic approach - different plates setup, prameters, ... all with the same code, no changes needed.  

**Python** programming language.  
**jupyter** notebook is currently used, with some functions divided into small modules.  
**Visual Studio Code** IDE (Integrated Development Environment).  
**Markdown** (*.md) format for generated report (Simple, humanly redable).  

## Input:
 - Worklist file path (*.xls) as used for Hamilton input.
   - Sample name
   - Dilution
   - Viscosity
 - Measurement results file path (*.xls) as output from Hamilton.
 - Parameters; constants in code (file path *.json)
   - CV (Coefficient of variation) threshold
   - Referennce value (1.7954e+10 cp/ml)
   - Dilutions [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]
   - Decimal digits for output

## Output:
  - Report (*.md, printable to pdf)
    - Could be manually edited
    - Image files
    - Result sheets
  - Estimated size <2kB (current)

## Done
  - Invalid sample:
    - CV >THRESHOLD
    - Only one point
  - Parameters file (*.scv, *.json)
  - Multiple plates (in worklist file)

## TODO:
  - Modules
  - Finalize the report
  - Running modes
    - Python script - automatic run (command line with parameters)
    - GUI; use modules to crete an App (code remains the same, but used from GUI)
  - Tests (unit, integration)
  - checksum (*.sdax); put into report
  - Extensive testing...
  - Automatic print to *.pdf ?

## Conclusion
End to end evaluation time reduction approximately 2h -> 20min per measurement. (thx Felix)


# Generate report  - POC

## Imports

In [None]:
VERBOSE_NOTEBOOK = False
WARNING_DISABLE = True
DEBUG = False

In [None]:
import pandas as pd
import numpy as np
from os import path
import os
import warnings
from scipy.optimize import OptimizeWarning

if WARNING_DISABLE:
    warnings.simplefilter('ignore', RuntimeWarning)
    warnings.simplefilter('ignore', OptimizeWarning)

In [None]:
def make_input_paths(input_dir, base_name):
    worklist = path.join(input_dir, base_name + 'worklist-ELISA.xls')
    if not os.path.isfile(worklist):
        raise Exception("Worklist file path is invlaid: {}".format(worklist))

    params = path.join(input_dir, base_name + 'AAV9-ELISA_Parameters.csv')

    return {'worklist': worklist, 'params': params}

def make_output_paths(input_dir, base_name, sample_num):
    results =  path.join(input_dir, base_name + 'calc{}.xlsx'.format(sample_num))
    if not os.path.isfile(results):
        raise Exception("Rewsults file path is invlaid: {}".format(results))
    
    report = path.join(input_dir, 'results_plate_{}'.format(sample_num))
    report = path.join(report, '{}report_plate_{}.md'.format(base_name, sample_num))

    return {'results': results, 'report': report}

In [None]:
WORKING_DIR = './data/input/'
BASE_NAME = '230426_GN004240-033_-_'

input_files = make_input_paths(WORKING_DIR, BASE_NAME)
WORKLIST_FILE_PATH = input_files['worklist']
PARAMS_FILE_PATH = input_files['params']

## Layouts

In [None]:
from layouthandle import read_plate_layout

g_plate_layout_id = read_plate_layout('./data/plate_layout_ident.csv')
g_plate_layout_num = read_plate_layout('./data/plate_layout_num.csv')
g_plate_layout_dil_id = read_plate_layout('./data/plate_layout_dil_id.csv')

if VERBOSE_NOTEBOOK:
    display(g_plate_layout_id)
    display(g_plate_layout_num)
    display(g_plate_layout_dil_id)

## Worklist

In [None]:
import worklist as wk

g_wl_raw = wk.read_worklist(WORKLIST_FILE_PATH)
g_valid_plates = wk.check_worklist(g_wl_raw)

## Read data

In [None]:
PLATE_ID = 1 # plate id

output_files = make_output_paths(WORKING_DIR, BASE_NAME, PLATE_ID)
RESULT_FILE_PATH = output_files['results']
REPORT_FILE_PATH = output_files['report']
REPORT_DIR = os.path.dirname(os.path.abspath(REPORT_FILE_PATH))

print(REPORT_DIR)

In [None]:
from readdata import read_concat_data
from readdata import concat_data_with_layouts

g_od = read_concat_data(RESULT_FILE_PATH)
df_all = concat_data_with_layouts(g_od, g_plate_layout_id, g_plate_layout_num, g_plate_layout_dil_id)

if VERBOSE_NOTEBOOK:
    display(g_od)
    display(df_all)

### Dilution to Concentration

Define dilution dataframe. The dataframe is indexed according plate layout, index of refference dataframe corresponds to refference of the `plate_layout_dil`.

In [None]:
# TODO: read reference value from parameters
REF_VAL_MAX = 1.7954e+10
DILUTIONS = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]

from sample import make_concentration
g_reference_conc = make_concentration(REF_VAL_MAX, DILUTIONS)

if VERBOSE_NOTEBOOK:
    display(g_reference_conc)

## Sample evaluation

### Compute concentration for all `s` and `k` samples

Fit the data, and apply the inverse function as a check...

In [None]:
from sample import init_samples
from sample import apply_fit
from sample import mask_sample
from sample import generate_results
from image import fit_image
from fitdata import fit_reference_auto_rm
from fitdata import backfit
from sample import data_range

dfg = init_samples(df_all, g_reference_conc)


def fit_func(ref):
    x = ref.reset_index(level=[0,1])['plate_layout_conc']
    y = ref.reset_index(level=[0,1])['OD_delta']
    fit = fit_reference_auto_rm(x, y, verbose=False)
    popt = fit[0][0]

    # fit_image(x, y, fit[0][0], fit[0][1], None, confidence='student-t', rm_index=fit[1])
    # display(fit[3])
    # display(fit[1])

    dr = data_range(ref, popt)
    return fit, dr

g_ref = dfg.loc[(dfg['plate_layout_ident']=='r')]
g_fit, g_dr = fit_func(g_ref)
g_popt = g_fit[0][0]
g_pcov = g_fit[0][1]

dfg = apply_fit(dfg, g_popt)
dfg = mask_sample(dfg, g_dr)
g_sl = generate_results(dfg, g_dr)

if VERBOSE_NOTEBOOK:
    display(dfg)
    display(g_sl)

In [None]:
if VERBOSE_NOTEBOOK:
    print('Concentration backfit [cp/ml] range <{0}, {1}>'.format(g_dr.cb[0], g_dr.cb[1]))
    print('Standard Value [cp/ml] range <{0}, {1}>'.format(g_dr.sv[0], g_dr.sv[1]))
    print('SV to OD fit range <{0:.4f}, {1:.4f}>'.format(g_dr.od_fit[0], g_dr.od_fit[1]))
    print('Optical density range <{0:.4f}, {1:.4f}>'.format(g_dr.od[0], g_dr.od[1]))

### Plot sample with referene curve

In [None]:
from image import sample_img

if VERBOSE_NOTEBOOK:
    sample_img(dfg, g_ref, 's', 6)

## Report  
We build a report here...

### Report Assembly

In [None]:
from readdata import read_params
from zlib import crc32
import reportmd as rmd

params = read_params(PARAMS_FILE_PATH)

report = '''
# Automatically Generated Markdown report

This a PoC for automatic report generation...  

'''

report += rmd.header_section('05 May 2023', 'GN004240-033', PLATE_ID, ':)')
report += rmd.result_section(rmd.make_final(g_sl, g_wl_raw, PLATE_ID).drop('reference 01', axis=0))
report += rmd.param_section(params)
img_dir = path.join(REPORT_DIR, 'img')
os.makedirs(img_dir, exist_ok=True)
report += rmd.fit_section_md(g_ref, g_popt, g_pcov, img_dir) # TODO: !!! global fit_result[3]

report += rmd.sample_section_md(dfg, g_ref, g_dr, img_dir)

print(REPORT_FILE_PATH)
rmd.save_md(REPORT_FILE_PATH, report)

In [None]:
res = bytearray(report,'utf8')
t = crc32(res)
print('\nReport CRC  = {}\n'.format(t))

crc_test = 2898421151
if t != crc_test:
    raise Exception('Report CRC missmatch! {} != {}'.format(t, crc_test))

### Export to PDF

In [None]:
from md2pdf.core import md2pdf
PDF_FILE_PATH = path.join(REPORT_DIR,"{}.pdf".format(os.path.basename(REPORT_FILE_PATH)))

In [None]:
md2pdf(PDF_FILE_PATH,
       md_content=report,
       md_file_path=None,
       css_file_path=None,
       base_url=None)