# Useful code snippets for debugging

## Mask multiindex table

In [None]:
from scipy.stats import variation
from itertools import combinations


def mask_sample_cv(df_in, valid_pts, cv_threshold):
    df = df_in[df_in['mask'].isna()]
    display(df)
    cv_min = cv_threshold  # variation(df['concentration'], ddof=1)
    non_mask_idx = []
    indices = df.index
    # Reverse combinations order to break if `CV` < `cv_threshold`
    for l in reversed(range(2, len(indices) + 1)):
        for subset in combinations(indices, l):
            comb = list(subset)
            t = df.loc[comb]
            display(t)
            cv = variation(t['concentration'], ddof=1)
            print(comb, cv)
            if cv < cv_min:
                non_mask_idx = comb
                cv_min = cv
                print(f'!!! min {cv}')
        # break if CV drops below threshold
        if cv_min < cv_threshold:
            break

    mask_idx = list(set(indices).symmetric_difference(non_mask_idx))
    return mask_idx, non_mask_idx, cv_min

In [None]:
import pandas as pd
import numpy as np

idx = pd.MultiIndex.from_product([['A'],
                                  [1, 2, 3, 4]],
                                 names=['col', 'row'])
col = ['concentration', 'mask']

dfm = pd.DataFrame([(10, np.nan), (11, np.nan),
                   (6, '<8'), (16, np.nan)], idx, col)
display(dfm)

# display(dfm['mask'].isna())
m_idx, _, _ = mask_sample_cv(dfm, 2, 0.2)
display(m_idx)
dfm.loc[m_idx, ['mask']] = "cv-masked"
display(dfm)

## Report dir handling

In [None]:
import os

def listdirs(rootdir):
    dirs = []
    for it in os.scandir(rootdir):
        if it.is_dir():
            dirs.append(it.path)
            # print(it.path)
    return dirs


rootdir = './../reports/all/'
dirs = listdirs(rootdir)
dirs

In [None]:
def parse_dir_name(path_name):
    if os.path.isdir(path_name):
        path_name = os.path.basename(path_name)
    else:
        raise Exception('Not directory!')
    s = path_name.split('_')
    dc = {'date': s[0], 'protocol': s[1], 'analyst': s[2], 'gn': s[3]}
    return dc

def make_base_name(date, gn):
    return date + '_' + gn + '_-_'

for work_dir in dirs:
    p = parse_dir_name(work_dir)
    print(p)
    b = make_base_name(p['date'], p['gn'])
    print(b)


In [None]:
from os import path

def make_input_paths(input_dir):
    print(input_dir)
    p =  parse_dir_name(input_dir)
    print(p)
    base_name = make_base_name(p['date'], p['gn'])
    worklist = path.join(input_dir, base_name + 'worklist-ELISA.xls')
    if not path.isfile(worklist):
        raise Exception("Worklist file path is invlaid: {}".format(worklist))

    params = path.join(input_dir, base_name + p['protocol'] +'_Parameters.csv')
    if not path.isfile(params):
        raise Exception("Parameters file path is invlaid: {}".format(params))

    return {'worklist': worklist, 'params': params}

make_input_paths(dirs[0])

## Parsing / checking worklist and params path


In [None]:
import os

def parse_file_path(path_name):
    if not os.path.isfile(path_name):
        raise Exception('Not directory!')
    fl = os.path.split(path_name)
    s = fl[1].split('_')
    dc = { 'dir': fl[0], 'file': fl[1], 'date': s[0], 'gn': s[1], 'analyst': s[2], 'protocol': s[3]}
    return dc

# params_path = 'c:/work/report-gen/reports/all/230530_AAV9-ELISA_sey_GN004240-040/230530_GN004240-040_-_AAV9-ELISA_Parameters.csv'
params_path = './../reports/all/230530_AAV9-ELISA_sey_GN004240-040/230530_GN004240-040_-_AAV9-ELISA_Parameters.csv'
# worklist_path = 'c:/work/report-gen/reports/all/230530_AAV9-ELISA_sey_GN004240-040/230530_GN004240-040_-_worklist-ELISA.xls'
worklist_path = './../reports/all/230530_AAV9-ELISA_sey_GN004240-040/230530_GN004240-040_-_worklist-ELISA.xls'

htp = os.path.split(params_path)
# print('params path split {} / {}'.format(htp[0], htp[1]))
pp = parse_file_path(params_path)
print(pp)

htw = os.path.split(worklist_path)
# print('worklist path split {} / {}'.format(htw[0], htw[1]))
pw = parse_file_path(worklist_path)
print(pw)

In [None]:
from datetime import datetime, date, time, timezone

dt = datetime.strptime("21/11/06 16:30", "%d/%m/%y %H:%M")
dt = datetime.strptime('230530', "%y%m%d")
print(dt.strftime('%d %b %Y'))

## Read params from json

In [None]:
import json
from os import path

working_dir = './../reports/230426_AAV9-ELISA_igi_GN004240-033'
params_path_default = path.join('./../data', 'params.json')
params_path_local = path.join(working_dir, 'params.json')
params_path = None

if path.exists(params_path_local):
    params_path = params_path_local
    print(f'loading local params {params_path}')
elif path.exists(params_path_default):
    params_path = params_path_default
    print(f'loading default params {params_path}')


with open(params_path_default) as json_file:
    data = json.load(json_file)
    dilutions = data['dilutions']
    ref_val_max = data['referenceValue']

print(f'{ref_val_max}, {dilutions}')

## Convert parameters CSV to json

In [None]:
import pandas as pd

scv_filepath = './../reports/230426_AAV9-ELISA_igi_GN004240-033/230426_GN004240-033_-_AAV9-ELISA_Parameters.csv'
df = pd.read_csv(scv_filepath, sep=';', index_col='Variable', header=0)
# df = pd.read_csv(scv_filepath, sep=';', index_col=False)
display(df)
# json_filepath = path.splitext(scv_filepath)[0] + '.json'
# df.to_json(json_filepath, indent=4, orient="columns", force_ascii=False)
# df.to_json()

In [None]:
int(df.loc['IncubationTime_Samples', :].values[0])

## Parameters json

In [None]:
import json

parameters_path = './../reports/230426_AAV9-ELISA_igi_GN004240-033/230426_GN004240-033_-_AAV9-ELISA_Parameters.json'
with open(parameters_path) as json_file:
    p = json.load(json_file)

p