# Metrics

## Notebook Aim 

- Devise metrics that can be dropped into analysis scripts
- Make plots of metrics with example data (e.g. color map to traces)

## Metric Aims

- Detect the really fucked up traces (e.g. scattering events)
    - Classify fucked up traces to mask them
- Quantify signal/noise

## Target Metrics

### Scattering

#### AUC

Will need correction for concentration I reckon

#### Average Gradient

Should be more negative if scattered

#### Scattering Curve Fit After Control Correction

More expensive, but more direct in terms of expected behaviour of scattered curves

---

## Data

- [ ] Gather dataset of bad traces 

In [39]:
import os
import re
import json
from pprint import pprint
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

import utils

root_dir_walk = os.walk('..')

experiments = {}

for dirpath, dirnames, filenames in root_dir_walk:
    if (e := re.search('\.\./([0-9]+)', dirpath)) is not None:
        experiment_number = int(e.group(1))
        if 1 <= experiment_number <= 22:
            data = {}

            for fname in filenames:
                fpath = os.path.join(dirpath, fname)
                if re.search('experiment-[0-9]+-summary\.csv', fname) is not None:
                    data['summary_fname'] = fname 
                    data['summary_path'] = fpath
                    #data['summary'] = pd.read_csv(fpath, index_col=0)
                if re.search('config_[0-9]+\.json', fname) is not None:
                    data['config_fname'] = fname 
                    with open(fpath, 'r') as f:
                        data['config'] = json.load(f)
            if data != {}:
                experiments[experiment_number] = data

df = pd.concat([pd.read_csv(experiments[i]['summary_path']) for i in experiments]).reset_index(drop=True)
df.head()

Unnamed: 0.1,ligand,km,vmax,rsq,column_num,protein_concentration,a420_max,auc_mean,auc_cv,std_405,...,shaking_seconds,BSA,protein_conc,protein_vol,Unnamed: 0,ligand_dispensing,bulk_dispensing,protein_volume,experiments,column
0,,1.53895,0.01251,0.021696,1.0,4.01,0.093,8.606125,0.575958,0.008799,...,,,,,,,,,,
1,Arachadionic Acid,285.406633,0.079279,-2.393192,2.0,4.01,0.08,10.5975,0.443524,0.012873,...,,,,,,,,,,
2,Arachadionic Acid,337.546311,0.083692,-2.341006,3.0,4.01,0.083,8.540825,0.461805,0.0055,...,,,,,,,,,,
3,Arachadionic Acid,261.523179,0.082059,-2.433887,4.0,4.01,0.082,8.798688,0.503424,0.005182,...,,,,,,,,,,
4,Arachadionic Acid,265.911556,0.077865,-2.452384,5.0,4.01,0.081,8.220812,0.391866,0.005153,...,,,,,,,,,,


In [2]:
 with open('../bad-traces.txt', 'r') as f:
    bad_traces = f.read().splitlines()

bad_traces

['./5_More_Iterations/img/5-None-plate_1-column-1-ligand-None.png',
 './5_More_Iterations/img/5-None-plate_1-column-2-ligand-Arachadionic Acid.png',
 './16_Echo/img/plate_1-block-13-ligand-None.png',
 './16_Echo/img/plate_1-block-1-ligand-None.png',
 './16_Echo/img/plate_1-block-7-ligand-None.png',
 './16_Echo/img/plate_2-block-1-ligand-None.png',
 './16_Echo/img/plate_2-block-3-ligand-None.png',
 './16_Echo/img/plate_2-block-7-ligand-None.png',
 './16_Echo/img/plate_3-block-5-ligand-None.png',
 './16_Echo/img/plate_3-block-7-ligand-None.png',
 './16_Echo/img/plate_4-block-13-ligand-None.png',
 './16_Echo/img/plate_4-block-7-ligand-None.png',
 './7_Moreiterations/img/7-None-plate_3-column-2-ligand-Arachadionic Acid.png',
 './7_Moreiterations/img/7-None-plate_4-column-2-ligand-Arachadionic Acid.png',
 './14_DMSO_dilutionScheme/img/14-None-plate_1-column-12-ligand-SDS.png',
 './19_Validation/img/plate_3-block-18-ligand-Palmitic acid.png',
 './19_Validation/img/plate_3-block-2-ligand-Laur

In [45]:
s = bad_traces[0]

def get_trace_info(path):
    search = lambda p, s : match.group(1) if (match := re.match(p, s)) else None
    root = path.split('/')[1]
    experiment_number = int(search('\./([0-9]+)', path))
    plate = search('.*(plate_[0-9]+)', path) or search('.*img/([0-9]+)', path) 
    column = search('.*column-([0-9]+)', path) or search('.*block-([0-9]+)', path)
    
    config = experiments[experiment_number]['config']
    file_name =  config['experiments'][plate]['file']
    file_path = os.path.join('..', root, 'data', file_name)

    test_wells = None
    control_wells = None
    
    if (blocks := config.get('blocks')):
        if (block := blocks.get(str(column))):
            test_wells = block.get('test_wells') 
            control_wells = block.get('control_wells') 
    else:
        if (test_rows := config.get('test_rows')):
            test_wells = [f'{i}{j}' for i, j in zip(test_rows, range(1,9))]
        if (control_rows := config.get('control_rows') ):
            control_wells = [f'{i}{j}' for i, j in zip(control_rows, range(1,9))]
        
    if test_wells is None:
        if (blocks_kinda := config.get('experiments')):
            if (blocks := blocks_kinda[plate]['blocks']):
                if (block := blocks.get(str(column))):
                    test_wells = block.get('test_wells') 
                    control_wells = block.get('control_wells') 
                    # experiments[18]['config']['experiments']['1']['blocks']['1']['test_wells']

    return {
        'experiment_number': experiment_number,
        'root': root,
        'plate': plate,
        'column': column,
        'file_path': file_path,
        'test_wells': test_wells,
        'control_wells': control_wells,
    }

bad_trace_data = pd.DataFrame((map(get_trace_info, bad_traces)))
bad_trace_data

Unnamed: 0,experiment_number,root,plate,column,file_path,test_wells,control_wells
0,5,5_More_Iterations,plate_1,1,../5_More_Iterations/data/20190619_boi.CSV,"[A1, C2, E3, G4, I5, K6, M7, O8]","[B1, D2, F3, H4, J5, L6, N7, P8]"
1,5,5_More_Iterations,plate_1,2,../5_More_Iterations/data/20190619_boi.CSV,"[A1, C2, E3, G4, I5, K6, M7, O8]","[B1, D2, F3, H4, J5, L6, N7, P8]"
2,16,16_Echo,plate_1,13,../16_Echo/data/data/20191104_Dopt.CSV,"[A13, B13, C13, D13, E13, F13, G13, H13]","[I13, J13, K13, L13, M13, N13, O13, P13]"
3,16,16_Echo,plate_1,1,../16_Echo/data/data/20191104_Dopt.CSV,"[A1, B1, C1, D1, E1, F1, G1, H1]","[I1, J1, K1, L1, M1, N1, O1, P1]"
4,16,16_Echo,plate_1,7,../16_Echo/data/data/20191104_Dopt.CSV,"[A7, B7, C7, D7, E7, F7, G7, H7]","[I7, J7, K7, L7, M7, N7, O7, P7]"
5,16,16_Echo,plate_2,1,../16_Echo/data/data/20191105_Dopt_2.CSV,"[A1, B1, C1, D1, E1, F1, G1, H1]","[I1, J1, K1, L1, M1, N1, O1, P1]"
6,16,16_Echo,plate_2,3,../16_Echo/data/data/20191105_Dopt_2.CSV,"[A3, B3, C3, D3, E3, F3, G3, H3]","[I3, J3, K3, L3, M3, N3, O3, P3]"
7,16,16_Echo,plate_2,7,../16_Echo/data/data/20191105_Dopt_2.CSV,"[A7, B7, C7, D7, E7, F7, G7, H7]","[I7, J7, K7, L7, M7, N7, O7, P7]"
8,16,16_Echo,plate_3,5,../16_Echo/data/data/20191105_Dopt_2_Shaking_r...,"[A5, B5, C5, D5, E5, F5, G5, H5]","[I5, J5, K5, L5, M5, N5, O5, P5]"
9,16,16_Echo,plate_3,7,../16_Echo/data/data/20191105_Dopt_2_Shaking_r...,"[A7, B7, C7, D7, E7, F7, G7, H7]","[I7, J7, K7, L7, M7, N7, O7, P7]"


In [38]:
experiments[18]['config']['experiments']['1']['blocks']['1']['test_wells']

['A1', 'B1', 'C1', 'D1', 'E1', 'F1', 'G1', 'H1']