In [1]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
from os.path import exists

sys.path.append('../..')

In [3]:
import pylab as plt
import pandas as pd
import numpy as np
from loguru import logger
import seaborn as sns
import copy

In [4]:
from vimms.Common import POSITIVE, set_log_level_warning, load_obj, save_obj
from vimms.ChemicalSamplers import UniformRTAndIntensitySampler, GaussianChromatogramSampler, UniformMZFormulaSampler, \
    MZMLFormulaSampler, MZMLRTandIntensitySampler, MZMLChromatogramSampler
from vimms.Noise import UniformSpikeNoise
from vimms.Evaluation import evaluate_real
from vimms.Chemicals import ChemicalMixtureFromMZML
from vimms.Roi import RoiBuilderParams, SmartRoiParams

from mass_spec_utils.data_import.mzmine import load_picked_boxes

from vimms_gym.env import DDAEnv
from vimms_gym.chemicals import generate_chemicals
from vimms_gym.evaluation import evaluate, run_method
from vimms_gym.common import METHOD_RANDOM, METHOD_FULLSCAN, METHOD_TOPN, METHOD_DQN, \
    METHOD_DQN_COV, METHOD_DQN_INT, METHOD_DQN_MID, METHOD_PPO, METHOD_PPO_RECURRENT
from vimms_gym.experiments import preset_qcb_medium

# 1. Parameters

In [5]:
env_alpha = 0.25
env_beta = 0.50
extract = True
params, max_peaks = preset_qcb_medium(None, alpha=env_alpha, beta=env_beta, extract_chromatograms=extract)
params, max_peaks

2023-03-07 13:26:57.403 | INFO     | vimms_gym.experiments:get_samplers:295 - Loaded /Users/joewandy/Work/git/vimms-gym/pickles/samplers_QCB_medium_extracted.p


({'chemical_creator': {'mz_range': (100, 600),
   'rt_range': (400, 800),
   'intensity_range': (10000.0, 1e+20),
   'n_chemicals': (200, 500),
   'mz_sampler': <vimms.ChemicalSamplers.MZMLFormulaSampler at 0x7f8d4b33c5e0>,
   'ri_sampler': <vimms.ChemicalSamplers.MZMLRTandIntensitySampler at 0x7f8d39d0d160>,
   'cr_sampler': <vimms.ChemicalSamplers.MZMLChromatogramSampler at 0x7f8d39d2b400>},
  'noise': {'enable_spike_noise': True,
   'noise_density': 0.1,
   'noise_max_val': 1000.0,
   'mz_range': (100, 600)},
  'env': {'ionisation_mode': 'Positive',
   'rt_range': (400, 800),
   'isolation_window': 0.7,
   'use_dew': False,
   'mz_tol': 10,
   'rt_tol': 5,
   'min_ms1_intensity': 5000,
   'alpha': 0.25,
   'beta': 0.5}},
 200)

In [6]:
max_peaks = 200
out_dir = 'optimise_baselines'

In [7]:
n_eval_episodes = 1

# 2. Evaluation

#### Generate some chemical sets

In [8]:
fname = 'QCB_chems_medium.p'
found = exists(fname)
if found:
    chem_list = load_obj(fname)
    for chems in chem_list:
        print(len(chems))

426
262
394
452
437
200
478
204
424
384
443
249
251
300
206
407
498
372
302
265
468
234
322
310
345
396
203
281
493
304


In [9]:
chem_list = [chem_list[0]]

#### Compare to Top-10

In [10]:
set_log_level_warning()

1

In [11]:
env_name = 'DDAEnv'
method = 'topN'
intensity_threshold = 0.5

In [12]:
rt_tols = [2, 5, 15, 30, 60, 120, 240, 300]
Ns = [1, 2, 5, 10, 15, 20, 25]

In [13]:
min_ms1_intensity = params['env']['min_ms1_intensity']
horizon = 4

In [14]:
topN_res = {}
for topN_rt_tol in rt_tols:
    for topN_N in Ns:

        copy_params = copy.deepcopy(params)            
        custom_objects = {
            "learning_rate": 0.0,
            "lr_schedule": lambda _: 0.0,
            "clip_range": lambda _: 0.0,
        }    

        model = None
        if method == METHOD_TOPN:
            N = topN_N
            effective_rt_tol = topN_rt_tol
            copy_params = dict(params)
            copy_params['env']['use_dew'] = True
            copy_params['env']['rt_tol'] = effective_rt_tol                        

        banner = 'method = %s max_peaks = %d N = %d rt_tol = %d' % (method, max_peaks, N, copy_params['env']['rt_tol'])
        print(banner)
        print()            

        episodic_results = run_method(env_name, copy_params, max_peaks, chem_list, method, out_dir, 
                                      N=N, min_ms1_intensity=min_ms1_intensity, model=model,
                                      print_eval=True, print_reward=False, intensity_threshold=intensity_threshold,
                                      mzml_prefix=method, horizon=horizon, write_mzML=False)
        eval_results = [er.eval_res for er in episodic_results]

        key = (topN_N, topN_rt_tol)
        topN_res[key] = eval_results
        print()    

method = topN max_peaks = 200 N = 1 rt_tol = 2

{'coverage_prop': '0.390', 'intensity_prop': '0.237', 'ms1ms2_ratio': '1.072', 'efficiency': '0.261', 'TP': '107', 'FP': '59', 'FN': '260', 'precision': '0.645', 'recall': '0.292', 'f1': '0.402', 'total_rewards': 119.4966354776849, 'invalid_action_count': 0, 'num_ms1_scans': 682, 'num_ms2_scans': 636}

method = topN max_peaks = 200 N = 2 rt_tol = 2

{'coverage_prop': '0.477', 'intensity_prop': '0.305', 'ms1ms2_ratio': '0.554', 'efficiency': '0.214', 'TP': '141', 'FP': '62', 'FN': '223', 'precision': '0.695', 'recall': '0.387', 'f1': '0.497', 'total_rewards': 170.45632673302185, 'invalid_action_count': 0, 'num_ms1_scans': 526, 'num_ms2_scans': 949}

method = topN max_peaks = 200 N = 5 rt_tol = 2

{'coverage_prop': '0.622', 'intensity_prop': '0.429', 'ms1ms2_ratio': '0.244', 'efficiency': '0.197', 'TP': '198', 'FP': '67', 'FN': '161', 'precision': '0.747', 'recall': '0.552', 'f1': '0.635', 'total_rewards': 236.6349733702281, 'invalid_action

In [16]:
method_eval_results = {
    method: topN_res
}

#### Test classic controllers in ViMMS

In [17]:
from vimms.MassSpec import IndependentMassSpectrometer
from vimms.Controller import TopNController, TopN_SmartRoiController, WeightedDEWController
from vimms.Environment import Environment

In [18]:
params

{'chemical_creator': {'mz_range': (100, 600),
  'rt_range': (400, 800),
  'intensity_range': (10000.0, 1e+20),
  'n_chemicals': (200, 500),
  'mz_sampler': <vimms.ChemicalSamplers.MZMLFormulaSampler at 0x7f8d4b33c5e0>,
  'ri_sampler': <vimms.ChemicalSamplers.MZMLRTandIntensitySampler at 0x7f8d39d0d160>,
  'cr_sampler': <vimms.ChemicalSamplers.MZMLChromatogramSampler at 0x7f8d39d2b400>},
 'noise': {'enable_spike_noise': True,
  'noise_density': 0.1,
  'noise_max_val': 1000.0,
  'mz_range': (100, 600)},
 'env': {'ionisation_mode': 'Positive',
  'rt_range': (400, 800),
  'isolation_window': 0.7,
  'use_dew': True,
  'mz_tol': 10,
  'rt_tol': 300,
  'min_ms1_intensity': 5000,
  'alpha': 0.25,
  'beta': 0.5}}

In [19]:
enable_spike_noise = params['noise']['enable_spike_noise']
ionisation_mode = params['env']['ionisation_mode']
isolation_window = params['env']['isolation_window']
mz_tol = params['env']['mz_tol']
rt_range = params['chemical_creator']['rt_range']

In [20]:
spike_noise = None
if enable_spike_noise:
    noise_params = params['noise']
    noise_density = noise_params['noise_density']
    noise_max_val = noise_params['noise_max_val']
    noise_min_mz = noise_params['mz_range'][0]
    noise_max_mz = noise_params['mz_range'][1]
    spike_noise = UniformSpikeNoise(noise_density, noise_max_val, min_mz=noise_min_mz,
                                    max_mz=noise_max_mz)

Run Top-N Controller

In [21]:
rt_range

(400, 800)

In [22]:
method = 'TopN_Controller'
print('method = %s' % method)
print()

chems = chem_list[0]
res = {}
for rt_tol in rt_tols:
    for N in Ns:

        effective_rt_tol = rt_tol
        mass_spec = IndependentMassSpectrometer(ionisation_mode, chems, spike_noise=spike_noise)
        controller = TopNController(ionisation_mode, N, isolation_window, mz_tol, rt_tol,
                                    min_ms1_intensity)
        env = Environment(mass_spec, controller, rt_range[0], rt_range[1], progress_bar=False)
        env.run()
        eval_res = evaluate(env, intensity_threshold)
        key = (N, rt_tol)
        print(N, rt_tol, eval_res)
        res[key] = eval_res

method_eval_results[method] = res

method = TopN_Controller

1 2 {'coverage_prop': '0.390', 'intensity_prop': '0.237', 'ms1ms2_ratio': '1.072', 'efficiency': '0.261', 'TP': '107', 'FP': '59', 'FN': '260', 'precision': '0.645', 'recall': '0.292', 'f1': '0.402'}
2 2 {'coverage_prop': '0.540', 'intensity_prop': '0.359', 'ms1ms2_ratio': '0.556', 'efficiency': '0.243', 'TP': '169', 'FP': '61', 'FN': '196', 'precision': '0.735', 'recall': '0.463', 'f1': '0.568'}
5 2 {'coverage_prop': '0.768', 'intensity_prop': '0.556', 'ms1ms2_ratio': '0.245', 'efficiency': '0.244', 'TP': '259', 'FP': '68', 'FN': '99', 'precision': '0.792', 'recall': '0.723', 'f1': '0.756'}
10 2 {'coverage_prop': '0.768', 'intensity_prop': '0.567', 'ms1ms2_ratio': '0.144', 'efficiency': '0.211', 'TP': '267', 'FP': '60', 'FN': '99', 'precision': '0.817', 'recall': '0.730', 'f1': '0.771'}
15 2 {'coverage_prop': '0.890', 'intensity_prop': '0.687', 'ms1ms2_ratio': '0.113', 'efficiency': '0.232', 'TP': '335', 'FP': '44', 'FN': '47', 'precision': '0.884', 'recall':

Run SmartROI Controller

In [23]:
alphas = [2, 3, 5, 10, 1E3, 1E6]
betas = [0, 0.1, 0.5, 1, 5]
initial_length_seconds_list = [0, 2, 5]
smartroi_N = 10
smartroi_dew = 5

In [24]:
method = 'SmartROI_Controller'
print('method = %s' % method)
print()

chems = chem_list[0]
res = {}
for alpha in alphas:
    for beta in betas:
        for ils in initial_length_seconds_list:

            mass_spec = IndependentMassSpectrometer(ionisation_mode, chems, spike_noise=spike_noise)

            roi_params = RoiBuilderParams(min_roi_intensity=0, min_roi_length=0)    
            smartroi_params = SmartRoiParams(intensity_increase_factor=alpha, drop_perc=beta/100.0, dew=smartroi_dew, initial_length_seconds=ils)
            controller = TopN_SmartRoiController(ionisation_mode, isolation_window, smartroi_N, mz_tol, smartroi_dew,
                                        min_ms1_intensity, roi_params, smartroi_params)

            env = Environment(mass_spec, controller, rt_range[0], rt_range[1], progress_bar=False)
            env.run()
            eval_res = evaluate(env, intensity_threshold)
            key = (alpha, beta, ils)
            print(alpha, beta, ils, eval_res)
            res[key] = eval_res

method_eval_results[method] = res

method = SmartROI_Controller

2 0 0 {'coverage_prop': '0.979', 'intensity_prop': '0.698', 'ms1ms2_ratio': '0.816', 'efficiency': '0.549', 'TP': '350', 'FP': '67', 'FN': '9', 'precision': '0.839', 'recall': '0.975', 'f1': '0.902'}
2 0 2 {'coverage_prop': '0.960', 'intensity_prop': '0.696', 'ms1ms2_ratio': '0.833', 'efficiency': '0.545', 'TP': '350', 'FP': '59', 'FN': '17', 'precision': '0.856', 'recall': '0.954', 'f1': '0.902'}
2 0 5 {'coverage_prop': '0.955', 'intensity_prop': '0.684', 'ms1ms2_ratio': '0.897', 'efficiency': '0.568', 'TP': '348', 'FP': '59', 'FN': '19', 'precision': '0.855', 'recall': '0.948', 'f1': '0.899'}
2 0.1 0 {'coverage_prop': '0.979', 'intensity_prop': '0.698', 'ms1ms2_ratio': '0.815', 'efficiency': '0.548', 'TP': '350', 'FP': '67', 'FN': '9', 'precision': '0.839', 'recall': '0.975', 'f1': '0.902'}
2 0.1 2 {'coverage_prop': '0.960', 'intensity_prop': '0.697', 'ms1ms2_ratio': '0.825', 'efficiency': '0.542', 'TP': '351', 'FP': '58', 'FN': '17', 'precision': '0.858

Run WeightedDEW Controller

In [25]:
t0s = [1, 3, 10, 15, 30, 60]
t1s = [15, 60, 120, 240, 360, 3600]
weighteddew_N = 10

In [26]:
method = 'WeightedDEW_Controller'
print('method = %s' % method)
print()

chems = chem_list[0]
res = {}
for t0 in t0s:
    for t1 in t1s:

        if t0 > t1:
            print('Invalid combination')
            continue
        
        mass_spec = IndependentMassSpectrometer(ionisation_mode, chems, spike_noise=spike_noise)
        
        controller = WeightedDEWController(ionisation_mode, weighteddew_N, isolation_window, mz_tol, t1,
                                    min_ms1_intensity, exclusion_t_0=t0)
        
        env = Environment(mass_spec, controller, rt_range[0], rt_range[1], progress_bar=False)
        env.run()
        eval_res = evaluate(env, intensity_threshold)
        key = (t0, t1)
        print(t0, t1, eval_res)
        res[key] = eval_res
        
method_eval_results[method] = res

method = WeightedDEW_Controller

1 15 {'coverage_prop': '0.955', 'intensity_prop': '0.746', 'ms1ms2_ratio': '0.138', 'efficiency': '0.260', 'TP': '368', 'FP': '39', 'FN': '19', 'precision': '0.904', 'recall': '0.951', 'f1': '0.927'}
1 60 {'coverage_prop': '0.960', 'intensity_prop': '0.714', 'ms1ms2_ratio': '0.138', 'efficiency': '0.261', 'TP': '356', 'FP': '53', 'FN': '17', 'precision': '0.870', 'recall': '0.954', 'f1': '0.910'}
1 120 {'coverage_prop': '0.960', 'intensity_prop': '0.712', 'ms1ms2_ratio': '0.138', 'efficiency': '0.261', 'TP': '357', 'FP': '52', 'FN': '17', 'precision': '0.873', 'recall': '0.955', 'f1': '0.912'}
1 240 {'coverage_prop': '0.960', 'intensity_prop': '0.711', 'ms1ms2_ratio': '0.138', 'efficiency': '0.261', 'TP': '358', 'FP': '51', 'FN': '17', 'precision': '0.875', 'recall': '0.955', 'f1': '0.913'}
1 360 {'coverage_prop': '0.960', 'intensity_prop': '0.711', 'ms1ms2_ratio': '0.138', 'efficiency': '0.261', 'TP': '358', 'FP': '51', 'FN': '17', 'precision': '0.875'