# 2. Grid Search

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../..')

In [3]:
from pathlib import Path
from pyDOE import *
from vimms.Environment import *

In [4]:
from vimms.Chemicals import ChemicalCreator, GET_MS2_BY_PEAKS, GET_MS2_BY_SPECTRA
from vimms.MassSpec import IndependentMassSpectrometer
from vimms.Controller import *
from vimms.Common import *
from vimms.PlotsForPaper import *
from vimms.Roi import make_roi
from vimms.Chemicals import RoiToChemicalCreator
from vimms.FeatureExtraction import extract_roi
from vimms.SequenceManager import *

In [5]:
data_dir = os.path.join(os.path.abspath(os.path.join(os.path.join(os.getcwd(),".."),"..")),'tests','fixtures')
dataset_file = os.path.join(data_dir, 'QCB_22May19_1.p')
dataset = load_obj(dataset_file)
ps = load_obj(Path(data_dir,'peak_sampler_mz_rt_int_beerqcb_fragmentation.p'))

In [6]:
url = 'http://researchdata.gla.ac.uk/870/2/example_data.zip'
base_dir = os.path.abspath(os.path.join(os.getcwd(),'..','01. Data', 'example_data'))

In [7]:
if not os.path.isdir(base_dir): # if not exist then download the example data and extract it
    print('Creating %s' % base_dir)    
    out_file = 'example_data.zip'
    download_file(url, out_file)
    extract_zip_file(out_file, delete=True)
else:
    print('Found %s' % base_dir)

Found C:\Users\Vinny\work\vimms\demo\01. Data\example_data


In [8]:
mzml_file = os.path.join(base_dir, 'beers', 'fullscan', 'mzML', 'Beer_multibeers_1_fullscan1.mzML')
mzml_file_list=[None, mzml_file, None, mzml_file]

In [9]:
set_log_level_info()

### Set some default parameters

In [10]:
experiment_dir = os.path.join(os.getcwd(), 'results')

In [11]:
mass_spec_params = {'ionisation_mode': POSITIVE,
                    'peak_sampler': ps,
                    'add_noise': False,
                    'isolation_transition_window': 'rectangular',
                    'isolation_transition_window_params': None}

In [12]:
controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1}

Note: you will need to install the same version of MZMine2 and put it in the same location as ViMMS

In [13]:
evaluation_methods = ['mzmine_peak']
mzmine_command = os.path.abspath(os.path.join(os.getcwd(),'..','..','..','MZmine-2.40.1','MZmine-2.40.1','startMZmine_Windows.bat'))

### Get a picked peaks file

You don't need to provide a picked peaks file to run a grid search experiment, e.g. Example 3. But if you have one it will be more efficient

In [22]:
pick_peaks([mzml_file], xml_template=QCB_XML_TEMPLATE_MS1, output_dir=os.getcwd(),
                       mzmine_command=mzmine_command)

2020-09-07 14:02:56.536 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for Beer_multibeers_1_fullscan1.mzML
2020-09-07 14:02:56.549 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for Beer_multibeers_1_fullscan1.mzML


In [23]:
ms1_picked_peaks_file = os.path.join(os.getcwd(), Path(mzml_file).stem + '_pp.csv')

### Some parameter settings to search over

In [24]:
topn_variable_params_dict = {'N': [10], 'rt_tol': [15,30]}

### Example 1 - Top N - Seed with dataset and picked peaks

In [25]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_1')

In [26]:
parallel = False

In [27]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir,mzmine_command, progress_bar=True, ms1_picked_peaks_file=ms1_picked_peaks_file)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, dataset_file, topn_variable_params_dict, controller_params, parallel=parallel)
gs.results

2020-09-07 14:12:29.652 | INFO     | vimms.Common:create_if_not_exist:148 - Created C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_1
2020-09-07 14:12:29.666 | INFO     | vimms.SequenceManager:run:240 - Running in serial mode
2020-09-07 14:12:29.667 | INFO     | vimms.SequenceManager:run_controller:192 - Begun experiment: sample0
2020-09-07 14:12:29.869 | INFO     | vimms.SequenceManager:run_experiment:158 - {'Sample ID': {0: 'sample0'}, 'Controller Method': {0: 'TopNController'}, 'Controller Params': {0: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 15, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0, 'initial_exclusion_list': None, 'params': None}}, 'MassSpec Params': {0: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x0000027A017CE5C0>, 'add_noise': False, 'isolation_transition_window': 'rectangular', 'isolation_transition_window_params': None}}, 'Dataset': {0: 'C:\\

False
Loaded 4228 scans


2020-09-07 14:16:04.361 | INFO     | vimms.SequenceManager:run_evaluation:208 - Completed Evaluation: sample0
2020-09-07 14:16:04.363 | INFO     | vimms.SequenceManager:run_serial:252 - Finished 0
2020-09-07 14:16:04.363 | INFO     | vimms.SequenceManager:run_controller:192 - Begun experiment: sample1
2020-09-07 14:16:04.565 | INFO     | vimms.SequenceManager:run_experiment:158 - {'Sample ID': {1: 'sample1'}, 'Controller Method': {1: 'TopNController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 30, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0, 'initial_exclusion_list': None, 'params': None}}, 'MassSpec Params': {1: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x0000027A017CE5C0>, 'add_noise': False, 'isolation_transition_window': 'rectangular', 'isolation_transition_window_params': None}}, 'Dataset': {1: 'C:\\Users\\Vinny\\work\\vimms\\tests\\fixtures\\QCB_22May19_1.p'}}


False
Loaded 4040 scans


2020-09-07 14:16:53.445 | INFO     | vimms.SequenceManager:run_evaluation:208 - Completed Evaluation: sample1
2020-09-07 14:16:53.446 | INFO     | vimms.SequenceManager:run_serial:252 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_shift,initial_exclusion_list,params,mzmine_peak
0,sample0,TopNController,10,10,15,175000.0,0,,,117.0
1,sample1,TopNController,10,10,30,175000.0,0,,,90.0


### Example 2 - Top N - Seed with mzml and picked peaks

In [28]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_2')

In [29]:
parallel = False

In [32]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=ms1_picked_peaks_file)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, None, topn_variable_params_dict, controller_params, mzml_file, ps=ps, parallel=parallel)
gs.results

2020-09-07 14:19:55.664 | INFO     | vimms.Common:save_obj:161 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_2\Beer_multibeers_1_fullscan1.p
2020-09-07 14:20:05.282 | INFO     | vimms.SequenceManager:run:241 - Running in serial mode
2020-09-07 14:20:05.283 | INFO     | vimms.SequenceManager:run_controller:193 - Begun experiment: sample0
2020-09-07 14:20:05.486 | INFO     | vimms.SequenceManager:run_experiment:159 - {'Sample ID': {0: 'sample0'}, 'Controller Method': {0: 'TopNController'}, 'Controller Params': {0: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 15, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0, 'initial_exclusion_list': None, 'params': None}}, 'MassSpec Params': {0: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x0000027A017CE5C0>, 'add_noise': False, 'isolation_transition_window': 'rectangular', 'isolation_transition_window_p

False
Loaded 6512 scans


2020-09-07 14:22:19.584 | INFO     | vimms.SequenceManager:run_evaluation:209 - Completed Evaluation: sample0
2020-09-07 14:22:19.585 | INFO     | vimms.SequenceManager:run_serial:253 - Finished 0
2020-09-07 14:22:19.586 | INFO     | vimms.SequenceManager:run_controller:193 - Begun experiment: sample1
2020-09-07 14:22:19.789 | INFO     | vimms.SequenceManager:run_experiment:159 - {'Sample ID': {1: 'sample1'}, 'Controller Method': {1: 'TopNController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 30, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0, 'initial_exclusion_list': None, 'params': None}}, 'MassSpec Params': {1: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x0000027A017CE5C0>, 'add_noise': False, 'isolation_transition_window': 'rectangular', 'isolation_transition_window_params': None}}, 'Dataset': {1: 'C:\\Users\\Vinny\\work\\vimms\\demo\\03. MultiSampleMethods\\result

False
Loaded 6157 scans


2020-09-07 14:24:39.186 | INFO     | vimms.SequenceManager:run_evaluation:209 - Completed Evaluation: sample1
2020-09-07 14:24:39.187 | INFO     | vimms.SequenceManager:run_serial:253 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_shift,initial_exclusion_list,params,mzmine_peak
0,sample0,TopNController,10,10,15,175000.0,0,,,1068.0
1,sample1,TopNController,10,10,30,175000.0,0,,,1425.0


### Example 3 - Top N - Seed with mzml, non-parallel

In [34]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_3')

In [35]:
parallel = False

In [36]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=None, mzmine_command=mzmine_command)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, None, topn_variable_params_dict, controller_params, mzml_file, ps=ps, parallel=parallel)
gs.results

2020-09-07 14:27:29.370 | INFO     | vimms.Common:create_if_not_exist:148 - Created C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_3
2020-09-07 14:28:23.969 | INFO     | vimms.Common:save_obj:161 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_3\Beer_multibeers_1_fullscan1.p
2020-09-07 14:28:32.847 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for Beer_multibeers_1_fullscan1.mzML
2020-09-07 14:28:32.850 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for Beer_multibeers_1_fullscan1.mzML
2020-09-07 14:36:15.975 | INFO     | vimms.SequenceManager:run:241 - Running in serial mode
2020-09-07 14:36:15.976 | INFO     | vimms.SequenceManager:run_controller:193 - Begun experiment: sample0
2020-09-07 14:36:16.178 | INFO     | vimms.SequenceManager:run_experiment:159 - {'Sample ID': {0: 'sample0'}, 'Controller Method': {0: 'TopNController'}, 'Controller Params'

False
Loaded 6512 scans


2020-09-07 14:38:41.587 | INFO     | vimms.SequenceManager:run_evaluation:209 - Completed Evaluation: sample0
2020-09-07 14:38:41.588 | INFO     | vimms.SequenceManager:run_serial:253 - Finished 0
2020-09-07 14:38:41.589 | INFO     | vimms.SequenceManager:run_controller:193 - Begun experiment: sample1
2020-09-07 14:38:41.791 | INFO     | vimms.SequenceManager:run_experiment:159 - {'Sample ID': {1: 'sample1'}, 'Controller Method': {1: 'TopNController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 30, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0, 'initial_exclusion_list': None, 'params': None}}, 'MassSpec Params': {1: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x0000027A017CE5C0>, 'add_noise': False, 'isolation_transition_window': 'rectangular', 'isolation_transition_window_params': None}}, 'Dataset': {1: 'C:\\Users\\Vinny\\work\\vimms\\demo\\03. MultiSampleMethods\\result

False
Loaded 6157 scans


2020-09-07 14:41:08.608 | INFO     | vimms.SequenceManager:run_evaluation:209 - Completed Evaluation: sample1
2020-09-07 14:41:08.609 | INFO     | vimms.SequenceManager:run_serial:253 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_shift,initial_exclusion_list,params,mzmine_peak
0,sample0,TopNController,10,10,15,175000.0,0,,,1068.0
1,sample1,TopNController,10,10,30,175000.0,0,,,1425.0


### Example 4 - SmartROI - Seed with mzml, non-parallel

In [37]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_4')

In [38]:
smartROI_controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1,
                       "min_roi_intensity": 1000,
                       "min_roi_length": 1,
                       "min_roi_length_for_fragmentation": 1,
                       "reset_length_seconds": 100,
                       "intensity_increase_factor": 2,
                       "length_units": "scans"}

In [39]:
smartROI_variable_params_dict = {'drop_perc': [0/100,0.1/100], 'intensity_increase_factor': [2]}

In [40]:
parallel = False

In [42]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=None, mzmine_command=mzmine_command)
gs = GridSearchExperiment(vsm, 'TopN_SmartRoiController', mass_spec_params, None, smartROI_variable_params_dict, smartROI_controller_params, mzml_file, ps=ps, parallel=parallel)
gs.results

2020-09-07 14:56:38.203 | INFO     | vimms.Common:save_obj:161 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_4\Beer_multibeers_1_fullscan1.p
2020-09-07 14:56:47.066 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for Beer_multibeers_1_fullscan1.mzML
2020-09-07 14:56:47.068 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for Beer_multibeers_1_fullscan1.mzML
2020-09-07 15:04:42.247 | INFO     | vimms.SequenceManager:run:241 - Running in serial mode
2020-09-07 15:04:42.248 | INFO     | vimms.SequenceManager:run_controller:193 - Begun experiment: sample0
2020-09-07 15:04:42.452 | INFO     | vimms.SequenceManager:run_experiment:159 - {'Sample ID': {0: 'sample0'}, 'Controller Method': {0: 'TopN_SmartRoiController'}, 'Controller Params': {0: {'ionisation_mode': 'Positive', 'isolation_width': 1, 'mz_tol': 10, 'min_ms1_intensity': 175000.0, 'min_roi_intensity': 1000, 'min_roi_length': 1, 'N': 1

False
Loaded 5864 scans


2020-09-07 15:07:35.879 | INFO     | vimms.SequenceManager:run_evaluation:209 - Completed Evaluation: sample0
2020-09-07 15:07:35.881 | INFO     | vimms.SequenceManager:run_serial:253 - Finished 0
2020-09-07 15:07:35.882 | INFO     | vimms.SequenceManager:run_controller:193 - Begun experiment: sample1
2020-09-07 15:07:36.084 | INFO     | vimms.SequenceManager:run_experiment:159 - {'Sample ID': {1: 'sample1'}, 'Controller Method': {1: 'TopN_SmartRoiController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'isolation_width': 1, 'mz_tol': 10, 'min_ms1_intensity': 175000.0, 'min_roi_intensity': 1000, 'min_roi_length': 1, 'N': 10, 'rt_tol': 30, 'min_roi_length_for_fragmentation': 1, 'reset_length_seconds': 100, 'intensity_increase_factor': 2, 'length_units': 'scans', 'drop_perc': 0.001, 'ms1_shift': 0, 'params': None}}, 'MassSpec Params': {1: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x0000027A017CE5C0>, 'add_noise': False, 'isol

False
Loaded 5854 scans


2020-09-07 15:10:26.700 | INFO     | vimms.SequenceManager:run_evaluation:209 - Completed Evaluation: sample1
2020-09-07 15:10:26.701 | INFO     | vimms.SequenceManager:run_serial:253 - Finished 1


Unnamed: 0,Sample ID,Controller Method,mz_tol,min_ms1_intensity,min_roi_intensity,min_roi_length,N,rt_tol,min_roi_length_for_fragmentation,reset_length_seconds,intensity_increase_factor,length_units,drop_perc,ms1_shift,params,mzmine_peak
0,sample0,TopN_SmartRoiController,10,175000.0,1000,1,10,30,1,100,2,scans,0.0,0,,1520.0
1,sample1,TopN_SmartRoiController,10,175000.0,1000,1,10,30,1,100,2,scans,0.001,0,,1519.0


### Example 5 - WeightedDEW - Seed with mzml, non-parallel

In [43]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_5')

In [44]:
weightedDEW_controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1,
                       "ms1_shift": 0,
                       "exclusion_t_0": 15,
                       "log_intensity": False}

In [45]:
weightedDEW_variable_params_dict = {'rt_tol': [30], 'exclusion_t_0': [10,20]}

In [46]:
parallel = False

In [47]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=None, mzmine_command=mzmine_command)
gs = GridSearchExperiment(vsm, 'WeightedDewController', mass_spec_params, None, weightedDEW_variable_params_dict, weightedDEW_controller_params, mzml_file, ps=ps, parallel=parallel)
gs.results

2020-09-07 15:10:27.371 | INFO     | vimms.Common:create_if_not_exist:148 - Created C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_5
2020-09-07 15:11:22.907 | INFO     | vimms.Common:save_obj:161 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_5\Beer_multibeers_1_fullscan1.p
2020-09-07 15:11:32.527 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for Beer_multibeers_1_fullscan1.mzML
2020-09-07 15:11:32.529 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for Beer_multibeers_1_fullscan1.mzML
2020-09-07 15:19:14.864 | INFO     | vimms.SequenceManager:run:241 - Running in serial mode
2020-09-07 15:19:14.865 | INFO     | vimms.SequenceManager:run_controller:193 - Begun experiment: sample0
2020-09-07 15:19:15.066 | INFO     | vimms.SequenceManager:run_experiment:159 - {'Sample ID': {0: 'sample0'}, 'Controller Method': {0: 'WeightedDewController'}, 'Controller 

False
Loaded 6581 scans


2020-09-07 15:21:42.070 | INFO     | vimms.SequenceManager:run_evaluation:209 - Completed Evaluation: sample0
2020-09-07 15:21:42.072 | INFO     | vimms.SequenceManager:run_serial:253 - Finished 0
2020-09-07 15:21:42.072 | INFO     | vimms.SequenceManager:run_controller:193 - Begun experiment: sample1
2020-09-07 15:21:42.274 | INFO     | vimms.SequenceManager:run_experiment:159 - {'Sample ID': {1: 'sample1'}, 'Controller Method': {1: 'WeightedDewController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 30, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0, 'exclusion_t_0': 20, 'log_intensity': False, 'params': None}}, 'MassSpec Params': {1: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x0000027A017CE5C0>, 'add_noise': False, 'isolation_transition_window': 'rectangular', 'isolation_transition_window_params': None}}, 'Dataset': {1: 'C:\\Users\\Vinny\\work\\vimms\\demo\\03. MultiS

False
Loaded 6390 scans


2020-09-07 15:23:59.259 | INFO     | vimms.SequenceManager:run_evaluation:209 - Completed Evaluation: sample1
2020-09-07 15:23:59.261 | INFO     | vimms.SequenceManager:run_serial:253 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_shift,exclusion_t_0,log_intensity,params,mzmine_peak
0,sample0,WeightedDewController,10,10,30,175000.0,0,10,False,,1199.0
1,sample1,WeightedDewController,10,10,30,175000.0,0,20,False,,1353.0
