# 2. Grid Search

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../..')

In [4]:
from vimms.SequenceManager import *

In [5]:
data_dir = os.path.join(os.path.abspath(os.path.join(os.path.join(os.getcwd(),".."),"..")),'tests','integration','fixtures')
dataset_file = os.path.join(data_dir, 'QCB_22May19_1.p')
dataset = load_obj(dataset_file)
ps = load_obj(Path(data_dir,'peak_sampler_mz_rt_int_beerqcb_fragmentation.p'))

In [None]:
url = 'http://researchdata.gla.ac.uk/870/2/example_data.zip'
base_dir = os.path.abspath(os.path.join(os.getcwd(),'..','01. Data', 'example_data'))

In [None]:
if not os.path.isdir(base_dir): # if not exist then download the example data and extract it
    print('Creating %s' % base_dir)    
    out_file = 'example_data.zip'
    download_file(url, out_file)
    extract_zip_file(out_file, delete=True)
else:
    print('Found %s' % base_dir)

In [None]:
mzml_file = os.path.join(base_dir, 'beers', 'fullscan', 'mzML', 'Beer_multibeers_1_fullscan1.mzML')
mzml_file_list=[None, mzml_file, None, mzml_file]

In [6]:
set_log_level_info()

### Set some default parameters

In [7]:
experiment_dir = os.path.join(os.getcwd(), 'results')

In [8]:
mass_spec_params = {'ionisation_mode': POSITIVE,
                    'peak_sampler': ps,
                    'add_noise': False,
                    'isolation_transition_window': 'rectangular',
                    'isolation_transition_window_params': None}

In [9]:
controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1}

Note: you will need to install the same version of MZMine2 and put it in the same location as ViMMS

In [10]:
evaluation_methods = ['mzmine_peak']
mzmine_command = os.path.abspath(os.path.join(os.getcwd(),'..','..','..','MZmine-2.40.1','MZmine-2.40.1','startMZmine_Windows.bat'))

In [11]:
MZML2CHEMS_DICT = {'min_ms1_intensity': 1.75E5,
                  'mz_tol': 5,
                  'mz_units':'ppm',
                  'min_length':1,
                  'min_intensity':0,
                  'start_rt':0,
                  'stop_rt':1560}

### Get a picked peaks file

You don't need to provide a picked peaks file to run a grid search experiment, e.g. Example 3

In [17]:
ms1_picked_peaks_file = pick_peaks([mzml_file], xml_template=QCB_XML_TEMPLATE_MS1, output_dir=os.getcwd(),
                       mzmine_command=mzmine_command)

2020-07-17 18:25:21.089 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for Beer_multibeers_1_fullscan1.mzML
2020-07-17 18:25:21.099 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for Beer_multibeers_1_fullscan1.mzML


### Some parameter settings to search over

In [18]:
topn_variable_params_dict = {'N': [10], 'rt_tol': [15,30]}

### Example 1 - Top N - Seed with dataset and picked peaks

In [19]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_1')

In [20]:
parallel = False

In [21]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=ms1_picked_peaks_file)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, dataset_file, topn_variable_params_dict, controller_params, parallel=parallel)
gs.results

2020-07-17 18:50:12.249 | INFO     | vimms.Common:create_if_not_exist:48 - Created C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_1
2020-07-17 18:50:12.262 | INFO     | vimms.SequenceManager:run:242 - Running in serial mode
2020-07-17 18:50:12.263 | INFO     | vimms.SequenceManager:run_controller:198 - Begun experiment: sample0
(504.098s) ms_level=2 N=10 DEW=15:  35%|███████████████████████████████████████████████████▊                                                                                                | 504.0979760000005/1440 [00:51<01:35,  9.78it/s]


KeyboardInterrupt: 

### Example 2 - Top N - Seed with mzml and picked peaks

In [25]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_2')

In [26]:
parallel = False

In [27]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=ms1_picked_peaks_file)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, None, topn_variable_params_dict, controller_params, mzml_file, MZML2CHEMS_DICT=MZML2CHEMS_DICT, ps=ps, parallel=parallel)
gs.results

2020-07-15 12:24:44.656 | INFO     | vimms.Roi:__init__:406 - Found 797 ROIs above thresholds
2020-07-15 12:24:44.660 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\mzmine_files\QCB\fullscan_mzmls\QCB_22May19_1.p
2020-07-15 12:24:46.063 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\mzmine_files\SimpleExperiments\GridSearch\experiment_2_2_2\QCB_22May19_1.p
2020-07-15 12:24:47.090 | INFO     | vimms.SequenceManager:run:240 - Running in serial mode
2020-07-15 12:24:47.091 | INFO     | vimms.SequenceManager:run_controller:196 - Begun experiment: sample0
2020-07-15 12:24:47.293 | INFO     | vimms.SequenceManager:run_experiment:177 - Experiment already completed. Skipping...
2020-07-15 12:24:47.293 | INFO     | vimms.SequenceManager:run_controller:200 - Completed experiment: sample0
2020-07-15 12:24:47.294 | INFO     | vimms.SequenceManager:run_evaluation:210 - Started Evaluation: sample0


Loaded 3090 scans


2020-07-15 12:24:56.540 | INFO     | vimms.SequenceManager:run_evaluation:212 - Completed Evaluation: sample0
2020-07-15 12:24:56.544 | INFO     | vimms.SequenceManager:run_serial:252 - Finished 0
2020-07-15 12:24:56.547 | INFO     | vimms.SequenceManager:run_controller:196 - Begun experiment: sample1
2020-07-15 12:24:56.749 | INFO     | vimms.SequenceManager:run_experiment:177 - Experiment already completed. Skipping...
2020-07-15 12:24:56.750 | INFO     | vimms.SequenceManager:run_controller:200 - Completed experiment: sample1
2020-07-15 12:24:56.751 | INFO     | vimms.SequenceManager:run_evaluation:210 - Started Evaluation: sample1


Loaded 2463 scans


2020-07-15 12:25:02.765 | INFO     | vimms.SequenceManager:run_evaluation:212 - Completed Evaluation: sample1
2020-07-15 12:25:02.766 | INFO     | vimms.SequenceManager:run_serial:252 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_agc_target,ms1_max_it,ms1_collision_energy,ms1_orbitrap_resolution,ms2_agc_target,ms2_max_it,ms2_collision_energy,ms2_orbitrap_resolution,mzmine_peak
0,sample0,TopNController,10,10,15,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,496.0
1,sample1,TopNController,10,10,30,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,466.0


### Example 3 - Top N - Seed with mzml, non-parallel

In [33]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_3')

In [34]:
parallel = False

In [35]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=None, mzmine_command=mzmine_command)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, None, topn_variable_params_dict, controller_params, mzml_file, MZML2CHEMS_DICT=MZML2CHEMS_DICT, ps=ps, parallel=parallel)
gs.results

2020-07-15 12:48:21.637 | INFO     | vimms.Roi:__init__:406 - Found 797 ROIs above thresholds
2020-07-15 12:48:21.637 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\mzmine_files\QCB\fullscan_mzmls\QCB_22May19_1.p
2020-07-15 12:48:23.052 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\mzmine_files\SimpleExperiments\GridSearch\experiment_2_2_3\QCB_22May19_1.p
2020-07-15 12:48:24.005 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for QCB_22May19_1.mzML
2020-07-15 12:48:24.007 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for QCB_22May19_1.mzML


C:\Users\Vinny\work\mzmine_files\QCB\fullscan_mzmls\QCB_22May19_1.mzML
C:\Users\Vinny\work\vimms\batch_files\QCB_mzmine_batch_ms1.xml
C:\Users\Vinny\work\mzmine_files\SimpleExperiments\GridSearch\experiment_2_2_3
C:\Users\Vinny\work\MZmine-2.40.1\MZmine-2.40.1\startMZmine_Windows.bat


2020-07-15 12:54:04.612 | INFO     | vimms.SequenceManager:run:244 - Running in serial mode
2020-07-15 12:54:04.614 | INFO     | vimms.SequenceManager:run_controller:200 - Begun experiment: sample0
2020-07-15 12:54:04.814 | INFO     | vimms.SequenceManager:run_experiment:181 - Experiment already completed. Skipping...
2020-07-15 12:54:04.815 | INFO     | vimms.SequenceManager:run_controller:204 - Completed experiment: sample0
2020-07-15 12:54:04.815 | INFO     | vimms.SequenceManager:run_evaluation:214 - Started Evaluation: sample0


Loaded 3100 scans


2020-07-15 12:54:08.311 | INFO     | vimms.SequenceManager:run_evaluation:216 - Completed Evaluation: sample0
2020-07-15 12:54:08.312 | INFO     | vimms.SequenceManager:run_serial:256 - Finished 0
2020-07-15 12:54:08.313 | INFO     | vimms.SequenceManager:run_controller:200 - Begun experiment: sample1
(1440.967s) ms_level=1 N=10 DEW=30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████▉| 1439.278592387769/1440 [00:21<00:00, 66.95it/s]
2020-07-15 12:54:39.384 | INFO     | vimms.SequenceManager:run_controller:204 - Completed experiment: sample1
2020-07-15 12:54:39.385 | INFO     | vimms.SequenceManager:run_evaluation:214 - Started Evaluation: sample1


Loaded 2426 scans


2020-07-15 12:54:42.788 | INFO     | vimms.SequenceManager:run_evaluation:216 - Completed Evaluation: sample1
2020-07-15 12:54:42.790 | INFO     | vimms.SequenceManager:run_serial:256 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_agc_target,ms1_max_it,ms1_collision_energy,ms1_orbitrap_resolution,ms2_agc_target,ms2_max_it,ms2_collision_energy,ms2_orbitrap_resolution,mzmine_peak
0,sample0,TopNController,10,10,15,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,488.0
1,sample1,TopNController,10,10,30,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,466.0


### Example 4 - SmartROI - Seed with mzml, non-parallel