# 1. Sequence Manager

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../..')

In [3]:
from pathlib import Path
from pyDOE import *
from vimms.Environment import *

In [4]:
from vimms.Chemicals import ChemicalCreator, GET_MS2_BY_PEAKS, GET_MS2_BY_SPECTRA
from vimms.MassSpec import IndependentMassSpectrometer
from vimms.Controller import *
from vimms.Common import *
from vimms.PlotsForPaper import *
from vimms.Roi import make_roi, RoiToChemicalCreator, extract_roi
from vimms.SequenceManager import *

In [5]:
data_dir = os.path.join(os.path.abspath(os.path.join(os.path.join(os.getcwd(),".."),"..")),'tests','integration','fixtures')
dataset_file = os.path.join(data_dir, 'QCB_22May19_1.p')
dataset = load_obj(dataset_file)
ps = load_obj(Path(data_dir,'peak_sampler_mz_rt_int_beerqcb_fragmentation.p'))

In [20]:
url = 'http://researchdata.gla.ac.uk/870/2/example_data.zip'
base_dir = os.path.abspath(os.path.join(os.getcwd(),'..','01. Data', 'example_data'))

In [21]:
if not os.path.isdir(base_dir): # if not exist then download the example data and extract it
    print('Creating %s' % base_dir)    
    out_file = 'example_data.zip'
    download_file(url, out_file)
    extract_zip_file(out_file, delete=True)
else:
    print('Found %s' % base_dir)

Found C:\Users\Vinny\work\vimms\demo\01. Data\example_data


In [22]:
mzml_file = os.path.join(base_dir, 'beers', 'fullscan', 'mzML', 'Beer_multibeers_1_fullscan1.mzML')
mzml_file_list=[None, mzml_file, None, mzml_file]

In [23]:
set_log_level_info()

### Set some default parameters

In [17]:
experiment_dir = os.path.join(os.getcwd(), 'results')

In [18]:
mass_spec_params = {'ionisation_mode': POSITIVE,
                    'peak_sampler': ps,
                    'add_noise': False,
                    'isolation_transition_window': 'rectangular',
                    'isolation_transition_window_params': None}

In [19]:
controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1}

Note: you will need to install the same version of MZMine2 and put it in the same location as ViMMS

In [25]:
evaluation_methods = []
mzmine_command = os.path.abspath(os.path.join(os.getcwd(),'..','..','..','MZmine-2.40.1','MZmine-2.40.1','startMZmine_Windows.bat'))

In [40]:
MZML2CHEMS_DICT = {'min_ms1_intensity': 1.75E5,
                  'mz_tol': 5,
                  'mz_units':'ppm',
                  'min_length':1,
                  'min_intensity':0,
                  'start_rt':0,
                  'stop_rt':1560}

### Set up some simple schedules

In [42]:
d = {
    'Sample ID'        : ['blank1', 'sample1', 'blank2', 'sample2'],
    'Controller Method': [None, 'TopNController', None, 'TopNController'],
    'Controller Params': [None, controller_params, None, controller_params],
    'MassSpec Params'  : [None, mass_spec_params, None, mass_spec_params],
    'Dataset'          : [None, dataset_file, None, dataset_file]
}
controller_schedule = pd.DataFrame(data=d)
controller_schedule

Unnamed: 0,Sample ID,Controller Method,Controller Params,MassSpec Params,Dataset
0,blank1,,,,
1,sample1,TopNController,"{'ionisation_mode': 'Positive', 'N': 10, 'mz_t...","{'ionisation_mode': 'Positive', 'peak_sampler'...",C:\Users\Vinny\work\vimms\demo\01. Data\exampl...
2,blank2,,,,
3,sample2,TopNController,"{'ionisation_mode': 'Positive', 'N': 10, 'mz_t...","{'ionisation_mode': 'Positive', 'peak_sampler'...",C:\Users\Vinny\work\vimms\demo\01. Data\exampl...


In [43]:
d2 = {
    'Sample ID'        : ['blank1', 'sample1', 'blank2', 'sample2'],
    'Controller Method': [None, 'TopNController', None, 'TopNController'],
    'Controller Params': [None, controller_params, None, controller_params],
    'MassSpec Params'  : [None, mass_spec_params, None, mass_spec_params],
    'Dataset'          : [None, None, None, None]
}
controller_schedule2 = pd.DataFrame(data=d2)
controller_schedule2

Unnamed: 0,Sample ID,Controller Method,Controller Params,MassSpec Params,Dataset
0,blank1,,,,
1,sample1,TopNController,"{'ionisation_mode': 'Positive', 'N': 10, 'mz_t...","{'ionisation_mode': 'Positive', 'peak_sampler'...",
2,blank2,,,,
3,sample2,TopNController,"{'ionisation_mode': 'Positive', 'N': 10, 'mz_t...","{'ionisation_mode': 'Positive', 'peak_sampler'...",


### Example 1 - Seed with dataset, non-parallel

In [44]:
output_dir = os.path.join(experiment_dir, 'sequence_manager_example_1')

In [45]:
parallel = False  # note: true is not yet implemented

In [46]:
vsm = VimmsSequenceManager(controller_schedule, evaluation_methods, output_dir, ms1_picked_peaks_file=None, progress_bar=True, mzmine_command=mzmine_command)
experiment = BasicExperiment(vsm, parallel=parallel)

2020-07-17 17:55:45.963 | INFO     | vimms.SequenceManager:run:242 - Running in serial mode
2020-07-17 17:55:45.964 | INFO     | vimms.SequenceManager:run_controller:198 - Begun experiment: sample1
2020-07-17 17:55:46.166 | INFO     | vimms.SequenceManager:run_experiment:179 - Experiment already completed. Skipping...
2020-07-17 17:55:46.169 | INFO     | vimms.SequenceManager:run_controller:202 - Completed experiment: sample1
2020-07-17 17:55:46.172 | INFO     | vimms.SequenceManager:run_evaluation:212 - Started Evaluation: sample1
2020-07-17 17:55:46.175 | INFO     | vimms.SequenceManager:run_evaluation:214 - Completed Evaluation: sample1
2020-07-17 17:55:46.177 | INFO     | vimms.SequenceManager:run_serial:254 - Finished 0
2020-07-17 17:55:46.180 | INFO     | vimms.SequenceManager:run_controller:198 - Begun experiment: sample2
(1440.030s) ms_level=1 N=10 DEW=30: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

In [47]:
experiment.results

Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_agc_target,ms1_max_it,ms1_collision_energy,ms1_orbitrap_resolution,ms2_agc_target,ms2_max_it,ms2_collision_energy,ms2_orbitrap_resolution
0,sample1,TopNController,10,10,30,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0
1,sample2,TopNController,10,10,30,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0


### Example 2 - Seed with mzml

In [57]:
output_dir = os.path.join(experiment_dir, 'sequence_manager_example_2')

In [58]:
parallel = False

In [59]:
vsm = VimmsSequenceManager(controller_schedule2, evaluation_methods, output_dir, ms1_picked_peaks_file=None, progress_bar=True, mzmine_command=mzmine_command)
experiment = BasicExperiment(vsm, parallel=parallel, mzml_file_list=mzml_file_list, MZML2CHEMS_DICT=MZML2CHEMS_DICT, ps=ps)

2020-07-17 18:02:47.070 | INFO     | vimms.Roi:__init__:406 - Found 11480 ROIs above thresholds
2020-07-17 18:02:47.071 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\01. Data\example_data\beers\fullscan\mzML\Beer_multibeers_1_fullscan1.p
2020-07-17 18:02:56.010 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\sequence_manager_example_1\Beer_multibeers_1_fullscan1.p
2020-07-17 18:05:17.721 | INFO     | vimms.Roi:__init__:406 - Found 11480 ROIs above thresholds
2020-07-17 18:05:17.722 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\01. Data\example_data\beers\fullscan\mzML\Beer_multibeers_1_fullscan1.p
2020-07-17 18:05:26.411 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\sequence_manager_example_1\Beer_multibeers_1_fullscan1.p
2020-07-