# 1. Sequence Manager

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../..')

In [3]:
from vimms.SequenceManager import *

In [4]:
data_dir = os.path.join(os.path.abspath(os.path.join(os.path.join(os.getcwd(),".."),"..")),'tests','integration','fixtures')
dataset_file = os.path.join(data_dir, 'QCB_22May19_1.p')
dataset = load_obj(dataset_file)
ps = load_obj(Path(data_dir,'peak_sampler_mz_rt_int_beerqcb_fragmentation.p'))

In [5]:
url = 'http://researchdata.gla.ac.uk/870/2/example_data.zip'
base_dir = os.path.abspath(os.path.join(os.getcwd(),'..','01. Data', 'example_data'))

In [6]:
if not os.path.isdir(base_dir): # if not exist then download the example data and extract it
    print('Creating %s' % base_dir)    
    out_file = 'example_data.zip'
    download_file(url, out_file)
    extract_zip_file(out_file, delete=True)
else:
    print('Found %s' % base_dir)

Found C:\Users\Vinny\work\vimms\demo\01. Data\example_data


In [7]:
mzml_file = os.path.join(base_dir, 'beers', 'fullscan', 'mzML', 'Beer_multibeers_1_fullscan1.mzML')
mzml_file_list=[None, mzml_file, None, mzml_file]

In [8]:
set_log_level_info()

### Set some default parameters

In [9]:
experiment_dir = os.path.join(os.getcwd(), 'results')

In [10]:
DEFAULT_SCAN_TIME_DICT = {1: 0.4, 2: 0.2}
mass_spec_params = {'ionisation_mode': POSITIVE,
                    'peak_sampler': ps,
                    'mz_noise': None,
                    'intensity_noise': None,
                    'isolation_transition_window': 'rectangular',
                    'isolation_transition_window_params': None,
                    'scan_duration_dict': DEFAULT_SCAN_TIME_DICT}

In [11]:
controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1}

Note: you will need to install the same version of MZMine2 and put it in the same location as ViMMS

In [12]:
evaluation_methods = []
mzmine_command = os.path.abspath(os.path.join(os.getcwd(),'..','..','..','MZmine-2.40.1','MZmine-2.40.1','startMZmine_Windows.bat'))

In [13]:
MZML2CHEMS_DICT = {'min_ms1_intensity': 1.75E5,
                  'mz_tol': 5,
                  'mz_units':'ppm',
                  'min_length':1,
                  'min_intensity':0,
                  'start_rt':0,
                  'stop_rt':1560}

### Set up some simple schedules

In [14]:
d = {
    'Sample ID'        : ['blank1', 'sample1', 'blank2', 'sample2'],
    'Controller Method': [None, 'TopNController', None, 'TopNController'],
    'Controller Params': [None, controller_params, None, controller_params],
    'MassSpec Params'  : [None, mass_spec_params, None, mass_spec_params],
    'Dataset'          : [None, dataset_file, None, dataset_file]
}
controller_schedule = pd.DataFrame(data=d)
controller_schedule

Unnamed: 0,Sample ID,Controller Method,Controller Params,MassSpec Params,Dataset
0,blank1,,,,
1,sample1,TopNController,"{'ionisation_mode': 'Positive', 'N': 10, 'mz_t...","{'ionisation_mode': 'Positive', 'peak_sampler'...",C:\Users\Vinny\work\vimms\tests\integration\fi...
2,blank2,,,,
3,sample2,TopNController,"{'ionisation_mode': 'Positive', 'N': 10, 'mz_t...","{'ionisation_mode': 'Positive', 'peak_sampler'...",C:\Users\Vinny\work\vimms\tests\integration\fi...


In [15]:
d2 = {
    'Sample ID'        : ['blank1', 'sample1', 'blank2', 'sample2'],
    'Controller Method': [None, 'TopNController', None, 'TopNController'],
    'Controller Params': [None, controller_params, None, controller_params],
    'MassSpec Params'  : [None, mass_spec_params, None, mass_spec_params],
    'Dataset'          : [None, None, None, None]
}
controller_schedule2 = pd.DataFrame(data=d2)
controller_schedule2

Unnamed: 0,Sample ID,Controller Method,Controller Params,MassSpec Params,Dataset
0,blank1,,,,
1,sample1,TopNController,"{'ionisation_mode': 'Positive', 'N': 10, 'mz_t...","{'ionisation_mode': 'Positive', 'peak_sampler'...",
2,blank2,,,,
3,sample2,TopNController,"{'ionisation_mode': 'Positive', 'N': 10, 'mz_t...","{'ionisation_mode': 'Positive', 'peak_sampler'...",


### Example 1 - Seed with dataset, non-parallel

In [16]:
output_dir = os.path.join(experiment_dir, 'sequence_manager_example_1')

In [17]:
parallel = False  # note: true is not yet implemented

In [24]:
vsm = VimmsSequenceManager(controller_schedule, evaluation_methods, output_dir, ms1_picked_peaks_file=None, progress_bar=True, mzmine_command=mzmine_command)
experiment = BasicExperiment(vsm, parallel=parallel)

2020-08-21 15:46:11.457 | INFO     | vimms.SequenceManager:run:237 - Running in serial mode
2020-08-21 15:46:11.458 | INFO     | vimms.SequenceManager:run_controller:191 - Begun experiment: sample1
2020-08-21 15:46:11.659 | INFO     | vimms.SequenceManager:run_experiment:172 - Experiment already completed. Skipping...
2020-08-21 15:46:11.660 | INFO     | vimms.SequenceManager:run_controller:195 - Completed experiment: sample1
2020-08-21 15:46:11.660 | INFO     | vimms.SequenceManager:run_evaluation:205 - Started Evaluation: sample1
2020-08-21 15:46:11.661 | INFO     | vimms.SequenceManager:run_evaluation:207 - Completed Evaluation: sample1
2020-08-21 15:46:11.662 | INFO     | vimms.SequenceManager:run_serial:249 - Finished 0
2020-08-21 15:46:11.663 | INFO     | vimms.SequenceManager:run_controller:191 - Begun experiment: sample2


False


2020-08-21 15:46:11.866 | INFO     | vimms.SequenceManager:run_experiment:158 - {'Sample ID': {1: 'sample2'}, 'Controller Method': {1: 'TopNController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 30, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0, 'ms1_agc_target': 200000, 'ms1_max_it': 250, 'ms1_collision_energy': 0, 'ms1_orbitrap_resolution': 120000, 'ms1_activation_type': 'HCD', 'ms1_mass_analyser': 'Orbitrap', 'ms1_isolation_mode': 'Quadupole', 'ms2_agc_target': 30000, 'ms2_max_it': 100, 'ms2_collision_energy': 25, 'ms2_orbitrap_resolution': 7500, 'ms2_activation_type': 'HCD', 'ms2_mass_analyser': 'Orbitrap', 'ms2_isolation_mode': 'Quadupole'}}, 'MassSpec Params': {1: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x00000206C55E9198>, 'mz_noise': None, 'intensity_noise': None, 'isolation_transition_window': 'rectangular', 'isolation_transition_window_params': None, 'scan

False


In [25]:
experiment.results

Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_shift,ms1_agc_target,ms1_max_it,ms1_collision_energy,...,ms1_activation_type,ms1_mass_analyser,ms1_isolation_mode,ms2_agc_target,ms2_max_it,ms2_collision_energy,ms2_orbitrap_resolution,ms2_activation_type,ms2_mass_analyser,ms2_isolation_mode
0,sample1,TopNController,10,10,30,175000.0,0,200000,250,0,...,HCD,Orbitrap,Quadupole,30000,100,25,7500,HCD,Orbitrap,Quadupole
1,sample2,TopNController,10,10,30,175000.0,0,200000,250,0,...,HCD,Orbitrap,Quadupole,30000,100,25,7500,HCD,Orbitrap,Quadupole


### Example 2 - Seed with mzml

In [None]:
output_dir = os.path.join(experiment_dir, 'sequence_manager_example_2')

In [None]:
parallel = False

In [None]:
vsm = VimmsSequenceManager(controller_schedule2, evaluation_methods, output_dir, ms1_picked_peaks_file=None, progress_bar=True, mzmine_command=mzmine_command)
experiment = BasicExperiment(vsm, parallel=parallel, mzml_file_list=mzml_file_list, MZML2CHEMS_DICT=MZML2CHEMS_DICT, ps=ps)