# Run DDA vs DIA experiments

# 0. Common imports and parameters

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
import platform
import sys
from os.path import exists

In [3]:
import numpy as np
import glob
from loguru import logger

In [4]:
np.random.seed(seed=1234)

In [5]:
if platform.system() == 'Darwin':
    user = 'joewandy'
    user_vimms = '/Users/%s/Work/git/vimms' % user
    msdial_console_app = os.path.abspath(os.path.join('MSDIAL ver.4.90 OSX', 'MsdialConsoleApp'))
    
elif platform.system() == 'Windows':
    user = 'joewa'
    user_vimms = '/Users/%s/Work/git/vimms' % user    
    msdial_console_app = os.path.abspath(os.path.join('MSDIAL ver.4.90 Windows', 'MsdialConsoleApp.exe'))

elif platform.system() == 'Linux':
    user = 'joewandy'
    user_vimms = '/home/%s/vimms' % user
    msdial_console_app = os.path.abspath(os.path.join('MSDIAL ver.4.90 Linux', 'MsdialConsoleApp'))    

sys.path.append(user_vimms)
mzmine_template = os.path.join(user_vimms, "batch_files", "real_smartroi_eval.xml")

In [6]:
PATH_TO_VIMMS = user_vimms
FIXTURES = os.path.join(PATH_TO_VIMMS,'tests','fixtures')
EXPERIMENTS_HOME = os.path.abspath('results')
sys.path.append(PATH_TO_VIMMS)

In [7]:
from vimms.Common import load_obj, save_obj, set_log_level_info, set_log_level_warning
from vimms.Common import POSITIVE, ROI_EXCLUSION_WEIGHTED_DEW
from vimms.Chemicals import ChemicalMixtureCreator, MultipleMixtureCreator
from vimms.ChemicalSamplers import DatabaseFormulaSampler, UniformRTAndIntensitySampler, UniformMS2Sampler, GaussianChromatogramSampler

from vimms.Noise import UniformSpikeNoise, GaussianPeakNoiseLevelSpecific
from vimms.Roi import RoiBuilderParams, SmartRoiParams

from vimms.DDA_vs_DIA import make_msp, run_experiment

# 1. Make chems

## a. Create base chemical objects

In [8]:
chem_pickle_folder = os.path.join(EXPERIMENTS_HOME, 'base_chemicals')

In [9]:
HMDB = load_obj(os.path.join(FIXTURES, 'hmdb_compounds.p'))

In [10]:
min_sample_mz = 100
max_sample_mz = 1000
min_sample_rt = 0
max_sample_rt = 440
min_measure_mz = 0
max_measure_mz = 1100
min_measure_rt = 0
max_measure_rt = 500
min_log_sample_intensity = np.log(1e4)
max_log_sample_intensity = np.log(1e7)

# poission parameter used to sample the number of MS2 peaks in each chemical
ms2_count_poiss_mean = 10

# sigma of the gaussian chromatograms
chrom_sigma = 5

# the sizes of chemical sets to generate
n_chem_list = [10, 20, 50, 100, 200, 500, 1000, 2000, 5000]

# number of repetitions
n_reps = 5

adduct_prior_dict = {POSITIVE: {'M+H': 1.}}

DEBUG is set below to generate a smaller test dataset. 

Disable DEBUG to generate the full simulated dataset used in the paper.

In [11]:
DEBUG = True

In [12]:
if DEBUG:
    logger.warning('DEBUG is enabled!')
    n_chem_list = [100, 1000]
    n_reps = 1



In [13]:
db = DatabaseFormulaSampler(HMDB, min_mz=min_sample_mz, max_mz=max_sample_mz)
ri = UniformRTAndIntensitySampler(min_rt=min_sample_rt, max_rt=max_sample_rt, min_log_intensity=min_log_sample_intensity, 
                                  max_log_intensity=max_log_sample_intensity)
ms = UniformMS2Sampler(poiss_peak_mean=ms2_count_poiss_mean)
cs = GaussianChromatogramSampler(sigma=chrom_sigma)

In [14]:
handler_id = set_log_level_info()

In [15]:
for n_chems in n_chem_list:
    for rep in range(n_reps):
        out_name = os.path.join(chem_pickle_folder, 'chems_{}_{}.p'.format(n_chems, rep))
        if not exists(out_name):
            cm = ChemicalMixtureCreator(db, rt_and_intensity_sampler=ri, ms2_sampler=ms, chromatogram_sampler=cs, adduct_prior_dict=adduct_prior_dict)
            chems = cm.sample(n_chems, 2, include_adducts_isotopes=False) 
            save_obj(chems, out_name)
        else:
            logger.info('Already exists %s' % out_name)

2022-12-21 12:24:20.599 | INFO     | vimms.Common:create_if_not_exist:378 - Created /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals
2022-12-21 12:24:20.600 | INFO     | vimms.Common:save_obj:437 - Saving <class 'list'> to /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals/chems_100_0.p
2022-12-21 12:24:25.476 | INFO     | vimms.Common:save_obj:437 - Saving <class 'list'> to /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals/chems_1000_0.p


## b. Make MSP files from base chemicals

Used for matching generated MS2 spectra to the chemicals in e.g. MS-DIAL later on

In [16]:
make_msp(chem_pickle_folder)

2022-12-21 12:24:25.608 | INFO     | vimms.DDA_vs_DIA:make_msp:33 - /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals/chems_1000_0.p
2022-12-21 12:24:25.803 | INFO     | vimms.DDA_vs_DIA:make_msp:40 - /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals/chems_1000_0.msp
2022-12-21 12:24:25.883 | INFO     | vimms.DDA_vs_DIA:make_msp:33 - /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals/chems_100_0.p
2022-12-21 12:24:25.896 | INFO     | vimms.DDA_vs_DIA:make_msp:40 - /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals/chems_100_0.msp


# 2. Generate chemicals for the different experiments

In [17]:
case_vs_control_experiment = 'case_v_control_chemicals'

In [18]:
cvc_pickle_folder = os.path.join(EXPERIMENTS_HOME, case_vs_control_experiment)

## a. Case vs control

Load each chemical list and create a multiple mixture creator, saving the resulting list of chemical lists

In [19]:
n_case = 5
n_control = 5
prob_missing_case = 0
prob_changing_case = 1

In [20]:
sample_list = ['control' for i in range(n_control)] + ['case' for i in range(n_case)]
sample_dict = {'case': {'missing_probability': prob_missing_case, 'changing_probability': prob_changing_case}}

In [21]:
original_pickle_files = glob.glob(os.path.join(chem_pickle_folder, '*.p'))
for rep in range(n_reps):
    for n_chems in n_chem_list:
        out_name = os.path.join(cvc_pickle_folder, 'chems_{}_{}_cvc.p'.format(n_chems, rep))
        if not exists(out_name):        
            pf = os.path.join(chem_pickle_folder, 'chems_{}_{}.p'.format(n_chems, rep))
            logger.info(pf)
            chems = load_obj(pf)
            mm = MultipleMixtureCreator(chems, sample_list, sample_dict)
            chem_list = mm.generate_chemical_lists()
            save_obj(chem_list, out_name)
        else:
            logger.info('Already exists %s' % out_name)

2022-12-21 12:24:26.097 | INFO     | __main__:<cell line: 2>:7 - /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals/chems_100_0.p
2022-12-21 12:24:26.669 | INFO     | vimms.Common:create_if_not_exist:378 - Created /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/case_v_control_chemicals
2022-12-21 12:24:26.670 | INFO     | vimms.Common:save_obj:437 - Saving <class 'list'> to /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/case_v_control_chemicals/chems_100_0_cvc.p
2022-12-21 12:24:26.951 | INFO     | __main__:<cell line: 2>:7 - /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/base_chemicals/chems_1000_0.p
2022-12-21 12:24:33.070 | INFO     | vimms.Common:save_obj:437 - Saving <class 'list'> to /Users/joewandy/Work/git/vimms/examples/04. DDAvsDIA (Wandy et al 2023)/results/case_v_control_chemicals/chems_1000_0_cvc.p


# 3. Run controllers

In [22]:
handler_id = set_log_level_warning(handler_id)

In [23]:
experiment_params = {
    'ionisation_mode': POSITIVE,        
    'min_measure_rt': min_measure_rt,
    'max_measure_rt': max_measure_rt,
    'min_measure_mz': min_measure_mz,
    'max_measure_mz': max_measure_mz,
    'rt_box_size': 50,
    'mz_box_size': 1,
    'scan_duration_dict': {1: 0.4, 2: 0.2},     # this ok?
    'spike_noise': UniformSpikeNoise(0.1, 1e3), # smallest peaks are 10 x bigger than noise at their apex
    
    # important for correlation-based DIA method to have some noise on the MS2 peaks
    # otherwise the correlation will always be 1
    'mz_noise': GaussianPeakNoiseLevelSpecific({2: 0.01}),
    'intensity_noise': GaussianPeakNoiseLevelSpecific({2: 1000.}),
    
    'topN_params': {
        "ionisation_mode": POSITIVE,
        "N": 10,
        "isolation_width": 0.7,
        "min_ms1_intensity": 5000,              # same as SmartROI paper, need to optimise this?
        "mz_tol": 10,
        "rt_tol": 15
    },
    'AIF_params': {
        'ms1_source_cid_energy': 30
    },
    'SWATH_params': {
        'min_mz': 0,
        'max_mz': 1100,
        'width': 100,
        'scan_overlap': 0
    },

    # below are for multi-sample (repeated injection) controllers that are not used in the paper
    
    'non_overlap_params': {
        'roi_params': RoiBuilderParams(min_roi_intensity=500, min_roi_length=0),  # same as SmartROI paper
        'min_roi_length_for_fragmentation': 0,  # same as SmartROI paper
    },
    'smartroi_params': {
        'smartroi_params': SmartRoiParams()
    },
    'weighteddew_params': {
        # 'rt_tol': 120,  # weighteddew parameters
        # 'exclusion_method': ROI_EXCLUSION_WEIGHTED_DEW,  # weighteddew parameters
        # 'exclusion_t_0': 15,  # weighteddew parameters        
        
        'rt_tol': 15,  # weighteddew parameters
        'exclusion_method': ROI_EXCLUSION_WEIGHTED_DEW,  # weighteddew parameters
        'exclusion_t_0': 1,  # weighteddew parameters
    },
    
    
}

controller_names = [
    'topN',
    'SWATH',
    'AIF',    
    
    # below are for multi-sample (repeated injection) controllers that are not used in the paper
    # 'topN_exclusion', 
    # 'intensity_non_overlap_weighteddew',    
]

## a. Case vs control

In [24]:
result_folder = cvc_pickle_folder

In [25]:
n_case = 5
n_control = 5
sample_list = ['control_%d' % i for i in range(n_control)] + ['case_%d' % i for i in range(n_case)]
sample_list

['control_0',
 'control_1',
 'control_2',
 'control_3',
 'control_4',
 'case_0',
 'case_1',
 'case_2',
 'case_3',
 'case_4']

In [26]:
run_experiment(result_folder, sample_list, controller_names, experiment_params)



Waiting for connection file: ~/.ipython/profile_default/security/ipcontroller-client.json



KeyboardInterrupt

