# 1. Main DDA vs DIA results

This notebook generates experimental data for the main DDA vs DIA results.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

## Import stuff that we need

In [2]:
import pandas as pd

In [3]:
import sys
import os
from collections import OrderedDict

In [4]:
sys.path.append('.')
from common import get_user_and_vimms_folder

use_instrument = True
user, vimms_folder, vimms_fusion_folder = get_user_and_vimms_folder(use_instrument)
vimms_folder, vimms_fusion_folder

('/Users/Orbitrap Fusion/vimms', '/Users/Orbitrap Fusion/vimms-fusion')

In [5]:
sys.path.append(vimms_folder)
sys.path.append(vimms_fusion_folder)
ref_dir = os.path.join(vimms_fusion_folder, "Interface", "FusionLibrary", "bin", "Debug", "netstandard2.0")
if ref_dir not in sys.path:
    sys.path.append(ref_dir)

In [6]:
ref_dir

'/Users/Orbitrap Fusion/vimms-fusion\\Interface\\FusionLibrary\\bin\\Debug\\netstandard2.0'

In [7]:
from experiment import get_shared_experiment_params
from vimms.Common import POSITIVE, set_log_level_warning
from vimms.Common import CONTROLLER_FULLSCAN, CONTROLLER_TOPN, CONTROLLER_TOPN_EXCLUSION, \
    CONTROLLER_SWATH, CONTROLLER_AIF
from vimms.scripts.multi_sample_experiment import extract_chemicals, run_batch, generate_sequence_df, write_sequence_csv



## Experiment parameters

Specify the sample names, the initial runs and which controller to run.

`controller_repeat` is a dictionary that tells us which sample to run under which controller, and the number of replicates to run
- key = controller name
- value = (sample_names, repeat), where
          sample_names is a list of samples, e.g. ['beer1', 'beer2'], 
          repeat is the number of replicates

In [8]:
# ionisation_mode = POSITIVE
# initial_runs = ['blank1', 'blank2', 'CMW_solvent_blank', 'QCA', 'QCB']
# all_samples = ['beer1', 'beer2', 'beer3', 'beer4', 'beer5', 'beer6']

In [9]:
# controller_repeat = OrderedDict({
#     CONTROLLER_FULLSCAN              : (all_samples, 1),
#     CONTROLLER_TOPN                  : (all_samples, 1),
#     CONTROLLER_TOPN_EXCLUSION        : (all_samples, 1),
#     CONTROLLER_SWATH                 : (all_samples, 1),
#     CONTROLLER_AIF                   : (all_samples, 1)
# })
# controller_repeat

In [10]:
ionisation_mode = POSITIVE
initial_runs = []
all_samples = ['beer1', 'beer2', 'beer3', 'beer4', 'beer5', 'beer6']

In [11]:
controller_repeat = OrderedDict({
    CONTROLLER_TOPN                  : (all_samples, 1),
    CONTROLLER_TOPN_EXCLUSION        : (all_samples, 1),
    CONTROLLER_SWATH                 : (all_samples, 1),
    CONTROLLER_AIF                   : (all_samples, 1)
})
controller_repeat

OrderedDict([('topN',
              (['beer1', 'beer2', 'beer3', 'beer4', 'beer5', 'beer6'], 1)),
             ('topN_exclusion',
              (['beer1', 'beer2', 'beer3', 'beer4', 'beer5', 'beer6'], 1)),
             ('SWATH',
              (['beer1', 'beer2', 'beer3', 'beer4', 'beer5', 'beer6'], 1)),
             ('AIF',
              (['beer1', 'beer2', 'beer3', 'beer4', 'beer5', 'beer6'], 1))])

In [12]:
experiment_params = get_shared_experiment_params()
experiment_params

{'topN_params': {'ionisation_mode': 'Positive',
  'N': 10,
  'isolation_width': 0.7,
  'min_ms1_intensity': 5000,
  'mz_tol': 10,
  'rt_tol': 15},
 'AIF_params': {'ms1_source_cid_energy': 25},
 'SWATH_params': {'min_mz': 70,
  'max_mz': 1000,
  'width': 100,
  'scan_overlap': 0},
 'non_overlap_scoring': {'use_smartroi_exclusion': False,
  'use_weighteddew_exclusion': False},
 'non_overlap_params': {'roi_params': {'mz_tol': 10, 'min_roi_length': 3, 'min_roi_intensity': 0, 'at_least_one_point_above': 0, 'start_rt': 0, 'stop_rt': 100000.0, 'max_gaps_allowed': 0},
  'min_roi_length_for_fragmentation': 3},
 'smartroi_params': {'smartroi_params': {'initial_length_seconds': 5, 'reset_length_seconds': 1000000.0, 'intensity_increase_factor': 10, 'drop_perc': 0.001, 'dew': 15}},
 'weighteddew_params': {'rt_tol': 120,
  'exclusion_method': 'exclusion_weighted_dew',
  'exclusion_t_0': 15},
 'grid_params': {'min_measure_rt': 0,
  'max_measure_rt': 1440,
  'rt_box_size': 50,
  'mz_box_size': 1},
 's

In [13]:
out_dir = 'results_1'

In [14]:
set_log_level_warning()

1

## Simulated runs

In [None]:
if not use_instrument: # for simulated data
    seed_file = '/Users/%s/University of Glasgow/Vinny Davies - CLDS Metabolomics Project/Experimental_Results/20201208_TopN_vs_ROI_replicates/results/seed/TopN_QCB_0.mzML' % user
    dataset = extract_chemicals(seed_file, ionisation_mode)
    print(len(dataset))

In [None]:
if not use_instrument:
    max_time = 60
    use_column = False
    pbar = False
    run_batch(initial_runs, controller_repeat, experiment_params, all_samples, 
        pbar, max_time, ionisation_mode, use_instrument, use_column, 
        ref_dir, dataset, out_dir)

## Background runs on instrument

Test on the background signals

In [None]:
test_all_samples = ['beer1', 'beer2', 'beer3']
test_controller_repeat = OrderedDict({
    CONTROLLER_FULLSCAN              : (test_all_samples, 1),
    CONTROLLER_TOPN                  : (test_all_samples, 1),
    CONTROLLER_TOPN_EXCLUSION        : (test_all_samples, 1),
    CONTROLLER_SWATH                 : (test_all_samples, 1),
    CONTROLLER_AIF                   : (test_all_samples, 1)
})
test_all_samples, test_controller_repeat

In [None]:
if use_instrument:
    max_time = 60
    use_column = False
    pbar = False
    dataset = None
    run_batch(initial_runs, test_controller_repeat, experiment_params, test_all_samples, 
        pbar, max_time, ionisation_mode, use_instrument, use_column, 
        ref_dir, dataset, out_dir)

## Run the actual injections

Generate sequence file to load to Xcalibur

In [None]:
position = {
    'blank1': 'RE5',
    'blank2': 'RE5',
    'CMW_solvent_blank': 'RE1',
    'QCA': 'RA1',
    'QCB': 'RA2',
    'beer1': 'RB1',
    'beer2': 'RB2',
    'beer3': 'RB3',
    'beer4': 'RB4',
    'beer5': 'RB5',
    'beer6': 'RB6'
}
raw_output_path = 'C:\\Xcalibur\\data\\Joe\\LC_Test\\05Jul2022'
blank_method_path = 'C:\\Xcalibur\\methods\\SII\\pHILIC_new\\Blank_SII_pHILIC_26min'
instrument_method_path = 'C:\\Xcalibur\\methods\\SII\\pHILIC_new\\Injection_SII_pHILIC_26min'

In [None]:
df = generate_sequence_df(initial_runs, controller_repeat, all_samples, position, raw_output_path, blank_method_path, instrument_method_path)
df.head(10)

In [None]:
write_sequence_csv(df, 'sequence1.csv')

Run controller on the instrument

In [15]:
if use_instrument:
    max_time = 1440
    use_column = True
    pbar = False
    dataset = None    
    run_batch(initial_runs, controller_repeat, experiment_params, all_samples, 
        pbar, max_time, ionisation_mode, use_instrument, use_column, 
        ref_dir, dataset, out_dir)

