This notebook is an example of how to optimise samples for the SmartROI and WeightedDEW methods

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import xml.etree.ElementTree
import os,glob,sys
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
import sys
sys.path.append('../..')

In [None]:
from vimms.PythonMzmine import *
from vimms.MassSpec import IndependentMassSpectrometer
from vimms.Controller import TopNController,WeightedDEWController
from vimms.PythonMzmine import *

In [None]:
from vimms.Roi import make_roi, RoiToChemicalCreator
from vimms.BOMAS import *
from vimms.Common import *
from vimms.Environment import *
from pathlib import Path
from vimms.PlotsForPaper import get_frag_events

In [None]:
from vimms.DataGenerator import extract_hmdb_metabolite, get_data_source, get_spectral_feature_database

In [None]:
set_log_level_warning()

# Load files needed to run experiments

In [None]:
data_dir = os.path.join(os.path.abspath(os.path.join(os.path.join(os.getcwd(),".."),"..")),'tests','integration','fixtures')
ps = load_obj(Path(data_dir,'peak_sampler_mz_rt_int_beerqcb_fragmentation.p'))

In [None]:
MZML2CHEMS_DICT = {'min_ms1_intensity': 0,
                  'mz_tol': 5,
                  'mz_units':'ppm',
                  'min_length':1,
                  'min_intensity':0,
                  'start_rt':0,
                  'stop_rt':1560}

In [None]:
mzmine_command = 'C:\\Users\\Vinny\\work\\MZmine-2.40.1\\MZmine-2.40.1\\startMZmine_Windows.bat'
xml_file = os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'batch_files', 'QC_PP.xml'))

# Data

original mzml files that the methods were optimised on

In [None]:
SmartROI_QCA_mzml = 'C:\\Users\\Vinny\\OneDrive - University of Glasgow\\CLDS Metabolomics Project\\TopN_vs_ROI\\QCA\\from_controller_TopN_QCA.mzML'
SmartROI_QCB_mzml = 'C:\\Users\\Vinny\\OneDrive - University of Glasgow\\CLDS Metabolomics Project\\TopN_vs_ROI\\QCB\\from_controller_TopN_QCB.mzML'
WeightedDEW_QCA_mzml = 'C:\\Users\\Vinny\\OneDrive - University of Glasgow\\CLDS Metabolomics Project\\Experimental_Results\\20200715_TopN_vs_ROI\\from_controllers\\QCA\\TopN_QCA.mzML'
WeightedDEW_QCB_mzml = 'C:\\Users\\Vinny\\OneDrive - University of Glasgow\\CLDS Metabolomics Project\\Experimental_Results\\20200715_TopN_vs_ROI\\from_controllers\\QCB\\TopN_QCB.mzML'

We can convert them into the ViMMS chemicals to allow us to run the virtual MS

In [None]:
SmartROI_QCA_dataset = mzml2chems(SmartROI_QCA_mzml, ps, MZML2CHEMS_DICT, n_peaks=None)
SmartROI_QCB_dataset = mzml2chems(SmartROI_QCB_mzml, ps, MZML2CHEMS_DICT, n_peaks=None)
WeightedDEW_QCA_dataset = mzml2chems(WeightedDEW_QCA_mzml, ps, MZML2CHEMS_DICT, n_peaks=None)
WeightedDEW_QCB_dataset = mzml2chems(WeightedDEW_QCB_mzml, ps, MZML2CHEMS_DICT, n_peaks=None)

In [None]:
save_obj(SmartROI_QCA_dataset, Path(os.getcwd(),'SmartROI_QCA_dataset.p'))
save_obj(SmartROI_QCB_dataset, Path(os.getcwd(),'SmartROI_QCB_dataset.p'))
save_obj(WeightedDEW_QCA_dataset, Path(os.getcwd(),'WeightedDEW_QCA_dataset.p'))
save_obj(WeightedDEW_QCB_dataset, Path(os.getcwd(),'WeightedDEW_QCB_dataset.p'))

In [None]:
SmartROI_QCA_dataset = load_obj(Path(os.getcwd(),'SmartROI_QCA_dataset.p'))
SmartROI_QCB_dataset = load_obj(Path(os.getcwd(),'SmartROI_QCB_dataset.p'))
WeightedDEW_QCA_dataset = load_obj(Path(os.getcwd(),'WeightedDEW_QCA_dataset.p'))
WeightedDEW_QCB_dataset = load_obj(Path(os.getcwd(),'WeightedDEW_QCB_dataset.p'))

We can pick the peaks from them in order to evaluate performance

In [None]:
pick_peaks([SmartROI_QCA_mzml], xml_template=xml_file, output_dir=os.getcwd(), mzmine_command=mzmine_command)
pick_peaks([SmartROI_QCB_mzml], xml_template=xml_file, output_dir=os.getcwd(), mzmine_command=mzmine_command)
pick_peaks([WeightedDEW_QCA_mzml], xml_template=xml_file, output_dir=os.getcwd(), mzmine_command=mzmine_command)
pick_peaks([WeightedDEW_QCB_mzml], xml_template=xml_file, output_dir=os.getcwd(), mzmine_command=mzmine_command)

Load the picked peaks files and convert them into boxes for use in the evaluation

In [None]:
SmartROI_QCA_boxes = load_picked_boxes(Path(os.getcwd(), 'from_controller_TopN_QCA_pp.csv'))
SmartROI_QCB_boxes = load_picked_boxes(Path(os.getcwd(), 'from_controller_TopN_QCB_pp.csv'))
WeightedDEW_QCA_boxes = load_picked_boxes(Path(os.getcwd(), 'TopN_QCA_pp.csv'))
WeightedDEW_QCB_boxes = load_picked_boxes(Path(os.getcwd(), 'TopN_QCB_pp.csv'))

## Experiment Parameters

In [None]:
min_rt = 0
max_rt = 26*60 # entire run
min_ms1_intensity = 5000
mz_tol = 10
rt_tol = 15
N = 10

roi_time_dict = {1: 0.71,2:0.20}
topn_time_dict = {1: 0.60,2:0.20}

ionisation_mode = POSITIVE
isolation_width = 1

min_roi_intensity = 500
min_roi_length = 0
min_roi_length_for_fragmentation = 0

SmartROI parameter options

In [None]:
reset_length_seconds = 1e6 # set so reset never happens
iif_values = [2,3,5,10,1e3,1e6]
dp_values = [0,0.1,0.5,1,5]

WeighedDEW parameter options

In [None]:
t0_vals = [1,3,5,10,15,30,60]
t0_vals = [1,3,10,15,30,60]
rt_tol_vals = [15,30,60,120,240,300,360]
rt_tol_vals = [15,60,120,240,360,3600]

## Experiment 1 - QCA, SmartROI

In [None]:
output_folder = Path(os.getcwd(), 'experiment_1')

In [None]:
for iif in iif_values:
    for dp in dp_values:
        print(iif,dp)
        intensity_increase_factor = iif # fragment ROI again if intensity increases 10 fold
        drop_perc = dp/100

        controller = TopN_SmartRoiController(ionisation_mode, isolation_width, mz_tol, min_ms1_intensity,
                                             min_roi_intensity, min_roi_length, N = N, rt_tol = rt_tol, 
                                             min_roi_length_for_fragmentation = min_roi_length_for_fragmentation,
                                             reset_length_seconds = reset_length_seconds,
                                             intensity_increase_factor = intensity_increase_factor,
                                             drop_perc = drop_perc)

        mass_spec = IndependentMassSpectrometer(ionisation_mode, SmartROI_QCA_dataset,
                                                ps,
                                                add_noise=True,
                                               scan_duration_dict=roi_time_dict)

        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)
        env.run()
        
        out_name = 'SmartROI_{}_{}.mzml'.format(iif,dp)
        env.write_mzML(output_folder, out_name)

In [None]:
counts_exp1 = np.zeros((len(iif_values),len(dp_values)))
for i,iif in enumerate(iif_values):
    for j,dp in enumerate(dp_values):
        print(iif,dp)
        fname = 'SmartROI_{}_{}.mzml'.format(iif,dp)
        mz_file = MZMLFile(os.path.join(output_folder,fname))
        scans2boxes,boxes2scans = map_boxes_to_scans(mz_file,SmartROI_QCA_boxes,half_isolation_window = 0)
        counts_exp1[i,j] = len(boxes2scans)
save_obj(counts_exp1, Path(os.getcwd(),'counts_exp1.p'))

In [None]:
plt.imshow(counts_exp1,aspect='auto')
plt.yticks(range(len(iif_values)),iif_values)
plt.xticks(range(len(dp_values)),dp_values)
plt.colorbar()

## Experiment 2 - QCB, SmartROI

In [None]:
output_folder = Path(os.getcwd(), 'experiment_2')

In [None]:
for iif in iif_values:
    for dp in dp_values:
        print(iif,dp)
        intensity_increase_factor = iif # fragment ROI again if intensity increases 10 fold
        drop_perc = dp/100


        # ROI can also be fragmented again if intensity falls to <1% of the max *since* last fragmentation
        from vimms.Controller import TopN_SmartRoiController
        controller = TopN_SmartRoiController(ionisation_mode, isolation_width, mz_tol, min_ms1_intensity,
                                             min_roi_intensity, min_roi_length, N = N, rt_tol = rt_tol, 
                                             min_roi_length_for_fragmentation = min_roi_length_for_fragmentation,
                                             reset_length_seconds = reset_length_seconds,
                                             intensity_increase_factor = intensity_increase_factor,
                                             drop_perc = drop_perc)

        mass_spec = IndependentMassSpectrometer(ionisation_mode, SmartROI_QCB_dataset,
                                                ps,
                                                add_noise=True,
                                               scan_duration_dict=roi_time_dict)

        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)
        env.run()
        
        out_name = 'SmartROI_{}_{}.mzml'.format(iif,dp)
        env.write_mzML(output_folder, out_name)

In [None]:
counts_exp2 = np.zeros((len(iif_values),len(dp_values)))
for i,iif in enumerate(iif_values):
    for j,dp in enumerate(dp_values):
        print(iif,dp)
        fname = 'SmartROI_{}_{}.mzml'.format(iif,dp)
        mz_file = MZMLFile(os.path.join(output_folder,fname))
        scans2boxes,boxes2scans = map_boxes_to_scans(mz_file,SmartROI_QCB_boxes,half_isolation_window = 0)
        counts_exp2[i,j] = len(boxes2scans)
save_obj(counts_exp2, Path(os.getcwd(),'counts_exp2.p'))

In [None]:
plt.imshow(counts_exp2,aspect='auto')
plt.yticks(range(len(iif_values)),iif_values)
plt.xticks(range(len(dp_values)),dp_values)
plt.colorbar()

## Experiment 3 - QCA, WeightedDEW

In [None]:
output_folder = Path(os.getcwd(), 'experiment_3')

In [None]:
for t0 in t0_vals:
    for r in rt_tol_vals:
        if t0 > r:
            # impossible combination
            continue
        print(t0,r)
        out_name = 'WeightedDEW_{}_{}.mzml'.format(t0,r)

        if os.path.isfile(os.path.join(output_folder,out_name)):
            print("Already done")
            continue
        
        controller = WeightedDEWController(ionisation_mode, N, isolation_width, mz_tol, 
                                            r,min_ms1_intensity, exclusion_t_0 = t0, log_intensity = True)

        mass_spec = IndependentMassSpectrometer(ionisation_mode, WeightedDEW_QCA_dataset,
                                                ps,
                                                add_noise=True,
                                               scan_duration_dict=topn_time_dict)

        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)
        env.run()
        
        env.write_mzML(output_folder, out_name)

In [None]:
counts_exp3 = np.zeros((len(t0_vals),len(rt_tol_vals)))
for i,t0 in enumerate(t0_vals):
    for j,r in enumerate(rt_tol_vals):
        print(t0,r)
        if t0 > r:
            # impossible combination
            counts_exp3[i,j] = np.nan # fix for colour on boxplot
            continue
        fname = 'WeightedDEW_{}_{}.mzml'.format(t0,r)
        mz_file = MZMLFile(os.path.join(output_folder,fname))
        scans2boxes,boxes2scans = map_boxes_to_scans(mz_file,WeightedDEW_QCA_boxes,half_isolation_window = 0)
        counts_exp3[i,j] = len(boxes2scans)
save_obj(counts_exp3, Path(os.getcwd(),'counts_exp3.p'))

In [None]:
plt.imshow(counts_exp3,aspect='auto')
plt.yticks(range(len(iif_values)),iif_values)
plt.xticks(range(len(dp_values)),dp_values)
plt.colorbar()

## Experiment 4 - QCB, WeightedDEW

In [None]:
output_folder = Path(os.getcwd(), 'experiment_4')

In [None]:
for t0 in t0_vals:
    for r in rt_tol_vals:
        if t0 > r:
            # impossible combination
            continue
        print(t0,r)
        out_name = 'WeightedDEW_{}_{}.mzml'.format(t0,r)

        if os.path.isfile(os.path.join(output_folder,out_name)):
            print("Already done")
            continue
        
        controller = WeightedDEWController(ionisation_mode, N, isolation_width, mz_tol, 
                                            r,min_ms1_intensity, exclusion_t_0 = t0, log_intensity = True)

        mass_spec = IndependentMassSpectrometer(ionisation_mode, WeightedDEW_QCB_dataset,
                                                ps,
                                                add_noise=True,
                                               scan_duration_dict=topn_time_dict)

        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)
        env.run()
        
        env.write_mzML(output_folder, out_name)

In [None]:
counts_exp4 = np.zeros((len(t0_vals),len(rt_tol_vals)))
for i,t0 in enumerate(t0_vals):
    for j,r in enumerate(rt_tol_vals):
        print(t0,r)
        if t0 > r:
            # impossible combination
            counts_exp4[i,j] = np.nan # fix for colour on boxplot
            continue
        fname = 'WeightedDEW_{}_{}.mzml'.format(t0,r)
        mz_file = MZMLFile(os.path.join(output_folder,fname))
        scans2boxes,boxes2scans = map_boxes_to_scans(mz_file,WeightedDEW_QCB_boxes,half_isolation_window = 0)
        counts_exp4[i,j] = len(boxes2scans)
save_obj(counts_exp4, Path(os.getcwd(),'counts_exp4.p'))

In [None]:
plt.imshow(counts_exp4,aspect='auto')
plt.yticks(range(len(iif_values)),iif_values)
plt.xticks(range(len(dp_values)),dp_values)
plt.colorbar()