# Example notebook to show how the new chemical mixture creator works

In [137]:
import os
import sys
from pathlib import Path

In [147]:
vimms_path = '/Users/simon/git/vimms'
sys.path.append(vimms_path)
# path to fixtures so we can load HMDB
BASE_DIR = os.path.abspath(Path(vimms_path,'tests','integration', 'fixtures'))

In [148]:
%load_ext autoreload
%autoreload 2

from vimms.Common import *
from vimms.Controller import AIF
from vimms.Environment import Environment
from vimms.MassSpec import IndependentMassSpectrometer

# Noise for the MS
from vimms.Noise import GaussianPeakNoise,  UniformSpikeNoise

# Chemical creator import
from vimms.Chemicals import ChemicalMixtureCreator
from vimms.ChemicalSamplers import UniformRTAndIntensitySampler, UniformMS2Sampler, CRPMS2Sampler, GaussianChromatogramSampler




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Load HMDB

In [141]:
HMDB = load_obj(Path(BASE_DIR, 'hmdb_compounds.p'))

In [143]:
N_CHEMS = 10
MZ_RANGE = [(0, 1050)]
RT_RANGE = [(300, 500)]

In [151]:
# sample RT and log intensity uniformly. See class def for min and max log intensity
# returns actual intensity, but samples in log space
uu = UniformRTAndIntensitySampler(min_rt = 300, max_rt = 500)

# a sampler that samples MS2 uniform between 50 and the mass of the formula.
# number ofo MS2 peaks is poisson with mean poiss_peak_mean
us = UniformMS2Sampler(poiss_peak_mean=5)

# OR a CRP MS2 sampler
us = CRPMS2Sampler()

# generates Gaussian chromatograms
cs = GaussianChromatogramSampler()

# The chemical mixture creator object

cm = ChemicalMixtureCreator(HMDB, rt_and_intensity_sampler=uu, 
                            adduct_prior_dict=ADDUCT_DICT_POS_MH,
                            chromatogram_sampler=cs,
                            ms2_sampler = us)

d = cm.sample(MZ_RANGE,RT_RANGE,N_CHEMS,2)

Print the sampled chemicals, and their children

In [152]:
d.sort(key=lambda x: x.rt)
for chem in d:
    print(chem)
    for child in chem.children:
        print('\t',child)

KnownChemical - 'C5H10NO7P' rt=303.83 max_intensity=276339.40
	 MSN Fragment mz=55.1608 ms_level=2
	 MSN Fragment mz=93.0238 ms_level=2
	 MSN Fragment mz=93.8821 ms_level=2
	 MSN Fragment mz=96.7493 ms_level=2
	 MSN Fragment mz=97.3165 ms_level=2
	 MSN Fragment mz=121.1201 ms_level=2
	 MSN Fragment mz=145.3962 ms_level=2
	 MSN Fragment mz=151.4234 ms_level=2
	 MSN Fragment mz=185.1501 ms_level=2
	 MSN Fragment mz=209.1331 ms_level=2
KnownChemical - 'C11H16O7' rt=343.72 max_intensity=3215141.93
	 MSN Fragment mz=124.7622 ms_level=2
	 MSN Fragment mz=134.9777 ms_level=2
	 MSN Fragment mz=183.3021 ms_level=2
	 MSN Fragment mz=208.5835 ms_level=2
	 MSN Fragment mz=234.4510 ms_level=2
	 MSN Fragment mz=256.8066 ms_level=2
KnownChemical - 'C9H20' rt=358.40 max_intensity=32827.53
	 MSN Fragment mz=52.4349 ms_level=2
	 MSN Fragment mz=61.8455 ms_level=2
	 MSN Fragment mz=63.3213 ms_level=2
	 MSN Fragment mz=65.2004 ms_level=2
	 MSN Fragment mz=65.8141 ms_level=2
	 MSN Fragment mz=91.9011 ms_le

Create and run an example controller. In this case, AIF.

In [156]:
controller = AIF(MZ_RANGE[0][0],MZ_RANGE[0][1])
ionisation_mode = POSITIVE
scan_time_dict = {1:0.6,2:0.06}


mz_noise = GaussianPeakNoise(0.001)
intensity_noise = GaussianPeakNoise(0.1,log_space=True)

# This is new: spike noise which ensures no scans are empty
# first argument is density: how many spikes to sample per unit m/z. Second is max spike intensity
spike_noise = UniformSpikeNoise(0.01,1e3)


# note the None in the call in place of a peak sampler object - this isn't used anywhere in this example
mass_spec = IndependentMassSpectrometer(ionisation_mode, d, None,
                                        scan_duration_dict = scan_time_dict, 
                                        mz_noise = mz_noise,
                                        intensity_noise = intensity_noise,
                                        spike_noise = spike_noise)


In [157]:
env = Environment(mass_spec, controller, 300, 600, progress_bar=True)

In [158]:
set_log_level_warning()
# run the simulation
env.run()
env.write_mzML('.','test.mzML')

(600.000s) ms_level=1: 100%|█████████▉| 299.99999999999994/300 [00:07<00:00, 41.59it/s] 
