In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import platform
import re

In [3]:
'''
if platform.system() == 'Darwin':
    user = 'joewandy'
elif platform.system() == 'Windows':
    user = 'joewa'
    
user_vimms = '/Users/%s/Work/git/vimms' % user
'''
#user_vimms = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "peak_picking", "vimms")
user_vimms = "vimms"
sys.path.append(user_vimms)

In [4]:
from vimms.Common import (
    POSITIVE, ROI_TYPE_SMART, ROI_EXCLUSION_WEIGHTED_DEW,
    set_log_level_warning,
)

from vimms.Roi import RoiBuilderParams, SmartRoiParams
from vimms.Experiment import ExperimentCase, Experiment
from vimms.Controller import TopNController

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
ionisation_mode = POSITIVE
pbar = False
set_log_level_warning()

1

### Get seed data

In [6]:
def match_files(data_dir, regex):
    pattern = re.compile(regex)
    files = (
        (int(pattern.match(fname).group(1)), fname)
        for fname in os.listdir(data_dir)
        if not pattern.match(fname) is None
    )

    return [
        os.path.join(data_dir, fname) 
        for _, fname in sorted(files, key=lambda p: p[0])
    ]

In [7]:
#multi_dir = "/Users/%s/University of Glasgow/Vinny Davies - CLDS Metabolomics Project/Experimental_Results/20220706_DDAvsDIA" % user
multi_dir = "20220719_multi_samples_main"
multibeer_dir = os.path.join(multi_dir, "results_4")

In [8]:
multibeer_fullscans = match_files(multibeer_dir, r"fullscan_beer([0-9]+)_0.mzML")

### Specify parameters

In [9]:
num_workers = 8 #can't always use number of physical cores because of memory constraints

In [10]:
experiment_params = {
    "topN_params": {
        "ionisation_mode" : ionisation_mode,
        "N" : 20,
        "isolation_width" : 1,
        "min_ms1_intensity" : 5000,
        "mz_tol" : 10,
        "rt_tol" : 30
    },
    
    "roi_params" : {
        "min_roi_length_for_fragmentation" : 0,
        "roi_params" : RoiBuilderParams(
                            min_roi_intensity=0,
                            min_roi_length=3,
                       )
    },
    
    "non_overlap_params": {
    },
}

min_rt = 0
max_rt = 1440
scan_duration_dict = { #CHANGED FROM QCB
    1: 0.59,
    2: 0.19
}
point_noise_threshold = 0 #CHANGED FROM QCB

In [11]:
topN_params = experiment_params["topN_params"]
roi_params = {**topN_params, **experiment_params["roi_params"]}
non_overlap_params = {**roi_params, **experiment_params["non_overlap_params"]}

cases = [
    ("topN_exclusion", "topN_exclusion", topN_params),
    ("intensity_non_overlap", "intensity_non_overlap", non_overlap_params)
]

# 1. Same Beer Repeated Multiple Times

In [12]:
repeat = 20
out_dir = "reoptimised_same_beer"

same_beer_exp = Experiment()
same_beer_exp.add_cases(
    ExperimentCase(controller_type, multibeer_fullscans[:1] * repeat, params, name=name, pickle_env=True)
    for controller_type, name, params in cases
)
same_beer_exp.run_experiment(
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    chem_noise_threshold=experiment_params["topN_params"]["min_ms1_intensity"] * 0.5, #filter low intensity signal for memory
    num_workers=num_workers
)

Creating Chemicals...
Generating chemicals for 20220719_multi_samples_main/results_4/fullscan_beer1_0.mzML

Running Experiment of 2 cases...
Outcome being written to: "topN_exclusion_0.mzML"
Outcome being written to: "intensity_non_overlap_0.mzML"
Outcome being written to: "topN_exclusion_1.mzML"
Outcome being written to: "intensity_non_overlap_1.mzML"
Outcome being written to: "topN_exclusion_2.mzML"
Outcome being written to: "intensity_non_overlap_2.mzML"
Outcome being written to: "topN_exclusion_3.mzML"
Outcome being written to: "topN_exclusion_4.mzML"
Outcome being written to: "intensity_non_overlap_3.mzML"
Outcome being written to: "topN_exclusion_5.mzML"
Outcome being written to: "intensity_non_overlap_4.mzML"
Outcome being written to: "topN_exclusion_6.mzML"
Outcome being written to: "intensity_non_overlap_5.mzML"
Outcome being written to: "topN_exclusion_7.mzML"
Outcome being written to: "topN_exclusion_8.mzML"
Outcome being written to: "intensity_non_overlap_6.mzML"
Outcome be

# 3. Repeating Different Beers

In [13]:
bio_repeat = 6
tech_repeat = 4
out_dir = "reoptimised_repeated_different_beer"

rep_diff_beer_exp = Experiment()
rep_diff_beer_exp.add_cases(
    ExperimentCase(controller_type, multibeer_fullscans[:bio_repeat] * tech_repeat, params, name=name, pickle_env=True)
    for controller_type, name, params in cases
)
rep_diff_beer_exp.run_experiment(
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=True,
    point_noise_threshold=point_noise_threshold,
    chem_noise_threshold=experiment_params["topN_params"]["min_ms1_intensity"] * 0.5, #filter low intensity signal for memory
    num_workers=num_workers
)

Creating Chemicals...
Generating chemicals for 20220719_multi_samples_main/results_4/fullscan_beer1_0.mzMLGenerating chemicals for 20220719_multi_samples_main/results_4/fullscan_beer3_0.mzMLGenerating chemicals for 20220719_multi_samples_main/results_4/fullscan_beer2_0.mzMLGenerating chemicals for 20220719_multi_samples_main/results_4/fullscan_beer5_0.mzMLGenerating chemicals for 20220719_multi_samples_main/results_4/fullscan_beer6_0.mzMLGenerating chemicals for 20220719_multi_samples_main/results_4/fullscan_beer4_0.mzML






Running Experiment of 2 cases...
Outcome being written to: "topN_exclusion_0.mzML"
Outcome being written to: "intensity_non_overlap_0.mzML"
Outcome being written to: "topN_exclusion_1.mzML"
Outcome being written to: "intensity_non_overlap_1.mzML"
Outcome being written to: "topN_exclusion_2.mzML"
Outcome being written to: "topN_exclusion_3.mzML"
Outcome being written to: "intensity_non_overlap_2.mzML"
Outcome being written to: "topN_exclusion_4.mzML"
Outcome being