# 0. Preliminaries

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import platform
import re

In [3]:
'''
if platform.system() == 'Darwin':
    user = 'joewandy'
elif platform.system() == 'Windows':
    user = 'joewa'
    
user_vimms = '/Users/%s/Work/git/vimms' % user
'''
#user_vimms = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "peak_picking", "vimms")
user_vimms = "vimms"
sys.path.append(user_vimms)

In [4]:
from vimms.Common import (
    POSITIVE, ROI_TYPE_SMART, ROI_EXCLUSION_WEIGHTED_DEW,
    set_log_level_warning,
)

from vimms.Roi import RoiBuilderParams, SmartRoiParams
from vimms.Experiment import ExperimentCase, Experiment

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
ionisation_mode = POSITIVE
pbar = False
set_log_level_warning()

1

### Get seed data

In [6]:
def match_files(data_dir, regex):
    pattern = re.compile(regex)
    files = (
        (int(pattern.match(fname).group(1)), fname)
        for fname in os.listdir(data_dir)
        if not pattern.match(fname) is None
    )

    return [
        os.path.join(data_dir, fname) 
        for _, fname in sorted(files, key=lambda p: p[0])
    ]

In [7]:
#multi_dir = "/Users/%s/University of Glasgow/Vinny Davies - CLDS Metabolomics Project/Data/multibeers_urine_data" % user
#multi_dir = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "data", "CLMS", "multibeers_urine_data")
multi_dir = "multibeers_urine_data"
multibeer_dir = os.path.join(multi_dir, "beers", "fullscan", "mzML")
multiurine_dir = os.path.join(multi_dir, "urines", "fullscan", "mzML")

In [8]:
multibeer_fullscans = match_files(multibeer_dir, r"Beer_multibeers_([0-9]+)_fullscan1.mzML")
multiurine_fullscans = match_files(multiurine_dir, r"Urine_StrokeDrugs_([0-9]+)_fullscan.mzML")

### Specify parameters

In [9]:
num_workers = 8 #can't always use number of physical cores because of memory constraints

In [10]:
experiment_params = {
    "topN_params": {
        "ionisation_mode" : ionisation_mode,
        "N" : 20,
        "isolation_width" : 1,
        "min_ms1_intensity" : 5000,
        "mz_tol" : 10,
        "rt_tol" : 15
    },
    
    "roi_params" : {
        "min_roi_length_for_fragmentation" : 0,
        "roi_params" : RoiBuilderParams(
                            min_roi_intensity=0,
                            min_roi_length=3,
                       )
    },
    
    "non_overlap_params": {
    },
    
    "smartroi_params": {
        "smartroi_params" : SmartRoiParams(
                                reset_length_seconds=1E6,
                                intensity_increase_factor=2,
                                drop_perc=1E-2
                            )  
    },
    
    "weighteddew_params": {
        "rt_tol": 30,
        "exclusion_method": ROI_EXCLUSION_WEIGHTED_DEW,
        "exclusion_t_0": 1
    }    
}

min_rt = 0
max_rt = 1440
scan_duration_dict = { #CHANGED FROM QCB
    1: 0.28,
    2: 0.13
}
point_noise_threshold = 0 #CHANGED FROM QCB

### Specify controllers to run

In [11]:
topN_params = experiment_params["topN_params"]
roi_params = {**topN_params, **experiment_params["roi_params"]}
non_overlap_params = {**roi_params, **experiment_params["non_overlap_params"]}

cases = [
    ("topN", topN_params),
    ("topN_RoI", roi_params),
    ("topN_exclusion", topN_params),
    ("topNEx", non_overlap_params),
    ("hard_roi_exclusion", non_overlap_params),
    ("intensity_roi_exclusion", non_overlap_params),
    ("non_overlap", non_overlap_params),
    ("intensity_non_overlap", non_overlap_params)
]

no_smartroi = ["topN", "topN_RoI", "topN_exclusion"]
#run_smartroi = []
run_smartroi = [name for name, _ in cases if not name in no_smartroi]
#run_weighteddew = []
run_weighteddew = [name for name, _ in cases if not name in no_smartroi]

In [12]:
new_cases = []
for controller_type, params in cases:
    new_cases.append(
        (controller_type, controller_type, params)
    )
    
    if(controller_type in run_smartroi):
        new_name = controller_type + "_smartroi"
        new_params = {**params, **experiment_params["smartroi_params"]}
        new_cases.append(
            (controller_type, new_name, new_params)
        )
        
    if(controller_type in run_weighteddew):
        new_name = controller_type + "_weighteddew"
        new_params = {**params, **experiment_params["weighteddew_params"]}
        new_cases.append(
            (controller_type, new_name, new_params)
        )
        
cases = new_cases

In [13]:
for controller_type, name, params in cases:
    print(f"NAME: {name}")
    print(f"CONTROLLER TYPE: {controller_type}")
    print(f"PARAMS: {params}")
    print()

NAME: topN
CONTROLLER TYPE: topN
PARAMS: {'ionisation_mode': 'Positive', 'N': 20, 'isolation_width': 1, 'min_ms1_intensity': 5000, 'mz_tol': 10, 'rt_tol': 15}

NAME: topN_RoI
CONTROLLER TYPE: topN_RoI
PARAMS: {'ionisation_mode': 'Positive', 'N': 20, 'isolation_width': 1, 'min_ms1_intensity': 5000, 'mz_tol': 10, 'rt_tol': 15, 'min_roi_length_for_fragmentation': 0, 'roi_params': {'mz_tol': 10, 'min_roi_length': 3, 'min_roi_intensity': 0, 'at_least_one_point_above': 0, 'start_rt': 0, 'stop_rt': 100000.0, 'max_gaps_allowed': 0}}

NAME: topN_exclusion
CONTROLLER TYPE: topN_exclusion
PARAMS: {'ionisation_mode': 'Positive', 'N': 20, 'isolation_width': 1, 'min_ms1_intensity': 5000, 'mz_tol': 10, 'rt_tol': 15}

NAME: topNEx
CONTROLLER TYPE: topNEx
PARAMS: {'ionisation_mode': 'Positive', 'N': 20, 'isolation_width': 1, 'min_ms1_intensity': 5000, 'mz_tol': 10, 'rt_tol': 15, 'min_roi_length_for_fragmentation': 0, 'roi_params': {'mz_tol': 10, 'min_roi_length': 3, 'min_roi_intensity': 0, 'at_least_on

# 1. Same Beer Repeated Multiple Times

In [14]:
repeat = 20
out_dir = "same_beer"

same_beer_exp = Experiment()
same_beer_exp.add_cases(
    ExperimentCase(controller_type, multibeer_fullscans[:1] * repeat, params, name=name, pickle_env=True)
    for controller_type, name, params in cases
)
same_beer_exp.run_experiment(
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    num_workers=num_workers
)

Creating Chemicals...
Generating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_1_fullscan1.mzML

Running Experiment of 18 cases...
Outcome being written to: "topN_0.mzML"Outcome being written to: "topN_RoI_0.mzML"Outcome being written to: "topN_exclusion_0.mzML"


Outcome being written to: "topNEx_0.mzML"Outcome being written to: "topNEx_smartroi_0.mzML"Outcome being written to: "hard_roi_exclusion_smartroi_0.mzML"Outcome being written to: "hard_roi_exclusion_0.mzML"



Outcome being written to: "topNEx_weighteddew_0.mzML"
Outcome being written to: "topN_exclusion_1.mzML"
Outcome being written to: "topN_1.mzML"
Outcome being written to: "topN_RoI_1.mzML"
Outcome being written to: "topNEx_1.mzML"
Outcome being written to: "hard_roi_exclusion_1.mzML"
Outcome being written to: "hard_roi_exclusion_smartroi_1.mzML"
Outcome being written to: "topNEx_smartroi_1.mzML"
Outcome being written to: "topNEx_weighteddew_1.mzML"
Outcome being written to: "topN_2.mzML"
Outcome

# 2. Multiple Different Beers

In [15]:
repeat = 19
out_dir = "different_beer"

diff_beer_exp = Experiment()
diff_beer_exp.add_cases(
    ExperimentCase(controller_type, multibeer_fullscans[:repeat], params, name=name, pickle_env=False)
    for controller_type, name, params in cases
)
diff_beer_exp.run_experiment(
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    num_workers=num_workers
)

Creating Chemicals...
Generating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_6_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_14_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_10_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_3_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_1_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_18_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_12_fullscan1.mzML
Generating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_5_fullscan1.mzML






Generating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_8_fullscan1.mzML
Generating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_mu

# 3. Repeating Different Beers

In [16]:
bio_repeat = 6
tech_repeat = 4
out_dir = "repeated_different_beer"

rep_diff_beer_exp = Experiment()
rep_diff_beer_exp.add_cases(
    ExperimentCase(controller_type, multibeer_fullscans[:bio_repeat] * tech_repeat, params, name=name, pickle_env=False)
    for controller_type, name, params in cases
)
rep_diff_beer_exp.run_experiment(
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    num_workers=num_workers
)

Creating Chemicals...
Generating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_3_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_4_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_6_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_1_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_5_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_2_fullscan1.mzML






Running Experiment of 18 cases...
Outcome being written to: "topN_0.mzML"Outcome being written to: "topN_RoI_0.mzML"Outcome being written to: "topN_exclusion_0.mzML"


Outcome being written to: "hard_roi_exclusion_smartroi_0.mzML"Outcome being written to: "hard_roi_exclusion_0.mzML"Outcome being written to: "topNEx_smartroi_0.mzML"


Outcome being written to: "topNEx_weighte

# 4. Same Urine Repeated Multiple Times

In [17]:
repeat = 20
out_dir = "same_urine"

same_urine_exp = Experiment()
same_urine_exp.add_cases(
    ExperimentCase(controller_type, multiurine_fullscans[:1] * repeat, params, name=name, pickle_env=False)
    for controller_type, name, params in cases
)
same_urine_exp.run_experiment(
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    num_workers=num_workers
)

Creating Chemicals...
Generating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_02_fullscan.mzML

Running Experiment of 18 cases...
Outcome being written to: "topN_0.mzML"Outcome being written to: "topN_exclusion_0.mzML"Outcome being written to: "topN_RoI_0.mzML"


Outcome being written to: "topNEx_smartroi_0.mzML"Outcome being written to: "hard_roi_exclusion_0.mzML"Outcome being written to: "topNEx_0.mzML"Outcome being written to: "topNEx_weighteddew_0.mzML"



Outcome being written to: "hard_roi_exclusion_smartroi_0.mzML"
Outcome being written to: "topN_exclusion_1.mzML"
Outcome being written to: "topN_1.mzML"
Outcome being written to: "topN_RoI_1.mzML"
Outcome being written to: "hard_roi_exclusion_1.mzML"
Outcome being written to: "topNEx_1.mzML"
Outcome being written to: "topNEx_smartroi_1.mzML"
Outcome being written to: "hard_roi_exclusion_smartroi_1.mzML"
Outcome being written to: "topNEx_weighteddew_1.mzML"
Outcome being written to: "topN_2.mzML"
Outc

# 5. Multiple Different Urines

In [18]:
repeat = 22
out_dir = "different_urine"

diff_urine_exp = Experiment()
diff_urine_exp.add_cases(
    ExperimentCase(controller_type, multiurine_fullscans[:repeat], params, name=name, pickle_env=False)
    for controller_type, name, params in cases
)
diff_urine_exp.run_experiment(
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    num_workers=num_workers
)

Creating Chemicals...
Generating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_03_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_09_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_57_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_49_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_54_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_17_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_38_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_02_fullscan.mzML







Generating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_51_fullscan.mzML
Generating chemicals for multibeers_urine_data/urin

# 6. Repeating Different Urines

In [19]:
bio_repeat = 6
tech_repeat = 4
out_dir = "repeated_different_urine"

rep_diff_urine_exp = Experiment()
rep_diff_urine_exp.add_cases(
    ExperimentCase(controller_type, multiurine_fullscans[:bio_repeat] * tech_repeat, params, name=name, pickle_env=False)
    for controller_type, name, params in cases
)
rep_diff_urine_exp.run_experiment(
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    num_workers=num_workers
)

Creating Chemicals...
Generating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_09_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_17_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_03_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_18_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_08_fullscan.mzMLGenerating chemicals for multibeers_urine_data/urines/fullscan/mzML/Urine_StrokeDrugs_02_fullscan.mzML






Running Experiment of 18 cases...
Outcome being written to: "topN_RoI_0.mzML"Outcome being written to: "topN_0.mzML"Outcome being written to: "topN_exclusion_0.mzML"


Outcome being written to: "hard_roi_exclusion_smartroi_0.mzML"
Outcome being written to: "hard_roi_exclusion_0.mzML"Outcome being written to: "topNEx_0.mzML"Outcome being written to: "topNEx_