# 0. Preliminaries

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import copy
import os
import platform
import re

In [3]:
user_vimms = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "peak_picking", "vimms")
#user_vimms = "vimms"
sys.path.append(user_vimms)

old_mzmine = False
template_suffix = "_old" if old_mzmine else ""
mzmine_template = os.path.join(user_vimms, "batch_files", f"multi_sample_peak_pick{template_suffix}.xml")

xcms_r_script = os.path.join(user_vimms, "vimms", "scripts", "xcms_script.R")
dsda_path = os.path.join(user_vimms, "vimms", "scripts", "dsda_script.R")

In [4]:
mzmine_path = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "peak_picking", "MZmine-2.53-Windows", "startMZmine-Windows.bat")

In [5]:
from vimms.Common import (
    POSITIVE, NEGATIVE, ROI_TYPE_SMART, ROI_EXCLUSION_WEIGHTED_DEW,
    set_log_level_warning,
)

from vimms.Roi import RoiBuilderParams, SmartRoiParams
from vimms.Experiment import ExperimentCase, Experiment
from vimms.Controller import TopNController
from vimms.PeakPicking import MZMineParams, XCMSScriptParams
from vimms.Matching import MatchingScan, Matching
from vimms.BoxManager import BoxManager, BoxSplitter



In [6]:
ionisation_mode = NEGATIVE
pbar = False
set_log_level_warning()

1

### Get seed data

In [7]:
def match_files(data_dir, regex):
    pattern = re.compile(regex)
    files = (
        (int(pattern.match(fname).group(1)), fname)
        for fname in os.listdir(data_dir)
        if not pattern.match(fname) is None
    )

    return [
        os.path.join(data_dir, fname) 
        for _, fname in sorted(files, key=lambda p: p[0])
    ]

In [8]:
data_dir = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "data", "CLMS", "new_matching_seeds", "Negative")

all_fullscans = match_files(data_dir, r"Vinny_Beers_[0-9]_[0-9]+_ID([0-9]+).mzML")
same_beer_fullscans = all_fullscans[:12]
rep_diff_beer_fullscans = all_fullscans[12:]
id_offset = len(same_beer_fullscans)

### Specify parameters

In [9]:
num_workers = 8 #can't always use number of physical cores because of memory constraints

In [10]:
experiment_params = {
    "topN_params": {
        "ionisation_mode" : ionisation_mode,
        "N" : 20,
        "isolation_width" : 1,
        "min_ms1_intensity" : 5000,
        "mz_tol" : 10,
        "rt_tol" : 60
    },
    
    "roi_params" : {
        "min_roi_length_for_fragmentation" : 0,
        "roi_params" : RoiBuilderParams(
                            min_roi_intensity=0,
                            min_roi_length=3,
                       )
    },
    
    "non_overlap_params": {
    },
    
    "smartroi_params": {
        "rt_tol" : 15,
        "smartroi_params" : SmartRoiParams(
                                reset_length_seconds=1E6,
                                intensity_increase_factor=3,
                                drop_perc=0.001
                            )  
    },
    
    "weighteddew_params": {
        "rt_tol": 60,
        "exclusion_method": ROI_EXCLUSION_WEIGHTED_DEW,
        "exclusion_t_0": 1
    }    
}

min_rt = 0
max_rt = 1440
scan_duration_dict = {
    1: 0.59,
    2: 0.19
}
point_noise_threshold = 0

In [11]:
centwave_params = {
    "ppm" : 15,
    "pwlower" : 15,
    "pwupper" : 80,
    "snthresh" : 5,
    "noise" : 1000,
    "prefilterlower" : 3,
    "prefilterupper" : 500,
    "mzdiff" : 0.001
}

mzmine_object = MZMineParams(
    mzmine_template = mzmine_template,
    mzmine_exe = mzmine_path
)

xcms_object = XCMSScriptParams(
    xcms_r_script = xcms_r_script,
    **centwave_params,
    mzvsrtbalance = None,
    absmz = None,
    absrt = None,
    kNN = None
)

#pp_params = mzmine_object
pp_params = xcms_object

In [12]:
dsda_params = {
    "dsda_loc" : dsda_path,
    "min_rt" : min_rt,
    "max_rt" : max_rt, 
    "scan_duration_dict" : scan_duration_dict,
    "port" : 7011,
    "rscript_loc" : "RScript",
    "dsda_params" : {
        **centwave_params
    }
}

same_dsda_params = {
    **dsda_params,
    "base_controller" : TopNController(**{**experiment_params["topN_params"], "N" : 10}),
    "dsda_params" : {
        **dsda_params["dsda_params"],
        "maxdepth" : 3
    }
}

rep_diff_dsda_params = {
    **dsda_params,
    "base_controller" : TopNController(**{**experiment_params["topN_params"], "N" : 20}),
    "dsda_params" : {
        **dsda_params["dsda_params"],
        "maxdepth" : None
    }
}

In [13]:
matching_params = {
    "aligned_reader" : pp_params,
    "ionisation_mode" : ionisation_mode,
    "isolation_width" : experiment_params["topN_params"]["isolation_width"],
    "intensity_threshold" : experiment_params["topN_params"]["min_ms1_intensity"],
}

### Specify controllers to run

In [14]:
topN_params = experiment_params["topN_params"]
roi_params = {**topN_params, **experiment_params["roi_params"]}
non_overlap_params = {**roi_params, **experiment_params["non_overlap_params"]}

cases = [
    ("topN", topN_params),
    #("topN_RoI", roi_params),
    ("topN_exclusion", topN_params),
    ("topNEx", non_overlap_params),
    #("hard_roi_exclusion", non_overlap_params),
    #("intensity_roi_exclusion", non_overlap_params),
    #("non_overlap", non_overlap_params),
    ("intensity_non_overlap", non_overlap_params)
]

intensity_methods = ["intensity_roi_exclusion", "intensity_non_overlap"]

no_smartroi = ["topN", "topN_RoI", "topN_exclusion", "dsda"]
#run_smartroi = []
run_smartroi = [name for name, _ in cases if not name in no_smartroi]
#run_weighteddew = []
run_weighteddew = [name for name, _ in cases if not name in no_smartroi]

In [15]:
matching_modes = {
    "unweighted_matching": (Matching.UNWEIGHTED, Matching.MATCHING_ONLY),
    "two_step_matching": (Matching.TWOSTEP, Matching.MATCHING_ONLY),
    "two_step_matching_with_recursive_assignment": (Matching.TWOSTEP, Matching.RECURSIVE_ASSIGNMENT),
    "two_step_matching_with_nearest_assignment": (Matching.TWOSTEP, Matching.NEAREST_ASSIGNMENT),
}

run_inclusion = [
    "topNEx", "intensity_non_overlap"
]

In [16]:
new_cases = []
for controller_type, params in cases:
    new_cases.append(
        (controller_type, controller_type, params)
    )
    
    if(controller_type in run_smartroi):
        new_name = controller_type + "_smartroi"
        new_params = {**params, **experiment_params["smartroi_params"]}
        new_cases.append(
            (controller_type, new_name, new_params)
        )
        
        intensity_methods.append(new_name)
        if(controller_type in run_inclusion):
            run_inclusion.append(new_name)
        
    if(controller_type in run_weighteddew):
        new_name = controller_type + "_weighteddew"
        new_params = {**params, **experiment_params["weighteddew_params"]}
        new_cases.append(
            (controller_type, new_name, new_params)
        )
        
        intensity_methods.append(new_name)
        if(controller_type in run_inclusion):
            run_inclusion.append(new_name)

cases = new_cases

In [17]:
def get_matching_cases(fullscans, times_list, aligned_file, edge_limit=None, pickle_env=False):
    matchings = {}
    for w_mode in set(m for _, (m, _) in matching_modes.items()):
        rp = RoiBuilderParams(
            min_roi_intensity=point_noise_threshold,
            at_least_one_point_above=experiment_params["topN_params"]["min_ms1_intensity"] * 0.5,
            min_roi_length=2
        )
        
        base_matching = Matching.make_matching(
            fullscans,
            times_list,
            pp_params,
            aligned_file,
            ionisation_mode,
            experiment_params["topN_params"]["min_ms1_intensity"],
            roi_params=rp,
            edge_limit=edge_limit,
            weighted=w_mode,
            full_assignment_strategy=Matching.MATCHING_ONLY
        )

        for name, (w_mode_2, a_mode) in matching_modes.items():
            if(w_mode != w_mode_2): continue
            
            if(a_mode == Matching.MATCHING_ONLY):
                matchings[name] = base_matching
            else:
                matchings[name] = copy.deepcopy(base_matching)
                matchings[name].assign_remaining_scans(a_mode)
                matchings[name].strip() # NetworkX graph uses several GB because there are so many edges - delete graph after matching calculated

        base_matching.strip()

    inclusion_boxes = matchings["two_step_matching_with_recursive_assignment"].make_inclusion_boxes(rt_width=10, mz_width=10)
    grid_base = BoxManager(inclusion_boxes=inclusion_boxes)
    intensity_grid_base = BoxManager(inclusion_boxes=inclusion_boxes, box_splitter=BoxSplitter(split=True))

    new_cases = [
        ExperimentCase(
            "matching", fullscans, {"isolation_width": 1},
            name=name, shareable_base=matching, pickle_env=pickle_env 
        )
        for name, matching in matchings.items()
    ]

    return new_cases + [
        ExperimentCase(
            controller_type, fullscans, params,
            name=basename + "_inclusion", shareable_base=(intensity_grid_base if basename in intensity_methods else grid_base),
            pickle_env=pickle_env 
        )
        for controller_type, basename, params in cases
        if(controller_type in run_inclusion)
    ]

# 1. Same Beer Repeated (Same Fullscan)

In [18]:
repeat = 6
out_dir = "same_beer"
fullscans = same_beer_fullscans[repeat:repeat+1] * repeat
aligned_file = f"stefanbeersID{repeat+1}" + template_suffix

aligned_file = pp_params.pick_aligned_peaks(
    fullscans,
    out_dir,
    aligned_file,
    force=False
)
print()

times_list = [
    list(MatchingScan.topN_times(N, max_rt, scan_duration_dict))
    for N in [20] * repeat
]

same_cases = cases + [("dsda", "dsda", same_dsda_params)]

5784 aligned boxes contained in file



In [19]:
same_beer_exp = Experiment()

same_beer_exp.add_cases(
    ExperimentCase(controller_type, fullscans, params, name=name, pickle_env=False)
    for controller_type, name, params in same_cases
)

same_beer_exp.add_cases(
    get_matching_cases(fullscans, times_list, aligned_file, edge_limit=None, pickle_env=False)
)

same_beer_exp.run_experiment(
    os.path.join(out_dir, str(repeat)),
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    chem_noise_threshold=experiment_params["topN_params"]["min_ms1_intensity"] * 0.5, #filter low intensity signal for memory
    num_workers=num_workers
)

Creating Chemicals...

Running Experiment of 19 cases...


In [20]:
for case_name in matching_modes.keys():
    i = same_beer_exp.case_names.index(case_name)
    case = same_beer_exp.cases[i]
    
    print(case_name.upper())
    print(case.log)
    print()

UNWEIGHTED_MATCHING
matching_size: 4511
chem_count: 5784
scan_count: 39360
edge_count: 2833788
chems_above_threshold: 4660
start_scan: 2024-09-21 14:37:06.671859
end_scan: 2024-09-21 14:43:24.496871
start_chem: 2024-09-21 14:43:25.275115
end_chem: 2024-09-21 14:43:25.365977
start_matching: 2024-09-21 14:43:25.365977
end_matching: 2024-09-21 14:43:42.327577
start_assign: 2024-09-21 14:43:42.327577
end_assign: 2024-09-21 14:43:42.335654

TWO_STEP_MATCHING
matching_size: 4511
chem_count: 5784
scan_count: 39360
edge_count: 2833788
chems_above_threshold: 4660
start_scan: 2024-09-21 14:43:56.392449
end_scan: 2024-09-21 14:50:15.256191
start_chem: 2024-09-21 14:50:16.041673
end_chem: 2024-09-21 14:50:16.124339
start_matching: 2024-09-21 14:50:16.132392
end_matching: 2024-09-21 14:51:05.144100
start_assign: 2024-09-21 14:51:05.144100
end_assign: 2024-09-21 14:51:05.154181

TWO_STEP_MATCHING_WITH_RECURSIVE_ASSIGNMENT
matching_size: 4511
chem_count: 5784
scan_count: 39360
edge_count: 2833788
che

In [21]:
#run matching exhaustively up to the right number of iterations
for i in range(1, repeat):
    iter_dir = os.path.join(out_dir, str(i))
    iter_times = times_list[:i]
    
    same_beer_exp = Experiment()
    same_beer_exp.add_cases(
        get_matching_cases(fullscans[:i], iter_times, aligned_file, edge_limit=None, pickle_env=False)
    )

    same_beer_exp.run_experiment(
        iter_dir,
        min_rt=min_rt,
        max_rt=max_rt,
        ionisation_mode=ionisation_mode,
        scan_duration_dict=scan_duration_dict,
        overwrite_keyfile=False,
        point_noise_threshold=point_noise_threshold,
        chem_noise_threshold=matching_params["intensity_threshold"] * 0.5, #filter low intensity signal for memory
        num_workers=num_workers
    )

Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...


# 2. Same Beer Repeated (Different Fullscans)

In [22]:
repeat = 6
out_dir = "same_iterbeer"
fullscans = same_beer_fullscans[repeat:2*repeat]
aligned_file = f"stefanbeersID{repeat+1}-{2*repeat}" + template_suffix

aligned_file = pp_params.pick_aligned_peaks(
    fullscans,
    out_dir,
    aligned_file,
    force=False
)
print()

times_list = [
    list(MatchingScan.topN_times(N, max_rt, scan_duration_dict))
    for N in [20] * repeat
]

same_cases = cases + [("dsda", "dsda", same_dsda_params)]

9671 aligned boxes contained in file



In [23]:
same_beer_exp = Experiment()

same_beer_exp.add_cases(
    ExperimentCase(controller_type, fullscans, params, name=name, pickle_env=False)
    for controller_type, name, params in same_cases
)

same_beer_exp.add_cases(
    get_matching_cases(fullscans, times_list, aligned_file, edge_limit=None, pickle_env=False)
)

same_beer_exp.run_experiment(
    os.path.join(out_dir, str(repeat)),
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    chem_noise_threshold=experiment_params["topN_params"]["min_ms1_intensity"] * 0.5, #filter low intensity signal for memory
    num_workers=num_workers
)

Creating Chemicals...

Running Experiment of 19 cases...


In [24]:
for case_name in matching_modes.keys():
    i = same_beer_exp.case_names.index(case_name)
    case = same_beer_exp.cases[i]
    
    print(case_name.upper())
    print(case.log)
    print()

UNWEIGHTED_MATCHING
matching_size: 7560
chem_count: 9671
scan_count: 39360
edge_count: 2831327
chems_above_threshold: 7916
start_scan: 2024-09-21 18:12:44.389714
end_scan: 2024-09-21 18:44:44.739995
start_chem: 2024-09-21 18:44:47.885450
end_chem: 2024-09-21 18:44:48.077023
start_matching: 2024-09-21 18:44:48.087090
end_matching: 2024-09-21 18:45:09.891475
start_assign: 2024-09-21 18:45:09.891475
end_assign: 2024-09-21 18:45:09.903571

TWO_STEP_MATCHING
matching_size: 7560
chem_count: 9671
scan_count: 39360
edge_count: 2831327
chems_above_threshold: 7916
start_scan: 2024-09-21 18:45:24.008181
end_scan: 2024-09-21 19:17:25.495372
start_chem: 2024-09-21 19:17:28.662066
end_chem: 2024-09-21 19:17:28.853567
start_matching: 2024-09-21 19:17:28.863644
end_matching: 2024-09-21 19:18:24.761427
start_assign: 2024-09-21 19:18:24.761427
end_assign: 2024-09-21 19:18:24.771532

TWO_STEP_MATCHING_WITH_RECURSIVE_ASSIGNMENT
matching_size: 7560
chem_count: 9671
scan_count: 39360
edge_count: 2831327
che

In [25]:
#run matching exhaustively up to the right number of iterations
for i in range(1, repeat):
    iter_dir = os.path.join(out_dir, str(i))
    iter_times = times_list[:i]
    
    same_beer_exp = Experiment()
    same_beer_exp.add_cases(
        get_matching_cases(fullscans[:i], iter_times, aligned_file, edge_limit=None, pickle_env=False)
    )

    same_beer_exp.run_experiment(
        iter_dir,
        min_rt=min_rt,
        max_rt=max_rt,
        ionisation_mode=ionisation_mode,
        scan_duration_dict=scan_duration_dict,
        overwrite_keyfile=False,
        point_noise_threshold=point_noise_threshold,
        chem_noise_threshold=matching_params["intensity_threshold"] * 0.5, #filter low intensity signal for memory
        num_workers=num_workers
    )

Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...


# 3. Same Beer Repeated (Different Fullscans, Different Plan)

In [26]:
repeat = 6
out_dir = "same_iterswapbeer"
fullscans = same_beer_fullscans[repeat:repeat*2]
fullscans2 = same_beer_fullscans[:repeat] # use the ones that happened _first_ for the plan
aligned_file = f"stefanbeersID1-{repeat}" + template_suffix

aligned_file = pp_params.pick_aligned_peaks(
    fullscans,
    out_dir,
    aligned_file,
    force=False
)
print()

times_list = [
    list(MatchingScan.topN_times(N, max_rt, scan_duration_dict))
    for N in [20] * repeat
]

same_cases = cases + [("dsda", "dsda", same_dsda_params)]

10385 aligned boxes contained in file



In [27]:
for i in range(1, repeat+1):
    iter_dir = os.path.join(out_dir, str(i))
    iter_times = times_list[:i]
    
    same_beer_exp = Experiment()

    matching_cases = get_matching_cases(fullscans2[:i], iter_times, aligned_file, edge_limit=None, pickle_env=False)
    for case in matching_cases: case.fullscan_paths = fullscans[:i]
    
    same_beer_exp.add_cases(matching_cases)

    same_beer_exp.run_experiment(
        iter_dir,
        min_rt=min_rt,
        max_rt=max_rt,
        ionisation_mode=ionisation_mode,
        scan_duration_dict=scan_duration_dict,
        overwrite_keyfile=False,
        point_noise_threshold=point_noise_threshold,
        chem_noise_threshold=matching_params["intensity_threshold"] * 0.5, #filter low intensity signal for memory
        num_workers=num_workers
    )

Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...


# 4. Repeating Different Beers (Same Fullscans)

In [28]:
bio_repeat = 4
tech_repeat = 3
repeat = bio_repeat * tech_repeat

out_dir = "repeated_different_beer"
fullscans = rep_diff_beer_fullscans[:bio_repeat] * tech_repeat
aligned_file = f"stefanbeersID{id_offset+1}-{id_offset+bio_repeat}" + template_suffix
edge_limit = None

aligned_file = pp_params.pick_aligned_peaks(
    fullscans,
    out_dir,
    aligned_file,
    force=False
)
print()

times_list = [
    list(MatchingScan.topN_times(N, max_rt, scan_duration_dict))
    for N in [20] * (bio_repeat * tech_repeat)
]

repeated_different_cases = cases + [("dsda", "dsda", rep_diff_dsda_params)]

11804 aligned boxes contained in file



In [29]:
rep_diff_beer_exp = Experiment()

rep_diff_beer_exp.add_cases(
    ExperimentCase(controller_type, fullscans, params, name=name, pickle_env=False)
    for controller_type, name, params in repeated_different_cases
)

rep_diff_beer_exp.add_cases(
    get_matching_cases(fullscans, times_list, aligned_file, edge_limit=None, pickle_env=False)
)

rep_diff_beer_exp.run_experiment(
    os.path.join(out_dir, str(bio_repeat * tech_repeat)),
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    chem_noise_threshold=experiment_params["topN_params"]["min_ms1_intensity"] * 0.5, #filter low intensity signal for memory
    num_workers=num_workers
)

Creating Chemicals...

Running Experiment of 19 cases...


In [30]:
for case_name in matching_modes.keys():
    i = rep_diff_beer_exp.case_names.index(case_name)
    case = rep_diff_beer_exp.cases[i]
    
    print(case_name.upper())
    print(case.log)
    print()

UNWEIGHTED_MATCHING
matching_size: 9479
chem_count: 11804
scan_count: 78720
edge_count: 5976762
chems_above_threshold: 9802
start_scan: 2024-09-22 06:26:56.447829
end_scan: 2024-09-22 06:51:52.827853
start_chem: 2024-09-22 06:51:55.147542
end_chem: 2024-09-22 06:51:55.391721
start_matching: 2024-09-22 06:51:55.401886
end_matching: 2024-09-22 06:52:43.424049
start_assign: 2024-09-22 06:52:43.424049
end_assign: 2024-09-22 06:52:43.444203

TWO_STEP_MATCHING
matching_size: 9479
chem_count: 11804
scan_count: 78720
edge_count: 5976762
chems_above_threshold: 9802
start_scan: 2024-09-22 06:53:15.952224
end_scan: 2024-09-22 07:17:55.953347
start_chem: 2024-09-22 07:17:58.253364
end_chem: 2024-09-22 07:17:58.515511
start_matching: 2024-09-22 07:17:58.525587
end_matching: 2024-09-22 07:20:02.068429
start_assign: 2024-09-22 07:20:02.068429
end_assign: 2024-09-22 07:20:02.088583

TWO_STEP_MATCHING_WITH_RECURSIVE_ASSIGNMENT
matching_size: 9479
chem_count: 11804
scan_count: 78720
edge_count: 5976762


In [31]:
#run matching exhaustively up to the right number of iterations
for i in range(1, repeat):
    iter_dir = os.path.join(out_dir, str(i))
    iter_times = times_list[:i]
    
    rep_diff_beer_exp = Experiment()
    rep_diff_beer_exp.add_cases(
        get_matching_cases(fullscans[:i], iter_times, aligned_file, edge_limit=None, pickle_env=False)
    )

    rep_diff_beer_exp.run_experiment(
        iter_dir,
        min_rt=min_rt,
        max_rt=max_rt,
        ionisation_mode=ionisation_mode,
        scan_duration_dict=scan_duration_dict,
        overwrite_keyfile=False,
        point_noise_threshold=point_noise_threshold,
        chem_noise_threshold=matching_params["intensity_threshold"] * 0.5, #filter low intensity signal for memory
        num_workers=num_workers
    )

Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...


# 5. Repeating Different Beers (Different Fullscans)

In [32]:
bio_repeat = 4
tech_repeat = 3
repeat = bio_repeat * tech_repeat

out_dir = "repeated_different_iterbeer"
fullscans = rep_diff_beer_fullscans[repeat:2*repeat]
aligned_file = f"stefanbeersID{id_offset+repeat+1}-{id_offset+2*repeat}" + template_suffix
edge_limit = None

aligned_file = pp_params.pick_aligned_peaks(
    fullscans,
    out_dir,
    aligned_file,
    force=False
)
print()

times_list = [
    list(MatchingScan.topN_times(N, max_rt, scan_duration_dict))
    for N in [20] * (bio_repeat * tech_repeat)
]

repeated_different_cases = cases + [("dsda", "dsda", rep_diff_dsda_params)]

17741 aligned boxes contained in file



In [33]:
rep_diff_beer_exp = Experiment()

rep_diff_beer_exp.add_cases(
    ExperimentCase(controller_type, fullscans, params, name=name, pickle_env=False)
    for controller_type, name, params in repeated_different_cases
)

rep_diff_beer_exp.add_cases(
    get_matching_cases(fullscans, times_list, aligned_file, edge_limit=None, pickle_env=False)
)

rep_diff_beer_exp.run_experiment(
    os.path.join(out_dir, str(bio_repeat * tech_repeat)),
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    overwrite_keyfile=False,
    point_noise_threshold=point_noise_threshold,
    chem_noise_threshold=experiment_params["topN_params"]["min_ms1_intensity"] * 0.5, #filter low intensity signal for memory
    num_workers=num_workers
)

Creating Chemicals...

Running Experiment of 19 cases...


In [34]:
for case_name in matching_modes.keys():
    i = rep_diff_beer_exp.case_names.index(case_name)
    case = rep_diff_beer_exp.cases[i]
    
    print(case_name.upper())
    print(case.log)
    print()

UNWEIGHTED_MATCHING
matching_size: 14471
chem_count: 17741
scan_count: 78720
edge_count: 5948549
chems_above_threshold: 15021
start_scan: 2024-09-23 00:49:07.044675
end_scan: 2024-09-23 01:58:19.878182
start_chem: 2024-09-23 01:58:26.781962
end_chem: 2024-09-23 01:58:27.330007
start_matching: 2024-09-23 01:58:27.362677
end_matching: 2024-09-23 01:59:12.956208
start_assign: 2024-09-23 01:59:12.956208
end_assign: 2024-09-23 01:59:12.986734

TWO_STEP_MATCHING
matching_size: 14471
chem_count: 17741
scan_count: 78720
edge_count: 5948549
chems_above_threshold: 15021
start_scan: 2024-09-23 01:59:45.810850
end_scan: 2024-09-23 03:08:35.273684
start_chem: 2024-09-23 03:08:42.388924
end_chem: 2024-09-23 03:08:42.928177
start_matching: 2024-09-23 03:08:42.968839
end_matching: 2024-09-23 03:11:10.446926
start_assign: 2024-09-23 03:11:10.446926
end_assign: 2024-09-23 03:11:10.518228

TWO_STEP_MATCHING_WITH_RECURSIVE_ASSIGNMENT
matching_size: 14471
chem_count: 17741
scan_count: 78720
edge_count: 594

In [35]:
#run matching exhaustively up to the right number of iterations
for i in range(1, repeat):
    iter_dir = os.path.join(out_dir, str(i))
    iter_times = times_list[:i]
    
    rep_diff_beer_exp = Experiment()
    rep_diff_beer_exp.add_cases(
        get_matching_cases(fullscans[:i], iter_times, aligned_file, edge_limit=None, pickle_env=False)
    )

    rep_diff_beer_exp.run_experiment(
        iter_dir,
        min_rt=min_rt,
        max_rt=max_rt,
        ionisation_mode=ionisation_mode,
        scan_duration_dict=scan_duration_dict,
        overwrite_keyfile=False,
        point_noise_threshold=point_noise_threshold,
        chem_noise_threshold=matching_params["intensity_threshold"] * 0.5, #filter low intensity signal for memory
        num_workers=num_workers
    )

Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...


# 6. Repeating Different Beers (Different Fullscans, Different Plan)

In [36]:
bio_repeat = 4
tech_repeat = 3
repeat = bio_repeat * tech_repeat

out_dir = "repeated_different_iterswapbeer"
fullscans = rep_diff_beer_fullscans[repeat:2*repeat]
fullscans2 = rep_diff_beer_fullscans[:repeat] # use the ones that happened _first_ for the plan
aligned_file = f"stefanbeersID{id_offset+1}-{id_offset+repeat}" + template_suffix
edge_limit = None

aligned_file = pp_params.pick_aligned_peaks(
    fullscans,
    out_dir,
    aligned_file,
    force=False
)
print()

times_list = [
    list(MatchingScan.topN_times(N, max_rt, scan_duration_dict))
    for N in [20] * (bio_repeat * tech_repeat)
]

repeated_different_cases = cases + [("dsda", "dsda", rep_diff_dsda_params)]

16916 aligned boxes contained in file



In [37]:
for i in range(1, repeat+1):
    iter_dir = os.path.join(out_dir, str(i))
    iter_times = times_list[:i]
    
    rep_diff_beer_exp = Experiment()

    matching_cases = get_matching_cases(fullscans2[:i], iter_times, aligned_file, edge_limit=None, pickle_env=False)
    for case in matching_cases: case.fullscan_paths = fullscans[:i]
    
    rep_diff_beer_exp.add_cases(matching_cases)

    rep_diff_beer_exp.run_experiment(
        iter_dir,
        min_rt=min_rt,
        max_rt=max_rt,
        ionisation_mode=ionisation_mode,
        scan_duration_dict=scan_duration_dict,
        overwrite_keyfile=False,
        point_noise_threshold=point_noise_threshold,
        chem_noise_threshold=matching_params["intensity_threshold"] * 0.5, #filter low intensity signal for memory
        num_workers=num_workers
    )

Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
Creating Chemicals...

Running Experiment of 10 cases...
