# 0. Preliminaries

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import platform
import re

In [3]:
'''
if platform.system() == 'Darwin':
    user = 'joewandy'
elif platform.system() == 'Windows':
    user = 'joewa'
    
user_vimms = '/Users/%s/Work/git/vimms' % user
'''
#user_vimms = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "peak_picking", "vimms")
user_vimms = "vimms"
sys.path.append(user_vimms)

In [4]:
from vimms.Common import (
    POSITIVE, ROI_TYPE_SMART, ROI_EXCLUSION_WEIGHTED_DEW,
    set_log_level_warning,
)

from vimms.Roi import RoiBuilderParams, SmartRoiParams
from vimms.Experiment import ExperimentCase, Experiment

In [5]:
ionisation_mode = POSITIVE
pbar = False
set_log_level_warning()

1

### Get seed data

In [6]:
def match_files(data_dir, regex):
    pattern = re.compile(regex)
    files = (
        (int(pattern.match(fname).group(1)), fname)
        for fname in os.listdir(data_dir)
        if not pattern.match(fname) is None
    )

    return [
        os.path.join(data_dir, fname) 
        for _, fname in sorted(files, key=lambda p: p[0])
    ]

In [7]:
#multi_dir = "/Users/%s/University of Glasgow/Vinny Davies - CLDS Metabolomics Project/Experimental_Results/20220706_DDAvsDIA" % user
multi_dir = "20220706_DDAvsDIA"
multibeer_dir = os.path.join(multi_dir, "results_1")

In [8]:
multibeer_fullscans = match_files(multibeer_dir, r"fullscan_beer([0-9]+)_0.mzML")

### Specify parameters

In [9]:
num_workers = 48 #can't always use number of physical cores because of memory constraints

In [10]:
search_params = {
    "topN_params" : {
        "N" : [1, 3, 5, 10, 20],
        #"N" : [1, 5],
        "rt_tol" : [15, 30, 60, 120, 240]
       # "rt_tol" : [60, 120]
    },
    
    "smartroi_params" : {
        "N" : [1, 3, 5, 10, 20],
        #"N" : [1, 5],
        "rt_tol" : [15],
        "smartroi_params" : [
            SmartRoiParams(
                reset_length_seconds=1E6,
                intensity_increase_factor=alpha,
                drop_perc=beta
            )
            for alpha in [2, 3, 5, 10] for beta in [0, 1E-3, 1E-2, 1E-1]
            #for alpha in [2, 3] for beta in [0, 1E-3]
        ]
    },
    
    "weighteddew_params" : {
        "exclusion_method": [ROI_EXCLUSION_WEIGHTED_DEW],
        "N" : [1, 3, 5, 10, 20],
        #"N" : [1, 5],
        "rt_tol" : [15, 30, 60, 120, 240],
        #"rt_tol" : [15, 30],
        "exclusion_t_0" : [1, 10, 15, 30, 60]
        #"exclusion_t_0" : [1, 10]
    }
}

In [11]:
shared_params = {
    "topN_params": {
        "ionisation_mode" : ionisation_mode,
        "isolation_width" : 1,
        "min_ms1_intensity" : 5000,
        "mz_tol" : 10,
    },
    
    "roi_params" : {
        "min_roi_length_for_fragmentation" : 0,
        "roi_params" : RoiBuilderParams(
                            min_roi_intensity=0,
                            min_roi_length=3,
                       )
    }
}

min_rt = 0
max_rt = 1440
scan_duration_dict = {
    1: 0.59,
    2: 0.19
}

In [12]:
topN_shared = shared_params["topN_params"]
roi_shared = {**topN_shared, **shared_params["roi_params"]}

# Grid Search

In [13]:
bio_repeat = 3
tech_repeat = 2
out_dir = "real_grid_search"

search_exp = Experiment.run_grid_search(
    "intensity_non_overlap",
    multibeer_fullscans[:bio_repeat] * tech_repeat,
    roi_shared,
    search_params,
    out_dir,
    min_rt=min_rt,
    max_rt=max_rt,
    ionisation_mode=ionisation_mode,
    scan_duration_dict=scan_duration_dict,
    num_workers=num_workers
)

GRID SEARCH OF 230 CASES
Creating Chemicals...
Generating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_2_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_1_fullscan1.mzMLGenerating chemicals for multibeers_urine_data/beers/fullscan/mzML/Beer_multibeers_3_fullscan1.mzML



Running Experiment...
Outcome being written to: "topN_params_2_0.mzML"
Outcome being written to: "topN_params_4_0.mzML"
Outcome being written to: "topN_params_10_0.mzML"
Outcome being written to: "topN_params_0_0.mzML"
Outcome being written to: "topN_params_12_0.mzML"
Outcome being written to: "topN_params_16_0.mzML"
Outcome being written to: "topN_params_8_0.mzML"
Outcome being written to: "topN_params_18_0.mzML"
Outcome being written to: "topN_params_14_0.mzML"
Outcome being written to: "topN_params_6_0.mzML"
Outcome being written to: "topN_params_24_0.mzML"
Outcome being written to: "smartroi_params_9_0.mzML"
Outcome being written to: "topN_