In [1]:
# MZMine peak-picking can be slow and by "default" we tend to repeat it,
# so this notebook runs it the minimum times necessary

In [2]:
import sys
import os
import shutil
import platform
import re

In [3]:
user_vimms = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "peak_picking", "vimms")
#user_vimms = "vimms"
sys.path.append(user_vimms)

old_mzmine = False
template_suffix = "_old" if old_mzmine else ""
mzmine_template = os.path.join(user_vimms, "batch_files", f"multi_sample_peak_pick{template_suffix}.xml")

xcms_r_script = os.path.join(user_vimms, "vimms", "scripts", "xcms_script.R")

In [4]:
mzmine_path = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "peak_picking", "MZmine-2.53-Windows", "startMZmine-Windows.bat")

In [5]:
from vimms.PeakPicking import MZMineParams, XCMSScriptParams

### Get seed data

In [6]:
def match_files(data_dir, regex):
    pattern = re.compile(regex)
    files = (
        (int(pattern.match(fname).group(1)), fname)
        for fname in os.listdir(data_dir)
        if not pattern.match(fname) is None
    )

    return [
        os.path.join(data_dir, fname) 
        for _, fname in sorted(files, key=lambda p: p[0])
    ]

In [7]:
data_dir = os.path.join("C:\\", "Users", "mcbrider5002", "Desktop", "Workspace", "phd", "data", "CLMS", "new_matching_seeds", "Negative")

all_fullscans = match_files(data_dir, r"Vinny_Beers_[0-9]_[0-9]+_ID([0-9]+).mzML")
same_beer_fullscans = all_fullscans[:12]
rep_diff_beer_fullscans = all_fullscans[12:]

### Peak-Pick

In [8]:
centwave_params = {
    "ppm" : 15,
    "pwlower" : 15,
    "pwupper" : 80,
    "snthresh" : 5,
    "noise" : 1000,
    "prefilterlower" : 3,
    "prefilterupper" : 500,
    "mzdiff" : 0.001
}

mzmine_object = MZMineParams(
    mzmine_template = mzmine_template,
    mzmine_exe = mzmine_path
)

xcms_object = XCMSScriptParams(
    xcms_r_script = xcms_r_script,
    **centwave_params,
    mzvsrtbalance = None,
    absmz = None,
    absrt = None,
    kNN = None
)

#pp_params = mzmine_object
pp_params = xcms_object

In [9]:
def peak_pick_master(master_dir, bio_repeat, tech_repeat):
    aligned_file_name = f"multibeers_{bio_repeat}_{tech_repeat}" + template_suffix
    
    for i, fs_dir in enumerate(multibeer_fullscan_dirs):
        fullscans = fs_dir[:bio_repeat] * tech_repeat
        
        aligned_file = pp_params.pick_aligned_peaks(
            fullscans,
            os.path.join(master_dir, str(i+1)),
            aligned_file_name,
            force=True
        )
        print()

def peak_pick_normal(master_dir, nor_dir, bio_repeat, tech_repeat):
    seed = 4
    aligned_file_name = f"multibeers_{bio_repeat}_{tech_repeat}" + template_suffix
    aligned_file_src = pp_params.format_output_path(os.path.join(master_dir, str(seed)), aligned_file_name)
    aligned_file_dst = pp_params.format_output_path(nor_dir, aligned_file_name)
    shutil.copy(aligned_file_src, aligned_file_dst)

def peak_pick_exhaustive(master_dir, exh_dir, bio_repeat, tech_repeat):
    seed = 4
    aligned_file_name = f"multibeers_{bio_repeat}_{tech_repeat}" + template_suffix
    aligned_file_src = pp_params.format_output_path(os.path.join(master_dir, str(seed)), aligned_file_name)

    for i in range(bio_repeat * tech_repeat):
        aligned_file_dst = pp_params.format_output_path(os.path.join(exh_dir, str(i+1)), aligned_file_name)
        shutil.copy(aligned_file_src, aligned_file_dst)
    
def peak_pick_replicates(master_dir, rep_dir, bio_repeat, tech_repeat):
    aligned_file_name = f"multibeers_{bio_repeat}_{tech_repeat}" + template_suffix

    for i, _ in enumerate(multibeer_fullscan_dirs):
        aligned_file_src = pp_params.format_output_path(os.path.join(master_dir, str(i+1)), aligned_file_name)

        shutil.copy(
            aligned_file_src,
            pp_params.format_output_path(os.path.join(rep_dir, str(i+1)), aligned_file_name)
        )

        for j, _ in enumerate(multibeer_fullscan_dirs):
            aligned_file_dst = pp_params.format_output_path(os.path.join(rep_dir, f"{j+1}_{i+1}"), aligned_file_name)
            shutil.copy(aligned_file_src, aligned_file_dst)

In [10]:
master_dir = "peak_picking_master"

### 1. Same Beers

In [11]:
repeat = 6

In [12]:
aligned_file = pp_params.pick_aligned_peaks(
    same_beer_fullscans[repeat:repeat+1],
    master_dir,
    f"stefanbeersID{repeat+1}" + template_suffix,
    force=True
)

aligned_file = pp_params.pick_aligned_peaks(
    same_beer_fullscans[:repeat],
    master_dir,
    f"stefanbeersID1-{repeat}" + template_suffix,
    force=True
)

aligned_file = pp_params.pick_aligned_peaks(
    same_beer_fullscans[repeat:2*repeat],
    master_dir,
    f"stefanbeersID{repeat+1}-{2*repeat}" + template_suffix,
    force=True
)

aligned_file = pp_params.pick_aligned_peaks(
    same_beer_fullscans[:2*repeat],
    master_dir,
    f"stefanbeersID1-{2*repeat}" + template_suffix,
    force=True
)

Running MZMine for peak_picking_master\stefanbeersID7_mzmine_aligned.csv
1800 aligned boxes contained in file
Running MZMine for peak_picking_master\stefanbeersID1-6_mzmine_aligned.csv
3499 aligned boxes contained in file
Running MZMine for peak_picking_master\stefanbeersID7-12_mzmine_aligned.csv
3139 aligned boxes contained in file
Running MZMine for peak_picking_master\stefanbeersID1-12_mzmine_aligned.csv
4261 aligned boxes contained in file


In [13]:
to_copy = [
    (f"stefanbeersID{repeat+1}", "same_beer"),
    (f"stefanbeersID{repeat+1}-{2*repeat}", "same_iterbeer"),
    (f"stefanbeersID1-{repeat}", "same_iterswapbeer"),
    (f"stefanbeersID{repeat+1}-{2*repeat}", "same_iterswapbeer"),
    (f"stefanbeersID1-{2*repeat}", "same_iterswapbeer")
]

for fname, dir in to_copy:
    aligned_file_name = fname + template_suffix
    aligned_file_src = pp_params.format_output_path(master_dir, aligned_file_name)
    aligned_file_dst = pp_params.format_output_path(dir, aligned_file_name)
    shutil.copy(aligned_file_src, aligned_file_dst)
    
    for i in range(repeat):
        out_dir = os.path.join(dir, str(i+1))
        aligned_file_dst = pp_params.format_output_path(out_dir, aligned_file_name)
        shutil.copy(aligned_file_src, aligned_file_dst)

### 2. Repeated Different Beers

In [14]:
id_offset = len(same_beer_fullscans)

bio_repeat, tech_repeat = 4, 3

repeat = bio_repeat * tech_repeat

In [15]:
aligned_file = pp_params.pick_aligned_peaks(
    rep_diff_beer_fullscans[:bio_repeat],
    master_dir,
    f"stefanbeersID{id_offset+1}-{id_offset+bio_repeat}" + template_suffix,
    force=True
)

aligned_file = pp_params.pick_aligned_peaks(
    rep_diff_beer_fullscans[:repeat],
    master_dir,
    f"stefanbeersID{id_offset+1}-{id_offset+repeat}" + template_suffix,
    force=True
)

aligned_file = pp_params.pick_aligned_peaks(
    rep_diff_beer_fullscans[repeat:2*repeat],
    master_dir,
    f"stefanbeersID{id_offset+repeat+1}-{id_offset+2*repeat}" + template_suffix,
    force=True
)

aligned_file = pp_params.pick_aligned_peaks(
    rep_diff_beer_fullscans[:2*repeat],
    master_dir,
    f"stefanbeersID{id_offset+1}-{id_offset+2*repeat}" + template_suffix,
    force=True
)

Running MZMine for peak_picking_master\stefanbeersID13-16_mzmine_aligned.csv
4279 aligned boxes contained in file
Running MZMine for peak_picking_master\stefanbeersID13-24_mzmine_aligned.csv
6187 aligned boxes contained in file
Running MZMine for peak_picking_master\stefanbeersID25-36_mzmine_aligned.csv
6083 aligned boxes contained in file
Running MZMine for peak_picking_master\stefanbeersID13-36_mzmine_aligned.csv
7590 aligned boxes contained in file


In [16]:
to_copy = [
    (f"stefanbeersID{id_offset+1}-{id_offset+bio_repeat}", "repeated_different_beer"),
    (f"stefanbeersID{id_offset+repeat+1}-{id_offset+2*repeat}", "repeated_different_iterbeer"),
    (f"stefanbeersID{id_offset+1}-{id_offset+repeat}", "repeated_different_iterswapbeer"),
    (f"stefanbeersID{id_offset+repeat+1}-{id_offset+2*repeat}", "repeated_different_iterswapbeer"),
    (f"stefanbeersID{id_offset+1}-{id_offset+2*repeat}", "repeated_different_iterswapbeer")
]

for fname, dir in to_copy:
    aligned_file_name = fname + template_suffix
    aligned_file_src = pp_params.format_output_path(master_dir, aligned_file_name)
    aligned_file_dst = pp_params.format_output_path(dir, aligned_file_name)
    shutil.copy(aligned_file_src, aligned_file_dst)
    
    for i in range(repeat):
        out_dir = os.path.join(dir, str(i+1))
        aligned_file_dst = pp_params.format_output_path(out_dir, aligned_file_name)
        shutil.copy(aligned_file_src, aligned_file_dst)