In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import math
import sys
import os
import platform
import itertools

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set_context('poster')

In [None]:
user_vimms = "vimms"
sys.path.append(user_vimms)

In [None]:
mzmine_path = os.path.join("MZmine-2.53-Windows", "startMZmine-Windows.bat")
old_mzmine = False
template_suffix = "_old" if old_mzmine else ""
mzmine_template = os.path.join(user_vimms, "batch_files", f"multi_sample_peak_pick{template_suffix}.xml")

In [None]:
from vimms.Common import (
    POSITIVE, load_obj,
    set_log_level_warning,
)
from vimms.PeakPicking import MZMineParams
from vimms.Experiment import Experiment
from vimms.BoxVisualise import (
    mpl_results_plot, plotly_results_plot,
    plotly_timing_hist, seaborn_timing_hist,
    mpl_fragmentation_events, plotly_fragmentation_events,
    mpl_fragmented_boxes,
    seaborn_uncovered_area_hist,
    BoxViewer
)

In [None]:
ionisation_mode = POSITIVE
pbar = False
set_log_level_warning()

In [None]:
#multi_dir = "/Users/%s/University of Glasgow/Vinny Davies - CLDS Metabolomics Project/Experimental_Results/20220706_DDAvsDIA" % user
multi_dir = "20220719_multi_samples_main"
multibeer_dir = os.path.join(multi_dir, "results_4")

In [None]:
num_workers = 20 #can't always use number of physical cores because of memory constraints

Utilities



In [None]:
def load_pkls(pickle_dir, case_names, repeat, load_rois=False):
    exp_pkls = {}
    for case_name in case_names:
        roi_str = "_rois" if load_rois else ""
        exp_pickle_paths = [f"{os.path.join(pickle_dir, case_name)}_{i}{roi_str}.pkl" for i in range(repeat)]
        exp_pkls[case_name] = [load_obj(pkl) for pkl in exp_pickle_paths]
    return exp_pkls

Specify line styles for mpl plots

In [None]:
mpl_colours = itertools.cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])
colour_map = {}

def update_cmap(name):
    if(not name in colour_map):
        colour_map[name] = next(mpl_colours)
    return colour_map[name]

def get_style(name):
    if(name.lower().endswith("smartroi")):
        c = update_cmap("_".join(name.split("_")[:-1]))
        return c, "s"
    elif(name.lower().endswith("weighteddew")):
        c = update_cmap("_".join(name.split("_")[:-1]))
        return c, "^"
    else:
        c = update_cmap(name)
        return c, "o"
    
def get_line_styles(experiment_names):
    colours, markers = [], []
    for exp_name in experiment_names:
        c, m = get_style(exp_name)
        colours.append(c)
        markers.append(m)
    return colours, markers

In [None]:
def get_rep_dirs(out_dir):
    dirs = os.listdir(out_dir)
    rep_dirs = []
    
    for i in itertools.count(0, 1):
        if(not str(i) in dirs): 
            break
        rep_dirs.append(os.path.join(out_dir, str(i)))
        
    return rep_dirs

In [None]:
pp_params = MZMineParams(
    mzmine_template = mzmine_template,
    mzmine_exe = mzmine_path
)

# 1. Same Beer

In [None]:
repeat = 10
out_dir = "replicate_same_beer"

same_case_names = [
    "topN",
    "topN_RoI",
    "topN_exclusion",
    "topNEx",
    "topNEx_smartroi",
    "topNEx_weighteddew",
    "hard_roi_exclusion",
    "hard_roi_exclusion_smartroi",
    "hard_roi_exclusion_weighteddew",
    "intensity_roi_exclusion",
    "intensity_roi_exclusion_smartroi",
    "intensity_roi_exclusion_weighteddew",
    "non_overlap",
    "non_overlap_smartroi",
    "non_overlap_weighteddew",
    "intensity_non_overlap",
    "intensity_non_overlap_smartroi",
    "intensity_non_overlap_weighteddew",
]

line_colours, line_markers = get_line_styles(same_case_names)

In [None]:
isolation_width = 1.0
reports_list = []

for rep_dir in get_rep_dirs(out_dir):
    same_beer_exp = Experiment.load_from_json(
        file_dir = rep_dir,
        file_name = "keyfile.json",
        out_dir = rep_dir,
        fullscan_dir = multibeer_dir,
        amend_result_path = True,
        case_names = same_case_names
    )
    
    true_repeat = min(
        repeat,
        len(same_beer_exp.case_mzmls[same_beer_exp.case_names[0]])
    )
    
    same_beer_exp.evaluate(
        pp_params = pp_params,
        num_workers = num_workers,
        isolation_widths = isolation_width,
        max_repeat = true_repeat,
        aligned_names = f"multibeers_1_{true_repeat}" + template_suffix,
        force_peak_picking = False,
        check_mzmine="exact"
    )
    
    reports_list.append(
        same_beer_exp.get_reports(num_workers=num_workers)
    )

In [None]:
coverages = []
it_coverages = []

for case_name in same_case_names:
    coverages.append([
        r[case_name]["cumulative_coverage_proportion"][-1] 
        for r in reports_list
    ])
    
    it_coverages.append([
        r[case_name]["cumulative_intensity_proportion"][-1] 
        for r in reports_list
    ])
    
print(coverages)
print()
print(it_coverages)

In [None]:
fig, axes = plt.subplots(2, 1)
ax1, ax2 = axes

for i in range(3, len(same_case_names), 3):
    ax1.axvline(i + 0.5, linestyle="--", linewidth=1.2)
    ax2.axvline(i + 0.5, linestyle="--", linewidth=1.2)

ax1.boxplot(coverages)
ax1.tick_params(
    bottom=False,
    labelbottom=False
)
ax1.set(title="Same Beer Final Cumulative Coverage", ylabel="Coverage Proportion")

ax2.boxplot(it_coverages)
ax2.set_xticklabels(same_case_names, rotation=40, ha="right")
ax2.set(title="Same Beer Final Cumulative Intensity Coverage", ylabel="Intensity Coverage Proportion")

fig.set_size_inches(15, 15)

# 2. Repeating Different Beers

In [None]:
bio_repeat = 6
tech_repeat = 4
repeat = bio_repeat * tech_repeat
out_dir = "replicate_repeated_different_beer"

rep_diff_case_names = [
    "topN",
    "topN_RoI",
    "topN_exclusion",
    "topNEx",
    "topNEx_smartroi",
    "topNEx_weighteddew",
    "hard_roi_exclusion",
    "hard_roi_exclusion_smartroi",
    "hard_roi_exclusion_weighteddew",
    "intensity_roi_exclusion",
    "intensity_roi_exclusion_smartroi",
    "intensity_roi_exclusion_weighteddew",
    "non_overlap",
    "non_overlap_smartroi",
    "non_overlap_weighteddew",
    "intensity_non_overlap",
    "intensity_non_overlap_smartroi",
    "intensity_non_overlap_weighteddew",
]

line_colours, line_markers = get_line_styles(rep_diff_case_names)

In [None]:
isolation_width = 1.0
reports_list = []

for rep_dir in get_rep_dirs(out_dir):
    rep_diff_beer_exp = Experiment.load_from_json(
        file_dir = rep_dir,
        file_name = "keyfile.json",
        out_dir = rep_dir,
        fullscan_dir = multibeer_dir,
        amend_result_path = True,
        case_names = rep_diff_case_names
    )
    
    true_repeat = min(
        repeat,
        len(rep_diff_beer_exp.case_mzmls[rep_diff_beer_exp.case_names[0]])
    )
    
    rep_diff_beer_exp.evaluate(
        pp_params = pp_params,
        num_workers = num_workers,
        isolation_widths = isolation_width,
        max_repeat = true_repeat,
        aligned_names = f"multibeers_{bio_repeat}_{tech_repeat}" + template_suffix,
        force_peak_picking = False,
        check_mzmine="exact"
    )
    
    reports_list.append(
        rep_diff_beer_exp.get_reports(num_workers=num_workers)
    )

In [None]:
coverages = []
it_coverages = []

for case_name in rep_diff_case_names:
    coverages.append([
        r[case_name]["cumulative_coverage_proportion"][-1] 
        for r in reports_list
    ])
    
    it_coverages.append([
        r[case_name]["cumulative_intensity_proportion"][-1] 
        for r in reports_list
    ])
    
print(coverages)
print()
print(it_coverages)

In [None]:
fig, axes = plt.subplots(2, 1)
ax1, ax2 = axes

for i in range(3, len(rep_diff_case_names), 3):
    ax1.axvline(i + 0.5, linestyle="--", linewidth=1.2)
    ax2.axvline(i + 0.5, linestyle="--", linewidth=1.2)

ax1.boxplot(coverages)
ax1.tick_params(
    bottom=False,
    labelbottom=False
)
ax1.set(title="6-4 Beers Final Cumulative Coverage", ylabel="Coverage Proportion")

ax2.boxplot(it_coverages)
ax2.set_xticklabels(rep_diff_case_names, rotation=40, ha="right")
ax2.set(title="6-4 Beers Final Cumulative Intensity Coverage", ylabel="Intensity Coverage Proportion")

fig.set_size_inches(15, 15)