In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import math
import sys
import os
import platform
import itertools

import IPython
import matplotlib as mpl
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
user_vimms = "vimms"
sys.path.append(user_vimms)

In [None]:
mzmine_path = os.path.join("MZmine-2.53-Windows", "startMZmine-Windows.bat")
old_mzmine = False
template_suffix = "_old" if old_mzmine else ""
template_name = "Permissive" if old_mzmine else "Restrictive"
mzmine_template = os.path.join(user_vimms, "batch_files", f"multi_sample_peak_pick{template_suffix}.xml")

In [None]:
from vimms.Common import (
    POSITIVE, load_obj,
    set_log_level_warning,
)
from vimms.PeakPicking import MZMineParams
from vimms.Experiment import Experiment
from vimms.BoxVisualise import (
    mpl_set_axis_style, mpl_set_figure_style,
    mpl_results_plot, plotly_results_plot,
    mpl_fragmentation_counts,
    plotly_timing_hist, seaborn_timing_hist,
    mpl_fragmentation_events, plotly_fragmentation_events,
    mpl_fragmented_boxes,
    seaborn_uncovered_area_hist,
    BoxViewer
)

In [None]:
ionisation_mode = POSITIVE
pbar = False
set_log_level_warning()

In [None]:
#multi_dir = "/Users/%s/University of Glasgow/Vinny Davies - CLDS Metabolomics Project/Experimental_Results/20220706_DDAvsDIA" % user
multi_dir = "20220719_multi_samples_main"
multibeer_dir = os.path.join(multi_dir, "results_4")

In [None]:
num_workers = 8 #can't always use number of physical cores because of memory constraints

Utilities

In [None]:
def load_pkls(pickle_dir, case_names, repeat, load_rois=False):
    exp_pkls = {}
    for case_name in case_names:
        roi_str = "_rois" if load_rois else ""
        exp_pickle_paths = [f"{os.path.join(pickle_dir, case_name)}_{i}{roi_str}.pkl" for i in range(repeat)]
        exp_pkls[case_name] = [load_obj(pkl) for pkl in exp_pickle_paths]
    return exp_pkls

Specify styles for mpl plots

In [None]:
mpl_colours = plt.rcParams["axes.prop_cycle"].by_key()["color"]
colour_iterator = itertools.cycle(mpl_colours)
colour_map = {
    "topN" : mpl_colours[5],
    "topN_RoI" : mpl_colours[7],
    "topN_exclusion" : mpl_colours[1],
    "topNEx" : mpl_colours[2],
    "hard_roi_exclusion" : mpl_colours[0],
    "intensity_roi_exclusion" : mpl_colours[8],
    "non_overlap" : mpl_colours[6],
    "intensity_non_overlap" : mpl_colours[3],
    
}

def update_cmap(name):
    if(not name in colour_map):
        colour_map[name] = next(mpl_colours)
    return colour_map[name]

def get_style(name):
    if(name.lower().endswith("smartroi")):
        c = update_cmap("_".join(name.split("_")[:-1]))
        return c, "v"
    elif(name.lower().endswith("weighteddew")):
        c = update_cmap("_".join(name.split("_")[:-1]))
        return c, "^"
    else:
        c = update_cmap(name)
        return c, "o"
    
def get_line_styles(case_names):
    colours, markers = [], []
    for case_name in case_names:
        c, m = get_style(case_name)
        colours.append(c)
        markers.append(m)
    return colours, markers

In [None]:
name_map = {
    "topN" : "TopN",
    "topN_RoI" : "TopN RoI",
    "topN_exclusion" : "TopN Exclusion",
    "topNEx" : "TopNEx",
    "hard_roi_exclusion" : "Hard RoI Exclusion",
    "intensity_roi_exclusion" : "Intensity RoI Exclusion",
    "non_overlap" : "Non-Overlap",
    "intensity_non_overlap" : "Intensity Non-Overlap",
    
}

markersize = 11

legend_elements = [
    Line2D([0], [0], marker="o", color="black", label="Standard DEW", markerfacecolor="black", markersize=markersize),
    Line2D([0], [0], marker="^", color="black", label="WeightedDEW", markerfacecolor="black", markersize=markersize),
    Line2D([0], [0], marker="v", color="black", label="SmartRoI", markerfacecolor="black", markersize=markersize),
    Line2D([0], [0], color="white", label="", markerfacecolor="black", markersize=markersize),
]
    
for name, colour in colour_map.items():
    legend_elements.append(
        Line2D([0], [0], marker="s", color="white", label=name_map[name], markerfacecolor=colour, markersize=14)
    )

In [None]:
fig_kwargs = {
    "tick_kwargs" : {
        "width" : 2.5,
        "length" : 12,
        "labelsize" : 22,
    },
    
    "axis_borderwidth" : 2.5,
    
    "axis_kwargs" : {
        "labelsize" : 24,
        "titlesize" : 24,
        "linewidth" : 1.5,
        "markersize" : 6,
        "legend_kwargs" : {
            "handles" : legend_elements,
            "fontsize" : 13
        }
    },
    
    "suptitle" : "Default Suptitle",
    "suptitle_size" : 18,
    "figure_sizes" : (18, 8),
}

In [None]:
pp_params = MZMineParams(
    mzmine_template = mzmine_template,
    mzmine_exe = mzmine_path
)

# 1. Same Beer Repeated Multiple Times

### Evaluate

In [None]:
repeat = 20
out_dir = "new_same_beer"

same_case_names = [
    "topN",
    "topN_RoI",
    "topN_exclusion",
    "topNEx",
    "topNEx_smartroi",
    "topNEx_weighteddew",
    "hard_roi_exclusion",
    "hard_roi_exclusion_smartroi",
    "hard_roi_exclusion_weighteddew",
    "intensity_roi_exclusion",
    "intensity_roi_exclusion_smartroi",
    "intensity_roi_exclusion_weighteddew",
    "non_overlap",
    "non_overlap_smartroi",
    "non_overlap_weighteddew",
    "intensity_non_overlap",
    "intensity_non_overlap_smartroi",
    "intensity_non_overlap_weighteddew",
]

line_colours, line_markers = get_line_styles(same_case_names)

In [None]:
same_beer_exp = Experiment.load_from_json(
    file_dir = out_dir,
    file_name = "keyfile.json",
    out_dir = out_dir,
    fullscan_dir = multibeer_dir,
    amend_result_path = True,
    case_names = same_case_names
)

In [None]:
isolation_width = 1.0
true_repeat = min(
    repeat,
    len(same_beer_exp.case_mzmls[same_beer_exp.case_names[0]])
)

same_beer_exp.evaluate(
    pp_params = pp_params,
    num_workers = num_workers,
    isolation_widths = isolation_width,
    max_repeat = true_repeat,
    aligned_names = f"multibeers_1_{true_repeat}" + template_suffix,
    force_peak_picking = False,
    check_files = "exact"
)

same_beer_exp.summarise(num_workers=num_workers, rank_key="cumulative_intensity_proportion")

### Plot

In [None]:
#non-interactive plot, but is just an image so will persist if notebook is refreshed
fig, axes = mpl_results_plot(
    same_case_names,
    same_beer_exp.evaluators,
    min_intensity=0.0,
    colours=line_colours,
    markers=line_markers,
    mode="absolute"
)

mpl_set_figure_style(fig, **{**fig_kwargs, "suptitle" : "Repeated Same Beer", "suptitle_size" : 26})
mpl_set_axis_style(axes[1], legend_kwargs={**fig_kwargs["axis_kwargs"]["legend_kwargs"], "loc" : "lower right"})

In [None]:
#interactive plot, but volatile
plotly_results_plot(
    same_case_names,
    same_beer_exp.evaluators,
    min_intensity=0.0,
    suptitle="Repeated Same Beer"
)

In [None]:
cases = [
    case_name
    for case_name in ["topN", "topN_exclusion", "non_overlap", "intensity_non_overlap"]
    if case_name in same_case_names
]

evals = [same_beer_exp.get_evaluator_by_name(case_name) for case_name in cases]
colours = [get_style(case_name)[0] for case_name in cases]

fig, axes = mpl_fragmentation_counts(
    evals, 
    min_intensity=0.0, 
    key="times_covered_summary", 
    fcs=colours
)

kwargs = {
    **fig_kwargs,
    "axis_kwargs" : {
        **fig_kwargs["axis_kwargs"],
        "legend_kwargs" : None
    },
    "suptitle" : f"Times Same Beer Peaks Covered ({template_name})",
    "suptitle_size" : 26
}
mpl_set_figure_style(fig, **kwargs)

for case_name, ax in zip(cases, axes):
    mpl_set_axis_style(ax, title=case_name, titlesize=24)

In [None]:
cases = [
    case_name
    for case_name in ["topN", "topN_exclusion", "non_overlap", "intensity_non_overlap"]
    if case_name in same_case_names
]

evals = [same_beer_exp.get_evaluator_by_name(case_name) for case_name in cases]
colours = [get_style(case_name)[0] for case_name in cases]

fig, axes = mpl_fragmentation_counts(
    evals, 
    min_intensity=0.0, 
    key="times_fragmented_summary", 
    fcs=colours
)

kwargs = {
    **fig_kwargs,
    "axis_kwargs" : {
        **fig_kwargs["axis_kwargs"],
        "legend_kwargs" : None
    },
    "suptitle" : f"Times Same Beer Peaks Fragmented ({template_name})",
    "suptitle_size" : 26
}
mpl_set_figure_style(fig, **kwargs)

for case_name, ax in zip(cases, axes):
    mpl_set_axis_style(ax, title=case_name, titlesize=24)

### Some other useful plots

In [None]:
plot_repeat = 6

timings = same_case_names
timings = [t for t in timings if t in same_case_names]

frag_plots = [
    "topN_exclusion",
    "hard_roi_exclusion",
    "non_overlap",
    "intensity_non_overlap",
    "non_overlap_smartroi",
]
frag_plots = [fp for fp in frag_plots if fp in same_case_names]

cover_plots = [
    "non_overlap",
    "intensity_non_overlap"
]
cover_plots = [c for c in cover_plots if c in same_case_names]

pickle_dir = os.path.join(os.path.abspath(out_dir), "pickle")
needs_pkl = list(
    set(same_case_names) & (set(timings) | set(frag_plots))
)

plot_repeat = min(
    min(len(same_beer_exp.case_mzmls[case]) for case in needs_pkl), 
    repeat,
    plot_repeat
)
case_pkls = load_pkls(pickle_dir, needs_pkl, plot_repeat)

In [None]:
for case_name in timings:
    processing_times = [penv.processing_times for penv in case_pkls[case_name]]
    fig, ax = seaborn_timing_hist(processing_times, binsize=0.001)
    mpl_set_figure_style(fig, suptitle=case_name, suptitle_size=18)
    del processing_times

In [None]:
for case_name in timings:
    processing_times = [penv.processing_times for penv in case_pkls[case_name]]
    plotly_timing_hist(processing_times, case_name, binsize=0.001)
    del processing_times

In [None]:
for case_name in frag_plots:
    mzmls = [mzml for _, mzml in same_beer_exp.case_mzmls[case_name]]
    fig, ax = mpl_fragmentation_events(case_name, mzmls, colour_minm=math.log(500.0))
    del mzmls

In [None]:
for case_name in frag_plots:
    mzmls = [mzml for _, mzml in same_beer_exp.case_mzmls[case_name]]
    plotly_fragmentation_events(case_name, mzmls, colour_minm=math.log(500.0))
    del mzmls

In [None]:
for case_name in frag_plots:
    eva_i = same_beer_exp.case_names.index(case_name)
    fig, ax = mpl_fragmented_boxes(case_name, same_beer_exp.evaluators[eva_i], min_intensity=0.0)
    mpl_set_figure_style(fig, figure_sizes=(20, 10))

In [None]:
for case_name in cover_plots:
    eva_i = same_beer_exp.case_names.index(case_name)
    
    box_likes = [penv.bm.box_geometry.get_all_boxes() for penv in case_pkls[case_name]]
    ex_figs = seaborn_uncovered_area_hist(
        same_beer_exp.evaluators[eva_i],
        box_likes,
        binsize=None
    )
    
    for fig, ax, name in ex_figs:
        mpl_set_figure_style(fig, suptitle=case_name + f" {name.lower()} pp box uncovered area w.r.t exclusion box", suptitle_size=18)
    
    box_likes = load_pkls(pickle_dir, [case_name], plot_repeat, load_rois=True)[case_name]
    roi_figs = seaborn_uncovered_area_hist(
        same_beer_exp.evaluators[eva_i],
        box_likes,
        binsize=None
    )
    
    for fig, ax, name in roi_figs:
        mpl_set_figure_style(fig, suptitle=case_name + f" {name.lower()} pp box uncovered area w.r.t roi box", suptitle_size=18)
    
    del box_likes

### View Individual Features

In [None]:
to_view = "intensity_non_overlap"
eva_i = same_beer_exp.case_names.index(to_view)
view_rois = load_pkls(pickle_dir, [to_view], plot_repeat, load_rois=True)[to_view]
    
viewer = BoxViewer()
viewer.set_mzmls([mzml for _, mzml in same_beer_exp.case_mzmls[to_view][:plot_repeat]])
viewer.add_evaluator_boxes([same_beer_exp.evaluators[eva_i]] * plot_repeat, name="fragmented", min_intensity=0.0)
viewer.add_roi_boxes(view_rois)
viewer.add_geom_boxes([penv.bm.box_geometry for penv in case_pkls[to_view]])
viewer.summarise()

In [None]:
#for the justinbeers with min_intensity=5000, duty cycle never runs out of things to do, so scans happen in exact same sequence
#with fixed scan lengths points then line up exactly
#in other circumstances this may not be the case

box_index = 0
boxset_index = 0
rt_buffer = 3
mz_buffer = 1E-3

fig, axes = viewer.mpl_show_box(
    box_index=box_index,
    boxset_index=boxset_index,
    rt_buffer=rt_buffer,
    mz_buffer=mz_buffer,
    ms_level=1,
    colour_minm=math.log(500.0),
    abs_scaling=True
)

mpl_set_figure_style(fig, suptitle=to_view, suptitle_size=18)

In [None]:
viewer.summarise_box(
    box_index=box_index,
    boxset_index=boxset_index,
    rt_buffer=rt_buffer,
    mz_buffer=mz_buffer
)

In [None]:
del case_pkls
del view_rois

In [None]:
del viewer

# 2. Different Beers

In [None]:
repeat = 6
out_dir = "new_different_beer"

diff_case_names = [
    "topN",
    "topN_RoI",
    "topN_exclusion",
    "topNEx",
    "topNEx_smartroi",
    "topNEx_weighteddew",
    "hard_roi_exclusion",
    "hard_roi_exclusion_smartroi",
    "hard_roi_exclusion_weighteddew",
    "intensity_roi_exclusion",
    "intensity_roi_exclusion_smartroi",
    "intensity_roi_exclusion_weighteddew",
    "non_overlap",
    "non_overlap_smartroi",
    "non_overlap_weighteddew",
    "intensity_non_overlap",
    "intensity_non_overlap_smartroi",
    "intensity_non_overlap_weighteddew",
]

line_colours, line_markers = get_line_styles(diff_case_names)

In [None]:
diff_beer_exp = Experiment.load_from_json(
    file_dir = out_dir,
    file_name = "keyfile.json",
    out_dir = out_dir,
    fullscan_dir = multibeer_dir,
    amend_result_path = True,
    case_names = diff_case_names
)

In [None]:
isolation_width = 1.0
true_repeat = min(
    repeat,
    len(diff_beer_exp.case_mzmls[diff_beer_exp.case_names[0]])
)

diff_beer_exp.evaluate(
    pp_params = pp_params,
    num_workers = num_workers,
    isolation_widths = isolation_width,
    max_repeat = repeat,
    aligned_names = f"multibeers_{true_repeat}_1" + template_suffix,
    force_peak_picking = False,
    check_files = "exact"
)

diff_beer_exp.summarise(num_workers=num_workers, rank_key="cumulative_intensity_proportion")

In [None]:
fig, axes = mpl_results_plot(
    diff_case_names,
    diff_beer_exp.evaluators,
    min_intensity=0.0,
    colours=line_colours,
    markers=line_markers,
)

mpl_set_figure_style(fig, **{**fig_kwargs, "suptitle" : "Different Beers", "suptitle_size" : 26})

In [None]:
plotly_results_plot(
    diff_case_names,
    diff_beer_exp.evaluators,
    min_intensity=0.0,
    suptitle="Different Beers ({template_name})"
)

# 3. Repeating Different Beers

In [None]:
bio_repeat = 6
tech_repeat = 4
repeat = bio_repeat * tech_repeat
out_dir = "new_repeated_different_beer"

rep_diff_case_names = [
    "topN",
    "topN_RoI",
    "topN_exclusion",
    "topNEx",
    "topNEx_smartroi",
    "topNEx_weighteddew",
    "hard_roi_exclusion",
    "hard_roi_exclusion_smartroi",
    "hard_roi_exclusion_weighteddew",
    "intensity_roi_exclusion",
    "intensity_roi_exclusion_smartroi",
    "intensity_roi_exclusion_weighteddew",
    "non_overlap",
    "non_overlap_smartroi",
    "non_overlap_weighteddew",
    "intensity_non_overlap",
    "intensity_non_overlap_smartroi",
    "intensity_non_overlap_weighteddew",
]

line_colours, line_markers = get_line_styles(rep_diff_case_names)

In [None]:
rep_diff_beer_exp = Experiment.load_from_json(
    file_dir = out_dir,
    file_name = "keyfile.json",
    out_dir = out_dir,
    fullscan_dir = multibeer_dir,
    amend_result_path = True,
    case_names = rep_diff_case_names
)

In [None]:
isolation_width = 1.0
rep_diff_beer_exp.evaluate(
    pp_params = pp_params,
    num_workers = num_workers,
    isolation_widths = isolation_width,
    max_repeat = repeat,
    aligned_names = f"multibeers_{bio_repeat}_{tech_repeat}" + template_suffix,
    force_peak_picking = False,
    check_files = "exact"
)

rep_diff_beer_exp.summarise(num_workers=num_workers, rank_key="cumulative_intensity_proportion")

In [None]:
fig, axes = mpl_results_plot(
    rep_diff_case_names,
    rep_diff_beer_exp.evaluators,
    min_intensity=0.0,
    colours=line_colours,
    markers=line_markers,
)

kwargs = {
    **fig_kwargs, 
    "suptitle" : f"{bio_repeat} Different Beers with {tech_repeat} Replicates", 
    "suptitle_size" : 26
}
mpl_set_figure_style(fig, **kwargs)

In [None]:
plotly_results_plot(
    rep_diff_case_names,
    rep_diff_beer_exp.evaluators,
    min_intensity=0.0,
    suptitle=f"{bio_repeat} Different Beers with {tech_repeat} Replicates"
)

In [None]:
cases = [
    case_name
    for case_name in ["topN", "topN_exclusion", "non_overlap", "intensity_non_overlap"]
    if case_name in rep_diff_case_names
]

evals = [rep_diff_beer_exp.get_evaluator_by_name(case_name) for case_name in cases]
colours = [get_style(case_name)[0] for case_name in cases]

fig, axes = mpl_fragmentation_counts(
    evals, 
    min_intensity=0.0, 
    key="times_covered_summary", 
    fcs=colours
)

kwargs = {
    **fig_kwargs,
    "axis_kwargs" : {
        **fig_kwargs["axis_kwargs"],
        "legend_kwargs" : None
    },
    "suptitle" : f"Times Repeated Different Beer Peaks Covered ({template_name})",
    "suptitle_size" : 26
}
mpl_set_figure_style(fig, **kwargs)

for case_name, ax in zip(cases, axes):
    mpl_set_axis_style(ax, title=case_name, titlesize=24)

In [None]:
cases = [
    case_name
    for case_name in ["topN", "topN_exclusion", "non_overlap", "intensity_non_overlap"]
    if case_name in rep_diff_case_names
]

evals = [rep_diff_beer_exp.get_evaluator_by_name(case_name) for case_name in cases]
colours = [get_style(case_name)[0] for case_name in cases]

fig, axes = mpl_fragmentation_counts(
    evals, 
    min_intensity=0.0, 
    key="times_fragmented_summary", 
    fcs=colours
)

kwargs = {
    **fig_kwargs,
    "axis_kwargs" : {
        **fig_kwargs["axis_kwargs"],
        "legend_kwargs" : None
    },
    "suptitle" : f"Times Repeated Different Beer Peaks Fragmented ({template_name})",
    "suptitle_size" : 26
}
mpl_set_figure_style(fig, **kwargs)

for case_name, ax in zip(cases, axes):
    mpl_set_axis_style(ax, title=case_name, titlesize=24)