In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import math
import sys
import os
import platform
import itertools

from matplotlib.lines import Line2D
import matplotlib.pyplot as plt

In [None]:
user_vimms = "vimms"
sys.path.append(user_vimms)

In [None]:
mzmine_path = os.path.join("MZmine-2.53-Windows", "startMZmine-Windows.bat")
old_mzmine = False
template_suffix = "_old" if old_mzmine else ""
mzmine_template = os.path.join(user_vimms, "batch_files", f"multi_sample_peak_pick{template_suffix}.xml")

In [None]:
from vimms.Common import (
    POSITIVE, load_obj,
    set_log_level_warning,
)
from vimms.PeakPicking import MZMineParams
from vimms.Experiment import Experiment
from vimms.BoxVisualise import (
    mpl_set_axis_style, mpl_set_figure_style,
    mpl_results_plot, plotly_results_plot,
    plotly_timing_hist, seaborn_timing_hist,
    mpl_fragmentation_events, plotly_fragmentation_events,
    mpl_fragmented_boxes,
    seaborn_uncovered_area_hist,
    BoxViewer
)

In [None]:
ionisation_mode = POSITIVE
pbar = False
set_log_level_warning()

In [None]:
#multi_dir = "/Users/%s/University of Glasgow/Vinny Davies - CLDS Metabolomics Project/Experimental_Results/20220706_DDAvsDIA" % user
multi_dir = "20220719_multi_samples_main"
multibeer_dir = os.path.join(multi_dir, "results_4")

In [None]:
num_workers = 8 #can't always use number of physical cores because of memory constraints

In [None]:
def load_pkls(pickle_dir, case_names, repeat, load_rois=False):
    exp_pkls = {}
    for case_name in case_names:
        roi_str = "_rois" if load_rois else ""
        exp_pickle_paths = [f"{os.path.join(pickle_dir, case_name)}_{i}{roi_str}.pkl" for i in range(repeat)]
        exp_pkls[case_name] = [load_obj(pkl) for pkl in exp_pickle_paths]
    return exp_pkls

In [None]:
mpl_colours = plt.rcParams["axes.prop_cycle"].by_key()["color"]
colour_iterator = itertools.cycle(mpl_colours)
colour_map = {
    "topN" : mpl_colours[5],
    "topN_RoI" : mpl_colours[7],
    "topN_exclusion" : mpl_colours[1],
    "topNEx" : mpl_colours[2],
    "hard_roi_exclusion" : mpl_colours[0],
    "intensity_roi_exclusion" : mpl_colours[8],
    "non_overlap" : mpl_colours[6],
    "intensity_non_overlap" : mpl_colours[3],
    
}

def update_cmap(name):
    if(not name in colour_map):
        colour_map[name] = next(mpl_colours)
    return colour_map[name]

def get_style(name):
    if(name.lower().endswith("smartroi")):
        c = update_cmap("_".join(name.split("_")[:-1]))
        return c, "v"
    elif(name.lower().endswith("weighteddew")):
        c = update_cmap("_".join(name.split("_")[:-1]))
        return c, "^"
    else:
        c = update_cmap(name)
        return c, "o"
    
def get_line_styles(case_names):
    colours, markers = [], []
    for case_name in case_names:
        c, m = get_style(case_name)
        colours.append(c)
        markers.append(m)
    return colours, markers

In [None]:
name_map = {
    "topN" : "TopN",
    "topN_RoI" : "TopN RoI",
    "topN_exclusion" : "TopN Exclusion",
    "topNEx" : "TopNEx",
    "hard_roi_exclusion" : "Hard RoI Exclusion",
    "intensity_roi_exclusion" : "Intensity RoI Exclusion",
    "non_overlap" : "Non-Overlap",
    "intensity_non_overlap" : "Intensity Non-Overlap",
    
}

markersize = 11

legend_elements = [
    Line2D([0], [0], marker="o", color="black", label="Standard DEW", markerfacecolor="black", markersize=markersize),
    Line2D([0], [0], color="white", label="", markerfacecolor="black", markersize=markersize),
]
    
for name in ["topN_exclusion", "intensity_non_overlap"]:
    legend_elements.append(
        Line2D([0], [0], marker="s", color="white", label=name_map[name], markerfacecolor=colour_map[name], markersize=14)
    )

In [None]:
fig_kwargs = {
    "tick_kwargs" : {
        "width" : 2.5,
        "length" : 12,
        "labelsize" : 22,
    },
    
    "axis_borderwidth" : 2.5,
    
    "axis_kwargs" : {
        "labelsize" : 24,
        "titlesize" : 24,
        "linewidth" : 1.5,
        "markersize" : 6,
        "legend_kwargs" : {
            "handles" : legend_elements,
            "fontsize" : 13
        }
    },
    
    "suptitle" : "Default Suptitle",
    "suptitle_size" : 18,
    "figure_sizes" : (18, 8),
}

In [None]:
pp_params = MZMineParams(
    mzmine_template = mzmine_template,
    mzmine_exe = mzmine_path
)

# 1. Same Beer Repeated Multiple Times

### Evaluate

In [None]:
repeat = 20
out_dir = "reoptimised_same_beer"

same_case_names = [
    "topN_exclusion",
    "intensity_non_overlap"
]

line_colours, line_markers = get_line_styles(same_case_names)

In [None]:
same_beer_exp = Experiment.load_from_json(
    file_dir = out_dir,
    file_name = "keyfile.json",
    out_dir = out_dir,
    fullscan_dir = multibeer_dir,
    amend_result_path = True,
    case_names = same_case_names
)

In [None]:
isolation_width = 1.0
true_repeat = min(
    repeat,
    len(same_beer_exp.case_mzmls[same_beer_exp.case_names[0]])
)

same_beer_exp.evaluate(
    pp_params = pp_params,
    num_workers = num_workers,
    isolation_widths = isolation_width,
    max_repeat = repeat,
    aligned_names = f"multibeers_1_{true_repeat}" + template_suffix,
    force_peak_picking = False
)

same_beer_exp.summarise(num_workers=num_workers, rank_key="cumulative_intensity_proportion")

### Plot

In [None]:
#non-interactive plot, but is just an image so will persist if notebook is refreshed
fig, axes = mpl_results_plot(
    same_case_names,
    same_beer_exp.evaluators,
    min_intensity=0.0,
    colours=line_colours,
    markers=line_markers,
    mode="absolute"
)

mpl_set_figure_style(fig, **{**fig_kwargs, "suptitle" : "Repeated Same Beer", "suptitle_size" : 26})
mpl_set_axis_style(axes[1], legend_kwargs={**fig_kwargs["axis_kwargs"]["legend_kwargs"], "loc" : "lower right"})

# 3. Repeating Different Beers

In [None]:
bio_repeat = 6
tech_repeat = 4
repeat = bio_repeat * tech_repeat
out_dir = "reoptimised_repeated_different_beer"

rep_diff_case_names = [
    "topN_exclusion",
    "intensity_non_overlap"
]

line_colours, line_markers = get_line_styles(rep_diff_case_names)

In [None]:
rep_diff_beer_exp = Experiment.load_from_json(
    file_dir = out_dir,
    file_name = "keyfile.json",
    out_dir = out_dir,
    fullscan_dir = multibeer_dir,
    amend_result_path = True,
    case_names = rep_diff_case_names
)

In [None]:
isolation_width = 1.0
rep_diff_beer_exp.evaluate(
    pp_params = pp_params,
    num_workers = num_workers,
    isolation_widths = isolation_width,
    max_repeat = repeat,
    aligned_names = f"multibeers_{bio_repeat}_{tech_repeat}" + template_suffix,
    force_peak_picking = False
)

rep_diff_beer_exp.summarise(num_workers=num_workers, rank_key="cumulative_intensity_proportion")

### Plot

In [None]:
fig, axes = mpl_results_plot(
    rep_diff_case_names,
    rep_diff_beer_exp.evaluators,
    min_intensity=0.0,
    colours=line_colours,
    markers=line_markers,
)

kwargs = {
    **fig_kwargs, 
    "suptitle" : f"{bio_repeat} Different Beers with {tech_repeat} Replicates", 
    "suptitle_size" : 26
}
mpl_set_figure_style(fig, **kwargs)
mpl_set_axis_style(axes[1], legend_kwargs={**fig_kwargs["axis_kwargs"]["legend_kwargs"], "loc" : "lower right"})