# RT Alignment
See the [Targeted_Analysis.md](https://github.com/biorack/metatlas/blob/main/docs/Targeted_Analysis.md) file on GitHub for documentation on how to use this notebook.

#### Parameters
The next code block sets parameters that are used throughout the remainder of the notebook.

In [None]:
# pylint: disable=invalid-name,missing-module-docstring

# The name of a workflow defined in the configuration file
workflow_name = None

# experiment ID that must match the parent folder containing the LCMS output files
# An example experiment ID is '20201116_JGI-AK_LH_506489_SoilWarm_final_QE-HF_HILICZ_USHXG01530'
experiment = None

# an integer, increment if you need to rerun this notebook for the same experiment
rt_alignment_number = None

# source atlas name
source_atlas = None

# list of substrings that will group together when creating groups
# this provides additional grouping beyond the default grouping on field #12
groups_controlled_vocab = None

# group will only be used in RT alignment model if their name has a substring match to this list of strings
include_groups = dict(  # noqa: C408
    always=None,
    rt_alignment=None,
    qc_outputs=None,
    gui=None,
    ids_spreadsheet=None,
    chromatograms=None,
    data_sheets=None,
    box_plots=None,
)

# Exclude groups with names containing any of the substrings in this list.
# Generally you will want to include polarities you are not using
# such as ['NEG', 'FPS'] for a positive polarity analysis.
exclude_groups = dict(  # noqa: C408
    always=None,
    rt_alignment=None,
    qc_outputs=None,
    gui=None,
    ids_spreadsheet=None,
    chromatograms=None,
    data_sheets=None,
    box_plots=None,
)

# Include LCMS runs with names containing any of the substrings in this list. Eg., ['peas', 'beans']
include_lcmsruns = dict(  # noqa: C408
    always=None,
    rt_alignment=None,
    qc_outputs=None,
    gui=None,
    ids_spreadsheet=None,
    chromatograms=None,
    data_sheets=None,
    box_plots=None,
)

# Exclude files with names containing any of the substrings in this list. Eg., ['peas', 'beans']
exclude_lcmsruns = dict(  # noqa: C408
    always=None,
    rt_alignment=None,
    qc_outputs=None,
    gui=None,
    ids_spreadsheet=None,
    chromatograms=None,
    data_sheets=None,
    box_plots=None,
)

# Override the rt_min and rt_max values in the atlas
# both rt_min_delta and rt_max_delta are *added* to rt_peak, so rt_min_delta < rt_max_delta.
# Normally you will have rt_min_delta < 0 and rt_max_delta > 0
# but you can have both of them be positive or both negative for extreme cases.
# Set to None to use the rt_min and rt_max values saved in the template atlas
# Only impacts the atlas used for RT alignment, not subsequent atlases.
rt_min_delta = None
rt_max_delta = None

# mz_tolerance sets a ppm threshold for MS1 mz accuracy
# mz_tolerance values usually come from the atlas, but if a value is not
# set in the atlas, then use mz_tolerance_default
mz_tolerance_default = None

# If mz_tolerance_override is not None, then all mz_tolerance values in the
# atlas will be replaced with mz_tolerance_override.
# mz_tolerance_override has precedence over mz_tolerance_default
mz_tolerance_override = None

# Tolerance for matching MS2 fragements in units of Daltons
frag_mz_tolerance = None

# How MSMS spectra are compared. Must be one of 'distance', 'intensity', or 'shape'.
# Default is 'distance'
resolve_msms_matches_by = None

# List of InChi Keys to be ignored when creating the RT alignment model.
inchi_keys_not_in_model = None

# The QC run or name of the summary statistic generated from all QC runs
# that will be used as the data source for the dependent variable for RT model generation.
# Can be the name of a summary statistic generated from all QC runs:
# "median", "mean", "min", max"
# Or a specific QC run, by supplying the name of an h5 file (without the path)
dependent_data_source = None

# if True, use a 2nd order polynomial model for RT alignment.
# if False, use a liner model
use_poly_model = None

# One of "atlases", "notebook_generation", "notebook_execution", None
# Terminates processing of RT-Alignment.ipynb early
# None generates all outputs
# normal processing order is:
# 1. generate RT alignment model
#    makes Actual_vs_Aligned_RTs.pdf, RT_Alignment_Model_Comparison.csv, rt_alignment_model.txt
# 2. create follow up analysis notebooks
# 3. executes follow up analysis notebooks
stop_before = None


# The rest of this block contains project independent parameters

# Configuration file location
config_file_name = None

# to use an older version of the metatlas source code, set this to a commit id,
# branch name, or tag. If None, then use the the "main" branch.
source_code_version_id = None

# Full path to the directory where you want this notebook to store data.
# A subdirectory will be auto created within this directory for each project.
# You can place this anywhere on cori's filesystem, but placing it within your
# global home directory is recommended so that you do not need to worry about
# your data being purged. Each project will take on the order of 100 MB.
project_directory = None

# ID from Google Drive URL for base output folder .
# The default value is the ID that corresponds to 'JGI_Metabolomics_Projects'.
google_folder = None

# maximum number of CPUs to use
# when running on jupyter.nersc.gov, you are not allowed to set this above 4
max_cpus = None

# Threshold for how much status information metatlas functions print in the notebook
# levels are 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
log_level = None

In [None]:
# pylint: disable=wrong-import-position,import-error,missing-class-docstring
parameters = {k: v for k, v in globals().items() if k[0] != "_" and k not in ["In", "Out", "get_ipython", "exit", "quit", "open"]}

import logging  # noqa: E402
from pathlib import Path  # noqa: E402


class StopExecution(Exception):
    def _render_traceback_(self):
        pass


logger = logging.getLogger("metatlas.jupyter")
kernel_def = """{"argv":["shifter","--entrypoint","--image=ghcr.io/biorack/metatlas/metatlas_shifter:latest","/usr/local/bin/python","-m",
                 "ipykernel_launcher","-f","{connection_file}"],"display_name": "Metatlas Targeted","language": "python",
                 "metadata": { "debugger": true }}"""
kernel_file_name = Path.home() / ".local" / "share" / "jupyter" / "kernels" / "metatlas-targeted" / "kernel.json"
try:
    has_root_kernel = Path("/root/.local/share/jupyter/kernels/papermill/kernel.json").is_file()
except PermissionError:
    has_root_kernel = False
if not has_root_kernel and not kernel_file_name.is_file():
    kernel_file_name.parent.mkdir(parents=True, exist_ok=True)
    with kernel_file_name.open(mode="w", encoding="utf-8") as f:
        f.writelines(kernel_def)
    logger.critical('CRITICAL: Notebook kernel has been installed. Set kernel to "Metatlas Targeted" and re-run notebook.')
    raise StopExecution
try:
    from metatlas.tools import config, notebook  # noqa: E402
    from metatlas.targeted import rt_alignment  # noqa: E402
except ImportError as err:
    logger.critical('CRITICAL: Set notebook kernel to "Metatlas Targeted" and re-run notebook.')
    raise StopExecution from err
configuration, workflow, analysis = config.get_config(parameters)
notebook.setup(analysis.parameters.log_level, analysis.parameters.source_code_version_id)

In [None]:
rt_alignment.run(
    experiment=experiment,
    rt_alignment_number=rt_alignment_number,
    configuration=configuration,
    workflow=workflow,
    set_parameters=parameters,
)