# Targeted Analysis
See the [Targeted_Analysis.md](https://github.com/biorack/metatlas/blob/main/docs/Targeted_Analysis.md) file on GitHub for documentation on how to use this notebook.

#### Parameters
The next code block sets parameters that are used throughout the remainder of the notebook.

In [None]:
# pylint: disable=invalid-name,missing-module-docstring

# source atlas name
source_atlas = None

# this atlas will be copied to an atlas named projectId_experimentName_sampleSet_polarity_analysisId
# where projectId is JGI Proposal ID Number
#       experiment name is short text description from field 4 (0-indexed) of LCMS filename
#       sampleSet is commonly Pilot, Final - from field 5 (0-indexed) of LCMS filename
#       polarity is 'POS' or 'NEG'
#       analysisId is usernameX where X is the analysis number

# one of 'positive' or 'negative'
polarity = "positive"

# one of 'ISTDsEtc', 'FinalEMA-HILIC', 'FinalEMA-C18'
output_type = "FinalEMA-HILIC"

# an integer, increment if you need to redo your analysis
# will be appended to your username to create analysis_id
analysis_number = 0

# experiment ID that must match the parent folder containing the LCMS output files
# An example experiment ID is '20201116_JGI-AK_LH_506489_SoilWarm_final_QE-HF_HILICZ_USHXG01530'
experiment = "REPLACE ME"

# Exclude files with names containing any of the substrings in this list. Eg., ['peas', 'beans']
exclude_files = []

# Exclude groups with names containing any of the substrings in this list.
# 'POS' or 'NEG' will be auto-appended later, so you shouldn't use them here.
exclude_groups = ["QC", "InjBl"]

# thresholds for filtering out compounds with weak MS1 signals
# set to None to disable a filter
num_points = 5
peak_height = 4e5

# threshold for filtering out compounds with poor MS2 spectra similaritiy
# Should be a value in range 0 to 1. Set to None to disable this filter.
msms_score = None

# include MSMS fragment ions in the output documents?
export_msms_fragment_ions = False

# list of substrings that will group together when creating groups
# this provides additional grouping beyond the default grouping on field #12
groups_controlled_vocab = ["QC", "InjBl", "ISTD"]

# list of tuples contain string with color name and substring pattern.
# Lines in the EIC plot will be colored by the first substring pattern
# that has a match within the name of the hdf5_file. The order they are
# listed in your list is the order they are displayed in the overlays
# (first is front, last is back). Named colors available in matplotlib
# are here: https://matplotlib.org/3.1.0/gallery/color/named_colors.html
# or use hexadecimal values '#000000'. Lines default to black.
line_colors = [("red", "ExCtrl"), ("green", "TxCtrl"), ("blue", "InjBl")]

# Setting this to True will remove the cache of MSMS hits
# if you don't see MSMS data for any of your compounds in RT adjuster GUI,
# then you might want to try settings this to True. However, it will
# make your notebook take significantly longer to run.
# The cache is per experiment, so clearing the cache will impact other
# notebooks for this same experiment.
clear_cache = False

# The rest of this block contains project independent parameters

# to use an older version of the metatlas source code, set this to a commit id,
# branch name, or tag. If None, then use the the "main" branch.
source_code_version_id = None

# Full path to the directory where you want this notebook to store data.
# A subdirectory will be auto created within this directory for each project.
# You can place this anywhere on cori's filesystem, but placing it within your
# global home directory is recommended so that you do not need to worry about
# your data being purged. Each project will take on the order of 100 MB.
project_directory = "/global/homes/FIRST-INITIAL-OF-USERNAME/USERNAME/metabolomics_projects"

# ID from Google Drive URL for base output folder .
# The default value is the ID that corresponds to 'JGI_Metabolomics_Projects'.
google_folder = "0B-ZDcHbPi-aqZzE5V3hOZFc0dms"

# maximum number of CPUs to use
# when running on jupyter.nersc.gov, you are not allowed to set this above 4
max_cpus = 4

# Threshold for how much status information metatlas functions print in the notebook
# levels are 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
log_level = "INFO"

In [None]:
# pylint: disable=wrong-import-position,import-error,missing-class-docstring
import logging  # noqa: E402
from pathlib import Path  # noqa: E402


class StopExecution(Exception):
    def _render_traceback_(self):
        pass


logger = logging.getLogger("metatlas.jupyter")
kernel_def = """{"argv":["shifter","--entrypoint","--image=doejgi/metatlas_shifter:latest","/usr/local/bin/python","-m",
                 "ipykernel_launcher","-f","{connection_file}"],"display_name": "Metatlas Targeted","language": "python",
                 "metadata": { "debugger": true }}"""
kernel_file_name = Path.home() / ".local" / "share" / "jupyter" / "kernels" / "metatlas-targeted" / "kernel.json"
kernel_file_name.parent.mkdir(parents=True, exist_ok=True)
try:
    with Path("/metatlas_image_version").open(mode="r", encoding="utf-8") as f:
        version = f.readlines()[0].rstrip()
except FileNotFoundError:
    version = "0"
try:
    has_root_kernel = Path("/root/.local/share/jupyter/kernels/papermill/kernel.json").is_file()
except PermissionError:
    has_root_kernel = False
if not (version == "1" and (kernel_file_name.is_file() or has_root_kernel)):
    with kernel_file_name.open(mode="w", encoding="utf-8") as f:
        f.writelines(kernel_def)
    logger.critical("CRITICAL: Notebook kernel has been updated. Restart kernel and re-run notebook.")
    raise StopExecution
try:
    from metatlas.tools import notebook  # noqa: E402
except ImportError as err:
    logger.critical('CRITICAL: Please check that the kernel is set to "Metatlas Targeted".')
    raise StopExecution from err
notebook.setup(log_level, source_code_version_id)
from metatlas.datastructures import metatlas_dataset as mads  # noqa: E402

In [None]:
metatlas_dataset = mads.pre_annotation(
    source_atlas,
    experiment,
    output_type,
    polarity,
    analysis_number,
    project_directory,
    google_folder,
    groups_controlled_vocab,
    exclude_files,
    num_points,
    peak_height,
    max_cpus,
    msms_score,
    clear_cache=clear_cache,
)

#### Annotation GUI
If you are re-running this notebook and do not need to make additional changes to RT min/max bounds, then you can skip running the next code cell. Skipping will save you from calculating MSMS hits twice.

In [None]:
agui = metatlas_dataset.annotation_gui(compound_idx=0, width=15, height=3, colors=line_colors)

In [None]:
mads.post_annotation(metatlas_dataset)