# Parameters

The next code block sets parameters that are used throughout the remainder of the notebook.

In [None]:
# pylint: disable=invalid-name,missing-module-docstring

# an integer, increment if you need to redo your analysis
# will be appended to your username to create analysis_id
analysis_number = 0

# experiment ID that must match the parent folder containing the LCMS output files
# An example experiment ID is '20201116_JGI-AK_LH_506489_SoilWarm_final_QE-HF_HILICZ_USHXG01530'
experiment = "REPLACE ME"

# group will only be used in RT prediction if their name has a substring match to this list of strings
include_groups = ["_QC_"]

# Exclude files with names containing any of the substrings in this list. Eg., ['peas', 'beans']
exclude_files = []

# list of substrings that will group together when creating groups
# this provides additional grouping beyond the default grouping on field #12
groups_controlled_vocab = ["QC", "InjBl", "ISTD"]

# The rest of this block contains project independent parameters

# Full path to the directory where you have cloned the metatlas git repo.
# If you ran the 'git clone ...' command in your home directory on Cori,
# then you'll want '/global/homes/FIRST-INITIAL-OF-USERNAME/USERNAME/metatlas'
# where the uppercase letters are replaced based on your NERSC username.
metatlas_repo_path = "/global/homes/FIRST-INITIAL-OF-USERNAME/USERNAME/metatlas"

# Full path to the directory where you want this notebook to store data.
# A subdirectory will be auto created within this directory for each project.
# You can place this anywhere on cori's filesystem, but placing it within your
# global home directory is recommended so that you do not need to worry about
# your data being purged. Each project will take on the order of 100 MB.
project_directory = "/global/homes/FIRST-INITIAL-OF-USERNAME/USERNAME/metabolomics_projects"

# ID from Google Drive URL for base output folder .
# The default value is the ID that corresponds to 'JGI_Metabolomics_Projects'.
google_folder = "0B-ZDcHbPi-aqZzE5V3hOZFc0dms"

# thresholds for filtering out compounds with weak MS1 signals
num_points = 5
peak_height = 4e5

# maximum number of CPUs to use
# when running on jupyter.nersc.gov, you are not allowed to set this above 4
max_cpus = 4

# Threshold for how much status information metatlas functions print in the notebook
# levels are 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
log_level = "INFO"

In [None]:
# pylint: disable=wrong-import-position,import-error
import logging  # noqa: E402
import os  # noqa: E402
import sys  # noqa: E402

sys.path.insert(0, metatlas_repo_path)
logger = logging.getLogger("metatlas.jupyter")
logger.debug("sys.executable=%s", sys.executable)
logger.debug("sys.path=%s.", sys.path)
logger.debug("metatlas_repo_path=%s.", metatlas_repo_path)
if not os.path.exists(metatlas_repo_path):
    logging.critical(
        "Directory set for metatlas_repo_path parameter (%s) does not exist or is not accessible.",
        metatlas_repo_path,
    )
    raise ValueError("Invalid metatlas_repo_path parameter in Jupyter Notebook.")
try:
    from metatlas.tools import notebook, predict_rt  # noqa: E402
except ModuleNotFoundError as err:
    if str(err) == "No module named 'metatlas.tools'":
        logging.critical(
            ("Could not find metatlas module at %s. " "In the Parameters block, please check the value of metatlas_repo_path."),
            metatlas_repo_path,
        )
    else:
        logger.critical('Please check that the kernel is set to "Metatlas Targeted".')
    raise ModuleNotFoundError from err
notebook.setup(log_level)

In [None]:
ids = predict_rt.get_analysis_ids_for_rt_prediction(experiment, project_directory, google_folder, analysis_number)

In [None]:
predict_rt.generate_outputs(ids, max_cpus, metatlas_repo_path, num_points, peak_height)