In [1]:
# pylint: disable=wrong-import-position,import-error,missing-class-docstring
import pandas as pd
import sys
from datetime import datetime
from IPython.display import display
import yaml
from pathlib import Path

pd.options.display.max_colwidth = 300
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

kernel_def = """{"argv":["shifter","--entrypoint","--image=ghcr.io/biorack/metatlas/metatlas_shifter:latest","/usr/local/bin/python","-m",
                 "ipykernel_launcher","-f","{connection_file}"],"display_name": "Metatlas Targeted","language": "python",
                 "metadata": { "debugger": true }}"""
kernel_file_name = Path.home() / ".local" / "share" / "jupyter" / "kernels" / "metatlas-targeted" / "kernel.json"
try:
    has_root_kernel = Path("/root/.local/share/jupyter/kernels/papermill/kernel.json").is_file()
except PermissionError:
    has_root_kernel = False
if not has_root_kernel and not kernel_file_name.is_file():
    kernel_file_name.parent.mkdir(parents=True, exist_ok=True)
    with kernel_file_name.open(mode="w", encoding="utf-8") as f:
        f.writelines(kernel_def)
    print('CRITICAL: Notebook kernel has been installed. Set kernel to "Metatlas Targeted" and re-run notebook.')
    raise StopExecution
try:
    from metatlas.tools import notebook  # noqa: E402
except ImportError as err:
    print('CRITICAL: Set notebook kernel to "Metatlas Targeted" and re-run notebook.')
    raise StopExecution from err

source_code_version_id = "f3db7e42d52ed8844421d3124358e0717f0b9a2d"
notebook.setup("INFO", source_code_version_id)

import notebooks.standards_library.standard_annotation as sta

2025-05-06 19:06:12 [32mINFO    [0m Running on git commit: b'f3db7e42d52ed8844421d3124358e0717f0b9a2d' from 2025-05-06 11:32:18 -0700 -- 34 minutes ago


2025-05-06 19:06:13 [32mINFO    [0m NERSC=True


# Read config file

In [2]:
config_path = "/global/homes/b/bkieft/metatlas/notebooks/standards_library/ref_std_annotation.yaml"
with open(config_path, "r") as config_file:
    config = yaml.safe_load(config_file)

# Extract EIC and Spectra information from files in the run table

In [3]:
if config["full_data_from_cache"] is False:
    lcmsruns_table_with_adducts = sta.build_standard_lcmsrun_table(config)
    eics_full, top_spectra_full, group_names_full, rt_peaks_full, atlas_full, mols_images = sta.extract_data(lcmsruns_table_with_adducts,config,method="find_peaks")
    sta.handle_data(mode="save", config=config, timestamp=timestamp, file_suffix="full", data=(eics_full, top_spectra_full, group_names_full, rt_peaks_full, atlas_full, mols_images))
elif config["full_data_from_cache"] is True:
    eics_full, top_spectra_full, group_names_full, rt_peaks_full, atlas_full, mols_images = sta.handle_data(mode="load", config=config, file_suffix="full")

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 

[19:07:40] ERROR: 



 Extracting data from lcmsruns:   0%|          | 0/120 [00:00<?, ?compound group/s]

Saving data to: /global/homes/b/bkieft/metatlas_junkdrawer/example_data/schellermetasci/cache/20250506190611_ref_stds_full.pkl


# Create interactive plot to choose adduct rt peaks for each standard compound

In [100]:
if config["selected_data_from_cache"] is False:
    processed_data = sta.process_data_for_plotting(eics_full, top_spectra_full, group_names_full, rt_peaks_full, config)
    selected_adducts_dict = {}
    ambiguous_adducts_dict = {}
    top_adducts_dict = {}
    sta.create_interactive_plots_jupyterlab(processed_data, mols_images, selected_adducts_dict, ambiguous_adducts_dict, top_adducts_dict)
    # Run next cell after manual selection of adducts

elif config["selected_data_from_cache"] is True:
    print("Not initiating GUI for adduct selection, loading selected adducts from cache below.")

VBox(children=(Output(), Output()))

In [103]:
selected_adducts_dict

{'sorgoleone;;/global/cfs/cdirs/metatlas/raw_data/jgi/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558_POS_MS2_RefStd-1_Sorgoleone_1_Rg70to1050-CE102040norm-200uM-S1_Run375.h5': ['[M+H]+||peak1',
  '[M+NH4]+||peak1'],
 'sorgoleone;;/global/cfs/cdirs/metatlas/raw_data/jgi/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558_POS_MS2_RefStd-1_Sorgoleone_1_Rg70to1050-CE205060norm-200uM-S1_Run376.h5': ['[M+H]+||peak1',
  '[M+NH4]+||peak1'],
 'sorgoleone;;/global/cfs/cdirs/metatlas/raw_data/jgi/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558_NEG_MS2_RefStd-1_Sorgoleone_1_Rg70to1050-CE102040norm-200uM-S1_Run377.h5': ['[M-H]-||peak1'],
 'sorgoleone;;/global/cfs/cdirs/metatlas/raw_data/jgi/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558/20241

# Generate static summary reports for each compound after selections are made

In [98]:
if config["selected_data_from_cache"] is False:
    sta.handle_data(mode="save", config=config, timestamp=timestamp, file_suffix="selected", \
                    data=(selected_adducts_dict, ambiguous_adducts_dict, top_adducts_dict, processed_data))
    sta.generate_static_summary_plots(processed_data, selected_adducts_dict, top_adducts_dict, config) # Save summary plots of selected compounds+adducts after completing GUI

elif config["selected_data_from_cache"] is True:
    selected_adducts_dict, ambiguous_adducts_dict, top_adducts_dict, processed_data = sta.handle_data(mode="load", config=config, file_suffix="selected")

Saving data to: /global/homes/b/bkieft/metatlas_junkdrawer/example_data/schellermetasci/cache/20250506190611_ref_stds_selected.pkl


 Writing summary plots for selected compounds:   0%|          | 0/8 [00:00<?, ? compound group/s]

2025-05-06 21:09:41 [37m[41mCRITICAL[0m Uncaught exception
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_805171/141666826.py", line 4, in <module>
    sta.generate_static_summary_plots(processed_data, selected_adducts_dict, top_adducts_dict, config) # Save summary plots of selected compounds+adducts after completing GUI
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/metatlas.ety4nerKCN/notebooks/standards_library/standard_annotation.py", line 3146, in generate_static_summary_plots
    fig.write_image(
  File "/usr/local/lib/python3.11/site-packages/plotly/basedatatypes.py", line 3827, in write_image
    return pio.write_image(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/p

SystemExit: 128

# Filter RT Peak, EICs, and Top Spectra by the selected compounds+adducts

In [58]:
if config["filtered_data_from_cache"] is False:
    eics_filtered, rt_peaks_filtered, top_spectra_filtered = sta.filter_by_selected(eics_full, rt_peaks_full, top_spectra_full, selected_adducts_dict, top_adducts_dict) 
    sta.handle_data(mode="save", config=config, timestamp=timestamp, file_suffix="filtered", \
                    data=(eics_filtered, top_spectra_filtered, rt_peaks_filtered))

elif config["filtered_data_from_cache"] is True:
    eics_filtered, top_spectra_filtered, rt_peaks_filtered = sta.handle_data(mode="load", config=config, file_suffix="filtered")
    print(f"\nTotal unique compounds selected: {eics_filtered['compound_name'].nunique()}")
    print(f"Total unique compound+adduct entries selected: {eics_filtered['label'].nunique()}\n")
    print(f"Total EICs selected: {eics_filtered.shape[0]}")
    print(f"Total RT peaks selected: {rt_peaks_filtered.shape[0]}")
    print(f"Total MS2 spectra selected: {top_spectra_filtered.shape[0]}")


Total unique compounds selected: 1
Total unique compound+adduct entries selected: 6

Total EICs selected: 6
Total RT peaks selected: 2
Total MS2 spectra selected: 2
Saving data to: /global/homes/b/bkieft/metatlas_junkdrawer/example_data/schellermetasci/cache/20250506190611_ref_stds_filtered.pkl


# Identify compounds not in the metatlas database Compounds table and store if necessary

In [59]:
if config['metatlas_db_data_from_cache'] is False:
    rt_peaks_filtered_all_selected, rt_peaks_filtered_top_selected = sta.format_and_select_top_adducts(rt_peaks_filtered, top_adducts_dict) # Export "all" for MSMS refs and "top" for EMA atlases
    in_db, notin_db = sta.search_for_matches_in_metatlas_db(rt_peaks_filtered_all_selected, check_by_flat=True) # Check if selected compounds are in metatlas DB
    if len(notin_db) > 0 and config['direct_store_to_compounds_table'] is True: # Store selected compounds+adducts in metatlas db
        sta.store_in_metatlas_db(notin_db)
    sta.check_db_deposit(rt_peaks_filtered_top_selected)
    sta.handle_data(mode="save", config=config, timestamp=timestamp, file_suffix="metatlas_db", \
                    data=(in_db, notin_db, rt_peaks_filtered_all_selected, rt_peaks_filtered_top_selected))

elif config['metatlas_db_data_from_cache'] is True:
    in_db, notin_db, rt_peaks_filtered_all_selected, rt_peaks_filtered_top_selected = sta.handle_data(mode="load", config=config, file_suffix="metatlas_db")
    print(f"\n'All' peaks dataset (for MSMS refs): {rt_peaks_filtered_all_selected.shape[0]} total compound peaks.")
    print(f"'Top' peaks dataset (for EMA atlases): {rt_peaks_filtered_top_selected.shape[0]} best compound peaks.\n")


Working on dataset: top
	Checking for differing RTs between CEs and polarities, which are unexpected...
		Group ('HILICZ', 'sorgoleone'): All RT values for ['102040norm'] and ['POS'] are within 0.05 mins of each other (0.0).

	Grouping by monoisotopic_mass and identify isomers in the datasets...
		No isomers found in top data.

	Selecting best collision energy row by intensity for the top adduct(s) per compound...
		Selected 1 row and removed 0 row(s) for ('HILICZ', 'POS', 'sorgoleone', '[M+H]+').
		Selected 1 row and removed 0 row(s) for ('HILICZ', 'POS', 'sorgoleone', '[M+NH4]+').

Working on dataset: all
	Checking for differing RTs between CEs and polarities, which are unexpected...
		Group ('HILICZ', 'sorgoleone'): All RT values for ['102040norm'] and ['POS'] are within 0.05 mins of each other (0.0).

	Grouping by monoisotopic_mass and identify isomers in the datasets...
		No isomers found in all data.

'All' peaks dataset (for MSMS refs): 2 total compound peaks.
'Top' peaks datas

 Searching for matches in metatlas db:   0%|          | 0/2 [00:00<?, ? compound/s]


Summary of compounds already in the metatlas database:



Unnamed: 0,query_label,query_matching_criterion,query_to_db,db_match
0,sorgoleone,inchi_key,FGWRUVXUQWGLOX-AFJQJTPPSA-N,[FGWRUVXUQWGLOX-AFJQJTPPSA-N]



All compounds are already in the metatlas database.

All new entries found in the database.

Saving data to: /global/homes/b/bkieft/metatlas_junkdrawer/example_data/schellermetasci/cache/20250506190611_ref_stds_metatlas_db.pkl


# Identify compounds+adducts not in atlases and set up new atlas creation

In [60]:
if config["ema_atlas_data_from_cache"] is False:
    ema_atlases_data = sta.get_ema_atlas_data(config["current_ema_atlases"])
    rt_peaks_filtered_top_selected_formatted = sta.convert_rt_peaks_to_atlas_format(rt_peaks_filtered_top_selected)
    matches_to_atlases, nonmatches_to_atlases = sta.search_for_matches_in_atlases(rt_peaks_filtered_top_selected_formatted, ema_atlases_data)

    if config["rt_correction_data_from_cache"] is False:
        print("Setting up RT correction for compounds not yet in atlases using baseline correction method:\n")
        baseline_qc, experimental_qc, baseline_to_experimental_qc = sta.get_qc_experimental_atlas(nonmatches_to_atlases, config["current_qc_atlases"], include_istds=True)
        baseline_correction_inputs = sta.create_baseline_correction_input(nonmatches_to_atlases, baseline_to_experimental_qc)
        baseline_correction_outputs = sta.rt_correction_from_baseline(baseline_correction_inputs, config)
        sta.handle_data(mode="save", config=config, timestamp=timestamp, file_suffix="rt_correction", \
                        data=(baseline_to_experimental_qc, baseline_correction_outputs))

    elif config["rt_correction_data_from_cache"] is True:
        baseline_to_experimental_qc, baseline_correction_outputs = sta.handle_data(mode="load",config=config, file_suffix="rt_correction")

    nonmatches_to_atlases_rt_corrected = sta.substitute_corrected_rt_values(nonmatches_to_atlases, baseline_correction_outputs)
    sta.handle_data(mode="save", config=config, timestamp=timestamp, file_suffix="ema_atlases", \
                    data=(nonmatches_to_atlases_rt_corrected, ema_atlases_data, baseline_to_experimental_qc, baseline_correction_outputs))

elif config["ema_atlas_data_from_cache"] is True:
    nonmatches_to_atlases_rt_corrected, ema_atlases_data, baseline_to_experimental_qc, baseline_correction_outputs = sta.handle_data(mode="load", config=config, file_suffix="ema_atlases")
    print(f"Total compounds to add to EMA atlases per chromatography: {nonmatches_to_atlases_rt_corrected['chromatography'].nunique()}")

 Searching in HILICZ positive atlas:   0%|          | 0/2 [00:00<?, ? compound/s]

 Searching in HILICZ negative atlas: 0 compound [00:00, ? compound/s]

 Searching in C18 positive atlas: 0 compound [00:00, ? compound/s]

 Searching in C18 negative atlas: 0 compound [00:00, ? compound/s]


None of the compounds+adducts searched were found in the atlases.

There are 2 compounds+adducts are not yet in any atlases. View with 'nonmatches_to_atlases'.

Setting up RT correction for compounds not yet in atlases using baseline correction method:

	Getting all QC files for project /global/cfs/cdirs/metatlas/raw_data/jgi/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558

	Retrieving baseline HILICZ QC atlas: /global/homes/b/bkieft/metatlas-data/HILIC/HILIC_QCv7_positive.tsv

	Collecting QC MS1 data for HILICZ...



 Collecting MS1 data for QC compounds:   0%|          | 0/52 [00:00<?, ? file/s]

	Performing RT correction...



Calculating RT correction model:   0%|          | 0/1 [00:00<?, ? chromatography/s]

	HILICZ RT correction results:


Unnamed: 0,label,adduct,polarity,rt_peak_baseline,rt_peak_experimental,rt_peak_corrected,rt_min_corrected,rt_max_corrected,rt_diff_experimental_vs_corrected
0,sorgoleone,[M+H]+,positive,,0.801184,0.645454,0.145454,1.145454,0.15573
1,sorgoleone,[M+NH4]+,positive,,0.801184,0.645454,0.145454,1.145454,0.15573
2,ABMBA (unlabeled),,QC,1.093806,1.21834,1.096205,0.596205,1.596205,0.122135
3,N-acetyl-glucosamine (U - 13C),,QC,6.707815,6.418233,6.580829,6.080829,7.080829,-0.162596
4,adenine (U - 15N),,QC,2.677602,2.834389,2.827321,2.327321,3.327321,0.007068
5,"alanine (U - 13C, 15N)",,QC,13.405091,13.240681,13.400469,12.900469,13.900469,-0.159788
6,"arginine (U - 13C, 15N)",,QC,16.939915,17.013107,16.987934,16.487934,17.487934,0.025173
7,"asparagine (U - 13C, 15N)",,QC,14.368089,14.252848,14.37583,13.87583,14.87583,-0.122982
8,"aspartic acid (U - 13C, 15N)",,QC,16.13036,16.043652,16.07848,15.57848,16.57848,-0.034828
9,"cystine (U - 13C, 15N)",,QC,16.904308,16.930342,16.910628,16.410628,17.410628,0.019714


Saving data to: /global/homes/b/bkieft/metatlas_junkdrawer/example_data/schellermetasci/cache/20250506190611_ref_stds_rt_correction.pkl
Formatted 2 RT-corrected compounds for insertion into HILICZ atlases.
Saving data to: /global/homes/b/bkieft/metatlas_junkdrawer/example_data/schellermetasci/cache/20250506190611_ref_stds_ema_atlases.pkl



The behavior of array concatenation with empty entries is deprecated. In a future version, this will no longer exclude empty items when determining the result dtype. To retain the old behavior, exclude the empty entries before the concat operation.


The behavior of array concatenation with empty entries is deprecated. In a future version, this will no longer exclude empty items when determining the result dtype. To retain the old behavior, exclude the empty entries before the concat operation.


The behavior of array concatenation with empty entries is deprecated. In a future version, this will no longer exclude empty items when determining the result dtype. To retain the old behavior, exclude the empty entries before the concat operation.



# Create new EMA atlas with top selected reference standards added

In [69]:
nonmatches_to_atlases_rt_corrected

Unnamed: 0,standard_lcmsrun,chromatography,compound_name,adduct,polarity,rt_peak,mz,smiles,peak_index,inchi,inchi_key,neutralized_inchi,neutralized_inchi_key,permanent_charge,formula,mono_isotopic_molecular_weight,collision_energy,label,rt_min,rt_max,mz_tolerance,mz_tolerance_units,in_metatlas
0,/global/cfs/cdirs/metatlas/raw_data/jgi/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558_POS_MS2_RefStd-1_Sorgoleone_1_Rg70to1050-CE102040norm-200uM-S1_Run375.h5,HILICZ,sorgoleone,[M+H]+,positive,0.645454,359.221685,COC1=CC(=O)C(=C(C1=O)CCCCCCC/C=C\C/C=C\CC=C)O,peak1,"InChI=1S/C22H30O4/c1-3-4-5-6-7-8-9-10-11-12-13-14-15-16-18-21(24)19(23)17-20(26-2)22(18)25/h3,5-6,8-9,17,24H,1,4,7,10-16H2,2H3/b6-5-,9-8-",FGWRUVXUQWGLOX-AFJQJTPPSA-N,"InChI=1S/C22H30O4/c1-3-4-5-6-7-8-9-10-11-12-13-14-15-16-18-21(24)19(23)17-20(26-2)22(18)25/h3,5-6,8-9,17,24H,1,4,7,10-16H2,2H3/b6-5-,9-8-",FGWRUVXUQWGLOX-AFJQJTPPSA-N,0,C22H30O4,358.214409,102040norm,sorgoleone,0.145454,1.145454,5,ppm,True
1,/global/cfs/cdirs/metatlas/raw_data/jgi/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558/20241002_JGI_HS_510060_SorghExu_final1_EXP120B_HILICZ_USHXG02558_POS_MS2_RefStd-1_Sorgoleone_1_Rg70to1050-CE102040norm-200uM-S1_Run375.h5,HILICZ,sorgoleone,[M+NH4]+,positive,0.645454,376.248232,COC1=CC(=O)C(=C(C1=O)CCCCCCC/C=C\C/C=C\CC=C)O,peak1,"InChI=1S/C22H30O4/c1-3-4-5-6-7-8-9-10-11-12-13-14-15-16-18-21(24)19(23)17-20(26-2)22(18)25/h3,5-6,8-9,17,24H,1,4,7,10-16H2,2H3/b6-5-,9-8-",FGWRUVXUQWGLOX-AFJQJTPPSA-N,"InChI=1S/C22H30O4/c1-3-4-5-6-7-8-9-10-11-12-13-14-15-16-18-21(24)19(23)17-20(26-2)22(18)25/h3,5-6,8-9,17,24H,1,4,7,10-16H2,2H3/b6-5-,9-8-",FGWRUVXUQWGLOX-AFJQJTPPSA-N,0,C22H30O4,358.214409,102040norm,sorgoleone,0.145454,1.145454,5,ppm,True


In [72]:
if config['save_new_ema_atlases'] is True:
    ema_atlas_ids, ema_atlas_names = sta.update_and_save_ema_atlases(nonmatches_to_atlases_rt_corrected, ema_atlases_data['HILICZ'], config, timestamp)
    sta.handle_data(mode="save", config=config, timestamp=timestamp, file_suffix="new_atlas_ids", \
                    data=(ema_atlas_ids, ema_atlas_names))
    ema_atlas_ids, ema_atlas_names = sta.handle_data(mode="load", config=config, file_suffix="new_atlas_ids")

    if config['direct_deposit_new_emas'] is True:
        print("New EMA atlases have been saved to disk and deposited in the metatlas database:")
        display(pd.DataFrame.from_dict(ema_atlas_ids))
    print(f"\nNew EMA atlas locations:")
    display(pd.DataFrame.from_dict(ema_atlas_names))

elif config['save_new_ema_atlases'] is False:
    print("No new EMA atlases saved to disk, as 'save_new_ema_atlases' is set to False in the config file.")
    print("Here is the new atlas data in memory:")
    display(nonmatches_to_atlases_rt_corrected)

2025-05-06 20:39:40 [37m[41mCRITICAL[0m Uncaught exception
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_805171/3518290779.py", line 2, in <module>
    ema_atlas_ids, ema_atlas_names = sta.update_and_save_ema_atlases(nonmatches_to_atlases_rt_corrected, {ema_atlases_data['HILICZ']}, config, timestamp)
                                                                                                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: unhashable type: 'dict'


SystemExit: 128

# Identify compounds not in MSMS refs and set up new MSMS refs creation

In [11]:
if config["msms_refs_data_from_cache"] is False:
    msms_refs = sta.get_msms_refs(msms_refs_path=config["current_msms_refs_path"])
    rt_peaks_filtered_all_selected_formatted = sta.format_for_msms_refs(rt_peaks_filtered_all_selected, top_spectra_filtered, msms_refs, config)
    in_msms_refs, notin_msms_refs = sta.search_for_matches_in_msms_refs(rt_peaks_filtered_all_selected_formatted, msms_refs, check_by_flat=True)
    sta.handle_data(mode="save", config=config, timestamp=timestamp, file_suffix="msms_refs", \
                    data=(msms_refs, notin_msms_refs, rt_peaks_filtered_all_selected_formatted))
    
elif config["msms_refs_data_from_cache"] is True:
    msms_refs, notin_msms_refs, rt_peaks_filtered_all_selected_formatted = sta.handle_data(mode="load", config=config, file_suffix="msms_refs")

Loaded MSMS refs with 216409 rows and 17 columns.


 Searching for matches in MSMS refs:   0%|          | 0/12 [00:00<?, ? compound/s]


12 compounds+adducts are not yet in MSMS refs. Check notin_msms_refs to view.

Saving data to: /global/homes/b/bkieft/metatlas/notebooks/standards_library/example_data/schellermetasci/cache/20250506103111_ref_stds_msms_refs.pkl


# Create new MSMS refs table with all selected reference standards added

In [12]:
if config['save_new_msms_refs'] is True:
    sta.update_and_save_msms_refs(msms_refs, notin_msms_refs, config, timestamp)

elif config['save_new_msms_refs'] is False:
    print("No new MSMS refs saved to disk, as 'save_new_msms_refs' is set to False in the config file.")
    print("Here is the new MSMS refs data in memory:")
    display(notin_msms_refs)

Existing MSMS refs went from 216409 to 216421 compounds.
	New MSMS refs: /global/homes/b/bkieft/metatlas/notebooks/standards_library/example_data/schellermetasci/updated_MSMS_refs/msms_refs_20250506103111.tab
