In [None]:
import numpy as np
import pandas as pd
import sys
from datetime import datetime
import os
from IPython.display import display
import yaml

sys.path.insert(1, '/global/homes/b/bkieft/metatlas')
import notebooks.standards_library.standard_annotation as sta

pd.options.display.max_colwidth = 300
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")

# Enter Reference Standard information

In [None]:
# Load configuration
config_path = "/global/homes/b/bkieft/metatlas/notebooks/standards_library/msms_set_config.yaml"
with open(config_path, "r") as config_file:
    config = yaml.safe_load(config_file)

# Get EIC and Spectra information from run table

In [None]:
if config["new_full_data"] is True:
    lcmsruns_table_with_adducts = sta.build_standard_lcmsrun_table(config['standards_input_file'], include_polarities=config["include_polarities"], 
                                                                   include_chromatographies=config["include_chromatographies"], include_adducts=config["include_adducts"])

    eics_full, top_spectra_full, group_names_full, rt_peaks_full, atlas_full, mols_images = sta.extract_data(lcmsruns_table=lcmsruns_table_with_adducts,
                                                                                                            ppm_tolerance=config["ppm_tolerance"],
                                                                                                            method="find_peaks")

# Save and/or read full data

In [None]:
if config["new_full_data"] is True:
    sta.handle_data(
        mode="save",
        save_path=config['standards_output_path'],
        timestamp=timestamp,
        data=(eics_full, top_spectra_full, group_names_full, rt_peaks_full, atlas_full, mols_images),
        file_suffix="full"
    )

elif config["new_full_data"] is False:
    eics_full, top_spectra_full, group_names_full, rt_peaks_full, atlas_full, mols_images = sta.handle_data(mode="load",
                                                                                                            save_path=config['standards_output_path'],
                                                                                                            file_suffix="full")

# Create interactive plot and choose adducts

In [None]:
if config["new_selected_data"] is True:

    processed_data = sta.process_data_for_plotting(eics_full, top_spectra_full, group_names_full, rt_peaks_full, config["include_adducts"], sort_by='run', # 'run', 'specs'
                                                   subset_by_compound="sorgoleone", subset_by_run=None) # Use these to subset by compound (str) or run number (int)

    selected_adducts_dict = {}
    ambiguous_adducts_dict = {}
    top_adducts_dict = {}
    sta.create_interactive_plots(processed_data, mols_images, selected_adducts_dict, ambiguous_adducts_dict, top_adducts_dict)

# Save and/or read selected data

In [None]:
if config["new_selected_data"] is True:
    sta.handle_data(
        mode="save",
        save_path=config['standards_output_path'],
        timestamp=timestamp,
        data=(selected_adducts_dict, ambiguous_adducts_dict, top_adducts_dict, processed_data),
        file_suffix="selected"
    )
    
elif config["new_selected_data"] is False:
    selected_adducts_dict, ambiguous_adducts_dict, top_adducts_dict, processed_data = sta.handle_data(mode="load",
                                                                                                        save_path=config['standards_output_path'],
                                                                                                        file_suffix="selected")

# Save summary plots of selected compounds+adducts after completing GUI

In [None]:
if config["new_selected_data"] is True:
    sta.generate_static_summary_plots(processed_data, selected_adducts_dict, top_adducts_dict, config["standards_output_path"])

# Filter RT Peak, EICs, and Top Spectra by selected compounds+adducts

In [None]:
if config["new_filtered_data"] is True:
    eics_filtered, rt_peaks_filtered, top_spectra_filtered = sta.filter_by_selected(eics_full, rt_peaks_full, top_spectra_full, 
                                                                                    selected_adducts_dict, top_adducts_dict)

# Save and/or read filtered data

In [None]:
if config["new_filtered_data"] is True:
    sta.handle_data(
        mode="save",
        save_path=config['standards_output_path'],
        timestamp=timestamp,
        data=(eics_filtered, top_spectra_filtered, rt_peaks_filtered),
        file_suffix="filtered"
    )
    
elif config["new_filtered_data"] is False:
    eics_filtered, top_spectra_filtered, rt_peaks_filtered = sta.handle_data(mode="load",
                                                                             save_path=config['standards_output_path'],
                                                                             file_suffix="filtered")

# Format RT peaks data for adding to metatlas DB, atlases, and MSMS refs

In [None]:
rt_peaks_filtered_all_selected, rt_peaks_filtered_top_selected = sta.format_and_select_top_adducts(rt_peaks_filtered, top_adducts_dict) # Export "all" for MSMS refs and "top" for EMA atlases

# Check if selected compounds are in metatlas DB

In [None]:
in_db, notin_db = sta.search_for_matches_in_metatlas_db(rt_peaks_filtered_top_selected, check_by_flat=True)

# Store selected compounds+adducts in metatlas db

In [None]:
# if len(notin_db) > 0:
#     sta.store_in_metatlas_db(notin_db)

In [None]:
# sta.test_metatlas_db_insertion(inchi_key="FGWRUVXUQWGLOX-AFJQJTPPSA-N", table="Compounds")

# Check if selected compounds/adducts are in the atlases

In [None]:
ema_atlases_data = sta.get_ema_atlas_data(config["current_ema_atlases"])

In [None]:
rt_peaks_filtered_top_selected_formatted = sta.convert_rt_peaks_to_atlas_format(rt_peaks_filtered_top_selected)

In [None]:
matches_to_atlases, nonmatches_to_atlases = sta.search_for_matches_in_atlases(rt_peaks_filtered_top_selected_formatted, ema_atlases_data)

# Perform backward RT correction on compound data to align with baseline RTs

In [None]:
if config["new_rt_correction_data"] is True:
    baseline_to_experimental_qc = sta.get_qc_experimental_atlas(nonmatches_to_atlases, config["current_qc_atlases"], include_istds=True)

# Save/load RT correction data

In [None]:
if config["new_rt_correction_data"] is True:
    sta.handle_data(
        mode="save",
        save_path=config['standards_output_path'],
        timestamp=timestamp,
        data=(baseline_to_experimental_qc),
        file_suffix="rt_correction"
    )
    
elif config["new_rt_correction_data"] is False:
    baseline_to_experimental_qc = sta.handle_data(mode="load",
                                                  save_path=config['standards_output_path'],
                                                  file_suffix="rt_correction")

In [None]:
baseline_correction_inputs = sta.create_baseline_correction_input(nonmatches_to_atlases, baseline_to_experimental_qc)
baseline_correction_outputs = sta.rt_correction_from_baseline(baseline_correction_inputs, config["include_chromatographies"])

In [None]:
nonmatches_to_atlases_rt_corrected = sta.substitute_corrected_rt_values(nonmatches_to_atlases, baseline_correction_outputs)

# Store selected compound/adduct in metatlas atlases

In [None]:
sta.update_and_save_atlases(ema_atlases_data, nonmatches_to_atlases_rt_corrected, timestamp, config["standards_output_path"], save_atlas=True)

# Check if selected compounds/adducts are in MSMS refs

In [None]:
msms_refs = sta.get_msms_refs(msms_refs_path=config["current_msms_refs_path"])

In [None]:
rt_peaks_filtered_all_selected_formatted = sta.format_for_msms_refs(rt_peaks_filtered_all_selected, top_spectra_filtered, msms_refs, config["msms_refs_metadata"])

In [None]:
in_msms_refs, notin_msms_refs = sta.search_for_matches_in_msms_refs(rt_peaks_filtered_all_selected_formatted, msms_refs, check_by_flat=True)

# Store selected compound/adduct in MSMS refs

In [None]:
sta.update_and_save_msms_refs(msms_refs, notin_msms_refs, config["standards_output_path"], timestamp, save_refs=True)