In [None]:
import pandas as pd
import re
import pyteomics.mgf
from tqdm.notebook import tqdm

In [None]:
infile = r"C:\git\msn_library\library/20230312_mce_library_pos_onlyMS2_collenergies.mgf"
spectra = []
filtered_spectra = []
columns = ["name", "pepmass", "charge", "mslevel", "ionmode", "specindex"]
spec_filter = "ALL_ENERGIES"

index = 0
with pyteomics.mgf.MGF(infile) as f_in:
    for spectrum_dict in tqdm(f_in):
        merge = spectrum_dict["params"].get("spectype", "SINGLE_BEST")
        if merge != spec_filter:
            continue

        # combine identifier for sirius
        # name_adduct_collenergy_spectype
        name = spectrum_dict["params"]["name"]
        formula = spectrum_dict["params"]["formula"]
        adduct = spectrum_dict["params"]["adduct"]
        coll = spectrum_dict["params"]["collision energy"]


        spectrum_dict["params"]["name"] = f"{index:06d}_{name}_{formula}_{adduct}_{coll}_{merge}"

        spectrum_dict["params"]["specindex"] = index

        spectra.append(spectrum_dict)

        spectrum_dict_copy = spectrum_dict.copy()
        # remove columns
        spectrum_dict_copy["params"] = {key: spectrum_dict_copy["params"][key] for key in columns if key in spectrum_dict_copy["params"]}
        filtered_spectra.append(spectrum_dict_copy)
        index += 1


pyteomics.mgf.write(filtered_spectra, r'C:\git\msn_library\library\library_sirius\20230312_mce_library_pos_collenergies_lib_onlyMS2_sirius_{}.mgf'.format(spec_filter))
pyteomics.mgf.write(spectra, r'C:\git\msn_library\library\library_sirius\220230312_mce_library_pos_collenergies_lib_onlyMS2_numbered_{}.mgf'.format(spec_filter))

In [None]:
infile = r"C:\git\msn_library\library/20230312_mce_library_pos_all_lib_MSn.mgf"
spectra = []
filtered_spectra = []
columns = ["name", "pepmass", "charge", "mslevel", "ionmode", "specindex"]
spec_filter = "ALL_MSN_TO_PSEUDO_MS2"


index = 0
with pyteomics.mgf.MGF(infile) as f_in:
    for spectrum_dict in tqdm(f_in):
        merge = spectrum_dict["params"].get("spectype", "SINGLE_BEST")
        well = spectrum_dict["params"]["usi"]
        # 1D3_A1,.... 1D3_C24
        matches_well_selection = bool(re.search(r"1D3_[ABC][\d]", well))
        if merge != spec_filter or not matches_well_selection:
            continue


        # combine identifier for sirius
        # name_adduct_collenergy_spectype
        name = spectrum_dict["params"]["name"]
        formula = spectrum_dict["params"]["formula"]
        adduct = spectrum_dict["params"]["adduct"]
        coll = spectrum_dict["params"]["collision energy"]


        spectrum_dict["params"]["name"] = f"{index:06d}_{name}_{formula}_{adduct}_{coll}_{merge}"

        spectrum_dict["params"]["specindex"] = index

        spectra.append(spectrum_dict)

        spectrum_dict_copy = spectrum_dict.copy()
        # remove columns
        spectrum_dict_copy["params"] = {key: spectrum_dict_copy["params"][key] for key in columns if key in spectrum_dict_copy["params"]}
        filtered_spectra.append(spectrum_dict_copy)
        index += 1


pyteomics.mgf.write(filtered_spectra, r'C:\git\msn_library\library\library_sirius\20230312_mce_library_pos_1D3_A_C_MSn_sirius_{}.mgf'.format(spec_filter))
pyteomics.mgf.write(spectra, r'C:\git\msn_library\library\library_sirius\20230312_mce_library_pos_1D3_A_C_MSn_numbered_{}.mgf'.format(spec_filter))