## Pré-processamento de Metabolômica Não Direcionada
========================================

O fluxo de trabalho universal para metabolômica não direcionada consiste sempre na detecção de `features` nos arquivos de amostra individuais de MS e sua vinculação a `consensus features` com valores comuns de m/z e tempo de retenção. Além disso, existem etapas opcionais, como detecção de adutos e anotação de `features` com espectros `MS2` associados.

In [None]:
!pip install pyopenms

In [1]:
from urllib.request import urlretrieve

gh = "https://raw.githubusercontent.com/OpenMS/pyopenms-docs/master"
urlretrieve(gh + "/src/data/Metabolomics_1.mzML", "Metabolomics_1.mzML")
urlretrieve(gh + "/src/data/Metabolomics_2.mzML", "Metabolomics_2.mzML")

('Metabolomics_2.mzML', <http.client.HTTPMessage at 0x7f743410bd10>)

In [1]:
!mkdir data

In [2]:
import os

os.chdir('data')
os.getcwd()

'/home/rsilva/Desktop/workshop/notebooks/data'

In [3]:
!python ../download_massive.py ftp://massive-ftp.ucsd.edu/v03/MSV000085496/

  dr = re.sub('.+edu/(v\d+/M.+)', '\\1', massive_url)
Downloading 200525_Dog_4h_01.mzXML...
--2025-09-30 07:37:40--  https://massive.ucsd.edu/ProteoSAFe/DownloadResultFile?file=f.MSV000085496%2Fccms_peak%2F200525_Dog_4h_01.mzML
Resolving massive.ucsd.edu (massive.ucsd.edu)... 132.249.211.33
Connecting to massive.ucsd.edu (massive.ucsd.edu)|132.249.211.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘200525_Dog_4h_01.mzML’

200525_Dog_4h_01.mz     [           <=>      ]  21,76M   879KB/s    in 45s     

2025-09-30 07:38:26 (497 KB/s) - ‘200525_Dog_4h_01.mzML’ saved [22818869]

Downloading 200525_Dog_4h_02.mzXML...
--2025-09-30 07:38:26--  https://massive.ucsd.edu/ProteoSAFe/DownloadResultFile?file=f.MSV000085496%2Fccms_peak%2F200525_Dog_4h_02.mzML
Resolving massive.ucsd.edu (massive.ucsd.edu)... 132.249.211.33
Connecting to massive.ucsd.edu (massive.ucsd.edu)|132.249.211.33|:443... connected.
HTTP request sent, awaiting response... 200

Para cada arquivo `mzML`, faça rastreamento de massa, pico de eluição e detecção de _features_.

In [37]:
import os

mzML_files = [f'data/{x}' for x in os.listdir('data') if 'aligned' not in x and 'feature' not in x]
mzML_files

['data/200525_Mou_4h_02.mzML',
 'data/200525_HUMAN_Blank_03.mzML',
 'data/200525_Rat_Blank_03.mzML',
 'data/200525_Dog_Blank_01.mzML',
 'data/200525_HUMAN_4h_02.mzML',
 'data/200525_Dog_Blank_03.mzML',
 'data/200525_Rat_4h_01.mzML',
 'data/200525_Dog_4h_03.mzML',
 'data/200525_Mou_4h_03.mzML',
 'data/200525_Mou_4h_01.mzML',
 'data/200525_Rat_4h_03.mzML',
 'data/200525_Dog_4h_01.mzML',
 'data/200525_Rat_Blank_01.mzML',
 'data/200525_Dog_4h_02.mzML',
 'data/200525_Rat_4h_02.mzML',
 'data/200525_Dog_Blank_02.mzML',
 'data/200525_Mou_Blank_02.mzML',
 'data/200525_Mou_Blank_03.mzML',
 'data/200525_HUMAN_Blank_01.mzML',
 'data/200525_Rat_Blank_02.mzML',
 'data/200525_Mou_Blank_01.mzML',
 'data/200525_HUMAN_Blank_02.mzML',
 'data/200525_HUMAN_4h_01.mzML',
 'data/200525_HUMAN_4h_03.mzML']

In [38]:
import pyopenms as oms
import os

#mzML_files = ["Metabolomics_1.mzML", "Metabolomics_2.mzML"]

def getFeatureMaps(mzML_list):
    feature_maps = []
    for file in mzML_list:
        # carregar os arquivos mzML file no objeto MSExperiment
        print('Processing file:', file) 
        exp = oms.MSExperiment()
        oms.MzMLFile().load(
            file, exp
        )  
    
        # detecção de 'traços'  de massa
        mass_traces = (
            []
        )  # criar lista vazia onde as massas serão armazenadas
        mtd = oms.MassTraceDetection()
        mtd_par = (
            mtd.getDefaults()
        )  # obter parâmetros padrão para edição
        mtd_par.setValue("mass_error_ppm", 15.0)  # instrumentos de alta resolução, exemplo: orbitraps
        mtd_par.setValue(
            "noise_threshold_int", 1.0e03
        )  # dependente dos dados (usualmente funciona para orbitraps)
        mtd.setParameters(mtd_par)  # definir os novos parâmetros
        mtd.run(exp, mass_traces, 0)  # executar detecção de massas
    
        # detecção de picos de eluição
        mass_traces_deconvol = []
        epd = oms.ElutionPeakDetection()
        epd_par = epd.getDefaults()
        epd_par.setValue(
            "width_filtering", "fixed"
        )  # Os filtros de parâmetros fixos removem traços fora do intervalo [min_fwhm: 1.0, max_fwhm: 60.0]
        epd.setParameters(epd_par)
        epd.detectPeaks(mass_traces, mass_traces_deconvol)
    
        # Detecção de features
        feature_map = oms.FeatureMap()  # features encontradas
        chrom_out = []  # lista de cromatogramas
        ffm = oms.FeatureFindingMetabo()
        ffm_par = ffm.getDefaults()
        ffm_par.setValue(
            "remove_single_traces", "true"
        )  # remove traços sem traços isotópicos satélite
        ffm.setParameters(ffm_par)
        ffm.run(mass_traces_deconvol, feature_map, chrom_out)
        feature_map.setUniqueIds()  # Atribui uma nova, id única válida por feature
        feature_map.setPrimaryMSRunPath(
            [file.encode()]
        )  # Ajusta o caminho do arquivo para o local dos aquivos mzML
        feature_maps.append(feature_map)
    
    return feature_maps

feature_maps = getFeatureMaps(mzML_files)

Processing file: data/200525_Mou_4h_02.mzML
    Progress of 'mass trace detection':
    -- done [took 0.07 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.25 s (CPU), 0.02 s (Wall)] -- 
Processing file: data/200525_HUMAN_Blank_03.mzML


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Mou_4h_02.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_HUMAN_Blank_03.mzML': Required attribute 'softwareRef' not present!


    Progress of 'assembling mass traces to features':
    -- done [took 0.10 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.05 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.22 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.14 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_Rat_Blank_03.mzML
    Progress of 'mass trace detection':
Processing file: data/200525_Dog_Blank_01.mzML
    -- done [took 0.07 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.23 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.14 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.07 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Rat_Blank_03.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Dog_Blank_01.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_HUMAN_4h_02.mzML': Required attribute 'softwareRef' not present!


    -- done [took 0.21 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.10 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_HUMAN_4h_02.mzML
    Progress of 'mass trace detection':
    -- done [took 0.05 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.25 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.14 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_Dog_Blank_03.mzML


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Dog_Blank_03.mzML': Required attribute 'softwareRef' not present!


    Progress of 'mass trace detection':
    -- done [took 0.03 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.22 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.10 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_Rat_4h_01.mzML


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Rat_4h_01.mzML': Required attribute 'softwareRef' not present!


    Progress of 'mass trace detection':
    -- done [took 0.03 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.24 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
Processing file: data/200525_Dog_4h_03.mzML
    -- done [took 0.15 s (CPU), 0.01 s (Wall)] -- 


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Dog_4h_03.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Mou_4h_03.mzML': Required attribute 'softwareRef' not present!


    Progress of 'mass trace detection':
Processing file: data/200525_Mou_4h_03.mzML
    -- done [took 0.03 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.21 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.11 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_Mou_4h_01.mzML
    Progress of 'mass trace detection':
    -- done [took 0.03 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.21 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.09 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.04 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.30 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
Processing file: data/200525_Rat_4h_03.mzML


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Mou_4h_01.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Rat_4h_03.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Dog_4h_01.mzML': Required attribute 'softwareRef' not present!


    -- done [took 0.11 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.04 s (CPU), 0.02 s (Wall)] -- 
Processing file: data/200525_Dog_4h_01.mzML
    Progress of 'elution peak detection':
    -- done [took 0.23 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.14 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.07 s (CPU), 0.03 s (Wall)] -- 
Processing file: data/200525_Rat_Blank_01.mzML
    Progress of 'elution peak detection':
    -- done [took 0.23 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.15 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.03 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.21 s (CPU), 0.02 s (Wall)] -- 
Processing file: data/200525_Dog_4h_02.mzML
    Progress of 'assembling mass traces to features'

/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Rat_Blank_01.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Dog_4h_02.mzML': Required attribute 'softwareRef' not present!


    Progress of 'mass trace detection':
    -- done [took 0.04 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
Processing file: data/200525_Rat_4h_02.mzML
    -- done [took 0.26 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.15 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.03 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.25 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
Processing file: data/200525_Dog_Blank_02.mzML


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Rat_4h_02.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Dog_Blank_02.mzML': Required attribute 'softwareRef' not present!


    -- done [took 0.10 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_Mou_Blank_02.mzML
    Progress of 'mass trace detection':
    -- done [took 0.06 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.25 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.10 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.05 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.22 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.14 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_Mou_Blank_03.mzML


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Mou_Blank_02.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Mou_Blank_03.mzML': Required attribute 'softwareRef' not present!


    Progress of 'mass trace detection':
Processing file: data/200525_HUMAN_Blank_01.mzML
    -- done [took 0.07 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.24 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.15 s (CPU), 0.01 s (Wall)] -- 
    Progress of 'mass trace detection':
    -- done [took 0.06 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
Processing file: data/200525_Rat_Blank_02.mzML
    -- done [took 0.21 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.10 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_Mou_Blank_01.mzML
    Progress of 'mass trace detection':
    -- done [took 0.06 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.24 s (CPU), 0.02 s (Wall)] -- 
Processing file: data/200525_HUMAN_Blank_02.mzML
    Progress of 'assembling mass traces to fea

/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_HUMAN_Blank_01.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Rat_Blank_02.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Mou_Blank_01.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_HUMAN_Blank_02.mzML': Required attribute 'softwareRef' not present!


    Progress of 'mass trace detection':
    -- done [took 0.03 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.22 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.14 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_HUMAN_4h_01.mzML


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_HUMAN_4h_01.mzML': Required attribute 'softwareRef' not present!


    Progress of 'mass trace detection':
    -- done [took 0.04 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.24 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.10 s (CPU), 0.01 s (Wall)] -- 
Processing file: data/200525_HUMAN_4h_03.mzML
    Progress of 'mass trace detection':
    -- done [took 0.03 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'elution peak detection':
    -- done [took 0.24 s (CPU), 0.02 s (Wall)] -- 
    Progress of 'assembling mass traces to features':
    -- done [took 0.10 s (CPU), 0.01 s (Wall)] -- 


/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_HUMAN_4h_03.mzML': Required attribute 'softwareRef' not present!


In [39]:
len(feature_maps)

24

In [40]:
len(mzML_files)

24

Alinhe os tempos de retenção das _features_ com base no `mapa de features` com o maior número de _features_ (mapa de referência).

In [41]:
def alignFeatureRT(feature_maps):
    # use as reference for alignment, the file with the largest number of features
    # (works well if you have a pooled QC for example)
    ref_index = feature_maps.index(sorted(feature_maps, key=lambda x: x.size())[-1])
    
    aligner = oms.MapAlignmentAlgorithmPoseClustering()
    
    trafos = {}
    
    # parameter optimization
    aligner_par = aligner.getDefaults()
    aligner_par.setValue("max_num_peaks_considered", -1)  # infinite
    aligner_par.setValue(
        "pairfinder:distance_MZ:max_difference", 10.0
    )  # Never pair features with larger m/z distance
    aligner_par.setValue("pairfinder:distance_MZ:unit", "ppm")
    aligner.setParameters(aligner_par)
    aligner.setReference(feature_maps[ref_index])
    
    for feature_map in feature_maps[:ref_index] + feature_maps[ref_index + 1 :]:
        trafo = oms.TransformationDescription()  # save the transformed data points
        aligner.align(feature_map, trafo)
        trafos[feature_map.getMetaValue("spectra_data")[0].decode()] = trafo
        transformer = oms.MapAlignmentTransformer()
        transformer.transformRetentionTimes(feature_map, trafo, True)

    return trafos

trafos = alignFeatureRT(feature_maps)

Alinhar os arquivos `mzML` com base no alinhamento `FeatureMap` (opcional, somente para GNPS).

In [42]:
# align mzML files based on FeatureMap alignment and store as mzML files (for GNPS!)
def generateAlignedMzML(trafos, mzML_files):
    for file in mzML_files:
        exp = oms.MSExperiment()
        oms.MzMLFile().load(file, exp)
        exp.sortSpectra(True)
        exp.setMetaValue("mzML_path", file)
        if file not in trafos.keys():
            oms.MzMLFile().store(file[:-5] + "_aligned.mzML", exp)
            continue
        transformer = oms.MapAlignmentTransformer()
        trafo_description = trafos[file]
        transformer.transformRetentionTimes(exp, trafo_description, True)
        oms.MzMLFile().store(file[:-5] + "_aligned.mzML", exp)
    mzML_files = [file[:-5] + "_aligned.mzML" for file in mzML_files]
    return mzML_files

mzML_files = generateAlignedMzML(trafos, mzML_files)

/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Mou_4h_02.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_HUMAN_Blank_03.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Rat_Blank_03.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Dog_Blank_01.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_HUMAN_4h_02.mzML': Required attribute 'softwareRef' not present!
/__w/OpenMS/OpenMS/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(105): While loading 'data/200525_Dog_Blank_03.mzML': Requ

In [43]:
mzML_files

['data/200525_Mou_4h_02_aligned.mzML',
 'data/200525_HUMAN_Blank_03_aligned.mzML',
 'data/200525_Rat_Blank_03_aligned.mzML',
 'data/200525_Dog_Blank_01_aligned.mzML',
 'data/200525_HUMAN_4h_02_aligned.mzML',
 'data/200525_Dog_Blank_03_aligned.mzML',
 'data/200525_Rat_4h_01_aligned.mzML',
 'data/200525_Dog_4h_03_aligned.mzML',
 'data/200525_Mou_4h_03_aligned.mzML',
 'data/200525_Mou_4h_01_aligned.mzML',
 'data/200525_Rat_4h_03_aligned.mzML',
 'data/200525_Dog_4h_01_aligned.mzML',
 'data/200525_Rat_Blank_01_aligned.mzML',
 'data/200525_Dog_4h_02_aligned.mzML',
 'data/200525_Rat_4h_02_aligned.mzML',
 'data/200525_Dog_Blank_02_aligned.mzML',
 'data/200525_Mou_Blank_02_aligned.mzML',
 'data/200525_Mou_Blank_03_aligned.mzML',
 'data/200525_HUMAN_Blank_01_aligned.mzML',
 'data/200525_Rat_Blank_02_aligned.mzML',
 'data/200525_Mou_Blank_01_aligned.mzML',
 'data/200525_HUMAN_Blank_02_aligned.mzML',
 'data/200525_HUMAN_4h_01_aligned.mzML',
 'data/200525_HUMAN_4h_03_aligned.mzML']

Mapear espectros MS2 para recursos como objetos `PeptideIdentification` (opcional, somente para GNPS).

In [44]:
def mapMS2(mzML_files, feature_maps):
    feature_maps_mapped = []
    use_centroid_rt = False
    use_centroid_mz = True
    mapper = oms.IDMapper()
    for file in mzML_files:
        exp = oms.MSExperiment()
        oms.MzMLFile().load(file, exp)
        for i, feature_map in enumerate(feature_maps):
            if feature_map.getMetaValue("spectra_data")[
                0
            ].decode() == exp.getMetaValue("mzML_path"):
                peptide_ids = []
                protein_ids = []
                mapper.annotate(
                    feature_map,
                    peptide_ids,
                    protein_ids,
                    use_centroid_rt,
                    use_centroid_mz,
                    exp,
                )
                fm_new = oms.FeatureMap(feature_map)
                fm_new.clear(False)
                # set unique identifiers to protein and peptide identifications
                prot_ids = []
                if len(feature_map.getProteinIdentifications()) > 0:
                    prot_id = feature_map.getProteinIdentifications()[0]
                    prot_id.setIdentifier(f"Identifier_{i}")
                    prot_ids.append(prot_id)
                fm_new.setProteinIdentifications(prot_ids)
                for feature in feature_map:
                    pep_ids = []
                    for pep_id in feature.getPeptideIdentifications():
                        pep_id.setIdentifier(f"Identifier_{i}")
                        pep_ids.append(pep_id)
                    feature.setPeptideIdentifications(pep_ids)
                    fm_new.push_back(feature)
                feature_maps_mapped.append(fm_new)
    feature_maps = feature_maps_mapped
    return feature_maps

feature_maps = mapMS2(mzML_files, feature_maps)

.
..
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<.> occurred 2 times
.
<..> occurred 48 times
3274 spectra and 0 chromatograms stored.
<Loading metabolite isotope model with 5% RMS error> occurred 48 times
3277 spectra and 0 chromatograms stored.
3266 spectra and 0 chromatograms stored.
<3274 spectra and 0 chromatograms stored.> occurred 2 times
3281 spectra and 0 chromatograms stored.
3271 spectra and 0 chromatograms stored.
3269 spectra and 0 chromatograms stored.
3277 spectra and 0 chromatograms stored.
3267 spectra and 0 chro

In [45]:
len(feature_maps)

24

Detectar adutos (opcional, somente para SIRIUS e GNPS Ion Identity Molecular Networking).

In [46]:
def getAdducts(feature_maps):
    feature_maps_adducts = []
    for feature_map in feature_maps:
        mfd = oms.MetaboliteFeatureDeconvolution()
        mdf_par = mfd.getDefaults()
        mdf_par.setValue(
            "potential_adducts",
            [
                b"H:+:0.4",
                b"Na:+:0.2",
                b"NH4:+:0.2",
                b"H-1O-1:+:0.1",
                b"H-3O-2:+:0.1",
            ],
        )
        mfd.setParameters(mdf_par)
        feature_map_adduct = oms.FeatureMap()
        mfd.compute(feature_map, feature_map_adduct, oms.ConsensusMap(), oms.ConsensusMap())
        feature_maps_adducts.append(feature_map_adduct)
    feature_maps = feature_maps_adducts
    
    # for SIRIUS store the feature maps as featureXML files!
    for feature_map in feature_maps:
        oms.FeatureXMLFile().store(
            feature_map.getMetaValue("spectra_data")[0].decode()[:-4]
            + "featureXML",
            feature_map,
        )
    return feature_maps

feature_maps = getAdducts(feature_maps)

MassExplainer table size: 312
Generating Masses with threshold: -6.90776 ...
done
19 of 143 valid net charge compomer results did not pass the feature charge constraints
Inferring edges raised edge count from 132 to 159
Found 159 putative edges (of 599) and avg hit-size of 0.955975
Using solver 'coinor' ...
Optimal solution found!
 Branch and cut took 0.006277 seconds,  with objective value: 8.5608.
ILP score is: 8.5608
Agreeing charges: 234/234
MassExplainer table size: 312
Generating Masses with threshold: -6.90776 ...
done
28 of 134 valid net charge compomer results did not pass the feature charge constraints
Inferring edges raised edge count from 112 to 130
Found 130 putative edges (of 554) and avg hit-size of 1.07692
Using solver 'coinor' ...
Optimal solution found!
 Branch and cut took 0.0039 seconds,  with objective value: 7.0724.
ILP score is: 7.0724
Agreeing charges: 196/196
MassExplainer table size: 312
Generating Masses with threshold: -6.90776 ...
done
44 of 173 valid net c

In [47]:
len(feature_maps)

24

Vincular _features_ em um `ConsensusMap`.

In [48]:
def generateConsensusMap(feature_maps, consensus_filename="FeatureMatrix.consensusXML"):
    feature_grouper = oms.FeatureGroupingAlgorithmKD()
    
    consensus_map = oms.ConsensusMap()
    file_descriptions = consensus_map.getColumnHeaders()
    
    for i, feature_map in enumerate(feature_maps):
        file_description = file_descriptions.get(i, oms.ColumnHeader())
        file_description.filename = os.path.basename(
            feature_map.getMetaValue("spectra_data")[0].decode()
        )
        file_description.size = feature_map.size()
        file_descriptions[i] = file_description
    
    feature_grouper.group(feature_maps, consensus_map)
    consensus_map.setColumnHeaders(file_descriptions)
    consensus_map.setUniqueIds()
    oms.ConsensusXMLFile().store("FeatureMatrix.consensusXML", consensus_map)
    return consensus_map

consensus_map = generateConsensusMap(feature_maps)

    Progress of 'computing RT transformations':
    -- done [took 0.03 s (CPU), 0.03 s (Wall)] -- 
    Progress of 'linking features':
    -- done [took 0.06 s (CPU), 0.06 s (Wall)] -- 


Obter uma matriz de _features_ final em um formato de tabela, exportando as _features_ `consensus` em um `DataFrame do pandas`.

In [49]:
df = consensus_map.get_df()
df

Unnamed: 0_level_0,sequence,charge,RT,mz,quality,200525_HUMAN_Blank_01.mzML,200525_HUMAN_Blank_02.mzML,200525_Mou_4h_02.mzML,200525_Mou_4h_01.mzML,200525_Mou_Blank_03.mzML,...,200525_HUMAN_4h_03.mzML,200525_Dog_4h_01.mzML,200525_Dog_Blank_02.mzML,200525_HUMAN_4h_02.mzML,200525_Rat_Blank_02.mzML,200525_Mou_Blank_01.mzML,200525_Rat_4h_03.mzML,200525_Rat_4h_02.mzML,200525_Rat_Blank_03.mzML,200525_Dog_4h_02.mzML
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14739944058528654579,,1,2246.113352,438.380326,0.001906,1.265751e+06,1.097600e+06,1.146201e+06,1075307.125,1.170430e+06,...,1.111038e+06,1.176974e+06,1.103988e+06,1.128128e+06,1.174037e+06,1.171156e+06,1.167791e+06,1.091322e+06,1.152289e+06,1.181689e+06
4157342119469212697,,1,2238.935716,526.432956,0.003521,1.977040e+06,1.890943e+06,1.976774e+06,1976585.125,1.975116e+06,...,2.032305e+06,1.950890e+06,1.842646e+06,2.011819e+06,1.840831e+06,1.972586e+06,1.829177e+06,1.949732e+06,1.973152e+06,1.821776e+06
12302576997318969048,,1,2099.446080,790.587144,0.003364,1.792398e+06,1.531315e+06,1.608213e+06,1558720.625,1.591925e+06,...,1.532721e+06,1.542612e+06,1.756035e+06,1.525446e+06,1.712583e+06,1.577991e+06,1.787740e+06,1.612728e+06,1.643221e+06,1.540430e+06
16994532679487893418,,1,2137.086734,383.207725,0.001177,8.185056e+05,8.419818e+05,7.111176e+05,806296.500,7.066050e+05,...,7.648841e+05,6.876800e+05,6.781026e+05,8.629775e+05,7.347326e+05,7.108326e+05,7.271477e+05,7.421345e+05,7.264196e+05,6.990809e+05
16111634120984322021,,1,2148.418678,619.909116,0.000275,1.639494e+05,1.622231e+05,1.492542e+05,152787.750,1.774465e+05,...,1.555899e+05,1.567735e+05,1.579209e+05,1.594049e+05,1.780567e+05,1.622634e+05,1.570629e+05,1.765407e+05,1.588426e+05,1.539932e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6203773306922151309,,2,2160.010000,530.335074,0.000505,0.000000e+00,0.000000e+00,0.000000e+00,0.000,0.000000e+00,...,3.933802e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
17957731810262048765,,1,2232.490000,578.936439,0.000507,0.000000e+00,0.000000e+00,0.000000e+00,0.000,0.000000e+00,...,4.031701e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
2341387033982192385,,2,2012.010000,982.553693,0.000084,0.000000e+00,0.000000e+00,0.000000e+00,0.000,0.000000e+00,...,6.481848e+04,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
6669415735134456905,,1,2123.760000,343.221418,0.000582,0.000000e+00,0.000000e+00,0.000000e+00,0.000,0.000000e+00,...,4.083494e+05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00


Como o GNPS só funciona com _features_ que contêm espectros de fragmentação MS2, o primeiro passo é filtrar _features_ do seu `ConsensusMap` que não têm espectros MS2 anotados.

In [50]:
def filterConsensus(input_consensus='FeatureMatrix.consensusXML', out_consensus="filtered.consensusXML"):
    consensusXML_file = input_consensus
    
    consensus_map = oms.ConsensusMap()
    oms.ConsensusXMLFile().load(consensusXML_file, consensus_map)
    filtered_map = oms.ConsensusMap(consensus_map)
    filtered_map.clear(False)
    for feature in consensus_map:
        if feature.getPeptideIdentifications():
            filtered_map.push_back(feature)
    
    consensusXML_file = out_consensus
    oms.ConsensusXMLFile().store(consensusXML_file, filtered_map)

filterConsensus(input_consensus='FeatureMatrix.consensusXML', out_consensus="filtered.consensusXML")

In [51]:
def export2GNPS(mzML_files, consensus_map, consensusXML_file, out_dir='.'):
    oms.GNPSMGFFile().store(
        oms.String(consensusXML_file),
        [file.encode() for file in mzML_files],
        oms.String("MS2data.mgf"),
    )
    oms.GNPSQuantificationFile().store(consensus_map, f"{out_dir}/FeatureQuantificationTable.txt")
    oms.GNPSMetaValueFile().store(consensus_map, f"{out_dir}/MetaValueTable.tsv")

export2GNPS(mzML_files, consensus_map, "filtered.consensusXML", out_dir='results')

def export2IIMN(consensus_map):
    # for IIMN
    oms.IonIdentityMolecularNetworking().annotateConsensusMap(consensus_map)
    oms.IonIdentityMolecularNetworking().writeSupplementaryPairTable(
        consensus_map, "SupplementaryPairTable.csv"
    )

export2IIMN(consensus_map)