# `Preprocessing workflow`

#### The preprocessing workflow consists of nine steps:

![Preprocessing.png](images/Preprocessing.png)

In [None]:
import os
from pymetabo.core import *
from pymetabo.helpers import *
from pymetabo.dataframes import *

Specifiy input mzML, result and interim directories

In [None]:
mzML_dir = "/home/axel/Nextcloud/workspace/Tests/CentogeneWorkflowTest/mzML"

results = Helper().reset_directory("results")
interim = Helper().reset_directory(os.path.join(results, "interim"))

Precursor Peak Correction (TODO: add to pymetabo)

In [None]:
mzML_files = os.listdir(mzML_dir)

Helper().reset_directory(os.path.join(interim, "mzML_PCpeak"))
for filename in mzML_files:
    exp = MSExperiment()
    MzMLFile().load(os.path.join(mzML_dir, filename), exp)
    exp.sortSpectra(True)
    delta_mzs= []
    mzs = []
    rts= []
    PrecursorCorrection.correctToHighestIntensityMS1Peak(exp, 100.0, True, delta_mzs, mzs, rts)
    mzmlfile_path = os.path.join(os.path.join(interim, "mzML_PCpeak"), filename)
    MzMLFile().store(mzmlfile_path, exp)

Feature detection by FFM

In [None]:
FeatureFinderMetabo().run(os.path.join(interim, "mzML_PCpeak"), os.path.join(interim, "FFM"),
                        {"noise_threshold_int": 10000.0,
                        "mass_error_ppm": 10.0,
                        "remove_single_traces": "true"})

Precursor Correction to nearest feature (TODO: add in pymetabo)

In [None]:
Helper().reset_directory(os.path.join(interim, "mzML_PCfeature"))
mzML_files = os.listdir(os.path.join(interim, "mzML_PCpeak"))
feature_files = os.listdir(os.path.join(interim, "FFM"))
for mzml in mzML_files:
    exp = MSExperiment()
    MzMLFile().load(os.path.join(interim, "mzML_PCpeak", mzml), exp)
    exp.sortSpectra(True)
    correct = PrecursorCorrection()

    for filename in feature_files:
        feature_map_MFD = FeatureMap()
        FeatureXMLFile().load(os.path.join(interim, "FFM", filename), feature_map_MFD)
        if os.path.basename(mzml)[:-5] == os.path.basename(filename)[:-11]:
            correct.correctToNearestFeature(feature_map_MFD, exp, 0.0, 100.0, True, False, False, False, 3, 0)
            corrected_file = os.path.join(interim, "mzML_PCfeature", mzml)
            MzMLFile().store(corrected_file, exp)

Map alignment of FetureMaps and mzML files

In [None]:
MapAligner().run(os.path.join(interim, "FFM"), os.path.join(interim, "FFM_aligned"),
                os.path.join(interim, "Trafo"),
                {"max_num_peaks_considered": -1,
                "superimposer:mz_pair_max_distance": 0.05,
                "pairfinder:distance_MZ:max_difference": 10.0,
                "pairfinder:distance_MZ:unit": "ppm"})

MapAligner().run(os.path.join(interim, "mzML_PCfeature"), os.path.join(interim, "mzML_aligned"),
                os.path.join(interim, "Trafo"))

Feature linking

In [None]:
FeatureLinker().run(os.path.join(interim, "FFM_aligned"),
                os.path.join(interim,  "FFM.consensusXML"))

DataFrames().create_consensus_table(os.path.join(
    interim, "FFM.consensusXML"), os.path.join(interim, "FFM_consensus.tsv"))

Gap filling with FFMID

In [None]:
FeatureMapHelper().FFMID_libraries_for_missing_features(os.path.join(interim,  "FFM.consensusXML"),
                                                    os.path.join(interim,  "FFMID_libraries"))

FeatureFinderMetaboIdent().run(os.path.join(interim, "MzML_aligned"),
                            os.path.join(interim,  "FFMID"),
                            os.path.join(interim,  "FFMID_libraries"),
                            {"detect:peak_width": 60.0})

FeatureMapHelper().merge_feature_maps(os.path.join(interim, "FeatureMaps_merged"), os.path.join(
    interim, "FFM"), os.path.join(interim, "FFMID"))

Adduct decharging

In [None]:
MetaboliteAdductDecharger().run(os.path.join(interim, "FeatureMaps_merged"), os.path.join(interim, "FeatureMaps_decharged"),
                            {"potential_adducts": [b"H:+:0.5", b"Na:+:0.3", b"H-1O-1:+:0.2"],
                            "charge_min": 1,
                            "charge_max": 3,
                            "max_neutrals": 2})

Map MS2 spectra to features

In [None]:
MapID().run(os.path.join(interim, "MzML_aligned"), os.path.join(
    interim, "FeatureMaps_decharged"), os.path.join(interim, "FeatureMaps_ID_mapped"))

Link gap-filled features to final ConsensusMap

In [None]:
FeatureLinker().run(os.path.join(interim, "FeatureMaps_ID_mapped"),
                os.path.join(interim, "FeatureMatrix.consensusXML"))

DataFrames().create_consensus_table(os.path.join(
    interim, "FeatureMatrix.consensusXML"), os.path.join(results, "FeatureMatrix.tsv"))