diff --git a/metaboblend/algorithms.py b/metaboblend/algorithms.py
new file mode 100644
index 0000000..624b00a
--- /dev/null
+++ b/metaboblend/algorithms.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019-2020 Ralf Weber
+#
+# This file is part of MetaboBlend.
+#
+# MetaboBlend is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# MetaboBlend is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with MetaboBlend.  If not, see <https://www.gnu.org/licenses/>.
+#
+
+import numpy
+
+
+def find_path(mass_list, sum_matrix, n, mass, max_subset_length, path=[]):
+    """
+    Recursive solution for backtracking through the dynamic programming boolean matrix. All possible subsets are found
+
+    :param mass_list: A list of masses from which to identify subsets.
+
+    :param mass: The target mass of the sum of the substructures.
+
+    :param sum_matrix: The dynamic programming boolean matrix.
+
+    :param n: The size of mass_list.
+
+    :param max_subset_length: The maximum length of subsets to return. Allows the recursive backtracking algorithm to
+        terminate early in many cases, significantly improving runtime.
+
+    :param path: List for keeping track of the current subset.
+
+    :return: Generates of lists containing the masses of valid subsets.
+    """
+
+    # base case - the path has generated a correct solution
+    if mass == 0:
+        yield sorted(path)
+        return
+
+    # stop running when we overshoot the mass
+    elif mass < 0:
+        return
+
+    # can we sum up to the target value using the remaining masses? recursive call
+    elif sum_matrix[n][mass]:
+        yield from find_path(mass_list, sum_matrix, n - 1, mass, max_subset_length, path)
+
+        if len(path) < max_subset_length:
+            path.append(mass_list[n-1])
+
+            yield from find_path(mass_list, sum_matrix, n - 1, mass - mass_list[n - 1], max_subset_length, path)
+            path.pop()
+
+
+def subset_sum(mass_list, mass, max_subset_length=3):
+    """
+    Dynamic programming implementation of subset sum. Note that, whilst this algorithm is pseudo-polynomial, the
+    backtracking algorithm for obtaining all possible subsets has exponential complexity and so remains unsuitable
+    for large input values.  This does, however, tend to perform a lot better than non-sum_matrix implementations, as
+    we're no longer doing sums multiple times and we've cut down the operations performed during the exponential portion
+    of the method.
+
+    :param mass_list: A list of masses from which to identify subsets.
+
+    :param mass: The target mass of the sum of the substructures.
+
+    :param max_subset_length: The maximum length of subsets to return. Allows the recursive backtracking algorithm to
+        terminate early in many cases, significantly improving runtime.
+
+    :return: Generates of lists containing the masses of valid subsets.
+    """
+
+    n = len(mass_list)
+
+    # initialise dynamic programming array
+    sum_matrix = numpy.ndarray([n + 1, mass + 1], bool)
+
+    # subsets can always equal 0
+    for i in range(n+1):
+        sum_matrix[i][0] = True
+
+    # empty subsets do not have non-zero sums
+    for i in range(mass):
+        sum_matrix[0][i + 1] = False
+
+    # fill in the remaining boolean matrix
+    for i in range(n):
+        for j in range(mass+1):
+            if j >= mass_list[i]:
+                sum_matrix[i + 1][j] = sum_matrix[i][j] or sum_matrix[i][j - mass_list[i]]
+            else:
+                sum_matrix[i + 1][j] = sum_matrix[i][j]
+
+    # backtrack through the matrix recursively to obtain all solutions
+    return find_path(mass_list, sum_matrix, n, mass, max_subset_length)
diff --git a/metaboblend/auxiliary.py b/metaboblend/auxiliary.py
index 52717b8..4408224 100644
--- a/metaboblend/auxiliary.py
+++ b/metaboblend/auxiliary.py
@@ -20,8 +20,8 @@
 #
 
 import itertools
-import networkx as nx
 import pylab as plt
+import networkx as nx
 
 
 def calculate_complete_multipartite_graphs(max_atoms_available, max_n_substructures):
diff --git a/metaboblend/build_structures.py b/metaboblend/build_structures.py
index 1fa4539..16987dc 100644
--- a/metaboblend/build_structures.py
+++ b/metaboblend/build_structures.py
@@ -20,103 +20,21 @@
 #
 
 import os
-import multiprocessing
 import copy
+import numpy
 import itertools
-from functools import partial
+import multiprocessing
 import networkx as nx
-import numpy
-import sqlite3
-import csv
+from functools import partial
 from operator import itemgetter
 from typing import Sequence, Dict, Union
 
 from rdkit import Chem
 
-from .databases import SubstructureDb, get_elements, calculate_exact_mass
-
-
-def find_path(mass_list, sum_matrix, n, mass, max_subset_length, path=[]):
-    """
-    Recursive solution for backtracking through the dynamic programming boolean matrix. All possible subsets are found
-
-    :param mass_list: A list of masses from which to identify subsets.
-
-    :param mass: The target mass of the sum of the substructures.
-
-    :param sum_matrix: The dynamic programming boolean matrix.
-
-    :param n: The size of mass_list.
-
-    :param max_subset_length: The maximum length of subsets to return. Allows the recursive backtracking algorithm to
-        terminate early in many cases, significantly improving runtime.
-
-    :param path: List for keeping track of the current subset.
-
-    :return: Generates of lists containing the masses of valid subsets.
-    """
-
-    # base case - the path has generated a correct solution
-    if mass == 0:
-        yield sorted(path)
-        return
-
-    # stop running when we overshoot the mass
-    elif mass < 0:
-        return
-
-    # can we sum up to the target value using the remaining masses? recursive call
-    elif sum_matrix[n][mass]:
-        yield from find_path(mass_list, sum_matrix, n - 1, mass, max_subset_length, path)
-
-        if len(path) < max_subset_length:
-            path.append(mass_list[n-1])
-
-            yield from find_path(mass_list, sum_matrix, n - 1, mass - mass_list[n - 1], max_subset_length, path)
-            path.pop()
-
-
-def subset_sum(mass_list, mass, max_subset_length=3):
-    """
-    Dynamic programming implementation of subset sum. Note that, whilst this algorithm is pseudo-polynomial, the
-    backtracking algorithm for obtaining all possible subsets has exponential complexity and so remains unsuitable
-    for large input values.  This does, however, tend to perform a lot better than non-sum_matrix implementations, as
-    we're no longer doing sums multiple times and we've cut down the operations performed during the exponential portion
-    of the method.
-
-    :param mass_list: A list of masses from which to identify subsets.
-
-    :param mass: The target mass of the sum of the substructures.
-
-    :param max_subset_length: The maximum length of subsets to return. Allows the recursive backtracking algorithm to
-        terminate early in many cases, significantly improving runtime.
-
-    :return: Generates of lists containing the masses of valid subsets.
-    """
-
-    n = len(mass_list)
-
-    # initialise dynamic programming array
-    sum_matrix = numpy.ndarray([n + 1, mass + 1], bool)
-
-    # subsets can always equal 0
-    for i in range(n+1):
-        sum_matrix[i][0] = True
-
-    # empty subsets do not have non-zero sums
-    for i in range(mass):
-        sum_matrix[0][i + 1] = False
-
-    # fill in the remaining boolean matrix
-    for i in range(n):
-        for j in range(mass+1):
-            if j >= mass_list[i]:
-                sum_matrix[i + 1][j] = sum_matrix[i][j] or sum_matrix[i][j - mass_list[i]]
-            else:
-                sum_matrix[i + 1][j] = sum_matrix[i][j]
-
-    # backtrack through the matrix recursively to obtain all solutions
-    return find_path(mass_list, sum_matrix, n, mass, max_subset_length)
+from .results import ResultsDb
+from .parse import parse_ms_data
+from .algorithms import subset_sum
+from .databases import SubstructureDb
 
 
 def combine_mfs(precise_mass_grp, db, table_name, accuracy):
@@ -236,6 +154,9 @@ def add_bonds(mols, edges, atoms_available, bond_types, bond_enthalpies):
 
             * **2.0** Double
 
+    :param bond_enthalpies: Dictionary of bond enthalpies, as generated by
+    :py:meth:`metaboblend.build_structures.get_bond_enthalpies`.
+
     :return: If unsuccessful, returns None, else returns an :py:meth:`rdkit.Chem.EditableMol` object containing
         the substructures combined into a final single molecule.
     """
@@ -275,11 +196,12 @@ def add_bonds(mols, edges, atoms_available, bond_types, bond_enthalpies):
             bt_start.remove(bond_matches[0])
             bt_end.remove(bond_matches[0])
 
-        try:
+        try:  # try forming the specified bond
             mol_edit.AddBond(edge[0], edge[1], rdkit_bond_types[bond_matches[0]])
         except KeyError:
             return None, None  # unknown bond type
 
+        # calculate bond dissociation energy of "formed" bonds for the structure
         try:
             total_bde += bond_enthalpies[bond_matches[0]][mols.GetAtomWithIdx(edge[0]).GetSymbol()][mols.GetAtomWithIdx(edge[1]).GetSymbol()]
         except (SyntaxError, TypeError):
@@ -288,285 +210,7 @@ def add_bonds(mols, edges, atoms_available, bond_types, bond_enthalpies):
     return mol_edit, total_bde
 
 
-class ResultsDb:
-    """
-    Methods for interacting with the SQLITE3 results database, as created by
-    :py:meth:`metaboblend.build_structures.annotate_msn`.
-
-    :param path_results: Directory to which results will be written.
-    """
-
-    def __init__(self, path_results, msn=True):
-        """Constructor method."""
-
-        self.path_results = path_results
-        self.path_results_db = os.path.join(self.path_results, "metaboblend_results.sqlite")
-        self.msn = msn
-
-        self.conn = None
-        self.cursor = None
-        
-        self.substructure_combo_id = 0
-
-    def connect(self):
-        """Connects to the results database."""
-
-        self.conn = sqlite3.connect(self.path_results_db)
-        self.cursor = self.conn.cursor()
-
-    def create_results_db(self):
-        """Generates a new results database."""
-
-        if os.path.exists(self.path_results_db):
-            os.remove(self.path_results_db)
-
-        self.connect()
-
-        self.cursor.execute("""CREATE TABLE queries (
-                                   ms_id_num INTEGER PRIMARY KEY,
-                                   ms_id TEXT,
-                                   exact_mass NUMERIC,
-                                   C INTEGER,
-                                   H INTEGER,
-                                   N INTEGER,
-                                   O INTEGER,
-                                   P INTEGER,
-                                   S INTEGER,
-                                   ppm INTEGER,
-                                   ha_min INTEGER,
-                                   ha_max INTEGER,
-                                   max_atoms_available INTEGER,
-                                   max_degree INTEGER,
-                                   max_n_substructures INTEGER,
-                                   hydrogenation_allowance INTEGER,
-                                   isomeric_smiles INTEGER)""")
-
-        if self.msn:
-            self.cursor.execute("""CREATE TABLE spectra (
-                                       ms_id_num INTEGER,
-                                       fragment_id INTEGER,
-                                       neutral_mass NUMERIC,
-                                       PRIMARY KEY (ms_id_num, fragment_id))""")
-
-        self.cursor.execute("""CREATE TABLE structures (
-                                   ms_id_num INTEGER,
-                                   structure_smiles TEXT,
-                                   frequency INTEGER,
-                                   PRIMARY KEY (ms_id_num, structure_smiles))""")
-
-        self.cursor.execute("""CREATE TABLE substructures (
-                                           substructure_combo_id INTEGER,
-                                           substructure_position_id INTEGER,
-                                           ms_id_num INTEGER,
-                                           structure_smiles TEXT,
-                                           fragment_id INTEGER,
-                                           substructure_smiles TEXT,
-                                           bde INTEGER,
-                                           PRIMARY KEY (substructure_combo_id, substructure_position_id))""")
-
-        self.cursor.execute("""CREATE TABLE results (
-                                   ms_id_num INTEGER,
-                                   fragment_id INTEGER,
-                                   structure_smiles TEXT,
-                                   bde INTEGER,
-                                   PRIMARY KEY(ms_id_num, fragment_id, structure_smiles))""")
-
-        self.conn.commit()
-
-    def add_ms(self, msn_data, ms_id, ms_id_num, parameters):
-        """
-        Add entries to the `queries` and `spectra` tables.
-
-        :param msn_data: Dictionary in the form
-            `msn_data[id] = {mf: [C, H, N, O, P, S], exact_mass: float, fragment_masses: []}`. id represents a unique
-            identifier for a given spectral tree or fragmentation spectrum, mf is a list of integers referring to the
-            molecular formula of the structure of interest, exact_mass is the mass of this molecular formula to >=4d.p.
-            and fragment_masses are neutral fragment masses generated by this structure used to inform candidate
-            scoring. See :py:meth:`metaboblend.build_structures.annotate_msn`.
-
-        :param ms_id: Unique identifier for the annotation of a single metabolite.
-
-        :param ms_id_num: Unique numeric identifier for the annotation of a single metaoblite.
-
-        :param parameters: List of parameters, in the form: [ppm, ha_min, ha_max, max_atoms_available, max_degree,
-            max_n_substructures, hydrogenation_allowance, isomeric_smiles]. See
-            :py:meth:`metaboblend.build_structures.annotate_msn`.
-        """
-
-        for i, parameter in enumerate(parameters):
-            if parameter is None:
-                parameters[i] = "NULL"
-            elif isinstance(parameter, bool):
-                parameters[i] = int(parameter)
-
-        self.cursor.execute("""INSERT INTO queries (
-                                   ms_id,
-                                   ms_id_num,
-                                   exact_mass,
-                                   C, H, N, O, P, S,
-                                   ppm,
-                                   ha_min,
-                                   ha_max,
-                                   max_atoms_available,
-                                   max_degree,
-                                   max_n_substructures,
-                                   hydrogenation_allowance,
-                                   isomeric_smiles
-                               ) VALUES ('{}', {}, {}, '{}', '{}', '{}', '{}', '{}', '{}', {})""".format(
-                                   ms_id,
-                                   ms_id_num,
-                                   msn_data[ms_id]["exact_mass"],
-                                   msn_data[ms_id]["mf"][0], msn_data[ms_id]["mf"][1],
-                                   msn_data[ms_id]["mf"][2], msn_data[ms_id]["mf"][3],
-                                   msn_data[ms_id]["mf"][4], msn_data[ms_id]["mf"][5],
-                                   ", ".join([str(p) for p in parameters])
-                               ))
-
-        self.conn.commit()
-
-    def add_results(self, ms_id_num, smi_dict, fragment_mass=None, fragment_id=None, retain_substructures=False):
-        """
-        Record which smiles were generated for a given fragment mass.
-
-        :param ms_id_num: Unique identifier for the annotation of a single metabolite.
-
-        :param smi_dict: The fragment and substructure smiles generated by the annotation of a single peak for a single
-            metabolite.
-
-        :param fragment_mass: The neutral fragment mass that has been annotated.
-
-        :param fragment_id: The unique identifier for the fragment mass that has been annotated.
-
-        :param retain_substructures: If True, record substructures in the results DB.
-        """
-
-        if self.msn:
-            self.cursor.execute("""INSERT OR IGNORE INTO spectra (
-                                                       ms_id_num,
-                                                       fragment_id,
-                                                       neutral_mass
-                                                   ) VALUES ('{}', {}, {})""".format(
-                                                       ms_id_num,
-                                                       fragment_id,
-                                                       fragment_mass
-                                                   ))
-        else:
-            fragment_id = "NULL"
-
-        for structure_smiles in smi_dict.keys():
-
-            self.cursor.execute("""INSERT OR IGNORE INTO results (
-                                       ms_id_num,
-                                       fragment_id,
-                                       structure_smiles,
-                                       bde
-                                   ) VALUES ({}, {}, '{}', {})""".format(
-                                       ms_id_num,
-                                       fragment_id,
-                                       structure_smiles,
-                                       min(smi_dict[structure_smiles]["bdes"])
-                                   ))
-
-            if retain_substructures:
-                for i in range(len(smi_dict[structure_smiles]["substructures"])):  # for each combination
-
-                    for j, substructure in enumerate(smi_dict[structure_smiles]["substructures"][i]):
-
-                        self.cursor.execute("""INSERT INTO substructures (
-                                                           substructure_combo_id,
-                                                           substructure_position_id,
-                                                           ms_id_num,
-                                                           fragment_id,
-                                                           structure_smiles,
-                                                           substructure_smiles,
-                                                           bde
-                                                       ) VALUES ({}, {}, {}, {}, '{}', '{}', {})""".format(
-                                                           self.substructure_combo_id,
-                                                           j,
-                                                           ms_id_num,
-                                                           fragment_id,
-                                                           structure_smiles,
-                                                           substructure,
-                                                           smi_dict[structure_smiles]["bdes"][i]
-                                                       ))
-
-                    self.substructure_combo_id += 1
-
-        self.conn.commit()
-
-    def calculate_frequencies(self, ms_id_num):
-        """
-        Calculates structure frequencies in the SQLite DB.
-
-        :param ms_id_num: Unique identifier for the annotation of a single metabolite.
-        """
-
-        self.cursor.execute("""INSERT INTO structures (ms_id_num, structure_smiles, frequency) 
-                                   SELECT ms_id_num, structure_smiles, COUNT(*)
-                                   FROM results 
-                                   WHERE ms_id_num = {}
-                                   GROUP BY structure_smiles""".format(ms_id_num))
-
-    def get_structures(self, ms_id_num):
-        """
-        Gets smiles of generated structures. In the case of the MSn annotation workflow, also gets structure
-        frequencies.
-
-        :param ms_id_num: Unique identifier for the annotation of a single metabolite.
-
-        :return: In the case of simple structure generation, returns a set of smiles strings for output structures.
-            For the MSn annotation workflow, returns a dictionary with smiles as keys and the number of peaks for which
-            the smiles were generated as values.
-        """
-
-        if self.msn:
-            msn_str = ", frequency"
-        else:
-            msn_str = ""
-
-        self.cursor.execute("""SELECT structure_smiles{} FROM structures 
-                                   WHERE ms_id_num = {}
-                            """.format(msn_str, ms_id_num))
-
-        if self.msn:
-            return [t for t in self.cursor.fetchall()]
-        else:
-            return [item for t in self.cursor.fetchall() for item in t]
-
-    def generate_csv_output(self):
-        """
-        Generate CSV file output for i) queries and tool parameters and ii) structures generated.
-        """
-
-        with open(os.path.join(self.path_results, "metaboblend_queries.csv"), "w", newline="") as results_file, \
-             open(os.path.join(self.path_results, "metaboblend_structures.csv"), "w", newline="") as ms_file:
-
-            results_writer = csv.writer(results_file, delimiter=",")
-            ms_writer = csv.writer(ms_file, delimiter=",")
-
-            results_writer.writerow(["ms_id", "exact_mass", "C", "H", "N", "O", "P", "S", "ppm", "ha_min", "ha_max",
-                                     "max_atoms_available", "max_degree", "max_n_substructures",
-                                     "hydrogenation_allowance", "isomeric_smiles"])
-
-            self.cursor.execute("SELECT * FROM queries")
-
-            for query in self.cursor.fetchall():
-                results_writer.writerow(query)
-
-            ms_writer.writerow(["ms_id", "smiles", "frequency", "exact_mass", "C", "H", "N", "O", "P", "S"])
-
-            self.cursor.execute("SELECT * FROM structures")
-
-            for structure in self.cursor.fetchall():
-                ms_writer.writerow(structure)
-
-    def close(self):
-        """Close the connection to the SQLITE3 database."""
-
-        self.conn.close()
-
-
-def annotate_msn(msn_data: Dict[str, Dict[str, Union[int, list]]],
+def annotate_msn(msn_data: Union[str, os.PathLike, Dict[str, Dict[str, Union[int, list]]]],
                  path_substructure_db: Union[str, bytes, os.PathLike] = os.path.realpath(os.getcwd()),
                  path_out: Union[str, bytes, os.PathLike] = "",
                  ppm: int = 5,
@@ -590,11 +234,18 @@ def annotate_msn(msn_data: Dict[str, Dict[str, Union[int, list]]],
     text format. For the generation of structures without MSn data, see
     :py:meth:`metaboblend.build_structures.generate_structures`.
 
-    :param msn_data: Dictionary in the form
-        `msn_data[id] = {mf: [C, H, N, O, P, S], exact_mass: float, fragment_masses=[]}`. id represents a unique
-        identifier for a given spectral tree or fragmentation spectrum, mf is a list of integers referring to the
-        molecular formula of the structure of interest, exact_mass is the mass of this molecular formula to >=4d.p.
-        and fragment_masses are neutral fragment masses generated by this structure used to inform candidate scoring.
+    :param msn_data: Either a dictionary or the path to an MSP file. MSP files are parsed by
+        :py:meth:`metaboblend.parse.parse_ms_data` before being converted into a dictionary. If a dictionary is
+        provided, it must contain one item per fragmentation spectrum; the keys of the dictionary should be a unique ID
+        for the query and the corresponding value must itself be a dictionary, containing the following:
+
+        - "exact_mass": `float` (neutral mass of query) OR "precursor_mz": `float` (mz of precursor ion)
+        - "mf": `[C, H, N, O, P, S]` (a list of 6 integers)
+        - "neutral_fragment_masses": `[float, float, ...]` (list of neutral fragment masses) OR "fragment_mzs":
+            `[float, float, ...]` (list of fragment mzs)
+        - "precursor_type": `str` (e.g. "[M+H]+", required for calculating neutral masses from ion mzs)
+
+        The dictionary or MSP path is fed to :py:meth:`metaboverse.parse.parse_ms_data`.
 
     :param path_substructure_db: The path to the SQLite 3 substructure database, as generated by
         :py:meth:`metaboblend.databases.SubstructureDb`.
@@ -671,22 +322,25 @@ def annotate_msn(msn_data: Dict[str, Dict[str, Union[int, list]]],
         max_degree=max_degree,
         max_atoms_available=max_atoms_available,
         minimum_frequency=minimum_frequency,
-        max_mass=round(max([msn_data[ms_id]["exact_mass"] for ms_id in msn_data.keys()]))
+        max_mass=None
     )
 
-    for i, ms_id in enumerate(msn_data.keys()):
+    for i, ms in enumerate(parse_ms_data(msn_data)):
+        
+        if ms is None:
+            continue
 
-        results_db.add_ms(msn_data, ms_id, i,
+        results_db.add_ms(msn_data, ms["ms_id"], i,
                           [ppm, ha_min, ha_max, max_atoms_available, max_degree, max_n_substructures, hydrogenation_allowance, isomeric_smiles])
 
-        for j, fragment_mass in enumerate(msn_data[ms_id]["fragment_masses"]):
+        for j, fragment_mass in enumerate(ms["neutral_fragment_masses"]):
 
             for k in range(0 - hydrogenation_allowance, hydrogenation_allowance + 1):
                 hydrogenated_fragment_mass = fragment_mass + (k * 1.007825)  # consider re-arrangements
 
                 smi_dict = build(
-                    mf=msn_data[ms_id]["mf"],
-                    exact_mass=msn_data[ms_id]["exact_mass"],
+                    mf=ms["mf"],
+                    exact_mass=ms["exact_mass"],
                     max_n_substructures=max_n_substructures,
                     path_connectivity_db=path_connectivity_db,
                     path_substructure_db=path_substructure_db,
@@ -705,7 +359,7 @@ def annotate_msn(msn_data: Dict[str, Dict[str, Union[int, list]]],
         results_db.calculate_frequencies(i)
 
         if yield_smis:
-            yield {ms_id: results_db.get_structures(i)}
+            yield {ms["ms_id"]: results_db.get_structures(i)}
 
     if write_csv_output:
         results_db.generate_csv_output()
@@ -714,7 +368,7 @@ def annotate_msn(msn_data: Dict[str, Dict[str, Union[int, list]]],
     results_db.close()
 
 
-def generate_structures(ms_data: Dict[str, Dict[str, Union[int, None]]],
+def generate_structures(ms_data: Union[str, os.PathLike, Dict[str, Dict[str, Union[int, None]]]],
                         path_substructure_db: Union[str, bytes, os.PathLike],
                         path_out: Union[str, bytes, os.PathLike] = os.path.realpath(os.getcwd()),
                         ha_min: Union[int, None] = 2,
@@ -736,11 +390,17 @@ def generate_structures(ms_data: Dict[str, Dict[str, Union[int, None]]],
     text format. For the generation of structures from MSn data, see
     :py:meth:`metaboblend.build_structures.annotate_msn`.
 
-    :param ms_data: Dictionary in the form ms_data[id] =
-        `{mf: [C, H, N, O, P, S], exact_mass: float, prescribed_mass=int}`. id represents a unique identifier for
-        a given test, mf is a list of integers referring to molecular formula of the structure of interest,
-        exact_mass is the mass of this structure to >=4d.p. and prescribed_mass is the neutral mass of a substructure
-        used to limit structures generated.
+    :param ms_data: A dictionary that must contain one item per fragmentation spectrum; the keys of the dictionary
+        should be a unique ID for the query and the corresponding value must itself be a dictionary, containing the
+        following:
+
+        - "exact_mass": `float` (neutral mass of query) OR "precursor_mz": `float` (mz of precursor ion)
+        - "mf": `[C, H, N, O, P, S]` (a list of 6 integers)
+        - "precursor_type": `str` (e.g. "[M+H]+", required for calculating neutral masses from ion mzs)
+        - (optional) "prescribed_mass": 'float' (neutral mass of substructure).
+
+        The dictionary or MSP path is fed to :py:meth:`metaboverse.parse.parse_ms_data`. A single neutral substructure
+        mass may be provided ("prescribed_mass") to guide the structure generation process.
 
     :param path_substructure_db: The path to the SQLite 3 substructure database, as generated by
         :py:meth:`metaboblend.databases.SubstructureDb`.
@@ -788,6 +448,8 @@ def generate_structures(ms_data: Dict[str, Dict[str, Union[int, None]]],
 
     :param write_csv_output: Whether to extract results from the SQLite3 database for deposition in CSV files.
 
+    :param retain_substructures: Whether to record the substructures used to generate final structures.
+
     :return: For each input molecule, yields unique SMILEs strings (unless `yield_smis = False`).
     """
 
@@ -809,26 +471,26 @@ def generate_structures(ms_data: Dict[str, Dict[str, Union[int, None]]],
         max_mass=round(max([ms_data[ms_id]["exact_mass"] for ms_id in ms_data.keys()]))
     )
 
-    for i, ms_id in enumerate(ms_data.keys()):
+    for i, ms in enumerate(parse_ms_data(ms_data, False)):
 
-        results_db.add_ms(ms_data, ms_id, i,
+        results_db.add_ms(ms_data, ms["ms_id"], i,
                           [None, ha_min, ha_max, max_atoms_available, max_degree, max_n_substructures, None, isomeric_smiles])
 
         ppm = None
 
         try:
-            if ms_data[ms_id]["prescribed_masses"] is not None:
+            if ms["prescribed_mass"] is not None:
                 ppm = 0
         except KeyError:
-            ms_data[ms_id]["prescribed_masses"] = None
+            ms["prescribed_mass"] = None
 
         smi_dict = build(
-            mf=ms_data[ms_id]["mf"],
-            exact_mass=ms_data[ms_id]["exact_mass"],
+            mf=ms["mf"],
+            exact_mass=ms["exact_mass"],
             max_n_substructures=max_n_substructures,
             path_connectivity_db=path_connectivity_db,
             path_substructure_db=path_substructure_db,
-            prescribed_mass=ms_data[ms_id]["prescribed_masses"],
+            prescribed_mass=ms["prescribed_mass"],
             ppm=ppm,
             table_name=table_name,
             ncpus=ncpus,
@@ -837,13 +499,13 @@ def generate_structures(ms_data: Dict[str, Dict[str, Union[int, None]]],
             retain_substructures=retain_substructures
         )
 
-        results_db.add_results(i, smi_dict, ms_data[ms_id]["prescribed_masses"])
+        results_db.add_results(i, smi_dict, ms["prescribed_mass"])
         smi_dict = None
 
         results_db.calculate_frequencies(i)
 
         if yield_smis:
-            yield {ms_id: results_db.get_structures(i)}
+            yield {ms["ms_id"]: results_db.get_structures(i)}
 
     if write_csv_output:
         results_db.generate_csv_output()
@@ -1040,15 +702,21 @@ def gen_subs_table(db, ha_min, ha_max, max_degree, max_atoms_available, max_mass
         ha_max_statement = """
                               AND heavy_atoms <= %s""" % str(ha_max)
 
+    if max_mass is None:
+        max_mass_statment = ""
+    else:
+        max_mass_statment = """
+                               AND exact_mass__1 < %s""" % str(max_mass)
+
     db.cursor.execute("""CREATE TABLE {} AS
-                             SELECT * FROM substructures WHERE
-                                 atoms_available <= {} AND
-                                 valence <= {} AND
-                                 exact_mass__1 < {}{}{}{}
+                             SELECT * 
+                                 FROM substructures
+                                 WHERE atoms_available <= {}
+                                 AND valence <= {}{}{}{}{}
                       """.format(table_name,
                                  max_atoms_available,
                                  max_degree,
-                                 max_mass,
+                                 max_mass_statment,
                                  freq_statement,
                                  ha_min_statement,
                                  ha_max_statement))
@@ -1136,6 +804,11 @@ def substructure_combination_build(substructure_subset, configs_iso, prescribed_
 
     :param isomeric_smiles: True/False, should output smiles be written with isomeric information?
 
+    :param bond_enthalpies: Dictionary of bond enthalpies, as generated by
+    :py:meth:`metaboblend.build_structures.get_bond_enthalpies`.
+
+    :param retain_substructures: Whether to record the substructures used to generate final structures.
+
     :return: List of smiles representing molecules generated (and the substructures used to generate them).
     """
 
diff --git a/metaboblend/databases.py b/metaboblend/databases.py
index 03d24a8..2c31d12 100644
--- a/metaboblend/databases.py
+++ b/metaboblend/databases.py
@@ -21,13 +21,10 @@
 
 import io
 import os
-import sys
-import subprocess
+import pickle
 import sqlite3
 import tempfile
-import pickle
-from collections import OrderedDict
-import xml.etree.ElementTree as ElementTree
+import subprocess
 import networkx as nx
 from typing import Sequence, Dict, Union
 
@@ -35,97 +32,10 @@
 from rdkit.Chem import Recap
 from rdkit.Chem import BRICS
 
+from .parse import parse_xml
 from .auxiliary import calculate_complete_multipartite_graphs, graph_to_ri, graph_info, sort_subgraphs
 
 
-def reformat_xml(source, encoding="utf8"):
-    """
-    Reformats HMDB xml files to be compatible with :py:meth:`metaboblend.databases.parse_xml`; some such files do not
-    contain a `<hmdb xmlns="http://www.hmdb.ca">` header.
-
-    :param source: Path to file to be reformatted.
-
-    :param encoding: Encoding of source file.
-
-    :return: Source file destination.
-    """
-
-    with io.open(source, "r", encoding=encoding) as xml:
-        xml_contents = xml.readlines()
-        if "hmdb" in xml_contents[1]:
-            return source
-
-        xml_contents.insert(1, "<hmdb xmlns=\"http://www.hmdb.ca\"> \n")
-
-    with io.open(source, "w", encoding=encoding) as xml:
-        xml_contents = "".join(xml_contents)
-        xml.write(xml_contents)
-        xml.write("</hmdb>")
-
-    return source
-
-
-def parse_xml(source, encoding="utf8", reformat=False):
-    """
-    Parses the contents of HMDB xml files to to extract information for the generation of substructures.
-
-    :param source: Source file destination.
-
-    :param encoding: Encoding of source file.
-
-    :param reformat: Whether to apply :py:meth:`metaboblend.databases.reformat_xml` to the XML file. Is required for
-        XML files recording single metabolites.
-
-        * **True** Add a `<hmdb xmlns="http://www.hmdb.ca">` header to the XML file before parsing.
-
-        * **False** Parse the XML file as it is (recommended if header is present).
-
-    :return: The XML file converted to a dictionary.
-    """
-
-    if reformat:
-        reformat_xml(source, encoding)
-
-    with io.open(source, "r", encoding=encoding) as inp:
-        record_out = OrderedDict()
-
-        inp.readline()
-        inp.readline()
-
-        xml_record = ""
-        path = []
-
-        for line in inp:
-            xml_record += line
-            if line == "</metabolite>\n" or line == "</drug>\n":
-
-                if sys.version_info[0] == 3:
-                    inp = io.StringIO(xml_record)
-                else:
-                    inp = io.BytesIO(xml_record.encode('utf-8').strip())
-
-                for event, elem in ElementTree.iterparse(inp, events=("start", "end")):
-                    if event == 'end':
-                        path.pop()
-
-                    if event == 'start':
-                        path.append(elem.tag)
-                        if elem.text is not None:
-                            if elem.text.replace(" ", "") != "\n":
-
-                                path_elem = ".".join(map(str, path[1:]))
-                                if path_elem in record_out:
-                                    if type(record_out[path_elem]) != list:
-                                        record_out[path_elem] = [record_out[path_elem]]
-                                    record_out[path_elem].append(elem.text)
-                                else:
-                                    record_out[path_elem] = elem.text
-
-                xml_record = ""
-                yield record_out
-                record_out = OrderedDict()
-
-
 class SubstructureDb:
     """
     Methods for interacting with the SQLITE3 substructure and connectivity databases. Provides a connection to the
diff --git a/metaboblend/parse.py b/metaboblend/parse.py
new file mode 100644
index 0000000..5495a58
--- /dev/null
+++ b/metaboblend/parse.py
@@ -0,0 +1,351 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019-2020 Ralf Weber
+#
+# This file is part of MetaboBlend.
+#
+# MetaboBlend is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# MetaboBlend is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with MetaboBlend.  If not, see <https://www.gnu.org/licenses/>.
+#
+
+import io
+import re
+import sys
+import copy
+import warnings
+from collections import OrderedDict
+import xml.etree.ElementTree as ElementTree
+
+
+def parse_ms_data(ms_data, msn=True):
+    """
+    Parse raw data provided by user and yield formatted input data. Decides what type of data has been provided
+    (i.e. whether a dictionary has been given vs path to MSP file; if a dictionary, checks whether neutral masses
+    need to be calculated from precursor ions).
+
+    :param ms_data: Dictionary containing input data or path to an MSP file.
+
+    :param msn: If True, formats the data for use by :py:meth:`metaboblend.build_structures.annotate_msn`; else, formats
+        input data for use by :py:meth:`metaboblend.build_structures.generate_structures`. Only relevant if a
+        dictionary has been provided.
+
+    :return: Yields a dictionary for use by build functions to generate structures.
+    """
+
+    if isinstance(ms_data, dict):
+        for i, ms_id in enumerate(ms_data.keys()):
+
+            ms_data[ms_id]["ms_id"] = ms_id
+
+            # check if user has provided a neutralised mass or ionised mz values
+            if "neutral_fragment_masses" in ms_data[ms_id].keys() and "exact_mass" in ms_data[ms_id].keys():
+                which = "none"
+
+            elif "exact_mass" in ms_data[ms_id].keys():
+                if msn:
+                    which = "fragments"
+                else:
+                    which = "none"
+
+            elif "neutral_fragment_masses" in ms_data[ms_id].keys() or not msn:
+                which = "precursor"
+
+            else:
+                which = "both"
+
+            yield precursor_ions_to_neutral_masses(ms_data[ms_id], which)
+
+    else:
+        yield from parse_msp(ms_data)
+
+
+def precursor_ion_to_neutral_mass(mass, precursor_type):
+    """
+    Convert precursor ion to predicted neutral mass for substructure searching.
+
+    :param mass: Charged mass to be neutralised.
+
+    :param precursor_type: Type of precursor ion.
+
+    :return: Neutral mass.
+    """
+
+    # conversions
+    precursor_dict = {"[M+H]+":  1.007276,
+                      "[M+Na]+": 22.989221,
+                      "[M+K]+": 38.963158,
+                      "[M-H]-": -1.007276,
+                      "[M+Cl]-": 34.969401,
+                      "[M+Na-2H]-": 20.974668,
+                      "[M+K-2H]-": 36.948605,
+                      "[M+Hac-H]-": 59.013853}
+
+    return mass - precursor_dict[precursor_type]
+
+
+def precursor_ions_to_neutral_masses(ms_dict, which="both"):
+    """
+    Convert precursor ion and fragment ions to neutral.
+
+    :param ms_dict: Dictionary used by build functions to generate structures. Converts the precursor ion mass and/or
+        the fragment ions to their respective neutral masses.
+
+    :param which: Whether to convert the precursor ion ("precursor"), the fragment ions ("fragments") or both ("both")
+        to their respective neutral masses. If which is "none", returns the original dictionary.
+
+    :return: Returns `ms_dict` with additional items corresponding to neutralised masses.
+    """
+
+    if which == "precursor" or which == "both":
+        ms_dict["exact_mass"] = precursor_ion_to_neutral_mass(ms_dict["precursor_mz"],
+                                                              ms_dict["precursor_type"])
+
+    if which == "fragments" or which == "both":
+
+        ms_dict["neutral_fragment_masses"] = []
+
+        for fragment_ion_mass in ms_dict["fragment_mzs"]:
+            ms_dict["neutral_fragment_masses"].append(precursor_ion_to_neutral_mass(fragment_ion_mass,
+                                                                                    ms_dict["precursor_type"]))
+
+    return ms_dict
+
+
+def parse_msp(msp_path):
+    """
+    Parse msp files and yield data for each compound. Accepts MSP files in MoNa or MassBank format. We expect that
+    the following are provided in the MSP:
+
+    - A unique accession ID.
+    - The molecular formula of the compound.
+    - The precursor mz representing the mass of the charged precursor ion.
+    - Fragment mzs representing masses of charged fragment ions.
+    - The type of precursor, e.g. "[M+H]+".
+
+    Code adapted from `msp2db` (https://github.com/computational-metabolomics/msp2db/blob/master/msp2db/parse.py).
+
+    :param msp_path: Path of an MSP file to be converted into a dictionary.
+
+    :return: Dictionary in a form useable by :py:meth:`metaboblend.build_structures.annotate_msn` and
+        :py:meth:`metaboblend.build_structures.generate_structures`.
+    """
+
+    meta_parse = get_msp_regex()
+    reached_spectra = False
+
+    empty_dict = {"ms_id": None, "mf": None, "precursor_mz": None, "precursor_type": None, "fragment_mzs": []}
+    entry_dict = copy.deepcopy(empty_dict)
+
+    with open(msp_path, "r") as msp_file:
+
+        for line in msp_file:
+
+            line = re.sub('^(.{2}\\$)', "", line)  # remove "XX$" from line start in massbank files
+
+            if reached_spectra:
+                if line in ["\n", "\r\n", "//\n", "//\r\n", "", "//"]:  # reached end of spectra
+
+                    yield reformat_msp_input(entry_dict)  # completed entry ready for sending to build
+
+                    entry_dict = copy.deepcopy(empty_dict)
+                    reached_spectra = False
+
+                else:  # add peak
+                    entry_dict["fragment_mzs"].append(float(line.split()[0]))
+
+            else:
+                for meta_type in meta_parse.keys():
+                    for meta_re in meta_parse[meta_type]:
+
+                        re_query = re.search(meta_re, line, re.IGNORECASE)
+
+                        if re_query:  # TODO: walrus
+                            entry_dict[meta_type] = re_query.group(1).strip()
+
+                if re.match("^Num Peaks(.*)$", line, re.IGNORECASE) or re.match("^PEAK:(.*)", line, re.IGNORECASE):
+                    reached_spectra = True  # reached line prior to spectra
+
+    if entry_dict != empty_dict:
+        yield reformat_msp_input(entry_dict)
+
+
+def reformat_msp_input(entry_dict):
+    """
+    Reformat input for use by build functions.
+
+    :param entry_dict: Dictionary containing MSn information extracted from an MSP file (by
+        :py:meth:`metaboblend.parse.parse_msp`. The dictionary must contain the following:
+
+        - ms_id - a unique accession number
+        - mf - the molecular formula of the compound (in the format "CXHXNXOXPXSX")
+        - precursor_mz - mz representing the mass of the charged precursor ion
+        - precursor_type - the type of precursor ion (e.g. "[M+H]+")
+        - fragment_mzs - mz(s) representing the mass of charged fragment ions
+
+    :return: If the correct inputs were not provided in the MSP (and, hence, were not available in `entry_dict`),
+        returns None (and generates a warning with i) the accession (if available) and ii) the variable that was not
+        able to be extracted from the MSP). Else, returns the same dictionary after reformatting the molecular formula,
+        using :py:meth:`metaboblend.parse.mc_to_list`, and converting the precursor ions to their corresponding
+        neutral masses.
+    """
+
+    if entry_dict["mf"] is not None:  # convert from C5H6... to [5, 6, ...]
+        entry_dict["mf"] = mc_to_list(entry_dict["mf"])
+
+    for key in ["ms_id", "mf", "precursor_mz", "precursor_type"]:  # required for the tool to function
+        if entry_dict[key] is None:
+            if key == "ms_id":
+                warnings.warn("Entry ignored from MSP file due to lack of accession in MSP file")
+            else:
+                warnings.warn("Entry " + entry_dict["ms_id"] + " removed due to lack of valid " + key + " in MSP file")
+            return None
+
+    entry_dict["precursor_mz"] = float(entry_dict["precursor_mz"])
+
+    if len(entry_dict["fragment_mzs"]) == 0:  # require a spectra to annotate
+        warnings.warn("No fragments were identified for " + entry_dict["ms_id"] + " in MSP file")
+        return None
+
+    return precursor_ions_to_neutral_masses(entry_dict)
+
+
+def mc_to_list(mc):
+    """
+    Convert molecular formula string to list format.
+
+    :param mc: Molecular formula (in the format "C1H2N3O4P5S6")
+
+    :return: Molecular formula (in the format `[1, 2, 3, 4, 5, 6]`)
+    """
+
+    if isinstance(mc, list):
+        return mc
+
+    mc_list = [0, 0, 0, 0, 0, 0]
+    element_positions = {"C": 0, "H": 1, "N": 2, "O": 3, "P": 4, "S": 5}
+
+    # seperates out the formula into [letter, number, letter, number, ...]
+    mc = re.findall(r"[A-Z][a-z]*|\d+", re.sub("[A-Z][a-z]*(?![\da-z])", r"\g<0>1", mc))
+
+    for i, substring in enumerate(mc):
+
+        if i % 2 == 0:  # in case of letter
+            try:
+                element_position = element_positions[substring]
+            except KeyError:  # element not in C, H, N, O, P, S
+                return None
+
+        else:  # record number following the letter
+            mc_list[element_position] = int(substring)
+
+    return mc_list
+
+
+def get_msp_regex():
+    """ Dictionary of regular expressions for parsing msp metadata. """
+
+    meta_parse = {"ms_id":          ["^accession(?:=|:)(.*)$", "^DB#(?:=|:)(.*)$", "^ACCESSION:(.*)$"],  # use accession as ms_id
+                  "mf":             ["^molecular formula(?:=|:)(.*)$", "^formula:(.*)$"],
+                  "precursor_type": ["^precursor.*type(?:=|:)(.*)$", "^adduct(?:=|:)(.*)$", "^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$"],
+                  "precursor_mz":   ["^precursor m/z(?:=|:)\s*(\d*[.,]?\d*)$", "^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$", "^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$"]}
+
+    return meta_parse
+
+
+def reformat_xml(source, encoding="utf8"):
+    """
+    Reformats HMDB xml files to be compatible with :py:meth:`metaboblend.databases.parse_xml`; some such files do not
+    contain a `<hmdb xmlns="http://www.hmdb.ca">` header.
+
+    :param source: Path to file to be reformatted.
+
+    :param encoding: Encoding of source file.
+
+    :return: Source file destination.
+    """
+
+    with io.open(source, "r", encoding=encoding) as xml:
+        xml_contents = xml.readlines()
+        if "hmdb" in xml_contents[1]:
+            return source
+
+        xml_contents.insert(1, "<hmdb xmlns=\"http://www.hmdb.ca\"> \n")
+
+    with io.open(source, "w", encoding=encoding) as xml:
+        xml_contents = "".join(xml_contents)
+        xml.write(xml_contents)
+        xml.write("</hmdb>")
+
+    return source
+
+
+def parse_xml(source, encoding="utf8", reformat=False):
+    """
+    Parses the contents of HMDB xml files to to extract information for the generation of substructures.
+
+    :param source: Source file destination.
+
+    :param encoding: Encoding of source file.
+
+    :param reformat: Whether to apply :py:meth:`metaboblend.databases.reformat_xml` to the XML file. Is required for
+        XML files recording single metabolites.
+
+        * **True** Add a `<hmdb xmlns="http://www.hmdb.ca">` header to the XML file before parsing.
+
+        * **False** Parse the XML file as it is (recommended if header is present).
+
+    :return: The XML file converted to a dictionary.
+    """
+
+    if reformat:
+        reformat_xml(source, encoding)
+
+    with io.open(source, "r", encoding=encoding) as inp:
+        record_out = OrderedDict()
+
+        inp.readline()
+        inp.readline()
+
+        xml_record = ""
+        path = []
+
+        for line in inp:
+            xml_record += line
+            if line == "</metabolite>\n" or line == "</drug>\n":
+
+                if sys.version_info[0] == 3:
+                    inp = io.StringIO(xml_record)
+                else:
+                    inp = io.BytesIO(xml_record.encode('utf-8').strip())
+
+                for event, elem in ElementTree.iterparse(inp, events=("start", "end")):
+                    if event == 'end':
+                        path.pop()
+
+                    if event == 'start':
+                        path.append(elem.tag)
+                        if elem.text is not None:
+                            if elem.text.replace(" ", "") != "\n":
+
+                                path_elem = ".".join(map(str, path[1:]))
+                                if path_elem in record_out:
+                                    if type(record_out[path_elem]) != list:
+                                        record_out[path_elem] = [record_out[path_elem]]
+                                    record_out[path_elem].append(elem.text)
+                                else:
+                                    record_out[path_elem] = elem.text
+
+                xml_record = ""
+                yield record_out
+                record_out = OrderedDict()
diff --git a/metaboblend/results.py b/metaboblend/results.py
new file mode 100644
index 0000000..898443f
--- /dev/null
+++ b/metaboblend/results.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019-2020 Ralf Weber
+#
+# This file is part of MetaboBlend.
+#
+# MetaboBlend is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# MetaboBlend is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with MetaboBlend.  If not, see <https://www.gnu.org/licenses/>.
+#
+
+import os
+import csv
+import sqlite3
+
+
+class ResultsDb:
+    """
+    Methods for interacting with the SQLITE3 results database, as created by
+    :py:meth:`metaboblend.build_structures.annotate_msn`.
+
+    :param path_results: Directory to which results will be written.
+    """
+
+    def __init__(self, path_results, msn=True):
+        """Constructor method."""
+
+        self.path_results = path_results
+        self.path_results_db = os.path.join(self.path_results, "metaboblend_results.sqlite")
+        self.msn = msn
+
+        self.conn = None
+        self.cursor = None
+
+        self.substructure_combo_id = 0
+
+    def connect(self):
+        """ Connects to the results database. """
+
+        self.conn = sqlite3.connect(self.path_results_db)
+        self.cursor = self.conn.cursor()
+
+    def create_results_db(self):
+        """ Generates a new results database. """
+
+        if os.path.exists(self.path_results_db):
+            os.remove(self.path_results_db)
+
+        self.connect()
+
+        self.cursor.execute("""CREATE TABLE queries (
+                                   ms_id_num INTEGER PRIMARY KEY,
+                                   ms_id TEXT,
+                                   exact_mass NUMERIC,
+                                   C INTEGER,
+                                   H INTEGER,
+                                   N INTEGER,
+                                   O INTEGER,
+                                   P INTEGER,
+                                   S INTEGER,
+                                   ppm INTEGER,
+                                   ha_min INTEGER,
+                                   ha_max INTEGER,
+                                   max_atoms_available INTEGER,
+                                   max_degree INTEGER,
+                                   max_n_substructures INTEGER,
+                                   hydrogenation_allowance INTEGER,
+                                   isomeric_smiles INTEGER)""")
+
+        if self.msn:
+            self.cursor.execute("""CREATE TABLE spectra (
+                                       ms_id_num INTEGER,
+                                       fragment_id INTEGER,
+                                       neutral_mass NUMERIC,
+                                       PRIMARY KEY (ms_id_num, fragment_id))""")
+
+        self.cursor.execute("""CREATE TABLE structures (
+                                   ms_id_num INTEGER,
+                                   structure_smiles TEXT,
+                                   frequency INTEGER,
+                                   PRIMARY KEY (ms_id_num, structure_smiles))""")
+
+        self.cursor.execute("""CREATE TABLE substructures (
+                                           substructure_combo_id INTEGER,
+                                           substructure_position_id INTEGER,
+                                           ms_id_num INTEGER,
+                                           structure_smiles TEXT,
+                                           fragment_id INTEGER,
+                                           substructure_smiles TEXT,
+                                           bde INTEGER,
+                                           PRIMARY KEY (substructure_combo_id, substructure_position_id))""")
+
+        self.cursor.execute("""CREATE TABLE results (
+                                   ms_id_num INTEGER,
+                                   fragment_id INTEGER,
+                                   structure_smiles TEXT,
+                                   bde INTEGER,
+                                   PRIMARY KEY(ms_id_num, fragment_id, structure_smiles))""")
+
+        self.conn.commit()
+
+    def add_ms(self, msn_data, ms_id, ms_id_num, parameters):
+        """
+        Add entries to the `queries` and `spectra` tables.
+
+        :param msn_data: Dictionary in the form
+            `msn_data[id] = {mf: [C, H, N, O, P, S], exact_mass: float, fragment_masses: []}`. id represents a unique
+            identifier for a given spectral tree or fragmentation spectrum, mf is a list of integers referring to the
+            molecular formula of the structure of interest, exact_mass is the mass of this molecular formula to >=4d.p.
+            and fragment_masses are neutral fragment masses generated by this structure used to inform candidate
+            scoring. See :py:meth:`metaboblend.build_structures.annotate_msn`.
+
+        :param ms_id: Unique identifier for the annotation of a single metabolite.
+
+        :param ms_id_num: Unique numeric identifier for the annotation of a single metaoblite.
+
+        :param parameters: List of parameters, in the form: [ppm, ha_min, ha_max, max_atoms_available, max_degree,
+            max_n_substructures, hydrogenation_allowance, isomeric_smiles]. See
+            :py:meth:`metaboblend.build_structures.annotate_msn`.
+        """
+
+        for i, parameter in enumerate(parameters):
+            if parameter is None:
+                parameters[i] = "NULL"
+            elif isinstance(parameter, bool):
+                parameters[i] = int(parameter)
+
+        self.cursor.execute("""INSERT INTO queries (
+                                   ms_id,
+                                   ms_id_num,
+                                   exact_mass,
+                                   C, H, N, O, P, S,
+                                   ppm,
+                                   ha_min,
+                                   ha_max,
+                                   max_atoms_available,
+                                   max_degree,
+                                   max_n_substructures,
+                                   hydrogenation_allowance,
+                                   isomeric_smiles
+                               ) VALUES ('{}', {}, {}, '{}', '{}', '{}', '{}', '{}', '{}', {})""".format(
+            ms_id,
+            ms_id_num,
+            msn_data[ms_id]["exact_mass"],
+            msn_data[ms_id]["mf"][0], msn_data[ms_id]["mf"][1],
+            msn_data[ms_id]["mf"][2], msn_data[ms_id]["mf"][3],
+            msn_data[ms_id]["mf"][4], msn_data[ms_id]["mf"][5],
+            ", ".join([str(p) for p in parameters])
+        ))
+
+        self.conn.commit()
+
+    def add_results(self, ms_id_num, smi_dict, fragment_mass=None, fragment_id=None, retain_substructures=False):
+        """
+        Record which smiles were generated for a given fragment mass.
+
+        :param ms_id_num: Unique identifier for the annotation of a single metabolite.
+
+        :param smi_dict: The fragment and substructure smiles generated by the annotation of a single peak for a single
+            metabolite.
+
+        :param fragment_mass: The neutral fragment mass that has been annotated.
+
+        :param fragment_id: The unique identifier for the fragment mass that has been annotated.
+
+        :param retain_substructures: If True, record substructures in the results DB.
+        """
+
+        if self.msn:
+            self.cursor.execute("""INSERT OR IGNORE INTO spectra (
+                                                       ms_id_num,
+                                                       fragment_id,
+                                                       neutral_mass
+                                                   ) VALUES ('{}', {}, {})""".format(
+                ms_id_num,
+                fragment_id,
+                fragment_mass
+            ))
+        else:
+            fragment_id = "NULL"
+
+        for structure_smiles in smi_dict.keys():
+
+            self.cursor.execute("""INSERT OR IGNORE INTO results (
+                                       ms_id_num,
+                                       fragment_id,
+                                       structure_smiles,
+                                       bde
+                                   ) VALUES ({}, {}, '{}', {})""".format(
+                ms_id_num,
+                fragment_id,
+                structure_smiles,
+                min(smi_dict[structure_smiles]["bdes"])
+            ))
+
+            if retain_substructures:
+                for i in range(len(smi_dict[structure_smiles]["substructures"])):  # for each combination
+
+                    for j, substructure in enumerate(smi_dict[structure_smiles]["substructures"][i]):
+                        self.cursor.execute("""INSERT INTO substructures (
+                                                           substructure_combo_id,
+                                                           substructure_position_id,
+                                                           ms_id_num,
+                                                           fragment_id,
+                                                           structure_smiles,
+                                                           substructure_smiles,
+                                                           bde
+                                                       ) VALUES ({}, {}, {}, {}, '{}', '{}', {})""".format(
+                            self.substructure_combo_id,
+                            j,
+                            ms_id_num,
+                            fragment_id,
+                            structure_smiles,
+                            substructure,
+                            smi_dict[structure_smiles]["bdes"][i]
+                        ))
+
+                    self.substructure_combo_id += 1
+
+        self.conn.commit()
+
+    def calculate_frequencies(self, ms_id_num):
+        """
+        Calculates structure frequencies in the SQLite DB.
+
+        :param ms_id_num: Unique identifier for the annotation of a single metabolite.
+        """
+
+        self.cursor.execute("""INSERT INTO structures (ms_id_num, structure_smiles, frequency) 
+                                   SELECT ms_id_num, structure_smiles, COUNT(*)
+                                   FROM results 
+                                   WHERE ms_id_num = {}
+                                   GROUP BY structure_smiles""".format(ms_id_num))
+
+    def get_structures(self, ms_id_num):
+        """
+        Gets smiles of generated structures. In the case of the MSn annotation workflow, also gets structure
+        frequencies.
+
+        :param ms_id_num: Unique identifier for the annotation of a single metabolite.
+
+        :return: In the case of simple structure generation, returns a set of smiles strings for output structures.
+            For the MSn annotation workflow, returns a dictionary with smiles as keys and the number of peaks for which
+            the smiles were generated as values.
+        """
+
+        if self.msn:
+            msn_str = ", frequency"
+        else:
+            msn_str = ""
+
+        self.cursor.execute("""SELECT structure_smiles{} FROM structures 
+                                   WHERE ms_id_num = {}
+                            """.format(msn_str, ms_id_num))
+
+        if self.msn:
+            return [t for t in self.cursor.fetchall()]
+        else:
+            return [item for t in self.cursor.fetchall() for item in t]
+
+    def generate_csv_output(self):
+        """ Generate CSV file output for i) queries and tool parameters and ii) structures generated. """
+
+        with open(os.path.join(self.path_results, "metaboblend_queries.csv"), "w", newline="") as results_file, \
+                open(os.path.join(self.path_results, "metaboblend_structures.csv"), "w", newline="") as ms_file:
+
+            results_writer = csv.writer(results_file, delimiter=",")
+            ms_writer = csv.writer(ms_file, delimiter=",")
+
+            results_writer.writerow(["ms_id", "exact_mass", "C", "H", "N", "O", "P", "S", "ppm", "ha_min", "ha_max",
+                                     "max_atoms_available", "max_degree", "max_n_substructures",
+                                     "hydrogenation_allowance", "isomeric_smiles"])
+
+            self.cursor.execute("SELECT * FROM queries")
+
+            for query in self.cursor.fetchall():
+                results_writer.writerow(query)
+
+            ms_writer.writerow(["ms_id", "smiles", "frequency", "exact_mass", "C", "H", "N", "O", "P", "S"])
+
+            self.cursor.execute("SELECT * FROM structures")
+
+            for structure in self.cursor.fetchall():
+                ms_writer.writerow(structure)
+
+    def close(self):
+        """ Close the connection to the SQLITE3 database. """
+
+        self.conn.close()
diff --git a/tests/test_build_structures.py b/tests/test_build_structures.py
index e74f81d..32a67c7 100644
--- a/tests/test_build_structures.py
+++ b/tests/test_build_structures.py
@@ -202,7 +202,7 @@ def test_generate_structures(self):  # tests vs build
                 ms_data = {record_dict["HMDB_ID"]: {"mf": [record_dict["C"], record_dict["H"], record_dict["N"],
                                                            record_dict["O"], record_dict["P"], record_dict["S"]],
                                                     "exact_mass": record_dict["exact_mass"],
-                                                    "prescribed_masses": fragments[i]}}
+                                                    "prescribed_mass": fragments[i]}}
 
                 # test prescribed building
                 returned_smis = list(
@@ -280,7 +280,7 @@ def test_annotate_msn(self):  # tests vs build_msn
                 ms_data = {record_dict["HMDB_ID"]: {"mf": [record_dict["C"], record_dict["H"], record_dict["N"],
                                                            record_dict["O"], record_dict["P"], record_dict["S"]],
                                                     "exact_mass": record_dict["exact_mass"],
-                                                    "fragment_masses": fragments}}
+                                                    "neutral_fragment_masses": fragments}}
 
                 # test standard building
                 returned_smis = list(annotate_msn(
@@ -307,7 +307,7 @@ def test_annotate_msn(self):  # tests vs build_msn
                 ms_data[record_dict["HMDB_ID"]] = {"mf": [record_dict["C"], record_dict["H"], record_dict["N"],
                                                           record_dict["O"], record_dict["P"], record_dict["S"]],
                                                    "exact_mass": record_dict["exact_mass"],
-                                                   "fragment_masses": fragments}
+                                                   "neutral_fragment_masses": fragments}
 
             os.mkdir(self.to_test_results("annotate_multi"))
 
@@ -338,7 +338,7 @@ def test_results_db(self):
             ms_data[record_dict["HMDB_ID"]] = {"mf": [record_dict["C"], record_dict["H"], record_dict["N"],
                                                       record_dict["O"], record_dict["P"], record_dict["S"]],
                                                "exact_mass": record_dict["exact_mass"],
-                                               "fragment_masses": fragments}
+                                               "neutral_fragment_masses": fragments}
 
         os.mkdir(self.to_test_results("test_results_db"))
 
diff --git a/tests/test_data/massbank_msp.txt b/tests/test_data/massbank_msp.txt
new file mode 100644
index 0000000..cdfeb00
--- /dev/null
+++ b/tests/test_data/massbank_msp.txt
@@ -0,0 +1,87 @@
+ACCESSION: UO000002
+RECORD_TITLE: 2,3-di-O-Phytanyl-sn-glycerol-1-phosphoserine; EI-B; MS
+DATE: 2016.01.19 (Created 2009.05.29, modified 2011.05.06)
+AUTHORS: Hiroyuki Morii, Department of Chemistry, University of Occupational and Environmental Health
+LICENSE: CC BY-SA
+PUBLICATION: Morii, H., Nishihara, M., Ohga, M., and Koga, Y. 1986. A diphytanyl ether analog of phosphatidylserine from a methanogenic bacterium, Methanobrevibacter arboriphilus. J Lipid Res. 27: 724-730.
+COMMENT: Ammonium salt of the compound was analyzed
+COMMENT: [Analytical] Ionizing Curr 300 uA, Chamber Temp 250 C, Accel Volt 3KV, Ion Multi 1.0 KV,
+CH$NAME: 2,3-di-O-Phytanyl-sn-glycerol-1-phosphoserine
+CH$NAME: archaetidylserine
+CH$COMPOUND_CLASS: Glycerophospholipids; Glycerophosphoserines; Dialkylglycerophosphoserines
+CH$FORMULA: C46H94NO8P
+CH$EXACT_MASS: 819.67171
+CH$SMILES: C(CCC(C)C)C(C)CCCC(CCCC(CCOCC(OCCC(CCCC(C)CCCC(C)CCCC(C)C)C)COP(O)(=O)OCC(C(O)=O)N)C)C
+CH$IUPAC: InChI=1S/C46H94NO8P/c1-36(2)17-11-19-38(5)21-13-23-40(7)25-15-27-42(9)29-31-52-33-44(34-54-56(50,51)55-35-45(47)46(48)49)53-32-30-43(10)28-16-26-41(8)24-14-22-39(6)20-12-18-37(3)4/h36-45H,11-35,47H2,1-10H3,(H,48,49)(H,50,51)/t38-,39-,40-,41-,42-,43-,44-,45-/m1/s1
+CH$LINK: CAS 105662-26-8
+CH$LINK: LIPIDBANK EEL3026
+AC$INSTRUMENT: JMS DX-300/JMS-3500 data system, Japan Electron Optics Laboratory, Japan
+AC$INSTRUMENT_TYPE: EI-B
+AC$MASS_SPECTROMETRY: MS_TYPE MS
+AC$MASS_SPECTROMETRY: ION_MODE POSITIVE
+AC$MASS_SPECTROMETRY: IONIZATION_POTENTIAL 30 eV
+AC$MASS_SPECTROMETRY: SCANNING 5 Sec
+AC$MASS_SPECTROMETRY: SOURCE_TEMPERATURE 320 C
+MS$FOCUSED_ION: ION_TYPE [M]+*
+PK$SPLASH: splash10-05gi-9611001000-5e7663b41cf47681770e
+PK$NUM_PEAK: 58
+PK$PEAK: m/z int. rel.int.
+  36.0 48.935 69
+  43.0 155.642 221
+  55.0 174.853 248
+  56.0 241.397 343
+  57.0 685.069 973
+  69.0 429.724 610
+  70.0 442.816 629
+  71.0 703.562 999
+  74.0 153.368 218
+  81.0 183.718 261
+  82.0 116.090 165
+  83.0 432.501 614
+  84.0 189.394 269
+  85.0 524.513 745
+  95.0 102.128 145
+  96.0 179.720 255
+  97.0 449.439 638
+  98.0 118.287 168
+  99.0 376.928 535
+  111.0 429.907 610
+  112.0 168.627 239
+  113.0 308.186 438
+  123.0 298.191 423
+  124.0 233.188 331
+  125.0 504.097 716
+  126.0 362.310 514
+  127.0 300.450 427
+  139.0 123.338 175
+  140.0 233.340 331
+  141.0 235.767 335
+  153.0 107.469 153
+  155.0 163.241 232
+  169.0 130.189 185
+  182.0 94.773 135
+  183.0 160.234 228
+  196.0 132.555 188
+  197.0 163.622 232
+  211.0 75.654 107
+  278.0 326.649 464
+  279.0 220.386 313
+  280.0 227.649 323
+  281.0 143.572 204
+  296.0 158.358 225
+  297.0 60.502 86
+  309.0 35.370 50
+  325.0 279.819 397
+  326.0 71.000 101
+  340.0 133.760 190
+  341.0 60.486 86
+  343.0 243.579 346
+  344.0 60.028 85
+  354.0 51.468 73
+  373.0 132.555 188
+  374.0 42.069 60
+  383.0 63.767 91
+  634.0 450.446 640
+  635.0 212.497 302
+  636.0 49.622 70
+//
diff --git a/tests/test_data/mona_msp.msp b/tests/test_data/mona_msp.msp
new file mode 100644
index 0000000..b1f6d21
--- /dev/null
+++ b/tests/test_data/mona_msp.msp
@@ -0,0 +1,580 @@
+Name: Sulfaclozine
+Synon: 4-amino-N-(6-chloropyrazin-2-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100601
+InChIKey: QKLPUVXBJHRFQZ-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 285.0208
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: Ramp 21.1-31.6 eV
+Formula: C10H9ClN4O2S
+MW: 284
+ExactMass: 284.013474208
+Comments: "accession=AU100601" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=284.0135" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=Ramp 21.1-31.6 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.6 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=285.0208" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.17469602228006656" "mass error=4.9792000027082395E-5" "SMILES=c1cc(ccc1N)S(=O)(=O)Nc2cncc(n2)Cl" "cas=102-65-8" "pubchem cid=66890" "chemspider=60252" "InChI=InChI=1S/C10H9ClN4O2S/c11-9-5-13-6-10(14-9)15-18(16,17)8-3-1-7(12)2-4-8/h1-6H,12H2,(H,14,15)" "InChIKey=QKLPUVXBJHRFQZ-UHFFFAOYSA-N" "molecular formula=C10H9ClN4O2S" "total exact mass=284.013474208" "SMILES=C=1C=C(C=CC1N)S(N=C2C=NC=C(Cl)N2)(=O)=O"
+Num Peaks: 27
+53.0389 0.594951
+54.0333 0.566811
+55.0178 0.522592
+60.0552 0.542692
+65.0382 3.822962
+66.0423 0.506512
+68.049 7.963499
+78.0333 0.727609
+79.0177 1.057244
+92.0498 7.702203
+93.0532 0.731629
+96.0443 0.623091
+108.0457 12.172375
+109.0483 1.181862
+110.0609 4.904325
+120.0562 3.095353
+130.0172 5.656054
+132.0138 1.515517
+156.0118 100.000000
+157.015 8.884065
+158.008 3.891301
+174.0228 0.751729
+184.0757 0.619071
+191.9647 0.590931
+219.0438 0.723589
+285.0221 3.694324
+287.0184 0.840167
+
+
+Name: Sulfachlorpyridazine
+Synon: 4-amino-N-(6-chloropyridazin-3-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100701
+InChIKey: XOXHILFPRYWFOD-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 285.0208
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: Ramp 21.1-31.6 eV
+Formula: C10H9ClN4O2S
+MW: 284
+ExactMass: 284.013474208
+Comments: "accession=AU100701" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=284.0135" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=Ramp 21.1-31.6 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.6 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=285.0208" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.17469602228006656" "mass error=4.9792000027082395E-5" "SMILES=c1cc(ccc1N)S(=O)(=O)Nc2ccc(nn2)Cl" "cas=80-32-0" "pubchem cid=6634" "chemspider=6382" "InChI=InChI=1S/C10H9ClN4O2S/c11-9-5-6-10(14-13-9)15-18(16,17)8-3-1-7(12)2-4-8/h1-6H,12H2,(H,14,15)" "InChIKey=XOXHILFPRYWFOD-UHFFFAOYSA-N" "molecular formula=C10H9ClN4O2S" "total exact mass=284.013474208" "SMILES=C=1C=C(C=CC1N)S(NC=2C=CC(Cl)=NN2)(=O)=O"
+Num Peaks: 27
+53.0389 0.594951
+54.0333 0.566811
+55.0178 0.522592
+60.0552 0.542692
+65.0382 3.822962
+66.0423 0.506512
+68.049 7.963499
+78.0333 0.727609
+79.0177 1.057244
+92.0498 7.702203
+93.0532 0.731629
+96.0443 0.623091
+108.0457 12.172375
+109.0483 1.181862
+110.0609 4.904325
+120.0562 3.095353
+130.0172 5.656054
+132.0138 1.515517
+156.0118 100.000000
+157.015 8.884065
+158.008 3.891301
+174.0228 0.751729
+184.0757 0.619071
+191.9647 0.590931
+219.0438 0.723589
+285.0221 3.694324
+287.0184 0.840167
+
+
+Name: Sulfadimidine
+Synon: 4-amino-N-(4,6-dimethylpyrimidin-2-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100801
+InChIKey: ASWVTGNCAZCNNR-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 279.091
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: Ramp 20.8-31.3 eV
+Formula: C12H14N4O2S
+MW: 278
+ExactMass: 278.08374668799996
+Comments: "accession=AU100801" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=278.0837" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=Ramp 20.8-31.3 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.4 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=279.091" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.08129248146496051" "mass error=-2.2687999944537296E-5" "SMILES=Cc1cc(nc(n1)NS(=O)(=O)c2ccc(cc2)N)C" "cas=57-68-1" "kegg=C19530" "pubchem cid=5327" "chemspider=5136" "InChI=InChI=1S/C12H14N4O2S/c1-8-7-9(2)15-12(14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ASWVTGNCAZCNNR-UHFFFAOYSA-N" "molecular formula=C12H14N4O2S" "total exact mass=278.08374668799996" "SMILES=CC1=CC(C)=NC(=N1)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 46
+53.0379 0.894101
+54.0335 0.661867
+55.0176 0.598003
+65.0379 8.717487
+68.0491 13.013818
+69.0329 1.640153
+78.0334 1.477589
+79.0178 2.261379
+80.0489 1.431143
+81.0444 1.950766
+82.0284 0.606712
+92.0499 30.585230
+93.0558 2.844868
+94.0647 1.686600
+95.0608 3.027752
+96.0443 1.300511
+108.0461 33.946818
+109.0497 2.360079
+110.0616 6.107757
+111.0651 0.519624
+120.0565 1.962378
+122.0716 6.078727
+123.0794 2.246865
+124.0872 71.211681
+125.0905 6.398049
+126.0663 17.911054
+127.0697 0.595100
+156.0117 82.855318
+157.0148 5.739085
+158.0072 1.544357
+174.0224 1.106015
+186.0334 11.263353
+187.0368 0.775081
+188.0128 1.637250
+188.0291 0.534138
+204.0445 100.000000
+205.0473 6.972829
+206.0406 3.358686
+213.1141 18.259405
+214.1167 2.241059
+215.0927 3.071296
+215.1291 1.320831
+279.0925 61.483976
+280.0953 8.438806
+281.0725 7.837901
+282.0742 1.222132
+
+
+Name: Sulfamethazine
+Synon: 4-amino-N-(4,6-dimethylpyrimidin-2-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100802
+InChIKey: ASWVTGNCAZCNNR-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 279.091
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 20 eV
+Formula: C12H14N4O2S
+MW: 278
+ExactMass: 278.08374668799996
+Comments: "accession=AU100802" "author=Nikiforos Alygizakis, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=278.0837467" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=20 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.1 min" "solvent a=90:10 water:methanol with 0.01% formic acid and 5mM ammonium formate" "solvent b=methanol with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=279.091" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.08129248146496051" "mass error=-2.2687999944537296E-5" "SMILES=CC1=CC(C)=NC(NS(=O)(=O)C2=CC=C(N)C=C2)=N1" "cas=57-68-1" "chebi=102265" "kegg=D02436" "pubchem cid=5327" "chemspider=5136" "InChI=InChI=1S/C12H14N4O2S/c1-8-7-9(2)15-12(14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ASWVTGNCAZCNNR-UHFFFAOYSA-N" "molecular formula=C12H14N4O2S" "total exact mass=278.08374668799996" "SMILES=CC1=CC(C)=NC(=N1)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 16
+122.0703 0.766124
+124.0861 36.693459
+125.0892 1.930893
+149.0227 0.828453
+156.0104 53.249536
+157.0129 2.999571
+158.0061 1.778967
+174.0209 0.627183
+186.0321 22.621444
+187.0346 1.719235
+188.0285 0.646661
+204.0431 100.000000
+213.1128 8.749399
+214.1159 1.407591
+215.1281 0.658348
+279.0909 80.894937
+
+
+Name: Sulfamethazine
+Synon: 4-amino-N-(4,6-dimethylpyrimidin-2-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100803
+InChIKey: ASWVTGNCAZCNNR-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 279.091
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 30 eV
+Formula: C12H14N4O2S
+MW: 278
+ExactMass: 278.083746688
+Comments: "accession=AU100803" "author=Nikiforos Alygizakis, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=278.0837467" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=30 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.2 min" "solvent a=90:10 water:methanol with 0.01% formic acid and 5mM ammonium formate" "solvent b=methanol with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=279.091" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.08129248166863394" "mass error=-2.2688000001380715E-5" "SMILES=CC1=CC(C)=NC(NS(=O)(=O)C2=CC=C(N)C=C2)=N1" "cas=57-68-1" "chebi=102265" "kegg=D02436" "pubchem cid=5327" "chemspider=5136" "InChI=InChI=1S/C12H14N4O2S/c1-8-7-9(2)15-12(14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ASWVTGNCAZCNNR-UHFFFAOYSA-N" "molecular formula=C12H14N4O2S" "total exact mass=278.083746688" "SMILES=CC1=CC(C)=NC(=N1)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 17
+108.0441 1.285794
+122.0704 6.630847
+123.0781 2.170942
+124.0861 100.000000
+125.0889 6.093546
+149.0221 1.388285
+156.0106 50.043481
+158.0064 1.615007
+186.0323 15.118951
+187.0355 1.323064
+196.0858 1.220573
+204.0429 70.964035
+205.0455 4.931983
+213.1123 22.610100
+214.1155 3.003292
+215.1283 0.804398
+279.0903 3.580968
+
+
+Name: Sulfamethazine
+Synon: 4-amino-N-(4,6-dimethylpyrimidin-2-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100804
+InChIKey: ASWVTGNCAZCNNR-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 279.091
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 40 eV
+Formula: C12H14N4O2S
+MW: 278
+ExactMass: 278.083746688
+Comments: "accession=AU100804" "author=Nikiforos Alygizakis, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=278.0837467" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=40 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.1 min" "solvent a=90:10 water:methanol with 0.01% formic acid and 5mM ammonium formate" "solvent b=methanol with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=279.091" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.08129248166863394" "mass error=-2.2688000001380715E-5" "SMILES=CC1=CC(C)=NC(NS(=O)(=O)C2=CC=C(N)C=C2)=N1" "cas=57-68-1" "chebi=102265" "kegg=D02436" "pubchem cid=5327" "chemspider=5136" "InChI=InChI=1S/C12H14N4O2S/c1-8-7-9(2)15-12(14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ASWVTGNCAZCNNR-UHFFFAOYSA-N" "molecular formula=C12H14N4O2S" "total exact mass=278.083746688" "SMILES=CC1=CC(C)=NC(=N1)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 22
+108.0445 1.153673
+122.0702 5.323878
+123.0772 2.202467
+124.0862 100.000000
+125.089 6.847126
+134.0701 0.714179
+149.0224 1.747990
+154.0624 0.644259
+155.0685 0.624282
+156.0104 10.373071
+157.0126 0.933926
+172.0852 0.564351
+186.0324 3.845578
+196.0852 5.209010
+197.0903 1.378415
+198.0888 2.362283
+204.0427 15.422264
+205.0463 0.869001
+206.0375 0.759127
+212.1036 0.659242
+213.1121 18.109174
+214.1152 2.577036
+
+
+Name: Sulfamethazine
+Synon: 4-amino-N-(4,6-dimethylpyrimidin-2-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100805
+InChIKey: ASWVTGNCAZCNNR-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 279.091
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 50 eV
+Formula: C12H14N4O2S
+MW: 278
+ExactMass: 278.083746688
+Comments: "accession=AU100805" "author=Nikiforos Alygizakis, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=278.0837467" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=50 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.2 min" "solvent a=90:10 water:methanol with 0.01% formic acid and 5mM ammonium formate" "solvent b=methanol with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=279.091" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.08129248166863394" "mass error=-2.2688000001380715E-5" "SMILES=CC1=CC(C)=NC(NS(=O)(=O)C2=CC=C(N)C=C2)=N1" "cas=57-68-1" "chebi=102265" "kegg=D02436" "pubchem cid=5327" "chemspider=5136" "InChI=InChI=1S/C12H14N4O2S/c1-8-7-9(2)15-12(14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ASWVTGNCAZCNNR-UHFFFAOYSA-N" "molecular formula=C12H14N4O2S" "total exact mass=278.083746688" "SMILES=CC1=CC(C)=NC(=N1)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 24
+108.0453 1.770916
+122.0703 2.803951
+123.078 2.792598
+124.0859 100.000000
+125.0891 7.901010
+149.0231 1.623340
+154.0639 2.111477
+155.0605 2.463390
+155.0714 2.690430
+156.01 2.713134
+169.0745 1.475763
+171.0781 1.555228
+172.0869 1.271427
+181.0634 0.930866
+186.1022 1.033034
+195.0786 1.555228
+196.0859 7.628562
+197.0856 3.871041
+198.0886 5.903054
+199.0904 0.998978
+204.0438 2.622318
+212.1048 2.327165
+213.1122 9.342718
+214.1153 1.725508
+
+
+Name: Sulfamethazine
+Synon: 4-amino-N-(4,6-dimethylpyrimidin-2-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100806
+InChIKey: ASWVTGNCAZCNNR-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 279.091
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 10 eV
+Formula: C12H14N4O2S
+MW: 278
+ExactMass: 278.08374668799996
+Comments: "accession=AU100806" "author=Nikiforos Alygizakis, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=278.0837467" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=10 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.2 min" "solvent a=90:10 water:methanol with 0.01% formic acid and 5mM ammonium formate" "solvent b=methanol with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=279.091" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.08129248146496051" "mass error=-2.2687999944537296E-5" "SMILES=CC1=CC(C)=NC(NS(=O)(=O)C2=CC=C(N)C=C2)=N1" "cas=57-68-1" "chebi=102265" "kegg=D02436" "pubchem cid=5327" "chemspider=5136" "InChI=InChI=1S/C12H14N4O2S/c1-8-7-9(2)15-12(14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ASWVTGNCAZCNNR-UHFFFAOYSA-N" "molecular formula=C12H14N4O2S" "total exact mass=278.08374668799996" "SMILES=CC1=CC(C)=NC(=N1)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 4
+124.086 0.740586
+156.0098 1.123942
+186.0319 0.831793
+279.0908 100.000000
+
+
+Name: Sulfadimethoxine
+Synon: 4-amino-n-(2,6-dimethoxypyrimidin-4-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100902
+InChIKey: ZZORFUFYDOWNEF-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 311.0809
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 20 eV
+Formula: C12H14N4O4S
+MW: 310
+ExactMass: 310.07357592799997
+Comments: "accession=AU100902" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=310.0735759" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=20 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.6 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=311.0809" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.15453214911901536" "mass error=4.8072000026877504E-5" "SMILES=COc1cc(nc(n1)OC)NS(=O)(=O)c2ccc(cc2)N" "cas=122-11-2" "chebi=32161" "pubchem=5323" "chemspider=5132" "InChI=InChI=1S/C12H14N4O4S/c1-19-11-7-10(14-12(15-11)20-2)16-21(17,18)9-5-3-8(13)4-6-9/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ZZORFUFYDOWNEF-UHFFFAOYSA-N" "molecular formula=C12H14N4O4S" "total exact mass=310.07357592799997" "SMILES=COC=1C=C(N=C(N1)OC)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 15
+140.0447 6.249276
+141.0515 0.699085
+154.0604 5.781932
+155.0683 3.398864
+156.0107 100.000000
+156.0763 16.893901
+157.0134 4.171334
+157.0794 0.857441
+158.0069 2.371480
+218.0242 0.965586
+245.1032 9.010853
+246.1061 0.834267
+311.0811 75.335059
+312.0835 7.145340
+313.0796 2.301958
+
+
+Name: Sulfadimethoxine
+Synon: 4-amino-n-(2,6-dimethoxypyrimidin-4-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100903
+InChIKey: ZZORFUFYDOWNEF-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 311.0809
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 30 eV
+Formula: C12H14N4O4S
+MW: 310
+ExactMass: 310.073575928
+Comments: "accession=AU100903" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=310.0735759" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=30 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.6 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=311.0809" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.15453214893628664" "mass error=4.8071999970034085E-5" "SMILES=COc1cc(nc(n1)OC)NS(=O)(=O)c2ccc(cc2)N" "cas=122-11-2" "chebi=32161" "pubchem=5323" "chemspider=5132" "InChI=InChI=1S/C12H14N4O4S/c1-19-11-7-10(14-12(15-11)20-2)16-21(17,18)9-5-3-8(13)4-6-9/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ZZORFUFYDOWNEF-UHFFFAOYSA-N" "molecular formula=C12H14N4O4S" "total exact mass=310.073575928" "SMILES=COC=1C=C(N=C(N1)OC)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 21
+108.0448 1.310092
+124.0204 1.354502
+126.0659 3.563895
+127.0504 0.843788
+138.0294 1.576552
+141.0517 10.458532
+154.0604 60.575108
+155.0672 5.484623
+156.0105 100.000000
+156.0762 63.495059
+157.0131 4.540913
+157.0798 3.452870
+158.0071 2.320417
+201.0772 2.720107
+212.069 3.896969
+218.0235 0.843788
+230.0808 9.270567
+231.0843 1.232375
+245.1039 10.447430
+246.107 1.176862
+311.0829 3.819252
+
+
+Name: Sulfadimethoxine
+Synon: 4-amino-n-(2,6-dimethoxypyrimidin-4-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100904
+InChIKey: ZZORFUFYDOWNEF-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 311.0809
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 40 eV
+Formula: C12H14N4O4S
+MW: 310
+ExactMass: 310.073575928
+Comments: "accession=AU100904" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=310.0735759" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=40 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.7 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=311.0809" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.15453214893628664" "mass error=4.8071999970034085E-5" "SMILES=COc1cc(nc(n1)OC)NS(=O)(=O)c2ccc(cc2)N" "cas=122-11-2" "chebi=32161" "pubchem=5323" "chemspider=5132" "InChI=InChI=1S/C12H14N4O4S/c1-19-11-7-10(14-12(15-11)20-2)16-21(17,18)9-5-3-8(13)4-6-9/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ZZORFUFYDOWNEF-UHFFFAOYSA-N" "molecular formula=C12H14N4O4S" "total exact mass=310.073575928" "SMILES=COC=1C=C(N=C(N1)OC)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 27
+112.0515 2.118270
+123.0436 2.184466
+124.0205 1.897617
+124.0508 2.162401
+126.0666 5.803177
+127.0502 2.030009
+132.0558 1.963813
+138.0295 4.898500
+140.045 77.780229
+141.0524 38.989409
+142.058 2.449250
+154.0604 100.000000
+155.0634 5.383936
+156.0104 20.101500
+156.0407 4.236540
+156.0761 54.744042
+157.0639 1.809356
+160.049 1.985878
+178.0597 3.420124
+201.077 8.274492
+202.0789 1.787290
+212.0697 15.114740
+213.0728 2.581642
+229.0713 2.206531
+230.0797 6.421006
+231.0852 1.919682
+245.1026 1.919682
+
+
+Name: Sulfadimethoxine
+Synon: 4-amino-n-(2,6-dimethoxypyrimidin-4-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU100905
+InChIKey: ZZORFUFYDOWNEF-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 311.0809
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 50 eV
+Formula: C12H14N4O4S
+MW: 310
+ExactMass: 310.07357592799997
+Comments: "accession=AU100905" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=310.0735759" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=50 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.7 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=311.0809" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.15453214911901536" "mass error=4.8072000026877504E-5" "SMILES=COc1cc(nc(n1)OC)NS(=O)(=O)c2ccc(cc2)N" "cas=122-11-2" "chebi=32161" "pubchem=5323" "chemspider=5132" "InChI=InChI=1S/C12H14N4O4S/c1-19-11-7-10(14-12(15-11)20-2)16-21(17,18)9-5-3-8(13)4-6-9/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=ZZORFUFYDOWNEF-UHFFFAOYSA-N" "molecular formula=C12H14N4O4S" "total exact mass=310.07357592799997" "SMILES=COC=1C=C(N=C(N1)OC)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 22
+112.051 5.243790
+123.0427 7.773689
+124.0502 6.439742
+126.0287 5.841766
+126.0666 6.255750
+127.0491 3.955842
+132.0559 9.521619
+133.0628 5.105796
+138.0293 10.579577
+140.045 45.768169
+141.0521 46.780129
+142.0539 3.817847
+154.0606 100.000000
+156.0102 3.679853
+156.0405 5.243790
+156.0769 17.157314
+157.0629 5.887764
+160.0507 3.909844
+178.0613 9.429623
+184.0741 4.737810
+201.0768 9.015639
+212.0705 7.589696
+
+
+Name: Sulfadoxine
+Synon: 4-amino-N-(5,6-dimethoxypyrimidin-4-yl)benzenesulfonamide
+SYNON: $:00in-source
+DB#: AU101001
+InChIKey: PJSFRIWCGOHTNF-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 311.0809
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: Ramp 21.8-32.7 eV
+Formula: C12H14N4O4S
+MW: 310
+ExactMass: 310.07357592799997
+Comments: "accession=AU101001" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=310.0736" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=Ramp 21.8-32.7 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=4.8 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=311.0809" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.15453214911901536" "mass error=4.8072000026877504E-5" "SMILES=COc1c(ncnc1OC)NS(=O)(=O)c2ccc(cc2)N" "cas=2447-57-6" "kegg=C07630" "pubchem cid=17134" "chemspider=16218" "InChI=InChI=1S/C12H14N4O4S/c1-19-10-11(14-7-15-12(10)20-2)16-21(17,18)9-5-3-8(13)4-6-9/h3-7H,13H2,1-2H3,(H,14,15,16)" "InChIKey=PJSFRIWCGOHTNF-UHFFFAOYSA-N" "molecular formula=C12H14N4O4S" "total exact mass=310.07357592799997" "SMILES=COC1=C(N=CN=C1OC)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 42
+53.0386 0.535490
+54.0339 0.505437
+65.0381 7.755041
+68.0491 10.088247
+69.0329 1.049123
+78.0332 1.038195
+79.0179 1.721217
+80.0363 0.707612
+80.0493 1.446642
+92.0498 46.272062
+93.0559 3.808535
+96.0447 1.331894
+108.0463 57.395771
+109.049 3.816731
+109.0643 0.531392
+110.0614 7.111633
+113.0359 0.703513
+120.0568 1.860554
+124.0215 1.529971
+124.0512 0.572373
+126.0665 2.939730
+138.0301 0.707612
+140.0457 34.351948
+141.0528 5.744222
+154.0615 32.562428
+155.0682 7.798754
+156.0118 100.000000
+156.0771 40.377575
+157.0147 7.961314
+157.0796 2.486203
+158.0078 3.766188
+201.0773 1.349653
+212.0697 2.576362
+213.0752 0.527294
+218.0236 1.945249
+230.0808 5.531119
+231.085 0.811431
+245.1045 18.128791
+246.1073 2.479373
+311.0829 49.986340
+312.0854 8.491339
+313.0812 2.222556
+
+
+Name: Sulfadiazine
+Synon: 4-amino-n-pyrimidin-2-ylbenzenesulfonamide
+SYNON: $:00in-source
+DB#: AU101101
+InChIKey: SEEPANYCNGTZFQ-UHFFFAOYSA-N
+Precursor_type: [M+H]+
+Spectrum_type: MS2
+PrecursorMZ: 251.0597
+Instrument_type: LC-ESI-QTOF
+Instrument: Bruker maXis Impact
+Ion_mode: P
+Collision_energy: 10 eV
+Formula: C10H10N4O2S
+MW: 250
+ExactMass: 250.05244656
+Comments: "accession=AU101101" "author=Nikiforos Alygizakis, Anna Bletsou, Nikolaos Thomaidis, University of Athens" "license=CC BY-SA" "copyright=Copyright (C) 2015 Department of Chemistry, University of Athens" "exact mass=250.0524466" "instrument=Bruker maXis Impact" "instrument type=LC-ESI-QTOF" "ms level=MS2" "ionization=ESI" "fragmentation mode=CID" "collision energy=10 eV" "resolution=35000" "column=Acclaim RSLC C18 2.2um, 2.1x100mm, Thermo" "flow gradient=99/1 at 0-1 min, 61/39 at 3 min, 0.1/99.9 at 14-16 min, 99/1 at 16.1-20 min" "flow rate=200 uL/min at 0-3 min, 400 uL/min at 14 min, 480 uL/min at 16-19 min, 200 uL/min at 19.1-20 min" "retention time=3.3 min" "solvent a=water with 0.01% formic acid and 5mM ammonium formate" "solvent b=90:10 methanol:water with 0.01% formic acid and 5mM ammonium formate" "precursor m/z=251.0597" "precursor type=[M+H]+" "ionization mode=positive" "mass accuracy=0.08985910518808851" "mass error=-2.2559999990789947E-5" "SMILES=c1cnc(nc1)NS(=O)(=O)c2ccc(cc2)N" "cas=141582-64-1" "chebi=9328" "kegg=C07658" "pubchem=5215" "chemspider=5026" "InChI=InChI=1S/C10H10N4O2S/c11-8-2-4-9(5-3-8)17(15,16)14-10-12-6-1-7-13-10/h1-7H,11H2,(H,12,13,14)" "InChIKey=SEEPANYCNGTZFQ-UHFFFAOYSA-N" "molecular formula=C10H10N4O2S" "total exact mass=250.05244656" "SMILES=C1=CN=C(N=C1)NS(C2=CC=C(C=C2)N)(=O)=O"
+Num Peaks: 6
+156.0106 9.361897
+174.0199 0.724251
+176.012 0.693756
+251.0596 100.000000
+252.0616 7.867653
+253.0565 2.729283
\ No newline at end of file
diff --git a/tests/test_databases.py b/tests/test_databases.py
index daae038..20c7ff7 100644
--- a/tests/test_databases.py
+++ b/tests/test_databases.py
@@ -25,6 +25,7 @@
 import shutil
 import pickle
 from metaboblend.databases import *
+from metaboblend.parse import reformat_xml
 
 
 class DatabasesTestCase(unittest.TestCase):
diff --git a/tests/test_isomorphism_database.py b/tests/test_isomorphism_database.py
index 96ff705..6ad11a4 100644
--- a/tests/test_isomorphism_database.py
+++ b/tests/test_isomorphism_database.py
@@ -21,6 +21,7 @@
 
 
 import os
+import sys
 import unittest
 import shutil
 import tempfile
@@ -45,46 +46,50 @@ def setUpClass(cls):
         shutil.copytree(os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_data"),
                         cls.to_test_results("test_data"))
 
+    def test_create_connectivity_database(self):
+
         pkg_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+
         if sys.platform == "win32" or sys.platform == "win64":  # TODO: add RI as dependency
-            cls.path_ri = os.path.join(pkg_path, "tools", "RI_win", "RI3.6-release", "ri36")
+            self.path_ri = os.path.join(pkg_path, "tools", "RI_win", "RI3.6-release", "ri36")
 
-        elif sys.platform == "darwin":
-            cls.path_ri = os.path.join(pkg_path, "tools", "RI_mac", "RI3.6-release", "ri36")
+        else:
 
-        elif sys.platform == "linux2":
-            if "bb" in "socket.gethostname":
-                cls.path_ri = os.path.join(pkg_path, "tools", "RI_unix", "RI3.6-release", "ri36")
-            else:
-                cls.path_ri = os.path.join(pkg_path, "tools", "RI_bb", "RI3.6-release", "ri36")
+            if sys.platform == "darwin":
+                self.path_ri = os.path.join(pkg_path, "tools", "RI_mac", "RI3.6-release", "ri36")
 
-        elif sys.platform == "linux":
-            cls.path_ri = os.path.join(pkg_path, "tools", "RI_unix", "RI3.6-release", "ri36")
+            elif sys.platform == "linux2":
+                if "bb" in "socket.gethostname":
+                    self.path_ri = os.path.join(pkg_path, "tools", "RI_unix", "RI3.6-release", "ri36")
+                else:
+                    self.path_ri = os.path.join(pkg_path, "tools", "RI_bb", "RI3.6-release", "ri36")
 
-        create_connectivity_database(cls.to_test_results("connectivity.sqlite"),
-                                    3,  # sizes
-                                    [1, 2],  # boxes
-                                    cls.path_ri
-                                    )
+            elif sys.platform == "linux":
+                self.path_ri = os.path.join(pkg_path, "tools", "RI_unix", "RI3.6-release", "ri36")
 
-    def test_create_connectivity_database(self):
-        ref_db = sqlite3.connect(self.to_test_data("connectivity.sqlite"))
-        ref_db_cursor = ref_db.cursor()
-        ref_db_cursor.execute("SELECT * FROM subgraphs")
+            create_connectivity_database(self.to_test_results("connectivity.sqlite"),
+                                         3,  # sizes
+                                         [1, 2],  # boxes
+                                         self.path_ri
+                                         )
+
+            ref_db = sqlite3.connect(self.to_test_data("connectivity.sqlite"))
+            ref_db_cursor = ref_db.cursor()
+            ref_db_cursor.execute("SELECT * FROM subgraphs")
 
-        test_db = sqlite3.connect(self.to_test_results("connectivity.sqlite"))
-        test_db_cursor = test_db.cursor()
-        test_db_cursor.execute("SELECT * FROM subgraphs")
+            test_db = sqlite3.connect(self.to_test_results("connectivity.sqlite"))
+            test_db_cursor = test_db.cursor()
+            test_db_cursor.execute("SELECT * FROM subgraphs")
 
-        ref_rows = {}
-        for row in ref_db_cursor.fetchall():
-            ref_rows[row[0]] = row
+            test_rows = {}
+            for row in test_db_cursor.fetchall():
+                test_rows[row[0]] = row
 
-        for row in test_db_cursor.fetchall():
-            self.assertEqual(row, ref_rows[row[0]])
+            for row in ref_db_cursor.fetchall():
+                self.assertEqual(row, test_rows[row[0]])
 
-        ref_db.close()
-        test_db.close()
+            ref_db.close()
+            test_db.close()
 
 
 if __name__ == '__main__':
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..513fb68
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019-2020 Ralf Weber
+#
+# This file is part of MetaboBlend.
+#
+# MetaboBlend is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# MetaboBlend is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with MetaboBlend.  If not, see <https://www.gnu.org/licenses/>.
+#
+
+
+import os
+import copy
+import shutil
+import tempfile
+import unittest
+from metaboblend.parse import *
+
+
+class IsomorphDbTestCase(unittest.TestCase):
+    temp_results_dir = None
+
+    @classmethod
+    def to_test_results(cls, *args):
+        return os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.temp_results_dir.name, *args)
+
+    @classmethod
+    def to_test_data(cls, *args):
+        return os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.temp_results_dir.name, "test_data", *args)
+
+    @classmethod
+    def setUpClass(cls):
+        cls.temp_results_dir = tempfile.TemporaryDirectory(dir=os.path.dirname(os.path.realpath(__file__)))
+
+        shutil.copytree(os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_data"),
+                        cls.to_test_results("test_data"))
+        
+        cls.neutral_fragment_masses = [155.00332400000002, 173.01262400000002, 175.004724,
+                                       250.052324, 251.054324, 252.049224]
+        cls.exact_mass = 250.052424
+        cls.mf = [10, 10, 4, 2, 0, 1]
+        cls.precursor_mz = 251.0597
+        cls.fragment_mzs = [156.0106, 174.0199, 176.012, 251.0596, 252.0616, 253.0565]
+
+    def test_parse_msp(self):
+        for i, ms in enumerate(parse_msp(self.to_test_data("mona_msp.msp"))):
+
+            if i < 2:
+                self.assertEqual(ms, None)
+            else:
+                self.assertNotEqual(ms, None)
+
+        self.assertEqual(ms, {"ms_id": "AU101101", "mf": self.mf, "precursor_mz": self.precursor_mz,
+                              "fragment_mzs": self.fragment_mzs, "precursor_type": "[M+H]+",
+                              "exact_mass": self.exact_mass, "neutral_fragment_masses": self.neutral_fragment_masses})
+
+        self.assertEqual(list(parse_msp(self.to_test_data("massbank_msp.txt")))[0], None)
+
+        # ensure that parse_msp provides same output as parse_ms_data when providing an msn file
+        for parse_msp_dict, parse_ms_dict in zip(parse_msp(self.to_test_data("mona_msp.msp")),
+                                                 parse_ms_data(self.to_test_data("mona_msp.msp"))):
+
+            self.assertEqual(parse_msp_dict, parse_ms_dict)
+
+    def test_parse_ms_data(self):
+
+        # exact mass and neutral fragment masses should not be overwritten by parse_ms_data
+        full_ms_dict = {"ms_id": "AU101101", "mf": self.mf, "precursor_mz": self.precursor_mz,
+                        "fragment_mzs": self.fragment_mzs, "precursor_type": "[M+H]+", "exact_mass": "abcd",
+                        "neutral_fragment_masses": ["a", "b", "c", "d"]}
+
+        self.assertEqual(list(parse_ms_data({"AU101101": copy.deepcopy(full_ms_dict)}))[0], full_ms_dict)
+
+        # if exact mass is present should not be overwritten by parse_ms_data
+        exact_mass_ms_dict = {"ms_id": "AU101101", "mf": self.mf, "precursor_mz": self.precursor_mz,
+                              "fragment_mzs": self.fragment_mzs, "precursor_type": "[M+H]+", "exact_mass": "abc"}
+
+        parsed_exact_mass_ms_dict = list(parse_ms_data({"test": copy.deepcopy(exact_mass_ms_dict)}))[0]
+        exact_mass_ms_dict["ms_id"] = "test"
+        exact_mass_ms_dict["neutral_fragment_masses"] = self.neutral_fragment_masses
+        self.assertEqual(parsed_exact_mass_ms_dict, exact_mass_ms_dict)
+
+        # neutral fragment masses should not be overwritten by parse_ms_data
+        neutral_fragment_masses_ms_dict = {"ms_id": "AU101101", "mf": self.mf, "precursor_mz": self.precursor_mz,
+                                           "precursor_type": "[M+H]+", "fragment_mzs": self.fragment_mzs,
+                                           "neutral_fragment_masses": ["a", "b", "c", "d"]}
+
+        parsed_neutral_fragment_masses_ms_dict = list(parse_ms_data({"AU101101": copy.deepcopy(neutral_fragment_masses_ms_dict)}))[0]
+        neutral_fragment_masses_ms_dict["exact_mass"] = self.exact_mass
+        self.assertEqual(parsed_neutral_fragment_masses_ms_dict, neutral_fragment_masses_ms_dict)
+
+        uncalculated_ms_dict = {"ms_id": "AU101101", "mf": self.mf, "precursor_mz": self.precursor_mz,
+                                "fragment_mzs": self.fragment_mzs, "precursor_type": "[M+H]+"}
+        parsed_uncalculated_ms_dict = list(parse_ms_data({"AU101101": copy.deepcopy(uncalculated_ms_dict)}))[0]
+        uncalculated_ms_dict["exact_mass"] = self.exact_mass
+        uncalculated_ms_dict["neutral_fragment_masses"] = self.neutral_fragment_masses
+        self.assertEqual(parsed_uncalculated_ms_dict, uncalculated_ms_dict)
+
+        # test with msn=False
+        generate_structures_dict = {"ms_id": "AU101101", "mf": self.mf, "precursor_mz": self.precursor_mz, 
+                                    "prescribed_mass": "m", "precursor_type": "[M+H]+"}
+        parsed_generate_structures_dict = list(parse_ms_data({"AU101101": copy.deepcopy(generate_structures_dict)}, False))[0]
+        generate_structures_dict["exact_mass"] = self.exact_mass
+        self.assertEqual(parsed_generate_structures_dict, generate_structures_dict)
+
+        # test with exact mass provided
+        generate_structures_dict["exact_mass"] = "a"
+        parsed_generate_structures_dict = list(parse_ms_data({"AU101101": copy.deepcopy(generate_structures_dict)}, False))[0]
+        self.assertEqual(parsed_generate_structures_dict, generate_structures_dict)
+
+    def test_precursor_ions_to_neutral_masses(self):
+
+        ms_dict = {"ms_id": "AU101101", "mf": self.mf, "precursor_mz": self.precursor_mz,
+                        "fragment_mzs": self.fragment_mzs, "precursor_type": "[M+H]+"}
+
+        for which in ["both", "fragments", "precursor", "none"]:
+            processed_ms_dict = precursor_ions_to_neutral_masses(copy.deepcopy(ms_dict), which)
+
+            if which in ["both", "fragments"]:
+                self.assertEqual(processed_ms_dict["neutral_fragment_masses"], self.neutral_fragment_masses)
+
+            if which in ["both", "precursor"]:
+                self.assertEqual(processed_ms_dict["exact_mass"], self.exact_mass)
+
+        ms_dict["precursor_type"] = "[M-H]-"
+
+        for which in ["both", "fragments", "precursor", "none"]:
+            processed_ms_dict = precursor_ions_to_neutral_masses(copy.deepcopy(ms_dict), which)
+
+            if which in ["both", "fragments"]:
+                neutral_fragment_masses = [nfm + 1.007276 for nfm in self.fragment_mzs]
+                self.assertEqual(processed_ms_dict["neutral_fragment_masses"], neutral_fragment_masses)
+
+            if which in ["both", "precursor"]:
+                self.assertEqual(processed_ms_dict["exact_mass"], self.precursor_mz + 1.007276)
+
+    def test_reformat_msp_input(self):
+
+        unformatted_msp_dict = {'ms_id': 'AU101101', 'mf': 'C10H10N4O2S', 'precursor_mz': '251.0597',
+                                'fragment_mzs': self.fragment_mzs,
+                                'precursor_type': '[M+H]+'}
+
+        formatted_msp_dict = {'ms_id': 'AU101101', 'mf': self.mf, 'precursor_mz': self.precursor_mz,
+                                'fragment_mzs': self.fragment_mzs, 'precursor_type': '[M+H]+',
+                                'exact_mass': self.exact_mass,
+                                'neutral_fragment_masses': self.neutral_fragment_masses}
+
+        self.assertEqual(reformat_msp_input(unformatted_msp_dict), formatted_msp_dict)
+
+        unformatted_msp_dict["precursor_mz"] = None
+        self.assertWarns(UserWarning, reformat_msp_input(unformatted_msp_dict))
+
+        unformatted_msp_dict["precursor_mz"] = self.precursor_mz
+        unformatted_msp_dict["fragment_mzs"] = []
+        self.assertWarns(UserWarning, reformat_msp_input(unformatted_msp_dict))
+
+    def test_mc_to_list(self):
+
+        mc_lists = [[12, 14, 4, 4, 0, 1], [10, 10, 4, 2, 0, 1], [46, 94, 1, 8, 1, 0], [46, 94, 1, 8, 1, 0], None]
+
+        for i, word_formula in enumerate(["C12H14N4O4S", "C10H10N4O2S", "C46H94NO8P", "C46H94NO8P1", "C10H9ClN4O2S"]):
+            self.assertEqual(mc_to_list(word_formula), mc_lists[i])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_suite_auxiliary.py b/tests/test_suite_auxiliary.py
index c44171d..cc211e2 100644
--- a/tests/test_suite_auxiliary.py
+++ b/tests/test_suite_auxiliary.py
@@ -27,6 +27,7 @@
 from pathlib import Path
 
 from . import test_auxiliary
+from . import test_parse
 
 sys.path.insert(0, str(Path(__file__).parent.parent.resolve()))
 
@@ -35,6 +36,7 @@
     suite = unittest.TestSuite()
 
     suite.addTest(unittest.findTestCases(test_auxiliary))
+    suite.addTest(unittest.findTestCases(test_parse))
 
     report = os.path.join(os.path.abspath(os.path.join(__file__, os.pardir)), 'results', 'results_test_suite_auxiliary')
     runTestSuite(suite, report, title='Process Test Suite Report', verbosity=2)