Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ae0bf23
Return metabolite ID with msn annotation results
jackgisby Sep 19, 2020
de6eab2
Rewrite MSn method for limiting connectivity to fragment edges only
jackgisby Sep 19, 2020
cf2378e
Add option for use of smiles without non-structural isomeric information
jackgisby Sep 19, 2020
4726b3a
Update tests
jackgisby Sep 19, 2020
dd2013b
Correct return type hints
jackgisby Sep 19, 2020
071e9a2
Implement SQLITE3 annotate_msn results database
jackgisby Sep 19, 2020
4d52c40
Add msn option to ResultsDb
jackgisby Nov 21, 2020
918b7b0
Re-structure results db tables
jackgisby Nov 21, 2020
030083b
Update add_ms to add ms information to the queries table
jackgisby Nov 21, 2020
b91c1ae
Add function to insert entries into the results and substructures tables
jackgisby Nov 21, 2020
4125307
Add function to get structure frequencies and/or SMILEs
jackgisby Nov 21, 2020
6017bda
Update user-facing functions for compatibility with ResultsDb
jackgisby Nov 21, 2020
b7f405b
Remove text-based output and return substructure smiles from build fu…
jackgisby Nov 21, 2020
3c04cf0
Update build unit tests for ResultsDb
jackgisby Nov 21, 2020
8a9dd0e
Add CSV output for build functions
jackgisby Nov 22, 2020
c75ac38
Check if ResultsDb output matches reference files
jackgisby Nov 22, 2020
94e31cd
Check ResultsDb CSV files line by line vs reference
jackgisby Nov 22, 2020
38c8ee0
Implement simple bond dissociation energy calculations
jackgisby Nov 22, 2020
be844fc
Add integer MS integer IDs and implement calculate_frequencies to mor…
jackgisby Nov 25, 2020
a5d7dcb
Re-format get_bond_enthalpies
jackgisby Nov 25, 2020
f36e174
Use integer IDs for results DB
jackgisby Nov 27, 2020
580219d
Add retain_substructures option
jackgisby Nov 27, 2020
a9b2cfd
Make filter_hmdbid_substructures a filtered version of the hmdbid_sub…
jackgisby Nov 27, 2020
26e2c89
Implement the substructure network generation algorithm in SQLite ins…
jackgisby Nov 27, 2020
9eaea12
Add get_substructure_network function to convert SQLite3 substructure…
jackgisby Nov 27, 2020
8a19a0e
Implement get_single_edge to get substructure edge weights without th…
jackgisby Nov 27, 2020
3e43432
Add integer substructure key
jackgisby Nov 27, 2020
151a189
Update unit tests
jackgisby Nov 27, 2020
d9f1607
Add parse_ms_data function to convert user provided raw data into a l…
jackgisby Nov 29, 2020
a10ca66
Implement msp parsing, update existing tests and spread functions acr…
jackgisby Nov 30, 2020
5caf8f6
Add unit testing and documentation for parse.py
jackgisby Nov 30, 2020
bdd71e4
Amend connectivity database unit tests so that they fail in case of t…
jackgisby Nov 30, 2020
40c516a
Amend results docstrings
jackgisby Nov 30, 2020
04260a7
Update docstrings of user-facing build functions
jackgisby Nov 30, 2020
b919249
Merge remote-tracking branch 'origin/dev' into feat-alternate_inputs
jackgisby Dec 3, 2020
399b9a8
Only test isomorphism database on non-windows systems
jackgisby Dec 3, 2020
298283c
Merge remote-tracking branch 'origin/dev' into feat-alternate_inputs
jackgisby Dec 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions metaboblend/algorithms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2019-2020 Ralf Weber
#
# This file is part of MetaboBlend.
#
# MetaboBlend is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# MetaboBlend is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with MetaboBlend. If not, see <https://www.gnu.org/licenses/>.
#

import numpy


def find_path(mass_list, sum_matrix, n, mass, max_subset_length, path=[]):
"""
Recursive solution for backtracking through the dynamic programming boolean matrix. All possible subsets are found

:param mass_list: A list of masses from which to identify subsets.

:param mass: The target mass of the sum of the substructures.

:param sum_matrix: The dynamic programming boolean matrix.

:param n: The size of mass_list.

:param max_subset_length: The maximum length of subsets to return. Allows the recursive backtracking algorithm to
terminate early in many cases, significantly improving runtime.

:param path: List for keeping track of the current subset.

:return: Generates of lists containing the masses of valid subsets.
"""

# base case - the path has generated a correct solution
if mass == 0:
yield sorted(path)
return

# stop running when we overshoot the mass
elif mass < 0:
return

# can we sum up to the target value using the remaining masses? recursive call
elif sum_matrix[n][mass]:
yield from find_path(mass_list, sum_matrix, n - 1, mass, max_subset_length, path)

if len(path) < max_subset_length:
path.append(mass_list[n-1])

yield from find_path(mass_list, sum_matrix, n - 1, mass - mass_list[n - 1], max_subset_length, path)
path.pop()


def subset_sum(mass_list, mass, max_subset_length=3):
"""
Dynamic programming implementation of subset sum. Note that, whilst this algorithm is pseudo-polynomial, the
backtracking algorithm for obtaining all possible subsets has exponential complexity and so remains unsuitable
for large input values. This does, however, tend to perform a lot better than non-sum_matrix implementations, as
we're no longer doing sums multiple times and we've cut down the operations performed during the exponential portion
of the method.

:param mass_list: A list of masses from which to identify subsets.

:param mass: The target mass of the sum of the substructures.

:param max_subset_length: The maximum length of subsets to return. Allows the recursive backtracking algorithm to
terminate early in many cases, significantly improving runtime.

:return: Generates of lists containing the masses of valid subsets.
"""

n = len(mass_list)

# initialise dynamic programming array
sum_matrix = numpy.ndarray([n + 1, mass + 1], bool)

# subsets can always equal 0
for i in range(n+1):
sum_matrix[i][0] = True

# empty subsets do not have non-zero sums
for i in range(mass):
sum_matrix[0][i + 1] = False

# fill in the remaining boolean matrix
for i in range(n):
for j in range(mass+1):
if j >= mass_list[i]:
sum_matrix[i + 1][j] = sum_matrix[i][j] or sum_matrix[i][j - mass_list[i]]
else:
sum_matrix[i + 1][j] = sum_matrix[i][j]

# backtrack through the matrix recursively to obtain all solutions
return find_path(mass_list, sum_matrix, n, mass, max_subset_length)
2 changes: 1 addition & 1 deletion metaboblend/auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#

import itertools
import networkx as nx
import pylab as plt
import networkx as nx


def calculate_complete_multipartite_graphs(max_atoms_available, max_n_substructures):
Expand Down
Loading