Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 6 additions & 23 deletions src/haddock/libs/libcns.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ def filter_empty_vars(v):
raise TypeError(emsg)


def load_workflow_params(default_params):
def load_workflow_params(
params,
param_header=f'{linesep}! Parameters{linesep}',
):
"""
Write the values at the header section.

Expand All @@ -72,7 +75,7 @@ def load_workflow_params(default_params):

Parameters
----------
default_params : dict
params : dict
Dictionary containing the key:value pars for the parameters to
be written to CNS. Values cannot be of dictionary type.

Expand All @@ -81,11 +84,9 @@ def load_workflow_params(default_params):
str
The string with the CNS parameters defined.
"""
param_header = f'{linesep}! Parameters{linesep}'

non_empty_parameters = (
(k, v)
for k, v in default_params.items()
for k, v in params.items()
if filter_empty_vars(v)
)

Expand All @@ -96,24 +97,6 @@ def load_workflow_params(default_params):
return param_header


def load_input_mols(mols):
"""Load input molecules as defined by the topoaa/defaults.cfg."""
param_header = ''

for mol, params in mols.items():

non_empty_parameters = (
(k, v)
for k, v in params.items()
if filter_empty_vars(v)
)

for param, value in non_empty_parameters:
param_header += write_eval_line(f'{param}_{mol}', value)

return param_header


def write_eval_line(param, value, eval_line='eval (${}={})'):
"""Write the CNS eval line depending on the type of `value`."""
eval_line += linesep
Expand Down
4 changes: 2 additions & 2 deletions src/haddock/libs/libpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pdbtools.pdb_tidy import tidy_pdbfile

from haddock.core.cns_paths import topology_file
from haddock.libs.libutil import get_result_or_same_in_list
from haddock.libs.libutil import get_result_or_same_in_list, sort_numbered_paths
from haddock.modules import working_directory


Expand Down Expand Up @@ -44,7 +44,7 @@ def split_ensemble(pdb_file_path):
with working_directory(abs_path):
split_model(input_handler)

return get_new_models(pdb_file_path)
return sort_numbered_paths(*get_new_models(pdb_file_path))


def split_by_chain(pdb_file_path):
Expand Down
70 changes: 70 additions & 0 deletions src/haddock/libs/libutil.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""General utilities."""
import collections.abc
import re
import shutil
import subprocess
from copy import deepcopy
Expand Down Expand Up @@ -218,3 +219,72 @@ def recursive_dict_update(d, u):
else:
d[k] = v
return d


def get_number_from_path_stem(path):
"""
Extract tail number from path.

Examples
--------

>>> get_number_from_path_stem('src/file_1.pdb')
>>> 1

>>> get_number_from_path_stem('src/file_3.pdb')
>>> 3

>>> get_number_from_path_stem('file_1231.pdb')
>>> 1231

>>> get_number_from_path_stem('src/file11')
>>> 11

>>> get_number_from_path_stem('src/file_1234_1.pdb')
>>> 1

Parameters
----------
path : str or Path obj
The path to evaluate.

Returns
-------
int
The tail integer of the path.
"""
stem = Path(path).stem
number = re.findall(r'\d+', stem)[-1]
return int(number)


def sort_numbered_paths(*paths):
"""
Sort input paths to tail number.

If possible, sort criteria is provided by :py:func:`get_number`.
If paths do not have a numbered tag, sort paths alphabetically.

Parameters
----------
*inputs : str or pathlib.Path
Paths to files.

Returns
-------
list
The sorted pathlist. The original types are not modified. If
strings are given, strings are returns, if Paths are given
paths are returned.
"""
try:
return sorted(paths, key=get_number_from_path_stem)
except TypeError as err:
log.exception(err)
emsg = (
"Mind the packing *argument, input should be strings or Paths, "
"not a list."
)
raise TypeError(emsg)
except IndexError:
return sorted(paths, key=lambda x: Path(x).stem)
32 changes: 21 additions & 11 deletions src/haddock/modules/topology/topoaa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from haddock.libs import libpdb
from haddock.libs.libcns import (
generate_default_header,
load_input_mols,
load_workflow_params,
prepare_output,
prepare_single_input,
Expand All @@ -22,14 +21,13 @@
DEFAULT_CONFIG = Path(RECIPE_PATH, "defaults.cfg")


def generate_topology(input_pdb, step_path, recipe_str, defaults,
def generate_topology(input_pdb, step_path, recipe_str, defaults, mol_params,
protonation=None):
"""Generate a HADDOCK topology file from input_pdb."""
# this is a special cases that only applies to topolyaa.
input_mols = defaults.pop('input', {})

general_param = load_workflow_params(defaults)
input_mols_params = load_input_mols(input_mols)

input_mols_params = load_workflow_params(mol_params, param_header='')

general_param = general_param + input_mols_params

Expand Down Expand Up @@ -77,6 +75,12 @@ def run(self, molecules, **params):
super().run(params)

molecules = make_molecules(molecules)
# extracts `input` key from params. The `input` keyword needs to
# be treated separately
mol_params = self.params.pop('input')
# to facilite the for loop down the line, we create a list with the keys
# of `mol_params` with inverted order (we will use .pop)
mol_params_keys = list(mol_params.keys())[::-1]

# Pool of jobs to be executed by the CNS engine
jobs = []
Expand All @@ -91,8 +95,11 @@ def run(self, molecules, **params):

# Split models
log.info(f"Split models if needed for {step_molecule_path}")
ens = libpdb.split_ensemble(step_molecule_path)
splited_models = sorted(ens)
splited_models = libpdb.split_ensemble(step_molecule_path)

# nice variable name, isn't it? :-)
# molecule parameters are shared among models of the same molecule
parameters_for_this_molecule = mol_params[mol_params_keys.pop()]

# Sanitize the different PDB files
for model in splited_models:
Expand All @@ -110,10 +117,13 @@ def run(self, molecules, **params):
libpdb.sanitize(model, overwrite=True)

# Prepare generation of topologies jobs
topology_filename = generate_topology(model,
self.path,
self.recipe_str,
self.params)
topology_filename = generate_topology(
model,
self.path,
self.recipe_str,
self.params,
parameters_for_this_molecule,
)
log.info(f"Topology CNS input created in {topology_filename}")

# Add new job to the pool
Expand Down
58 changes: 57 additions & 1 deletion tests/test_libutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@

import pytest

from haddock.libs.libutil import file_exists, non_negative_int
from haddock.libs.libutil import (
file_exists,
get_number_from_path_stem,
non_negative_int,
sort_numbered_paths,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -52,3 +57,54 @@ def test_file_exists_wrong(i):
"""."""
with pytest.raises(ValueError):
file_exists(i)


@pytest.mark.parametrize(
'in1,expected',
[
('pdb_1.pdb', 1),
('pdb2.pdb', 2),
('pdb2.pdb', 2),
('pdb_3.pdb', 3),
('pdb_1231.pdb', 1231),
('pdb_0011.pdb', 11),
('pdb_1_.pdb', 1),
('pdb_1', 1),
('5', 5),
('pdb_20200101_1.pdb', 1),
],
)
def test_get_number(in1, expected):
"""Test get number from path."""
result = get_number_from_path_stem(in1)
assert result == expected


@pytest.mark.parametrize(
'in1,expected',
[
(
['f_1.pdb', 'f_11.pdb', 'f_2.pdb'],
['f_1.pdb', 'f_2.pdb', 'f_11.pdb'],
),
(
['b.pdb', 'c.pdb', 'a.pdb'],
['a.pdb', 'b.pdb', 'c.pdb']),
],
)
def test_sort_numbered_input_1(in1, expected):
"""Test sort numbered inputs."""
result = sort_numbered_paths(*in1)
assert result == expected


@pytest.mark.parametrize(
'in1,error',
[
(['f_1.pdb', 'f_11.pdb', 'f_2.pdb'], TypeError),
]
)
def test_sort_numbered_inputs_error(in1, error):
"""Test sort numbered inputs raised Errors."""
with pytest.raises(error):
sort_numbered_paths(in1)