Skip to content

Commit

Permalink
Merge pull request #758 from haddocking/show_gzipped_structures
Browse files Browse the repository at this point in the history
show gzipped structures in analysis report
  • Loading branch information
mgiulini committed Dec 14, 2023
2 parents b00edff + 035c063 commit e56da8f
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 5 deletions.
14 changes: 12 additions & 2 deletions src/haddock/clis/cli_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,14 @@ def update_paths(
"--scale", help="scale for images", required=False, type=float, default=1.0
)

ap.add_argument(
"--is_cleaned",
help="is the directory going to be cleaned?",
required=False,
type=bool,
default=False
)

ap.add_argument(
"-p",
"--other-params",
Expand Down Expand Up @@ -289,6 +297,7 @@ def analyse_step(
top_cluster: int,
format: Optional[ImgFormat],
scale: Optional[float],
is_cleaned: Optional[bool],
) -> None:
"""
Analyse a step.
Expand Down Expand Up @@ -343,7 +352,7 @@ def analyse_step(
log.info("Plotting results..")
scatters = scatter_plot_handler(ss_file, cluster_ranking, format, scale)
boxes = box_plot_handler(ss_file, cluster_ranking, format, scale)
tables = clt_table_handler(clt_file, ss_file)
tables = clt_table_handler(clt_file, ss_file, is_cleaned)
report_generator(boxes, scatters, tables, step)


Expand All @@ -353,6 +362,7 @@ def main(
top_cluster: int,
format: Optional[ImgFormat],
scale: Optional[float],
is_cleaned: Optional[bool],
**kwargs: Any,
) -> None:
"""
Expand Down Expand Up @@ -424,7 +434,7 @@ def main(
error = False
try:
analyse_step(
step, Path("./"), capri_dict, target_path, top_cluster, format, scale
step, Path("./"), capri_dict, target_path, top_cluster, format, scale, is_cleaned
)
except Exception as e:
error = True
Expand Down
11 changes: 10 additions & 1 deletion src/haddock/libs/libplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ def _pandas_df_to_json(df):
return data_string, headers_string


def clt_table_handler(clt_file, ss_file):
def clt_table_handler(clt_file, ss_file, is_cleaned=False):
"""
Create a dataframe including data for tables.
Expand All @@ -821,6 +821,8 @@ def clt_table_handler(clt_file, ss_file):
path to capri_clt.tsv file
ss_file: str or Path
path to capri_ss.tsv file
is_cleaned: bool
is the run going to be cleaned?
Returns
-------
Expand All @@ -834,6 +836,13 @@ def clt_table_handler(clt_file, ss_file):
# table of structures
structs_df = find_best_struct(ss_file, number_of_struct=10)

# if the run will be cleaned, the structures are going to be gzipped
if is_cleaned:
# substitute the values in the df by adding .gz at the end
structs_df = structs_df.replace(
to_replace=r"(\.pdb)$", value=r".pdb.gz", regex=True
)

# Order structs by best (lowest score) cluster on top
structs_df = structs_df.set_index("Cluster ID")
structs_df = structs_df.reindex(index=statistics_df["Cluster ID"])
Expand Down
5 changes: 4 additions & 1 deletion src/haddock/libs/libworkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,15 @@ def clean(self, terminated: Optional[int] = None) -> None:

def postprocess(self) -> None:
"""Postprocess the workflow."""
# is the workflow going to be cleaned?
is_cleaned = self.recipe.steps[0].config['clean']

capri_steps: list[int] = []
for step in self.recipe.steps:
if step.module_name == "caprieval":
capri_steps.append(step.order) # type: ignore
# call cli_analyse (no need for capri_dicts, it's all precalculated)
cli_analyse("./", capri_steps, top_cluster=10, format=None, scale=None)
cli_analyse("./", capri_steps, top_cluster=10, format=None, scale=None, is_cleaned=is_cleaned)
# call cli_traceback. If it fails, it's not a big deal
try:
cli_traceback("./")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cli_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_main(example_capri_ss, example_capri_clt):
shutil.copy(example_capri_clt, Path(step_dir, "capri_clt.tsv"))

# run haddock3-analyse
main(run_dir, [2], 5, format=None, scale=None)
main(run_dir, [2], 5, format=None, scale=None, is_cleaned=False)

# check analysis directory exists
ana_dir = Path(run_dir, "analysis/")
Expand Down
27 changes: 27 additions & 0 deletions tests/test_libworkflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from haddock.libs.libworkflow import WorkflowManager
from haddock.core.typing import Any, ModuleParams
import tempfile
from pathlib import Path
import os

def test_WorkflowManager(caplog):
"""Test WorkflowManager."""
caplog.set_level("INFO")
ParamDict = {
'topoaa.1':
{'autohis': True,
'molecules': ['fake.pdb'],
'clean': True
}
}
with tempfile.TemporaryDirectory(dir=".") as tmpdir:
workflow = WorkflowManager(
ParamDict,
start=0,
other_params=Any,)
workflow.postprocess()
first_log_line = str(caplog.records[0].message)
second_log_line = str(caplog.records[1].message)
assert first_log_line == "Reading instructions step 0_topoaa"
assert second_log_line == "Running haddock3-analyse on ./, modules [], with top_cluster = 10"

0 comments on commit e56da8f

Please sign in to comment.