Merge pull request #758 from haddocking/show_gzipped_structures

show gzipped structures in analysis report
haddocking · Dec 14, 2023 · e56da8f · e56da8f
2 parents b00edff + 035c063
commit e56da8f
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 5 deletions.
diff --git a/src/haddock/clis/cli_analyse.py b/src/haddock/clis/cli_analyse.py
@@ -152,6 +152,14 @@ def update_paths(
     "--scale", help="scale for images", required=False, type=float, default=1.0
 )
 
+ap.add_argument(
+    "--is_cleaned",
+    help="is the directory going to be cleaned?",
+    required=False,
+    type=bool,
+    default=False
+)
+
 ap.add_argument(
     "-p",
     "--other-params",
@@ -289,6 +297,7 @@ def analyse_step(
     top_cluster: int,
     format: Optional[ImgFormat],
     scale: Optional[float],
+    is_cleaned: Optional[bool],
 ) -> None:
     """
     Analyse a step.
@@ -343,7 +352,7 @@ def analyse_step(
         log.info("Plotting results..")
         scatters = scatter_plot_handler(ss_file, cluster_ranking, format, scale)
         boxes = box_plot_handler(ss_file, cluster_ranking, format, scale)
-        tables = clt_table_handler(clt_file, ss_file)
+        tables = clt_table_handler(clt_file, ss_file, is_cleaned)
         report_generator(boxes, scatters, tables, step)
 
 
@@ -353,6 +362,7 @@ def main(
     top_cluster: int,
     format: Optional[ImgFormat],
     scale: Optional[float],
+    is_cleaned: Optional[bool],
     **kwargs: Any,
 ) -> None:
     """
@@ -424,7 +434,7 @@ def main(
         error = False
         try:
             analyse_step(
-                step, Path("./"), capri_dict, target_path, top_cluster, format, scale
+                step, Path("./"), capri_dict, target_path, top_cluster, format, scale, is_cleaned
             )
         except Exception as e:
             error = True

diff --git a/src/haddock/libs/libplots.py b/src/haddock/libs/libplots.py
@@ -808,7 +808,7 @@ def _pandas_df_to_json(df):
     return data_string, headers_string
 
 
-def clt_table_handler(clt_file, ss_file):
+def clt_table_handler(clt_file, ss_file, is_cleaned=False):
     """
     Create a dataframe including data for tables.
 
@@ -821,6 +821,8 @@ def clt_table_handler(clt_file, ss_file):
         path to capri_clt.tsv file
     ss_file: str or Path
         path to capri_ss.tsv file
+    is_cleaned: bool
+        is the run going to be cleaned?
 
     Returns
     -------
@@ -834,6 +836,13 @@ def clt_table_handler(clt_file, ss_file):
     # table of structures
     structs_df = find_best_struct(ss_file, number_of_struct=10)
 
+    # if the run will be cleaned, the structures are going to be gzipped
+    if is_cleaned:
+        # substitute the values in the df by adding .gz at the end
+        structs_df = structs_df.replace(
+            to_replace=r"(\.pdb)$", value=r".pdb.gz", regex=True
+        )
+
     # Order structs by best (lowest score) cluster on top
     structs_df = structs_df.set_index("Cluster ID")
     structs_df = structs_df.reindex(index=statistics_df["Cluster ID"])

diff --git a/src/haddock/libs/libworkflow.py b/src/haddock/libs/libworkflow.py
@@ -61,12 +61,15 @@ def clean(self, terminated: Optional[int] = None) -> None:
 
     def postprocess(self) -> None:
         """Postprocess the workflow."""
+        # is the workflow going to be cleaned?
+        is_cleaned = self.recipe.steps[0].config['clean']
+
         capri_steps: list[int] = []
         for step in self.recipe.steps:
             if step.module_name == "caprieval":
                 capri_steps.append(step.order)  # type: ignore
         # call cli_analyse (no need for capri_dicts, it's all precalculated)
-        cli_analyse("./", capri_steps, top_cluster=10, format=None, scale=None)
+        cli_analyse("./", capri_steps, top_cluster=10, format=None, scale=None, is_cleaned=is_cleaned)
         # call cli_traceback. If it fails, it's not a big deal
         try:
             cli_traceback("./")

diff --git a/tests/test_cli_analyse.py b/tests/test_cli_analyse.py
@@ -71,7 +71,7 @@ def test_main(example_capri_ss, example_capri_clt):
     shutil.copy(example_capri_clt, Path(step_dir, "capri_clt.tsv"))
 
     # run haddock3-analyse
-    main(run_dir, [2], 5, format=None, scale=None)
+    main(run_dir, [2], 5, format=None, scale=None, is_cleaned=False)
 
     # check analysis directory exists
     ana_dir = Path(run_dir, "analysis/")

diff --git a/tests/test_libworkflow.py b/tests/test_libworkflow.py
@@ -0,0 +1,27 @@
+from haddock.libs.libworkflow import WorkflowManager
+from haddock.core.typing import Any, ModuleParams
+import tempfile
+from pathlib import Path
+import os
+
+def test_WorkflowManager(caplog):
+    """Test WorkflowManager."""
+    caplog.set_level("INFO")
+    ParamDict = {
+        'topoaa.1':
+            {'autohis': True,
+             'molecules': ['fake.pdb'],
+             'clean': True
+            }
+        }
+    with tempfile.TemporaryDirectory(dir=".") as tmpdir:
+        workflow = WorkflowManager(
+            ParamDict,
+            start=0,
+            other_params=Any,)
+        workflow.postprocess()
+        first_log_line = str(caplog.records[0].message)
+        second_log_line = str(caplog.records[1].message)
+        assert first_log_line == "Reading instructions step 0_topoaa"
+        assert second_log_line == "Running haddock3-analyse on ./, modules [], with top_cluster = 10"
+