haddocking · mgiulini · Mar 27, 2024 · Mar 20, 2024 · Mar 20, 2024 · Mar 20, 2024
diff --git a/examples/compare_runs.py b/examples/compare_runs.py
@@ -69,6 +69,7 @@
     ("docking-protein-DNA"         , "docking-protein-DNA-mdref-test.cfg"),  # noqa: E203, E501
     ("docking-protein-homotrimer"  , "docking-protein-homotrimer-test.cfg"),  # noqa: E203, E501
     ("docking-protein-glycan"      , "docking-protein-glycan-test.cfg"),  # noqa: E203, E501
+    ("docking-protein-glycan"      , "docking-protein-glycan-ilrmsd-test.cfg"),  # noqa: E203, E501
     ("docking-protein-glycan"      , "docking-flexref-protein-glycan-test.cfg"),  # noqa: E203, E501
     ("docking-protein-ligand-shape", "docking-protein-ligand-shape-test.cfg"),  # noqa: E203, E501
     ("docking-protein-ligand"      , "docking-protein-ligand-test.cfg"),  # noqa: E203, E501

diff --git a/examples/run_tests.py b/examples/run_tests.py
@@ -56,6 +56,7 @@
     ("docking-protein-DNA"         , "docking-protein-DNA-mdref-test.cfg"),  # noqa: E203, E501
     ("docking-protein-homotrimer"  , "docking-protein-homotrimer-test.cfg"),  # noqa: E203, E501
     ("docking-protein-glycan"      , "docking-protein-glycan-test.cfg"),  # noqa: E203, E501
+    ("docking-protein-glycan"      , "docking-protein-glycan-ilrmsd-test.cfg"),  # noqa: E203, E501
     ("docking-protein-glycan"      , "docking-flexref-protein-glycan-test.cfg"),  # noqa: E203, E501
     ("docking-protein-ligand-shape", "docking-protein-ligand-shape-test.cfg"),  # noqa: E203, E501
     ("docking-protein-ligand"      , "docking-protein-ligand-test.cfg"),  # noqa: E203, E501

diff --git a/src/haddock/clis/cli_traceback.py b/src/haddock/clis/cli_traceback.py
@@ -139,6 +139,40 @@ def traceback_dataframe(
     df_ord.loc[unk_records, last_step] = "-"
     return df_ord
 
+def order_traceback_df(df_output, sel_step):
+    """
+    Order the traceback dataframe. Each step is ordered by rank.
+
+    Parameters
+    ----------
+    df_output : pandas.DataFrame
+        Dataframe containing the traceback data.
+
+    sel_step : list
+        List of selected steps.
+
+    Returns
+    -------
+    df_output : pandas.DataFrame
+        Dataframe containing the ordered traceback data.
+    """
+    # loop over sel_step in reverse order
+    sorted_list = []
+    indexes = []
+    for n in range(len(sel_step) - 1, -1, -1):
+        rank_col = sel_step[n] + "_rank"
+        # take only models with a rank
+        df_last = df_output[df_output[rank_col] != "-"]
+        # remove from df_last the indexes that are already in the dataframe
+        df_last = df_last[~df_last.index.isin(indexes)]
+        # sorting the dataframe by rank
+        sorted_df_last = df_last.sort_values(by=rank_col)
+        sorted_list.append(sorted_df_last)
+        # concat the current indexes with the previous ones
+        indexes = sorted_df_last.index.tolist() + indexes
+    df_output = pd.concat(sorted_list)
+    return df_output
+
 
 # Command line interface parser
 ap = argparse.ArgumentParser(
@@ -234,20 +268,22 @@ def main(run_dir):
                     # this is the first step in which the pdbfile appears.
                     # This means that it was discarded for the subsequent steps
                     # We need to add the pdbfile to the data_dict
-                    key = f"unk{unk_idx}"
-                    data_dict[key] = ["-" for el in range(delta - 1)]
-                    data_dict[key].append(str(pdbfile.rel_path))
-                    rank_dict[key] = ["-" for el in range(delta)]
+                    keys = [f"unk{unk_idx}"]
+                    data_dict[keys[0]] = ["-" for el in range(delta - 1)]
+                    data_dict[keys[0]].append(str(pdbfile.rel_path))
+                    rank_dict[keys[0]] = ["-" for el in range(delta)]
                     unk_idx += 1
                 else:
                     # we've already seen this pdb before.
-                    idx = ls_values.index(str(pdbfile.rel_path))
-                    key = list(data_dict.keys())[idx // delta]
+                    idxs = [i for i, el in enumerate(ls_values) if el==str(pdbfile.rel_path)]
+                    keys = [list(data_dict.keys())[idx // delta] for idx in idxs]
 
                 # assignment
                 for el in ori_names:
-                    data_dict[key].append(el)
-                rank_dict[key].append(rank)
+                    for key in keys:
+                        data_dict[key].append(el)
+                for key in keys:
+                    rank_dict[key].append(rank)
             else:  # last step of the workflow
                 data_dict[str(pdbfile.rel_path)] = [on for on in ori_names]
                 rank_dict[str(pdbfile.rel_path)] = [rank]
@@ -268,6 +304,9 @@ def main(run_dir):
     df_output = traceback_dataframe(
         final_data_dict, final_rank_dict, sel_step, max_topo_len
     )
+
+    # ordering the dataframe
+    df_output = order_traceback_df(df_output, sel_step)
     # dumping the dataframe
     track_filename = Path(run_dir, TRACK_FOLDER, "traceback.tsv")
     log.info(

diff --git a/tests/test_cli_traceback.py b/tests/test_cli_traceback.py
@@ -60,8 +60,8 @@ def test_main(rigid_json, flexref_json):
     exp_tr = [["00_topo1", "00_topo2", "1_rigidbody", "1_rigidbody_rank", "4_flexref", "4_flexref_rank"],  # noqa: E501
               ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_3.pdb", "1", "flexref_1.pdb", "1"],  # noqa: E501
               ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_1.pdb", "2", "flexref_2.pdb", "2"],  # noqa: E501
-              ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_2.pdb", "4", "-", "-"],  # noqa: E501
-              ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_4.pdb", "3", "-", "-"]]  # noqa: E501
+              ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_4.pdb", "3", "-", "-"],  # noqa: E501
+              ["4G6K_fv_haddock.psf", "4I1B-matched_haddock.psf", "rigidbody_2.pdb", "4", "-", "-"]]  # noqa: E501
     exp_tr_df = pd.DataFrame(exp_tr[1:], columns=exp_tr[0])
 
     assert obs_tr.columns.tolist() == exp_tr_df.columns.tolist()