Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement rank column in caprieval #213

Merged
merged 2 commits into from
Dec 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 36 additions & 25 deletions src/haddock/modules/analysis/caprieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,11 @@ def irmsd(self, cutoff=5.0):

if P.shape != Q.shape:
log.warning(
'[{RECIPE_PATH}] Cannot align these models,'
' the number of atoms is in the interface'
' is different.'
f"[{RECIPE_PATH}] Cannot align these models,"
" the number of atoms is in the interface"
" is different."
)
i_rmsd = float('nan')
i_rmsd = float("nan")

else:
P = P - centroid(P)
Expand Down Expand Up @@ -404,13 +404,17 @@ def fnat(self, cutoff=5.0):
self.fnat_dic[model] = fnat
return self.fnat_dic

def output(self, output_f, sortby_key, ascending):
def output(self, output_f, sortby_key, sort_ascending, rankby_key,
rank_ascending):
"""Output the CAPRI results to a .tsv file."""
output_l = []
for model in self.model_list:
data = {}
# keep always 'model' the first key
data["model"] = Path(model.parent.name, model.name)
# create the empty rank here so that it will appear
# as the second column
data["rank"] = None
data["score"] = self.score_dic[model]
if model in self.irmsd_dic:
data["irmsd"] = self.irmsd_dic[model]
Expand All @@ -423,14 +427,21 @@ def output(self, output_f, sortby_key, ascending):
# list of dictionaries
output_l.append(data)

# Get the ranking of each model
rankkey_values = [(i, k[rankby_key]) for i, k in enumerate(output_l)]
rankkey_values.sort(key=lambda x: x[1], reverse=not rank_ascending)
for i, k in enumerate(rankkey_values, start=1):
idx, _ = k
output_l[idx]["rank"] = i

# Sort the column
key_values = [(i, k[sortby_key]) for i, k in enumerate(output_l)]
key_values.sort(key=lambda x: x[1], reverse=not ascending)
key_values.sort(key=lambda x: x[1], reverse=not sort_ascending)

max_model_space = max(len(str(_d['model'])) for _d in output_l) + 2
hmodel = 'model'.center(max_model_space, ' ')
header = hmodel + ''.join(
_.rjust(10, " ")
for _ in list(output_l[0].keys())[1:]
max_model_space = max(len(str(_d["model"])) for _d in output_l) + 2
hmodel = "model".center(max_model_space, " ")
header = hmodel + "".join(
_.rjust(10, " ") for _ in list(output_l[0].keys())[1:]
)

with open(output_f, "w") as out_fh:
Expand All @@ -440,22 +451,20 @@ def output(self, output_f, sortby_key, ascending):
for value in output_l[idx].values():
if isinstance(value, Path):
row_l.append(str(value).ljust(max_model_space, " "))
# elif isinstance(value, (int, float)):
elif isinstance(value, int):
row_l.append(f"{value}".rjust(10, " "))
else:
# better to have the else: statment so errors are
# spotted. Only int and floats should go here
row_l.append(f"{value:.3f}".rjust(10, " "))
out_fh.write(''.join(row_l) + os.linesep)
out_fh.write("".join(row_l) + os.linesep)


class HaddockModule(BaseHaddockModule):
"""HADDOCK3 module to calculate the CAPRI metrics."""

name = RECIPE_PATH.name

def __init__(
self, order, path, *ignore, init_params=DEFAULT_CONFIG, **everything
):
def __init__(self, order, path, *ignore, init_params=DEFAULT_CONFIG,
**everything):
super().__init__(order, path, init_params)

@classmethod
Expand Down Expand Up @@ -500,22 +509,22 @@ def _run(self):
if self.params["fnat"]:
self.log("Calculating FNAT")
fnat_cutoff = self.params["fnat_cutoff"]
self.log(f' cutoff: {fnat_cutoff}A')
self.log(f" cutoff: {fnat_cutoff}A")
capri.fnat(cutoff=fnat_cutoff)

if self.params["irmsd"]:
self.log("Calculating I-RMSD")
irmsd_cutoff = self.params["irmsd_cutoff"]
self.log(f' cutoff: {irmsd_cutoff}A')
self.log(f" cutoff: {irmsd_cutoff}A")
capri.irmsd(cutoff=irmsd_cutoff)

if self.params["lrmsd"]:
self.log("Calculating L-RMSD")
lrmsd_receptor_chain = self.params["receptor_chain"]
lrmsd_ligand_chain = self.params["ligand_chain"]

self.log(f' Receptor chain: {lrmsd_receptor_chain}')
self.log(f' Ligand chain: {lrmsd_ligand_chain}')
self.log(f" Receptor chain: {lrmsd_receptor_chain}")
self.log(f" Ligand chain: {lrmsd_ligand_chain}")
capri.lrmsd(
receptor_chain=lrmsd_receptor_chain,
ligand_chain=lrmsd_ligand_chain,
Expand All @@ -526,8 +535,8 @@ def _run(self):
ilrmsd_ligand_chain = self.params["ligand_chain"]
ilrmsd_cutoff = self.params["irmsd_cutoff"]

self.log(f' Ligand chain: {ilrmsd_ligand_chain}')
self.log(f' cutoff: {ilrmsd_cutoff}A')
self.log(f" Ligand chain: {ilrmsd_ligand_chain}")
self.log(f" cutoff: {ilrmsd_cutoff}A")

capri.ilrmsd(
ligand_chain=ilrmsd_ligand_chain,
Expand All @@ -539,7 +548,9 @@ def _run(self):
capri.output(
output_fname,
sortby_key=self.params["sortby"],
ascending=self.params["ascending"],
sort_ascending=self.params["sort_ascending"],
rankby_key=self.params["rankby"],
rank_ascending=self.params["sort_ascending"],
)

selected_models = models_to_calc
Expand Down
5 changes: 4 additions & 1 deletion src/haddock/modules/analysis/caprieval/defaults.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ atoms = ['C','N','CA','O']
# this CA will NOT be used for the alignment.
ignore_missing = true
sortby = 'score'
ascending = true
sort_ascending = true
# which field should be used to create the ranking
rankby = 'score'
rank_ascending = true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What will happen if both sort_ascending and rank_ascending are true and sortby is set to e.g. i-rmsd while rankby is set by score?

Shouldn't we have only sort_ascending?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What will happen if both sort_ascending and rank_ascending are true and sortby is set to e.g. i-rmsd while rankby is set by score?

This scenario is the example I posted above

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But aren't sort_ascending and rank_ascending redundant??? I don't see the need for the rank_ascending parameter

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one defined the direction in which the table will be sorted, the other the direction of the value that we will rank the models;
for sorting: irmsd we'd sort asceding (the lower the best), fnat descending (the higher the best)
for ranking: haddock-score ascending (the lower the best), some other module in which the higher the best = descending

But up to you, I can remove it as well. Rule # 1 !