-
Notifications
You must be signed in to change notification settings - Fork 4
New clash score script and improvements to the CIF patching script. #87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
0ddb18d
adding script to compute clashscores using phenix.clashscore; making …
marcuscollins 545e72c
Addressing coderabbit f/b on PR 87
marcuscollins 8732a23
Addressing k.chrispen's f/b on PR 87
marcuscollins 2d90cc7
addressing more coderabbit f/b
marcuscollins File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,127 @@ | ||
| import argparse | ||
| import json | ||
| import subprocess | ||
| from pathlib import Path | ||
|
|
||
| import joblib | ||
| import pandas as pd | ||
| from loguru import logger | ||
| from sampleworks.eval.eval_dataclasses import Experiment | ||
| from sampleworks.eval.grid_search_eval_utils import scan_grid_search_results | ||
|
|
||
|
|
||
| def parse_args(description: str | None = None) -> argparse.Namespace: | ||
| """ | ||
| Return a common set of arguments for grid search evaluation scripts, | ||
| with a custom description, which is passed to argparse.ArgumentParser. | ||
|
|
||
| All eval scripts should use this same framework | ||
| """ | ||
| parser = argparse.ArgumentParser(description=description) | ||
| parser.add_argument( | ||
| "--workspace-root", | ||
| type=Path, | ||
| required=True, | ||
| help="Path containing the grid search results directory, e.g. if results are " | ||
| "at $HOME/grid_search_results, $HOME should be what you pass", | ||
| ) | ||
| parser.add_argument( | ||
| "--n-jobs", | ||
| type=int, | ||
| help="Number of parallel jobs to run. -1 uses all CPUs.", | ||
| default=16, | ||
| ) | ||
| return parser.parse_args() | ||
|
|
||
|
|
||
| def main(args) -> None: | ||
| # check that phenix is installed and available, bail early if not. | ||
| try: | ||
| subprocess.call("phenix.clashscore", stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | ||
| except FileNotFoundError: | ||
| raise RuntimeError( | ||
| "phenix.clashscore is not available, make sure phenix is installed " | ||
| " and that you have activated it, e.g. `source phenix-dir/phenix_env.sh`" | ||
| ) | ||
|
|
||
| workspace_root = Path(args.workspace_root) | ||
| grid_search_dir = workspace_root / "grid_search_results" # TODO make more general | ||
|
k-chrispens marked this conversation as resolved.
|
||
| all_experiments = scan_grid_search_results(grid_search_dir) | ||
| logger.info(f"Found {len(all_experiments)} experiments with refined.cif files") | ||
|
|
||
| # Now loop over experiments with joblib and get back tuples of experiment level metrics | ||
| clashscore_metrics = joblib.Parallel(n_jobs=args.n_jobs)( | ||
| joblib.delayed(process_one_experiment)(experiment) for experiment in all_experiments | ||
| ) | ||
| if not clashscore_metrics: | ||
| logger.error( | ||
| "No experiments successfully processed, check that result files are available." | ||
| ) | ||
| return | ||
|
|
||
| clashscore_df = pd.concat(clashscore_metrics) # pyright: ignore | ||
|
marcuscollins marked this conversation as resolved.
|
||
| clashscore_df.to_csv( | ||
| workspace_root / "grid_search_results" / "clashscore_metrics.csv", index=False | ||
| ) | ||
|
|
||
|
|
||
| def process_one_experiment(experiment: Experiment) -> pd.DataFrame: | ||
| # make sure there are no nan lines in the CIF file; this is an extra | ||
| # precaution, even though our CIF writers should now avoid writing nans | ||
| file_with_no_nans = experiment.refined_cif_path.parent / "nonan.cif" | ||
| json_output = experiment.refined_cif_path.parent / "clashscore.json" | ||
| logfile = experiment.refined_cif_path.parent / "clashscore.log" | ||
| logger.info(f"Removing nans from {experiment.refined_cif_path}") | ||
|
|
||
| with file_with_no_nans.open("w") as fn: | ||
| retcode = subprocess.call( | ||
| ["grep", "-viP", r"\bnan\b", str(experiment.refined_cif_path)], stdout=fn | ||
| ) | ||
|
marcuscollins marked this conversation as resolved.
|
||
| if retcode != 0: | ||
| raise RuntimeError(f"grep failed with code {retcode}, see {logfile} for details") | ||
|
|
||
| # phenix needs to be installed and on path for this to work. Also sh won't work with | ||
| # phenix.clashscore because of that pesky period in the name. | ||
| with logfile.open("w") as fn: | ||
| # phenix.clashscore generates a JSON file with both per-model scores as well as per-model | ||
| # lists of clashes. | ||
| retcode = subprocess.call( | ||
| ["phenix.clashscore", str(file_with_no_nans), "--json-filename", str(json_output)], | ||
| stderr=fn, | ||
|
marcuscollins marked this conversation as resolved.
|
||
| ) | ||
| if retcode != 0: | ||
| logger.error(f"phenix.clashscore failed, see {logfile} for details") | ||
| return pd.DataFrame() | ||
| return process_clashscore_json_output(json_output) | ||
|
|
||
|
|
||
| def process_clashscore_json_output(json_output: Path) -> pd.DataFrame: | ||
| """ | ||
| Opens the json output file `json_output` and parses out the | ||
| "summary_results", flattening it into rows which include the "model_name" field | ||
|
|
||
| """ | ||
| with open(json_output) as f: | ||
| json_data = json.load(f) | ||
|
|
||
| model_name = json_data.get("model_name") | ||
| # For now we're only collecting model-level summary statistics, but | ||
| # there are lists of specific clashes in each model too. | ||
| summary_results = json_data.get("summary_results", {}) | ||
|
|
||
| rows = [] | ||
| for model_id, results in summary_results.items(): | ||
| row = { | ||
| "model_name": model_name, | ||
| "model_id": model_id, | ||
| "clashscore": results.get("clashscore"), | ||
| "num_clashes": results.get("num_clashes"), | ||
| } | ||
| rows.append(row) | ||
|
|
||
| return pd.DataFrame(rows) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| args = parse_args() | ||
| main(args) | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.