Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 0 additions & 129 deletions scripts/run_and_process_phenix_clashscore.py

This file was deleted.

15 changes: 13 additions & 2 deletions src/sampleworks/eval/grid_search_eval_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pathlib import Path

from loguru import logger
from sampleworks.eval.constants import OCCUPANCY_LEVELS
from sampleworks.eval.eval_dataclasses import Experiment, ExperimentList
from sampleworks.eval.occupancy_utils import extract_protein_and_occupancy
from sampleworks.utils.guidance_constants import StructurePredictor
Expand All @@ -24,7 +25,7 @@ def parse_experiment_dir(exp_dir: Path) -> dict[str, int | float | None]:
- pure_guidance format: ens{N}_gw{W}
"""
dir_name = exp_dir.name
logger.debug(f"Parsing experiment directory: {dir_name}")
logger.debug(f"Parsing experiment directory: {exp_dir}")

# Extract ensemble size
ens_match = re.search(r"ens(\d+)", dir_name)
Expand Down Expand Up @@ -128,7 +129,10 @@ def scan_grid_search_results(
# Recurse into subdirectories
for item in current_directory.iterdir():
if item.is_dir() and not item.name.endswith(".json"):
experiments.extend(scan_grid_search_results(item, current_depth + 1, target_depth))
grid_search_experiments = scan_grid_search_results(
item, current_depth + 1, target_depth, target_filename=target_filename
)
experiments.extend(grid_search_experiments)

return experiments

Expand Down Expand Up @@ -175,4 +179,11 @@ def parse_args(description: str | None = None):
"Defaults to sampleworks/data/protein_configs.csv",
default=files("sampleworks.data") / "protein_configs.csv",
)
parser.add_argument(
"--occupancies",
nargs="+",
type=float,
help="Occupancies to evaluate",
default=OCCUPANCY_LEVELS,
)
return parser.parse_args()
5 changes: 4 additions & 1 deletion src/sampleworks/eval/occupancy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ def extract_protein_and_occupancy(dir_name):
protein = parts[0]

# Parse occupancy
if "1.0occa" in dir_name.lower() or "1occa" in dir_name.lower():
if "native" in dir_name.lower():
# this is a hack, it would be better to properly name the directory
occ_a = 0.5
elif "1.0occa" in dir_name.lower() or "1occa" in dir_name.lower():
# Check it's not a mixed case like 0.1occA
if not any(f"0.{i}occa" in dir_name.lower() for i in range(1, 10)):
occ_a = 1.0
Expand Down
5 changes: 4 additions & 1 deletion src/sampleworks/metrics/lddt.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,10 @@ def compute(
if selection is not None:
mask_fn = predicted_aa_stack.mask
if mask_fn is None:
raise RuntimeError("predicted_aa_stack does not support mask()")
raise RuntimeError(
"predicted_aa_stack does not support mask() You should read in atom arrays"
"using `atomworks.io.utils.io_utils.load_any()` to access this method"
)
Comment thread
marcuscollins marked this conversation as resolved.
mask = mask_fn(selection)
selected_arr = cast(AtomArray, predicted_aa_stack[0, mask])
selected_token_ids = selected_arr.token_id
Expand Down
5 changes: 2 additions & 3 deletions src/sampleworks/utils/cif_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ def find_altloc_selections(
- Iterable[str]: Iterable of alternative location selections, keyed by altloc ID.

Example: for RCSB PDB entry 5SOP, this should yield items like:
['chain A and resi 3-6', 'chain A and resi 10-12', 'chain A and resi 20-26', ...,
'chain_id == 'A' and (res_id == 3 or res_id == 10 or res_id == 20 or ...)]
['chain A and resi 125-137', "chain_id == 'A' and ((res_id >= 3 and res_id <= 6) or ...)"]

"""
cif_file = Path(cif_file)
Expand All @@ -44,7 +43,7 @@ def find_altloc_selections(
altlocs = OrderedDict()
for altloc_id in find_all_altloc_ids(structure):
altk = select_altloc(structure, altloc_id=altloc_id)
unique_altk = set((ch, res) for ch, res in zip(altk.chain_id, altk.res_id))
unique_altk = set((ch, res) for ch, res in zip(altk.chain_id, altk.res_id, strict=True))
# probably unnecessary but making sure these are consistently ordered
# FIXME? This is a little clunky. Perhaps should be hierarchical by chain then altloc?
# At some point though we'll do altloc selections using correlations/contacts
Expand Down
5 changes: 5 additions & 0 deletions src/sampleworks/utils/density_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ def compute_density_from_atomarray(
atom_array, device
)

# need to make sure these all have the same batch dimension or the transformer will fail.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@k-chrispens this may no longer be needed with the changes you main in real_space_density.py but I'm going to leave here for now then test everything in my next PR, if that's okay with you.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I made any changes affecting this

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do think it would be clearer to do match_batch here in the next PR though, esp. since these are potentially already at batch size n_model from an AtomArrayStack, which could lead to errors that we would catch and report well in match_batch

elements = elements.expand(coords.shape[0], -1)
b_factors = b_factors.expand(coords.shape[0], -1)
occupancies = occupancies.expand(coords.shape[0], -1)

with torch.no_grad():
density = transformer(
coordinates=coords,
Expand Down