diff-use · k-chrispens · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026 · marcuscollins
diff --git a/scripts/run_and_process_phenix_clashscore.py b/scripts/run_and_process_phenix_clashscore.py
diff --git a/src/sampleworks/eval/grid_search_eval_utils.py b/src/sampleworks/eval/grid_search_eval_utils.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 
 from loguru import logger
+from sampleworks.eval.constants import OCCUPANCY_LEVELS
 from sampleworks.eval.eval_dataclasses import Experiment, ExperimentList
 from sampleworks.eval.occupancy_utils import extract_protein_and_occupancy
 from sampleworks.utils.guidance_constants import StructurePredictor
@@ -24,7 +25,7 @@ def parse_experiment_dir(exp_dir: Path) -> dict[str, int | float | None]:
     - pure_guidance format: ens{N}_gw{W}
     """
     dir_name = exp_dir.name
-    logger.debug(f"Parsing experiment directory: {dir_name}")
+    logger.debug(f"Parsing experiment directory: {exp_dir}")
 
     # Extract ensemble size
     ens_match = re.search(r"ens(\d+)", dir_name)
@@ -128,7 +129,10 @@ def scan_grid_search_results(
     # Recurse into subdirectories
     for item in current_directory.iterdir():
         if item.is_dir() and not item.name.endswith(".json"):
-            experiments.extend(scan_grid_search_results(item, current_depth + 1, target_depth))
+            grid_search_experiments = scan_grid_search_results(
+                item, current_depth + 1, target_depth, target_filename=target_filename
+            )
+            experiments.extend(grid_search_experiments)
 
     return experiments
 
@@ -175,4 +179,11 @@ def parse_args(description: str | None = None):
         "Defaults to sampleworks/data/protein_configs.csv",
         default=files("sampleworks.data") / "protein_configs.csv",
     )
+    parser.add_argument(
+        "--occupancies",
+        nargs="+",
+        type=float,
+        help="Occupancies to evaluate",
+        default=OCCUPANCY_LEVELS,
+    )
     return parser.parse_args()
diff --git a/src/sampleworks/eval/occupancy_utils.py b/src/sampleworks/eval/occupancy_utils.py
@@ -14,7 +14,10 @@ def extract_protein_and_occupancy(dir_name):
     protein = parts[0]
 
     # Parse occupancy
-    if "1.0occa" in dir_name.lower() or "1occa" in dir_name.lower():
+    if "native" in dir_name.lower():
+        # this is a hack, it would be better to properly name the directory
+        occ_a = 0.5
+    elif "1.0occa" in dir_name.lower() or "1occa" in dir_name.lower():
         # Check it's not a mixed case like 0.1occA
         if not any(f"0.{i}occa" in dir_name.lower() for i in range(1, 10)):
             occ_a = 1.0

diff --git a/src/sampleworks/metrics/lddt.py b/src/sampleworks/metrics/lddt.py
@@ -346,7 +346,10 @@ def compute(
         if selection is not None:
             mask_fn = predicted_aa_stack.mask
             if mask_fn is None:
-                raise RuntimeError("predicted_aa_stack does not support mask()")
+                raise RuntimeError(
+                    "predicted_aa_stack does not support mask() You should read in atom arrays"
+                    "using `atomworks.io.utils.io_utils.load_any()` to access this method"
+                )
             mask = mask_fn(selection)
             selected_arr = cast(AtomArray, predicted_aa_stack[0, mask])
             selected_token_ids = selected_arr.token_id

diff --git a/src/sampleworks/utils/cif_utils.py b/src/sampleworks/utils/cif_utils.py
@@ -30,8 +30,7 @@ def find_altloc_selections(
     - Iterable[str]: Iterable of alternative location selections, keyed by altloc ID.
 
     Example: for RCSB PDB entry 5SOP, this should yield items like:
-    ['chain A and resi 3-6', 'chain A and resi 10-12', 'chain A and resi 20-26', ...,
-     'chain_id == 'A' and (res_id == 3 or res_id == 10 or res_id == 20 or ...)]
+    ['chain A and resi 125-137', "chain_id == 'A' and ((res_id >= 3 and res_id <= 6) or ...)"]
 
     """
     cif_file = Path(cif_file)
@@ -44,7 +43,7 @@ def find_altloc_selections(
     altlocs = OrderedDict()
     for altloc_id in find_all_altloc_ids(structure):
         altk = select_altloc(structure, altloc_id=altloc_id)
-        unique_altk = set((ch, res) for ch, res in zip(altk.chain_id, altk.res_id))
+        unique_altk = set((ch, res) for ch, res in zip(altk.chain_id, altk.res_id, strict=True))
         # probably unnecessary but making sure these are consistently ordered
         # FIXME? This is a little clunky. Perhaps should be hierarchical by chain then altloc?
         #   At some point though we'll do altloc selections using correlations/contacts

diff --git a/src/sampleworks/utils/density_utils.py b/src/sampleworks/utils/density_utils.py
@@ -152,6 +152,11 @@ def compute_density_from_atomarray(
         atom_array, device
     )
 
+    # need to make sure these all have the same batch dimension or the transformer will fail.
+    elements = elements.expand(coords.shape[0], -1)
+    b_factors = b_factors.expand(coords.shape[0], -1)
+    occupancies = occupancies.expand(coords.shape[0], -1)
+
     with torch.no_grad():
         density = transformer(
             coordinates=coords,