# 02 - Run De Novo Sequence Design

Run de novo protein design with BoltzGen via OpenProtein API, with optional ProteinMPNN sequence generation.


## Python Path Setup
Ensure project imports work whether Jupyter starts from repo root or `notebooks/`.


In [13]:
from pathlib import Path
import sys

cwd = Path.cwd().resolve()
repo_root = cwd.parent if cwd.name == 'notebooks' else cwd
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

src_path = repo_root / 'src'
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

print('repo_root:', repo_root)


repo_root: /Users/charmainechia/Documents/projects/agentic-protein-design


## Imports
Load step wrapper and path config.


In [3]:
from project_config.variables import address_dict, subfolders
from agentic_protein_design.steps.run_denovo_sequence_design import (
    run_denovo_sequence_design,
    build_denovo_design_kwargs,
    run_or_load_postdesign_pipeline,
    render_overlay_views,
)
from tools.openprotein import (
    validate_design_with_boltzgen_kwargs,
    filter_and_select_designs,
)
from tools.struct.struct_utils import visualize_structures, visualize_overlay_structures


## User Inputs
Fill either direct inputs (`target_sequence`, optional `ligand_smiles`) or provide `design_spec_yaml` to drive the run from YAML.


In [4]:
root_key = 'examples' # 'PIPS2'

user_inputs = {
    # Tool selection
    'design_tool': 'boltzgen_openprotein',

    # Option A: YAML-driven run (relative to data_root or absolute path).
    # If set, this takes precedence over direct fields below.
    'design_spec_yaml': 'design_input/brilacidin.yaml',
    'yaml_include_query_fields': False,  # keep False unless you need explicit query kwargs from YAML

    # Option B: direct inputs for design_with_boltzgen(...)
    # Design chain specifications
    'design_chain_id': 'A',
    'design_structure_path': '',
    'design_pdb_id': '',
    'design_source_chain_id': '',
    'design_sequence': '',
    'design_fasta_path': '',
    'design_is_cyclic': False,
    'design_group_id': 1,

    # Target can be ligand-only, peptide, or protein structure.
    'target_chain_id': 'B',
    'target_structure_path': '',
    'target_pdb_id': '',
    'target_sequence': '',
    'target_fasta_path': '',
    'target_is_cyclic': False,

    # Ligands (SMILES list and/or path to .txt with one SMILES per line)
    'ligand_smiles': [],
    'ligand_smiles_path': '',
    'ligand_ccds': [],
    'ligand_ccd_path': '',

    # Optional BoltzGen design spec JSON/YAML (in addition to direct fields)
    'design_spec_path': '',

    # Runtime controls
    'n_structures': 4,

    # Optional ProteinMPNN pass (set False if ProteinMPNN will be run separately in the cell after BoltzGen)
    'run_proteinmpnn': False,
    'proteinmpnn_samples': 5,
    'proteinmpnn_temperature': 0.1,
    'proteinmpnn_seed': 42,
    'mpnn_scaffold_sequence': '',

    # Postdesign options
    'run_postdesign_pipeline': True, # False
    'postdesign_run_boltz2_refold': True,
    'postdesign_compute_metrics': True,


    # Boltz-2 refold settings for ProteinMPNN-derived sequences
    # If design_spec_yaml is used, ligand targets can be auto-loaded from that spec.
    'refold_target_mode': 'auto',  # auto|ligand|peptide|protein
    'refold_ligand_smiles': [],
    'refold_ligand_ccds': [],
    'refold_target_structure_path': '',  # optional PDB/CIF target template for refold
    'refold_target_structure_chain_id': 'A',
    'auto_refold_targets_from_design_spec': True,

    # Filtering thresholds
    'filter_rmsd_max': 2.5,
    'filter_ipae_max': 12.0, # 10.0
    'filter_binder_rmsd_max': 1.0,
    'filter_binder_plddt_min': 80.0,
    'top_k_overlay': 1,
    'selected_overlay_max': 3,
    'overlay_generated_color': 'deepskyblue',
    'overlay_predicted_color': 'tomato',
    'overlay_generated_opacity': 1.0,
    'overlay_predicted_opacity': 0.35,
    'overlay_generated_radius_scale': 1.0,
    'overlay_predicted_radius_scale': 0.85,
    'overlay_show_res_near_target': True,
    'overlay_show_target_representation': True,
    'overlay_target_representation': 'ball+stick',

    # Output naming
    'output_tag': 'denovo_boltzgen', ## MODIFY OUTPUT TAG
}

user_inputs


{'design_tool': 'boltzgen_openprotein',
 'design_spec_yaml': 'design_input/brilacidin.yaml',
 'yaml_include_query_fields': False,
 'design_chain_id': 'A',
 'design_structure_path': '',
 'design_pdb_id': '',
 'design_source_chain_id': '',
 'design_sequence': '',
 'design_fasta_path': '',
 'design_is_cyclic': False,
 'design_group_id': 1,
 'target_chain_id': 'B',
 'target_structure_path': '',
 'target_pdb_id': '',
 'target_sequence': '',
 'target_fasta_path': '',
 'target_is_cyclic': False,
 'ligand_smiles': [],
 'ligand_smiles_path': '',
 'ligand_ccds': [],
 'ligand_ccd_path': '',
 'design_spec_path': '',
 'n_structures': 4,
 'run_proteinmpnn': False,
 'proteinmpnn_samples': 5,
 'proteinmpnn_temperature': 0.1,
 'proteinmpnn_seed': 42,
 'mpnn_scaffold_sequence': '',
 'run_postdesign_pipeline': True,
 'postdesign_run_boltz2_refold': True,
 'postdesign_compute_metrics': True,
 'refold_target_mode': 'auto',
 'refold_ligand_smiles': [],
 'refold_ligand_ccds': [],
 'refold_target_structure_pa

## Resolve Output Paths
Outputs are written under `processed/02_run_denovo_sequence_design/` for the selected `data_root`.


In [5]:
data_root = (repo_root / address_dict[root_key]).resolve()
processed_dir = (data_root / subfolders['processed']).resolve()
step_dir = (processed_dir / '02_run_denovo_sequence_design').resolve()
out_dir = (step_dir / user_inputs['output_tag']).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
out_summary = (out_dir / f"{user_inputs['output_tag']}_summary.json").resolve()

print('data_root:', data_root)
print('out_dir:', out_dir)
print('out_summary:', out_summary)


data_root: /Users/charmainechia/Documents/projects/agentic-protein-design/examples
out_dir: /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen
out_summary: /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_summary.json


## Pre-Run Validation
Validate BoltzGen inputs before submitting the OpenProtein job.


In [6]:
design_kwargs = build_denovo_design_kwargs(user_inputs, data_root=data_root)
design_kwargs.update({'out_dir': out_dir, 'out_summary_json': out_summary})

validation_report = validate_design_with_boltzgen_kwargs(design_kwargs)
print('validation ok:', validation_report['ok'])
if validation_report['warnings']:
    print('warnings:')
    for w in validation_report['warnings']:
        print(' -', w)
if not validation_report['ok']:
    print('errors:')
    for e in validation_report['errors']:
        print(' -', e)
    raise ValueError('Pre-run validation failed. Fix inputs before running BoltzGen.')


validation ok: True
 - Target may be defined inside `design_spec`; skipping strict target field enforcement.


## Run BoltzGen
Calls OpenProtein BoltzGen through the step function.


In [12]:
result = run_denovo_sequence_design(
    user_inputs,
    data_root=data_root,
    out_dir=out_dir,
    out_summary_json=out_summary,
)
result

resolved_design_spec: {'entities': [{'protein': {'id': 'A', 'sequence': '140..180'}}, {'ligand': {'id': 'B', 'smiles': 'C1CNC[C@@H]1OC2=C(C=C(C=C2NC(=O)C3=CC(=NC=N3)C(=O)NC4=CC(=CC(=C4O[C@@H]5CCNC5)NC(=O)CCCCN=C(N)N)C(F)(F)F)C(F)(F)F)NC(=O)CCCCN=C(N)N'}}]}


Waiting: 100%|██████████| 100/100 [05:45<00:00,  3.45s/it, status=SUCCESS]


Running ProteinMPNN


{'summary': {'job_id': '13ae944a-4f3e-4d70-87e7-304ab995a4c5',
  'status': 'JobStatus.SUCCESS',
  'n_structures_requested': 4,
  'n_structures_returned': 4,
  'structure_paths': []},
 'query_metadata': {'query_chain_ids': [],
  'target_source': 'none',
  'design_source': 'none',
  'num_ligands': 0},
 'tool': 'boltzgen_openprotein',
 'implemented': True}

## Load Generated Structures
Load BoltzGen structures from disk for postdesign sequence generation.


In [7]:
from openprotein.molecules import Structure as OPStructure

prefix = str(user_inputs.get('output_tag', 'boltzgen_design')).strip() or 'boltzgen_design'
generated_cif_paths = sorted(out_dir.glob(f'{prefix}_*.cif'))
if not generated_cif_paths:
    generated_cif_paths = sorted(out_dir.glob('boltzgen_design_*.cif'))  # backward compatibility
if not generated_cif_paths:
    raise FileNotFoundError(f'No BoltzGen CIF files found in {out_dir}')
generated_structures = [OPStructure.from_filepath(str(p))[0] for p in generated_cif_paths]
print('Loaded generated structures:', len(generated_structures))

Loaded generated structures: 4


## Visualize BoltzGen Designed Structures
Plot designed complex structures (PDB) right after BoltzGen generation.


In [8]:
from IPython.display import display

prefix = str(user_inputs.get('output_tag', 'boltzgen_design')).strip() or 'boltzgen_design'
designed_pdb_paths = sorted(out_dir.glob(f'{prefix}_*.pdb'))
if not designed_pdb_paths:
    designed_pdb_paths = sorted(out_dir.glob('boltzgen_design_*.pdb'))  # backward compatibility
if not designed_pdb_paths:
    raise FileNotFoundError(f'No designed PDB files found in {out_dir}')
preview_n = min(3, len(designed_pdb_paths))
print('Displaying', preview_n, 'designed structure(s) as separate views')

design_views = []
for i, pdb_path in enumerate(designed_pdb_paths[:preview_n]):
    print(f'Design {i}: {pdb_path}')
    view_i = visualize_structures(
        [str(pdb_path)],
        show_res_near_ligand=None,
        protein_chain_id=str(user_inputs.get('design_chain_id', 'A')),
        ligand_chain_id=str(user_inputs.get('target_chain_id', 'B')),
    )
    design_views.append(view_i)
    display(view_i)

print('Rendered', len(design_views), 'NGL views.')


Displaying 3 designed structure(s) as separate views
Design 0: /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_000.pdb




0 /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_000.pdb


NGLWidget()

Design 1: /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_001.pdb
0 /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_001.pdb


NGLWidget()

Design 2: /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_002.pdb
0 /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_002.pdb


NGLWidget()

Rendered 3 NGL views.


## ProteinMPNN Postdesign (Optional Boltz-2 + Metrics)
Run ProteinMPNN from generated structures; optionally refold with Boltz-2 and compute evaluation metrics.


In [9]:
postdesign_payload = run_or_load_postdesign_pipeline(
    user_inputs=user_inputs,
    generated_structures=generated_structures,
    out_dir=out_dir,
    design_kwargs=design_kwargs,
)

postdesign = postdesign_payload['postdesign']
postdesign_out_dir = postdesign_payload['postdesign_out_dir']
mpnn_csv_path = postdesign_payload['mpnn_csv_path']
metrics_csv_path = postdesign_payload['metrics_csv_path']
refold_summary_csv_path = postdesign_payload['refold_summary_csv_path']
mpnn_df = postdesign_payload['mpnn_df']
metrics_df = postdesign_payload['metrics_df']
refold_summaries_df = postdesign_payload['refold_summaries_df']

print('ProteinMPNN CSV:', mpnn_csv_path)
print('Metrics CSV:', metrics_csv_path)
print('Refold summary CSV:', refold_summary_csv_path)
print('ProteinMPNN rows:', 0 if mpnn_df is None else len(mpnn_df))
print('Metrics rows:', 0 if metrics_df is None else len(metrics_df))
print('Refold summary rows:', 0 if refold_summaries_df is None else len(refold_summaries_df))
if mpnn_df is not None:
    mpnn_df.head()


Session: <openprotein.OpenProtein object at 0x16a981810>


Waiting: 100%|██████████| 100/100 [03:00<00:00,  1.81s/it, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s000_q000.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s000.pdb".
Session: <openprotein.OpenProtein object at 0x17d49f010>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.31it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s000_q001.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s000.pdb".
Session: <openprotein.OpenProtein object at 0x17d4cb7d0>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.30it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s000_q002.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s000.pdb".
Session: <openprotein.OpenProtein object at 0x16a9e8550>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.32it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s000_q003.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s000.pdb".
Session: <openprotein.OpenProtein object at 0x17d513750>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.31it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s000_q004.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s000.pdb".
Session: <openprotein.OpenProtein object at 0x16a9fd010>


Waiting: 100%|██████████| 100/100 [00:42<00:00,  2.33it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s001_q000.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s001.pdb".
Session: <openprotein.OpenProtein object at 0x137daec90>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.30it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s001_q001.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s001.pdb".
Session: <openprotein.OpenProtein object at 0x16a9fc290>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.31it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s001_q002.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s001.pdb".
Session: <openprotein.OpenProtein object at 0x17d525f50>


Waiting: 100%|██████████| 100/100 [00:42<00:00,  2.33it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s001_q003.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s001.pdb".
Session: <openprotein.OpenProtein object at 0x17ed015d0>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.32it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s001_q004.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s001.pdb".
Session: <openprotein.OpenProtein object at 0x17d4a4510>


Waiting: 100%|██████████| 100/100 [00:42<00:00,  2.34it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s002_q000.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s002.pdb".
Session: <openprotein.OpenProtein object at 0x17ed002d0>


Waiting: 100%|██████████| 100/100 [00:42<00:00,  2.33it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s002_q001.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s002.pdb".
Session: <openprotein.OpenProtein object at 0x17d525910>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.31it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s002_q002.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s002.pdb".
Session: <openprotein.OpenProtein object at 0x17ed02c90>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.32it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s002_q003.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s002.pdb".
Session: <openprotein.OpenProtein object at 0x17d4680d0>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.32it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s002_q004.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s002.pdb".
Session: <openprotein.OpenProtein object at 0x112afc2d0>


Waiting: 100%|██████████| 100/100 [00:44<00:00,  2.26it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s003_q000.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s003.pdb".
Session: <openprotein.OpenProtein object at 0x17d49f090>


Waiting: 100%|██████████| 100/100 [00:42<00:00,  2.35it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s003_q001.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s003.pdb".
Session: <openprotein.OpenProtein object at 0x16a9e8990>


Waiting: 100%|██████████| 100/100 [00:42<00:00,  2.33it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s003_q002.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s003.pdb".
Session: <openprotein.OpenProtein object at 0x16a9b3950>


Waiting: 100%|██████████| 100/100 [00:43<00:00,  2.32it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s003_q003.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s003.pdb".
Session: <openprotein.OpenProtein object at 0x17ed01390>


Waiting: 100%|██████████| 100/100 [00:42<00:00,  2.34it/s, status=SUCCESS]


Aligned "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s003_q004.pdb" to "/Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/generated_ref_s003.pdb".
ProteinMPNN CSV: /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/denovo_boltzgen_proteinmpnn_sequences.csv
Metrics CSV: /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/denovo_boltzgen_proteinmpnn_predicted_structure_metrics.csv
Refold summary CSV: /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/denovo_boltzgen_proteinmpnn_refold_summary.csv
ProteinMP

## Visualize Top Boltz-2 Refold vs Original Design
Overlay the top-ranked refolded complex with its original designed structure.


In [10]:
from IPython.display import display

if refold_summaries_df is None or refold_summaries_df.empty:
    print('No Boltz-2 refolds available. Set postdesign_run_boltz2_refold=True and rerun postdesign cell.')
else:
    if metrics_df is not None and len(metrics_df):
        ranking_df = metrics_df.reset_index().sort_values(by='ipae', na_position='last')
    elif mpnn_df is not None and len(mpnn_df):
        ranking_df = mpnn_df.sort_values(by='score', na_position='last')
    else:
        raise ValueError('Need metrics_df or mpnn_df loaded (run postdesign cell or load CSVs).')

    overlay_views = render_overlay_views(
        ranking_df=ranking_df,
        refold_summaries_df=refold_summaries_df,
        out_dir=out_dir,
        user_inputs=user_inputs,
        n_rows=max(1, int(user_inputs.get('top_k_overlay', 1))),
    )
    for v in overlay_views:
        display(v)
    if not overlay_views:
        print('No overlays rendered.')


0 /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_000.pdb A #00bfff
1 /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s000_q003.pdb A #ff6347


NGLWidget()

## Final Selection
Apply thresholds to metrics and select top designs (best iPAE per structure).


In [11]:
if metrics_df is None:
    raise ValueError('metrics_df is not available. Enable postdesign_run_boltz2_refold and postdesign_compute_metrics first.')

df_filtered, df_selected = filter_and_select_designs(
    metrics_df,
    rmsd_max=float(user_inputs.get('filter_rmsd_max', 2.5)),
    ipae_max=float(user_inputs.get('filter_ipae_max', 10.0)),
    binder_rmsd_max=float(user_inputs.get('filter_binder_rmsd_max', 1.0)),
    binder_plddt_min=float(user_inputs.get('filter_binder_plddt_min', 80.0)),
    rank_by='ipae',
)
print('# designs passing filters', len(df_filtered))
print('# unique structures passing filters', df_filtered.index.get_level_values('structure_idx').nunique() if len(df_filtered) else 0)
df_selected.head(3)


# designs passing filters 1
# unique structures passing filters 1


Unnamed: 0_level_0,Unnamed: 1_level_0,design_idx,rmsd,ipae,binder_rmsd,binder_plddt,score,sequence
structure_idx,sequence_idx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,1,16,0.935175,10.857935,0.935175,95.928154,0.7591,MKVIKYTAVLVLQPDRILLARRLDGPGKGELVPFGGKSTSTTLEGA...


## Visualize Best Final Selections
Overlay up to the top 3 selected designs: Boltz-2 predicted vs original BoltzGen generated structure.


In [12]:
from IPython.display import display

if df_selected is None or len(df_selected) == 0:
    print('No selected designs to visualize.')
elif refold_summaries_df is None or refold_summaries_df.empty:
    raise ValueError('No Boltz-2 refold summaries found. Run postdesign refolding first.')
else:
    n_show = min(int(user_inputs.get('selected_overlay_max', 3)), 3, len(df_selected))
    selected_rows = df_selected.reset_index().head(n_show)
    selected_views = render_overlay_views(
        ranking_df=selected_rows,
        refold_summaries_df=refold_summaries_df,
        out_dir=out_dir,
        user_inputs=user_inputs,
        n_rows=len(selected_rows),
    )
    for v in selected_views:
        display(v)
    if not selected_views:
        print('No selected overlays rendered.')


0 /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/denovo_boltzgen_003.pdb A #00bfff
1 /Users/charmainechia/Documents/projects/agentic-protein-design/examples/processed/02_run_denovo_sequence_design/denovo_boltzgen/postdesign/boltz2_refolds/boltz2_refold_s003_q001.pdb A #ff6347


NGLWidget()