# 06 - Get Structures (Apo/Holo)

Notebook for retrieving/predicting protein structures using selectable tools (RCSB search, Boltz-2 via OpenProtein, Boltz-2 local).

## Python Path Setup
Ensure project-root imports work whether Jupyter starts from repo root or `notebooks/`.

In [1]:
from pathlib import Path
import sys

cwd = Path.cwd().resolve()
repo_root = cwd.parent if cwd.name == "notebooks" else cwd
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))
src_root = repo_root / "src"
if src_root.exists() and str(src_root) not in sys.path:
    sys.path.insert(0, str(src_root))

## Imports
Load OpenProtein Boltz-2 predictor and shared path helpers.

In [2]:
import json
from agentic_protein_design.core import resolve_input_path
from project_config.variables import address_dict, subfolders
from agentic_protein_design.steps.get_structures_apo_holo import run_structure_prediction
from tools.struct.struct_utils import convert_cif_to_pdb_pymol, visualize_structures

## User Inputs
Specify known PDB id (optional), sequence(s), ligand(s), and preferred structure tool.

In [3]:
root_key = "PIPS2"

user_inputs = {
    "pdb_id": "",  # optional known structure id
    "struct_name": "ET096_S82",
    "sequences": [
 "MKLTLLLSAVFSGAVATLAETSEWSPPESGDARSPCPLLNSLANHGYLPHDGKNITGDVLSKAITTTLNMDDSVSAAFMAALRNSITTAETFSLDELNKHNGIEHDASLSRQDFYFGNVQAFNETIFNQTRSYWTDPVTIDIHQAANARNARIETSKATNPTYNETAVNRASALETAAYILSFGDKVTGSVPKAFVEYFFENERLPFHLGWYKSAESISFADFQNMSTRVSQAGSQSPRAIEL"
    ],
    "ligands": [
        "CCC1=C(C2=[N]3C1=Cc4c(c(c5n4[Fe+4](=O)36[N]7=C(C=C8N6C(=C2)C(=C8CCC(=O)O)C)C(=C(C7=C5)C)CCC(=O)O)C=C)C)C",
        "[Mg+2]",
        "c1cccc(c12)c(C)c[nH]2",
    ],
    "preferred_structure_tool": "boltz2_openprotein",  # rcsb_search | boltz2_openprotein | boltz2_local
    "predict_affinity": False,
}

user_inputs

{'pdb_id': '',
 'struct_name': 'ET096_S82',
 'sequences': ['MKLTLLLSAVFSGAVATLAETSEWSPPESGDARSPCPLLNSLANHGYLPHDGKNITGDVLSKAITTTLNMDDSVSAAFMAALRNSITTAETFSLDELNKHNGIEHDASLSRQDFYFGNVQAFNETIFNQTRSYWTDPVTIDIHQAANARNARIETSKATNPTYNETAVNRASALETAAYILSFGDKVTGSVPKAFVEYFFENERLPFHLGWYKSAESISFADFQNMSTRVSQAGSQSPRAIEL'],
 'ligands': ['CCC1=C(C2=[N]3C1=Cc4c(c(c5n4[Fe+4](=O)36[N]7=C(C=C8N6C(=C2)C(=C8CCC(=O)O)C)C(=C(C7=C5)C)CCC(=O)O)C=C)C)C',
  '[Mg+2]',
  'c1cccc(c12)c(C)c[nH]2'],
 'preferred_structure_tool': 'boltz2_openprotein',
 'predict_affinity': False}

## Resolve Output Paths
Set output paths under selected data root (`pdb/` subfolder).

In [4]:
project_root = repo_root
data_root = (project_root / address_dict[root_key]).resolve()
pdb_dir = (data_root / subfolders["pdb"]).resolve()
pdb_dir.mkdir(parents=True, exist_ok=True)

out_cif = pdb_dir / f"{user_inputs['struct_name']}_boltz2.cif"
out_summary = pdb_dir / f"{user_inputs['struct_name']}_boltz2_summary.json"

pdb_dir, out_cif, out_summary

(PosixPath('/Users/charmainechia/Documents/projects/PIPS/PIPS2-UPOs-data/pdb'),
 PosixPath('/Users/charmainechia/Documents/projects/PIPS/PIPS2-UPOs-data/pdb/ET096_S82_boltz2.cif'),
 PosixPath('/Users/charmainechia/Documents/projects/PIPS/PIPS2-UPOs-data/pdb/ET096_S82_boltz2_summary.json'))

## Run Prediction
Run selected tool/API and return prediction outputs (Boltz-2 OpenProtein path implemented).

In [5]:
result = run_structure_prediction(user_inputs, out_cif, out_summary)
result

Session: <openprotein.OpenProtein object at 0x120051810>


Waiting: 100%|██████████| 100/100 [03:07<00:00,  1.87s/it, status=SUCCESS]


{'job_id': '8a62f5b4-cec4-402f-a186-43b798626bc5',
 'protein_chains': ['A'],
 'ligand_chains': ['B', 'C', 'D'],
 'predict_affinity': False,
 'binder_chain': 'B',
 'status': 'JobStatus.SUCCESS',
 'cif_path': '/Users/charmainechia/Documents/projects/PIPS/PIPS2-UPOs-data/pdb/ET096_S82_boltz2.cif',
 'plddt_shape': [1, 298],
 'pae_shape': [1, 298, 298],
 'pde_shape': [1, 298, 298],
 'confidence': {'confidence_score': 0.9248332381248474,
  'ptm': 0.9538129568099976,
  'iptm': 0.9820447564125061,
  'ligand_iptm': 0.9820447564125061,
  'protein_iptm': 0.0,
  'complex_plddt': 0.9105302691459656,
  'complex_iplddt': 0.9342793226242065,
  'complex_pde': 0.39162781834602356,
  'complex_ipde': 0.49203985929489136,
  'chains_ptm': {'0': 0.9480785131454468,
   '1': 0.9129278063774109,
   '2': 0.0,
   '3': 0.9625590443611145},
  'pair_chains_iptm': {'0': {'0': 0.9480785131454468,
    '1': 0.7068461179733276,
    '2': 0.0,
    '3': 0.5531360507011414},
   '1': {'0': 0.9867264032363892,
    '1': 0.91292

## Inspect Saved Outputs
Display saved summary content and output artifact paths.

In [6]:
if out_summary.exists():
    print(out_summary)
    print(json.dumps(json.loads(out_summary.read_text(encoding="utf-8")), indent=2)[:4000])
else:
    print("No summary file written yet.")

print("CIF exists:", out_cif.exists(), out_cif)

/Users/charmainechia/Documents/projects/PIPS/PIPS2-UPOs-data/pdb/ET096_S82_boltz2_summary.json
{
  "job_id": "8a62f5b4-cec4-402f-a186-43b798626bc5",
  "protein_chains": [
    "A"
  ],
  "ligand_chains": [
    "B",
    "C",
    "D"
  ],
  "predict_affinity": false,
  "binder_chain": "B",
  "status": "JobStatus.SUCCESS",
  "cif_path": "/Users/charmainechia/Documents/projects/PIPS/PIPS2-UPOs-data/pdb/ET096_S82_boltz2.cif",
  "plddt_shape": [
    1,
    298
  ],
  "pae_shape": [
    1,
    298,
    298
  ],
  "pde_shape": [
    1,
    298,
    298
  ],
  "confidence": {
    "confidence_score": 0.9248332381248474,
    "ptm": 0.9538129568099976,
    "iptm": 0.9820447564125061,
    "ligand_iptm": 0.9820447564125061,
    "protein_iptm": 0.0,
    "complex_plddt": 0.9105302691459656,
    "complex_iplddt": 0.9342793226242065,
    "complex_pde": 0.39162781834602356,
    "complex_ipde": 0.49203985929489136,
    "chains_ptm": {
      "0": 0.9480785131454468,
      "1": 0.9129278063774109,
      "2":

In [5]:
# 1. Open CIF and Save as PDB using PyMOL
out_pdb = convert_cif_to_pdb_pymol(out_cif, str(out_cif).replace('.cif', '.pdb'))

# Visualize PDB in NGLView
view = visualize_structures([out_pdb], show_res_near_ligand=6, protein_chain_id='A', ligand_chain_id='D')
view



0 /Users/charmainechia/Documents/projects/PIPS/PIPS2-UPOs-data/pdb/ET096_S82_boltz2.pdb
[('ALA', 77), ('PHE', 78), ('MET', 79), ('ALA', 81), ('LEU', 82), ('ILE', 103), ('ARG', 170), ('ALA', 171), ('SER', 172), ('LEU', 174), ('GLU', 175)]


NGLWidget()