<a href="https://colab.research.google.com/github/espickle1/boltz-2/blob/main/boltz_yaml_interface.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install boltz[cuda] -U



In [7]:
!boltz predict /content/drive/MyDrive/boltz/ns1_v2.yaml --out_dir /content/drive/MyDrive/boltz_results_pocket --use_msa_server

MSA server enabled: https://api.colabfold.com
MSA server authentication: no credentials provided
Checking input data.
Processing 1 inputs with 1 threads.
  0% 0/1 [00:00<?, ?it/s]Generating MSA for /content/drive/MyDrive/boltz/ns1_v2.yaml with 1 protein entities.
Calling MSA server for target ns1_v2 with 1 sequences
MSA server URL: https://api.colabfold.com
MSA pairing strategy: greedy
No authentication provided for MSA server

  0% 0/150 [00:00<?, ?it/s][A
SUBMIT:   0% 0/150 [00:00<?, ?it/s][A
COMPLETE:   0% 0/150 [00:00<?, ?it/s][A
COMPLETE: 100% 150/150 [00:01<00:00, 108.40it/s]
100% 1/1 [00:01<00:00,  1.50s/it]
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Running structure prediction for 1 input.
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/utilities/migration/utils.py:56: The loaded checkpoint was produced with Lightning v2.5.0.post0, which is newe

In [8]:
id = 'A'
sequence = 'ASDF'

In [14]:
import yaml

# Define your values here
protein_id = "P12345"
protein_sequence = "MKTAYIAKQRQISFVKSHFSRQ"

# Build the structure
data = {
    "sequences": [
        {
            "protein": {
                "id": protein_id,
                "sequence": protein_sequence
            }
        }
    ]
}

# Write to YAML file
yaml_path = "/content/drive/MyDrive/boltz/"
yaml_file_name = "protein.yaml"
with open(yaml_path + yaml_file_name, "w") as f:
    yaml.dump(data, f, sort_keys=False)

# Show the YAML output in the notebook
print(yaml.dump(data, sort_keys=False))

sequences:
- protein:
    id: P12345
    sequence: MKTAYIAKQRQISFVKSHFSRQ



In [15]:
# Define values
protein_id = "P12345"
protein_sequence = "MKTAYIAKQRQISFVKSHFSRQ"

# Build the YAML string manually
yaml_text = f"""
sequences:
\t- protein:
\t\t\tid: {protein_id}
\t\t\tsequence: {protein_sequence}
"""

# Save to file
yaml_path = "/content/drive/MyDrive/boltz/"
yaml_file_name = "protein.yaml"
with open(yaml_path + yaml_file_name, "w") as f:
# with open("protein.yaml", "w") as f:
    f.write(yaml_text)

# Print to check
print(yaml_text)



sequences:
	- protein:
			id: P12345
			sequence: MKTAYIAKQRQISFVKSHFSRQ



In [17]:
# === Interactive YAML builder (ONLY id + sequence) ===
# Works in Google Colab
!pip -q install ipywidgets
from google.colab import output, files
output.enable_custom_widget_manager()

import ipywidgets as W
from IPython.display import display, Markdown, clear_output

yaml_path = "/content/drive/MyDrive/boltz/"
yaml_file_name = "protein.yaml"

# ----- protein entry factory: ONLY 'id' and 'sequence' -----
def make_entry():
    id_field = W.Text(placeholder="protein id", layout=W.Layout(width="350px"))
    seq_field = W.Textarea(placeholder="protein sequence", layout=W.Layout(width="600px", height="80px"))
    row = W.VBox([
        W.HBox([W.Label("id:", layout=W.Layout(width="30px")), id_field]),
        W.HBox([W.Label("sequence:", layout=W.Layout(width="70px")), seq_field]),
        W.HTML("<hr>")
    ])
    return {"box": row, "id": id_field, "seq": seq_field}

# container for multiple entries
entries = []

def add_entry(_=None):
    e = make_entry()
    entries.append(e)
    entries_box.children = [x["box"] for x in entries]

def remove_last(_=None):
    if entries:
        entries.pop()
        entries_box.children = [x["box"] for x in entries]

# initial one
entries_box = W.VBox([])
add_btn = W.Button(description="Add protein", button_style="info")
remove_btn = W.Button(description="Remove last", button_style="")
preview_btn = W.Button(description="Preview YAML", button_style="primary")
save_btn = W.Button(description="Save protein.yaml", button_style="success")
out = W.Output()

add_btn.on_click(add_entry)
remove_btn.on_click(remove_last)

# ---- YAML (manual, tabs) ----
def build_yaml_text():
    # start with header line exactly as you used (no '---' lines)
    parts = ["sequences:"]
    for e in entries:
        pid = e["id"].value.strip()
        pseq = e["seq"].value.strip()
        if not pid and not pseq:
            continue
        parts.append("\t- protein:")
        parts.append(f"\t\t\tid: {pid}")
        parts.append(f"\t\t\tsequence: {pseq}")
    return "\n".join(parts) + ("\n" if parts else "")

def on_preview(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if not entries:
            display(Markdown("**Add at least one protein entry.**"))
            return
        display(Markdown("### YAML Preview"))
        # show the YAML exactly; (tabs are literal \t characters)
        display(Markdown(f"```\n{y}```"))

def on_save(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if not entries:
            display(Markdown("**Add at least one protein entry.**"))
            return
        with open(yaml_path + yaml_file_name, "w", encoding="utf-8") as f:
            f.write(y)
        display(Markdown(f"**Saved:** `{yaml_file_name}` — downloading…"))
        files.download(yaml_file_name)

preview_btn.on_click(on_preview)
save_btn.on_click(on_save)

# layout
controls = W.HBox([add_btn, remove_btn, preview_btn, save_btn])
display(W.VBox([
    W.HTML("<h3>YAML: sequences → - protein → (id, sequence)</h3>"),
    controls,
    entries_box,
    out
]))

# add one empty entry by default
add_entry()


VBox(children=(HTML(value='<h3>YAML: sequences → - protein → (id, sequence)</h3>'), HBox(children=(Button(butt…

In [18]:
# === Interactive YAML builder (ONLY id + sequence) ===
# Prompts for YAML directory and output filename
!pip -q install ipywidgets
from google.colab import output, files
output.enable_custom_widget_manager()

import ipywidgets as W
from IPython.display import display, Markdown, clear_output
import os

# ----- protein entry factory: ONLY 'id' and 'sequence' -----
def make_entry():
    id_field = W.Text(placeholder="protein id", layout=W.Layout(width="350px"))
    seq_field = W.Textarea(placeholder="protein sequence", layout=W.Layout(width="600px", height="80px"))
    row = W.VBox([
        W.HBox([W.Label("id:", layout=W.Layout(width="30px")), id_field]),
        W.HBox([W.Label("sequence:", layout=W.Layout(width="70px")), seq_field]),
        W.HTML("<hr>")
    ])
    return {"box": row, "id": id_field, "seq": seq_field}

# container for multiple entries
entries = []

def add_entry(_=None):
    e = make_entry()
    entries.append(e)
    entries_box.children = [x["box"] for x in entries]

def remove_last(_=None):
    if entries:
        entries.pop()
        entries_box.children = [x["box"] for x in entries]

# initial one
entries_box = W.VBox([])
add_btn = W.Button(description="Add protein", button_style="info")
remove_btn = W.Button(description="Remove last", button_style="")
preview_btn = W.Button(description="Preview YAML", button_style="primary")
save_btn = W.Button(description="Save YAML", button_style="success")
out = W.Output()

# Directory and filename widgets
dir_text = W.Text(value="/content/", description="Directory:", layout=W.Layout(width="500px"))
file_text = W.Text(value="protein.yaml", description="File name:", layout=W.Layout(width="300px"))

add_btn.on_click(add_entry)
remove_btn.on_click(remove_last)

# ---- YAML (manual, tabs) ----
def build_yaml_text():
    parts = ["sequences:"]
    for e in entries:
        pid = e["id"].value.strip()
        pseq = e["seq"].value.strip()
        if not pid and not pseq:
            continue
        parts.append("\t- protein:")
        parts.append(f"\t\t\tid: {pid}")
        parts.append(f"\t\t\tsequence: {pseq}")
    return "\n".join(parts) + ("\n" if parts else "")

def on_preview(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if not entries:
            display(Markdown("**Add at least one protein entry.**"))
            return
        display(Markdown("### YAML Preview"))
        display(Markdown(f"```\n{y}```"))

def on_save(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if not entries:
            display(Markdown("**Add at least one protein entry.**"))
            return
        # Get path and filename
        directory = dir_text.value.strip()
        fname = file_text.value.strip()
        if not (fname.endswith(".yaml") or fname.endswith(".yml")):
            fname += ".yaml"
        # Ensure directory exists
        os.makedirs(directory, exist_ok=True)
        full_path = os.path.join(directory, fname)
        with open(full_path, "w", encoding="utf-8") as f:
            f.write(y)
        display(Markdown(f"**Saved:** `{full_path}`"))
        # Also offer download
        files.download(full_path)

preview_btn.on_click(on_preview)
save_btn.on_click(on_save)

# layout
controls = W.HBox([add_btn, remove_btn, preview_btn, save_btn])
display(W.VBox([
    W.HTML("<h3>YAML: sequences → - protein → (id, sequence)</h3>"),
    dir_text,
    file_text,
    controls,
    entries_box,
    out
]))

# add one empty entry by default
add_entry()


VBox(children=(HTML(value='<h3>YAML: sequences → - protein → (id, sequence)</h3>'), Text(value='/content/', de…