<a href="https://colab.research.google.com/github/espickle1/boltz-2/blob/main/src/input_config.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title 1. Load dependencies
!pip -q install ipywidgets
from google.colab import output, files
output.enable_custom_widget_manager()

import ipywidgets as W
from IPython.display import display, Markdown, clear_output
import os

In [None]:
#@title 2. Protein and ligand information
def _yaml_squote(s: str) -> str:
    """Return a YAML single-quoted scalar (escape internal single quotes)."""
    s = "" if s is None else str(s)
    return "'" + s.replace("'", "''") + "'"

def make_entry(entry_type="protein"):
    type_dd = W.Dropdown(options=["protein", "ligand"], value=entry_type,
                         description="Type:", layout=W.Layout(width="160px"))

    # protein widgets
    p_id  = W.Text(placeholder="protein id (e.g., A1)", layout=W.Layout(width="320px"))
    p_seq = W.Textarea(placeholder="protein sequence", layout=W.Layout(width="560px", height="70px"))

    # ligand widgets (UI order: id → ccd → smiles)
    l_id     = W.Text(placeholder="ligand id (e.g., L1)", layout=W.Layout(width="320px"))
    l_ccd    = W.Text(placeholder="CCD (e.g., ATP, HEM)", layout=W.Layout(width="320px"))
    l_smiles = W.Text(placeholder="(optional) SMILES", layout=W.Layout(width="560px"))

    protein_box = W.VBox([
        W.HBox([W.Label("id:", layout=W.Layout(width="70px")), p_id]),
        W.HBox([W.Label("sequence:", layout=W.Layout(width="70px")), p_seq]),
    ])
    ligand_box = W.VBox([
        W.HBox([W.Label("id:",     layout=W.Layout(width="70px")), l_id]),
        W.HBox([W.Label("ccd:",    layout=W.Layout(width="70px")), l_ccd]),
        W.HBox([W.Label("smiles:", layout=W.Layout(width="70px")), l_smiles]),
    ])

    container = W.VBox([])
    def refresh(*_):
        container.children = [protein_box] if type_dd.value == "protein" else [ligand_box]
    type_dd.observe(refresh, names="value")
    refresh()

    box = W.VBox([W.HBox([type_dd]), container, W.HTML("<hr>")])
    return {
        "box": box, "type": type_dd,
        "p_id": p_id, "p_seq": p_seq,
        "l_ccd": l_ccd, "l_id": l_id, "l_smiles": l_smiles
    }

# entries and controls
entries, entries_box = [], W.VBox([])
def add_entry(_=None, default_type="protein"):
    e = make_entry(default_type)
    entries.append(e)
    entries_box.children = [x["box"] for x in entries]
def remove_last(_=None):
    if entries:
        entries.pop()
        entries_box.children = [x["box"] for x in entries]

add_btn      = W.Button(description="Add protein", button_style="info")
add_lig_btn  = W.Button(description="Add ligand",  button_style="info")
remove_btn   = W.Button(description="Remove last")
preview_btn  = W.Button(description="Preview YAML", button_style="primary")
save_btn     = W.Button(description="Save YAML", button_style="success")
out          = W.Output()

dir_text  = W.Text(value="/content/", description="Directory:", layout=W.Layout(width="500px"))
file_text = W.Text(value="sequences.yaml", description="File name:", layout=W.Layout(width="300px"))

add_btn.on_click(lambda _: add_entry(default_type="protein"))
add_lig_btn.on_click(lambda _: add_entry(default_type="ligand"))
remove_btn.on_click(remove_last)

def build_yaml_text():
    lines = ["sequences:"]
    for e in entries:
        t = e["type"].value
        if t == "protein":
            pid  = (e["p_id"].value or "").strip()
            pseq = (e["p_seq"].value or "").strip()
            if not pid and not pseq:
                continue
            lines.append("  - protein:")
            lines.append(f"      id: [{pid}]")       # unquoted list element
            lines.append(f"      sequence: {pseq}")  # unquoted
        else:
            lid    = (e["l_id"].value or "").strip()
            ccd    = (e["l_ccd"].value or "").strip()
            smiles = (e["l_smiles"].value or "").strip()
            if not lid and not ccd and not smiles:
                continue

            # Apply precedence & omission rules
            use_ccd = bool(ccd)
            use_smiles = bool(smiles) and not use_ccd  # only if ccd is absent

            lines.append("  - ligand:")
            if lid:
                lines.append(f"      id: {lid}")  # UNquoted
            if use_ccd:
                lines.append(f"      ccd: {ccd}")  # UNquoted
                # DO NOT emit smiles at all when ccd is present
            elif use_smiles:
                # Only smiles present -> emit smiles (quoted), do not emit ccd
                lines.append(f"      smiles: {_yaml_squote(smiles)}")
            # If neither ccd nor smiles present, emit neither line
    return "\n".join(lines) + "\n"

def on_preview(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if len(y.strip().splitlines()) <= 1:
            display(Markdown("**Add at least one entry.**"))
            return
        display(Markdown("### YAML Preview"))
        display(Markdown(f"```yaml\n{y}```"))

def on_save(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if len(y.strip().splitlines()) <= 1:
            display(Markdown("**Add at least one entry.**"))
            return
        directory = dir_text.value.strip()
        fname = file_text.value.strip()
        if not (fname.endswith(".yaml") or fname.endswith(".yml")):
            fname += ".yaml"
        os.makedirs(directory, exist_ok=True)
        full_path = os.path.join(directory, fname)
        with open(full_path, "w", encoding="utf-8") as f:
            f.write(y)
        display(Markdown(f"**Saved:** `{full_path}`"))
        files.download(full_path)

preview_btn.on_click(on_preview)
save_btn.on_click(on_save)

controls = W.HBox([add_btn, add_lig_btn, remove_btn, preview_btn, save_btn])
display(W.VBox([
    W.HTML("<h3>YAML: sequences → - protein|ligand (ligand rule: ccd > smiles, omit missing lines)</h3>"),
    dir_text, file_text,
    controls,
    entries_box,
    out
]))

# start with one protein entry
add_entry("protein")