<a href="https://colab.research.google.com/github/espickle1/boltz-2/blob/main/boltz_yaml_creator_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# === Interactive YAML builder (ONLY id + sequence) ===
# Prompts for YAML directory and output filename
!pip -q install ipywidgets
from google.colab import output, files
output.enable_custom_widget_manager()

import ipywidgets as W
from IPython.display import display, Markdown, clear_output
import os

# ----- protein entry factory: ONLY 'id' and 'sequence' -----
def make_entry():
    id_field = W.Text(placeholder="protein id", layout=W.Layout(width="350px"))
    seq_field = W.Textarea(placeholder="protein sequence", layout=W.Layout(width="600px", height="80px"))
    row = W.VBox([
        W.HBox([W.Label("id:", layout=W.Layout(width="30px")), id_field]),
        W.HBox([W.Label("sequence:", layout=W.Layout(width="70px")), seq_field]),
        W.HTML("<hr>")
    ])
    return {"box": row, "id": id_field, "seq": seq_field}

# container for multiple entries
entries = []

def add_entry(_=None):
    e = make_entry()
    entries.append(e)
    entries_box.children = [x["box"] for x in entries]

def remove_last(_=None):
    if entries:
        entries.pop()
        entries_box.children = [x["box"] for x in entries]

# initial one
entries_box = W.VBox([])
add_btn = W.Button(description="Add protein", button_style="info")
remove_btn = W.Button(description="Remove last", button_style="")
preview_btn = W.Button(description="Preview YAML", button_style="primary")
save_btn = W.Button(description="Save YAML", button_style="success")
out = W.Output()

# Directory and filename widgets
dir_text = W.Text(value="/content/", description="Directory:", layout=W.Layout(width="500px"))
file_text = W.Text(value="protein.yaml", description="File name:", layout=W.Layout(width="300px"))

add_btn.on_click(add_entry)
remove_btn.on_click(remove_last)

# ---- YAML (manual, tabs) ----
def build_yaml_text():
    parts = ["sequences:"]
    for e in entries:
        pid = e["id"].value.strip()
        pseq = e["seq"].value.strip()
        if not pid and not pseq:
            continue
        parts.append("\t- protein:")
        parts.append(f"\t\t\tid: {pid}")
        parts.append(f"\t\t\tsequence: {pseq}")
    return "\n".join(parts) + ("\n" if parts else "")

def on_preview(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if not entries:
            display(Markdown("**Add at least one protein entry.**"))
            return
        display(Markdown("### YAML Preview"))
        display(Markdown(f"```\n{y}```"))

def on_save(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if not entries:
            display(Markdown("**Add at least one protein entry.**"))
            return
        # Get path and filename
        directory = dir_text.value.strip()
        fname = file_text.value.strip()
        if not (fname.endswith(".yaml") or fname.endswith(".yml")):
            fname += ".yaml"
        # Ensure directory exists
        os.makedirs(directory, exist_ok=True)
        full_path = os.path.join(directory, fname)
        with open(full_path, "w", encoding="utf-8") as f:
            f.write(y)
        display(Markdown(f"**Saved:** `{full_path}`"))
        # Also offer download
        files.download(full_path)

preview_btn.on_click(on_preview)
save_btn.on_click(on_save)

# layout
controls = W.HBox([add_btn, remove_btn, preview_btn, save_btn])
display(W.VBox([
    W.HTML("<h3>YAML: sequences → - protein → (id, sequence)</h3>"),
    dir_text,
    file_text,
    controls,
    entries_box,
    out
]))

# add one empty entry by default
add_entry()

VBox(children=(HTML(value='<h3>YAML: sequences → - protein → (id, sequence)</h3>'), Text(value='/content/', de…

In [5]:
# Helper: YAML single-quote escaping
def _yaml_single_quote(s: str) -> str:
    # YAML single-quoted scalar rule: single quote is doubled
    return s.replace("'", "''")

# Define your values:
# Use r"..." or "\\t" if you want a LITERAL backslash+t in the value
protein_id = r"P12345\toops"           # literal \t kept
protein_sequence = r"MKT\tAYI\tag"     # literal \t kept

# (Optional) If your current variable has REAL tabs and you want to turn them into \t text:
# protein_sequence = protein_sequence.replace("\t", "\\t")

# Build YAML text with tab indentation and single-quoted values
lines = [
    "sequences:",
    "\t- protein:",
    f"\t\t\tid: '{_yaml_single_quote(protein_id)}'",
    f"\t\t\tsequence: '{_yaml_single_quote(protein_sequence)}'",
]
yaml_text = "\n".join(lines) + "\n"

# Save
yaml_path = "/content/"
yaml_file_name = "protein.yaml"
with open(yaml_path + yaml_file_name, "w", encoding="utf-8") as f:
    f.write(yaml_text)

# Visual check: prints with backslashes intact
print(yaml_text)


sequences:
	- protein:
			id: 'P12345\toops'
			sequence: 'MKT\tAYI\tag'



In [9]:
# Interactive YAML builder (id + sequence ONLY) with correct YAML SPACES (no tabs)
# Prompts for directory + filename; writes ids as one-element lists: id: [A1]

!pip -q install ipywidgets
from google.colab import output, files
output.enable_custom_widget_manager()

import ipywidgets as W
from IPython.display import display, Markdown, clear_output
import os

# ----- protein entry factory: ONLY 'id' and 'sequence' -----
def make_entry():
    id_field = W.Text(placeholder="protein id (e.g., A1)", layout=W.Layout(width="350px"))
    seq_field = W.Textarea(placeholder="protein sequence (AA letters)", layout=W.Layout(width="600px", height="80px"))
    row = W.VBox([
        W.HBox([W.Label("id:", layout=W.Layout(width="60px")), id_field]),
        W.HBox([W.Label("sequence:", layout=W.Layout(width="60px")), seq_field]),
        W.HTML("<hr>")
    ])
    return {"box": row, "id": id_field, "seq": seq_field}

entries = []
def add_entry(_=None):
    e = make_entry()
    entries.append(e)
    entries_box.children = [x["box"] for x in entries]

def remove_last(_=None):
    if entries:
        entries.pop()
        entries_box.children = [x["box"] for x in entries]

entries_box = W.VBox([])
add_btn = W.Button(description="Add protein", button_style="info")
remove_btn = W.Button(description="Remove last")
preview_btn = W.Button(description="Preview YAML", button_style="primary")
save_btn = W.Button(description="Save YAML", button_style="success")
out = W.Output()

# Directory and filename widgets
dir_text = W.Text(value="/content/", description="Directory:", layout=W.Layout(width="500px"))
file_text = W.Text(value="protein.yaml", description="File name:", layout=W.Layout(width="300px"))

add_btn.on_click(add_entry)
remove_btn.on_click(remove_last)

# ---- YAML (spaces, not tabs). Target:
# sequences:
#   - protein:
#       id: [A1]
#       sequence: MDSN...
IND_2 = "  "      # 2 spaces
IND_6 = "      "  # 6 spaces

def normalize_id_to_list(pid: str) -> str:
    pid = pid.strip()
    # If already looks like a YAML list, keep as-is; else wrap in [pid]
    if pid.startswith('[') and pid.endswith(']'):
        return pid
    return f"[{pid}]"

def build_yaml_text():
    lines = ["sequences:"]
    any_item = False
    for e in entries:
        pid = e["id"].value.strip()
        pseq = e["seq"].value.strip()
        if not pid and not pseq:
            continue
        any_item = True
        lines.append(f"{IND_2}- protein:")
        lines.append(f"{IND_6}id: {normalize_id_to_list(pid)}")
        lines.append(f"{IND_6}sequence: {pseq}")
    return ("\n".join(lines) + "\n") if any_item else ""

def on_preview(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if not y:
            display(Markdown("**Add at least one protein entry (id and/or sequence).**"))
            return
        display(Markdown("### YAML Preview"))
        display(Markdown(f"```yaml\n{y}```"))

def on_save(_):
    with out:
        clear_output()
        y = build_yaml_text()
        if not y:
            display(Markdown("**Add at least one protein entry (id and/or sequence).**"))
            return
        directory = dir_text.value.strip()
        fname = file_text.value.strip()
        if not (fname.endswith(".yaml") or fname.endswith(".yml")):
            fname += ".yaml"
        os.makedirs(directory, exist_ok=True)
        full_path = os.path.join(directory, fname)
        with open(full_path, "w", encoding="utf-8") as f:
            f.write(y)
        display(Markdown(f"**Saved:** `{full_path}`"))
        files.download(full_path)

preview_btn.on_click(on_preview)
save_btn.on_click(on_save)

controls = W.HBox([add_btn, remove_btn, preview_btn, save_btn])
display(W.VBox([
    W.HTML("<h3>YAML: sequences → - protein → (id: [ID], sequence)</h3>"),
    dir_text,
    file_text,
    controls,
    entries_box,
    out
]))

# start with one entry
add_entry()


VBox(children=(HTML(value='<h3>YAML: sequences → - protein → (id: [ID], sequence)</h3>'), Text(value='/content…