<a href="https://colab.research.google.com/github/alexjisa/ColabDNMP/blob/main/ColabDNMP_Borrador.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**ColabMiniprot v1.0**

It integrates state-of-the-art tools such as P2Rank, RFdiffusion, ProteinMPNN, and AlphaFold2/ColabFold to design and evaluate novel binders against any target protein.

For more details, see bottom of the notebook and checkout the ColabMiniprot GitHub.

**üßë‚Äçüíª Author**

Alejandro Jim√©nez-S√°nchez

**üìÖ Version & License**

Version: v1.0-2025

License: MIT License-Free to use and modify with credit

In [None]:
# @title **Create your output directory in Google Drive üìÅ**
#@markdown This cell reads **p2rank_output/target_input.pdb_predictions.csv**, lets you pick a **pocket by rank**,
#@markdown extracts its `residue_ids`, computes **per-residue SASA** only for those residues, and returns **ranked hotspots**.
%%time
from google.colab import drive
import os

# ---- User input ----
project_name = "Prueba_1"  # @param {type:"string"}

# ---- Mount Drive ----
drive.mount('/content/drive', force_remount=True)

# ---- Create main project folder ----
WORKDIR = f"/content/drive/MyDrive/{project_name}"
os.makedirs(WORKDIR, exist_ok=True)

# ---- Subfolders ----
SUBFOLDERS = [
    "01_target",
    "02_p2rank",
    "03_hotspots",
    "04_trimming",
    "05_rfdiffusion",
    "06_sequences",
    "07_alphafold",
    "08_metrics",
]
for sf in SUBFOLDERS:
    os.makedirs(os.path.join(WORKDIR, sf), exist_ok=True)

print(f"‚úÖ Project folder created: {WORKDIR}")
print("üìÇ Subfolders:")
for sf in SUBFOLDERS:
    print("   -", sf)

In [None]:
#@title **1) Upload your target protein structure üéØ**
from google.colab import files
import os, shutil

# --- Settings ---
try:
    WORKDIR
except NameError:
    WORKDIR = "/content/drive/MyDrive/Prueba_1"  # fallback if previous cell wasn't run

TARGET_DIR = os.path.join(WORKDIR, "01_target")
os.makedirs(TARGET_DIR, exist_ok=True)

print("üìé Please upload your target structure (.pdb / .cif / .mmcif)")
uploaded = files.upload()  # pick one file

# pick first valid file
valid_ext = (".pdb", ".cif", ".mmcif")
src = None
for k in uploaded.keys():
    if k.lower().endswith(valid_ext):
        src = k
        break

if src is None:
    raise ValueError("‚ùå No valid file uploaded (need .pdb / .cif / .mmcif)")

fixed_name = "target_input.pdb"
drive_out  = os.path.join(TARGET_DIR, fixed_name)
local_out  = fixed_name  # keep a local copy for tools that expect it in CWD

# --- Convert CIF ‚Üí PDB if needed (simple, using Biopython) ---
def convert_cif_to_pdb(cif_path, pdb_path):
    try:
        from Bio.PDB import MMCIFParser, PDBIO
    except ImportError:
        import sys
        !pip -q install biopython
        from Bio.PDB import MMCIFParser, PDBIO
    parser = MMCIFParser(QUIET=True)
    structure = parser.get_structure("x", cif_path)
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_path)

if src.lower().endswith((".cif", ".mmcif")):
    print("üîÑ Converting CIF ‚Üí PDB ...")
    tmp_pdb = "_tmp_converted.pdb"
    convert_cif_to_pdb(src, tmp_pdb)
    shutil.move(tmp_pdb, drive_out)
    # keep local working copy too
    shutil.copy(drive_out, local_out)
    # clean original upload
    os.remove(src)
    print(f"‚úÖ Saved (Drive): {drive_out}")
else:
    # already a PDB
    shutil.move(src, drive_out)
    shutil.copy(drive_out, local_out)
    print(f"‚úÖ Saved (Drive): {drive_out}")

üìé Please upload your target structure (.pdb / .cif / .mmcif)


In [None]:
#@title **2) Predict pockets on the target surface üîç**
%%time
import os, pandas as pd

P2RANK_VERSION = "2.5.1"
P2RANK_FOLDER  = f"p2rank_{P2RANK_VERSION}"
OUT_DIR        = "p2rank_output"
PRED_CSV       = f"{OUT_DIR}/target_input.pdb_predictions.csv"

# --- Check input file ---
if not os.path.exists("target_input.pdb"):
    raise FileNotFoundError("‚ùå target_input.pdb not found. Run Step 1 first.")

# --- Install P2Rank if needed ---
if not os.path.exists(P2RANK_FOLDER):
    print("‚öôÔ∏è Installing P2Rank ...")
    !apt-get -qq install -y openjdk-17-jdk-headless
    !wget -q https://github.com/rdk/p2rank/releases/download/{P2RANK_VERSION}/p2rank_{P2RANK_VERSION}.tar.gz
    !tar -xzf p2rank_{P2RANK_VERSION}.tar.gz
    print("‚úÖ P2Rank installed.")

# --- Run P2Rank ---
os.makedirs(OUT_DIR, exist_ok=True)
print("üöÄ Running P2Rank ...")
!{P2RANK_FOLDER}/prank predict -f target_input.pdb -o {OUT_DIR} -visualizations 0

# --- Load results ---
if not os.path.exists(PRED_CSV):
    raise FileNotFoundError(f"‚ùå {PRED_CSV} not found. Something went wrong.")

df = pd.read_csv(PRED_CSV, sep=None, engine="python")
df.columns = [c.strip() for c in df.columns]  # clean header spaces

# --- Keep only desired columns ---
needed_cols = ["rank", "probability", "residue_ids"]
missing = [c for c in needed_cols if c not in df.columns]
if missing:
    raise ValueError(f"‚ùå Missing columns in P2Rank output: {missing}")

df_simple = df[needed_cols].sort_values("rank")
print("\n‚úÖ Pocket summary:")
display(df_simple)

In [None]:
#@title **3) Select pocket and compute hotspot residues (SASA on specific residues)**
#@markdown This cell reads **p2rank_output/target_input.pdb_predictions.csv**, lets you pick a **pocket by rank**,
#@markdown extracts its `residue_ids`, computes **per-residue SASA** only for those residues, and returns **ranked hotspots**.
# Install dependency
!pip install biopython --quiet
pocket_rank = 1            #@param {type:"integer"}
sasa_cutoff = 40         #@param {type:"number"}
filter_mode = "aromatic_or_hydrophobic"  #@param ["none", "aromatic_only", "hydrophobic_only", "aromatic_or_hydrophobic"]
top_n_show = 5            #@param {type:"integer"}

import os
import pandas as pd
from Bio.PDB import PDBParser, ShrakeRupley

pred_csv = "p2rank_output/target_input.pdb_predictions.csv"
pdb_file = "target_input.pdb"

# --- basic checks ---
if not os.path.exists(pred_csv):
    raise FileNotFoundError(f"‚ùå Missing file: {pred_csv}. Run Step 2 first.")
if not os.path.exists(pdb_file):
    raise FileNotFoundError(f"‚ùå Missing file: {pdb_file}. Run Step 1 first.")

# --- load P2Rank results and get chosen pocket residue_ids ---
df = pd.read_csv(pred_csv, sep=None, engine="python")
df.columns = [c.strip() for c in df.columns]
if "rank" not in df.columns or "residue_ids" not in df.columns:
    raise ValueError("‚ùå P2Rank CSV must contain 'rank' and 'residue_ids' columns.")

# Pick the pocket by exact rank
pocket_df = df[df["rank"] == pocket_rank]
if pocket_df.empty:
    available = sorted(df["rank"].unique().tolist())
    raise ValueError(f"‚ùå Pocket with rank={pocket_rank} not found. Available ranks: {available}")
resids_str = str(pocket_df.iloc[0]["residue_ids"])
residue_ids = resids_str.split()  # e.g., ["A_185","A_208",...]

if len(residue_ids) == 0:
    raise ValueError("‚ùå The selected pocket has no residue_ids listed.")

# --- parse PDB and compute SASA for the whole structure ---
parser = PDBParser(QUIET=True)
structure = parser.get_structure("target", pdb_file)

sr = ShrakeRupley()  # default: probe_radius=1.4 √Ö
sr.compute(structure, level="R")  # annotate .sasa per residue

# --- helpers for residue class filters ---
AROMATIC = {"PHE", "TYR", "TRP", "HIS"}
HYDROPHOBIC = {"ALA", "VAL", "ILE", "LEU", "MET", "PHE", "TRP", "TYR", "PRO"}  # common set

def is_aromatic(resn): return resn in AROMATIC
def is_hydrophobic(resn): return resn in HYDROPHOBIC

# --- extract SASA for the requested residue_ids only ---
records = []
target_set = set(residue_ids)  # e.g., {"A_185", ...}

for model in structure:
    for chain in model:
        ch = chain.id
        for res in chain:
            # Skip non-standard residues (hetero/water): id[0] == " "
            if res.id[0] != " ":
                continue
            resn = res.get_resname().strip()
            resi = res.id[1]
            key = f"{ch}_{resi}"
            if key in target_set:
                sasa = float(getattr(res, "sasa", 0.0) or 0.0)
                arom = is_aromatic(resn)
                hyd  = is_hydrophobic(resn)
                records.append({
                    "chain": ch,
                    "resi": resi,
                    "resn": resn,
                    "sasa_A2": sasa,
                    "is_aromatic": arom,
                    "is_hydrophobic": hyd
                })

hotspots_df = pd.DataFrame(records)
if hotspots_df.empty:
    raise ValueError("‚ùå None of the pocket residue_ids were found in the PDB. Check chain/indexing consistency.")

# --- apply filters ---
if filter_mode == "aromatic_only":
    filt = hotspots_df["is_aromatic"]
elif filter_mode == "hydrophobic_only":
    filt = hotspots_df["is_hydrophobic"]
elif filter_mode == "aromatic_or_hydrophobic":
    filt = hotspots_df["is_aromatic"] | hotspots_df["is_hydrophobic"]
else:
    filt = True  # no mode filter

hotspots_sel = hotspots_df[filt & (hotspots_df["sasa_A2"] >= float(sasa_cutoff))]

# --- sort and show ---
hotspots_ranked = hotspots_sel.sort_values(["sasa_A2","resn","resi"], ascending=[False, True, True]).reset_index(drop=True)

print(f"\nPocket rank selected: {pocket_rank}")
print(f"Residues listed by P2Rank for this pocket ({len(residue_ids)}): {residue_ids}")
print(f"\nFilter mode: {filter_mode}  |  SASA cutoff: {sasa_cutoff} √Ö¬≤")
print("\nTop hotspots (sorted by SASA):")
display(hotspots_ranked.head(top_n_show)[["chain","resi","resn","sasa_A2","is_aromatic","is_hydrophobic"]])

# --- also print a compact list for RFdiffusion hotspots flag ---
if not hotspots_ranked.empty:
    hotspot_flags = [f"{r.chain}{r.resi}" for r in hotspots_ranked.itertuples(index=False)]
    print("\nRFdiffusion hotspot list (chain+index):")
    print(hotspot_flags)
else:
    print("\nNo residues passed the filters. Consider lowering the SASA cutoff or changing filter_mode.")

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/3.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.0/3.2 MB[0m [31m29.6 MB/s[0m eta [36m0:00:01[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m3.2/3.2 MB[0m [31m52.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.2/3.2 MB[0m [31m42.7 MB/s[0m eta [36m0:00:00[0m
[?25h

FileNotFoundError: ‚ùå Missing file: p2rank_output/target_input.pdb_predictions.csv. Run Step 2 first.

In [None]:
#@title **4) Trim PDB by ¬±N residues around hotspots (contiguous block, BioPython)** ‚úÇÔ∏è
%%time
chain_id     = "A"                      #@param {type:"string"}
hotspots_str = "A79, A145, A173"    #@param {type:"string"}
flank        = 10                       #@param {type:"integer"}

src_pdb   = "target_input.pdb"          # fixed name from Step 1
out_pdb   = "target_trimmed_seqwin.pdb" # fixed output for downstream steps

import re, os
from Bio.PDB import PDBParser, PDBIO, Select

# --- checks ---
if not os.path.exists(src_pdb):
    raise FileNotFoundError("target_input.pdb not found. Upload/rename in Step 1 first.")

# parse hotspots like "A185,A208, A215"
tokens = [t for t in re.split(r"[,\s]+", hotspots_str.strip()) if t]
hot_nums = []
for t in tokens:
    m = re.fullmatch(r"([A-Za-z])(\d+)", t)
    if not m:
        raise ValueError(f"Invalid hotspot token: '{t}' (expected e.g. A185)")
    ch, rn = m.group(1).upper(), int(m.group(2))
    if ch != chain_id.upper():
        raise ValueError(f"Hotspot '{t}' is on chain {ch}, but chain_id is '{chain_id}'.")
    hot_nums.append(rn)
if not hot_nums:
    raise ValueError("No valid hotspots parsed.")

min_hot, max_hot = min(hot_nums), max(hot_nums)

# load structure
parser = PDBParser(QUIET=True)
structure = parser.get_structure("target", src_pdb)

# collect available residue numbers on requested chain (standard residues only)
avail = []
the_chain = None
for model in structure:
    for ch in model:
        if ch.id == chain_id:
            the_chain = ch
            for res in ch:
                if res.id[0] == " ":  # standard residue (no HET, no insertion code)
                    avail.append(res.id[1])

if the_chain is None or not avail:
    raise ValueError(f"No standard residues found on chain '{chain_id}' in {src_pdb}.")

start = max(min(avail), min_hot - flank)
end   = min(max(avail), max_hot + flank)

class RangeSelect(Select):
    def accept_chain(self, chain):
        return 1 if chain.id == chain_id else 0
    def accept_residue(self, residue):
        if residue.id[0] != " ":
            return 0
        rnum = residue.id[1]
        return 1 if (start <= rnum <= end) else 0

# write trimmed PDB
io = PDBIO()
io.set_structure(structure)
io.save(out_pdb, select=RangeSelect())

# small report
kept = sum(1 for res in the_chain if res.id[0]==" " and start <= res.id[1] <= end)
total_std = sum(1 for res in the_chain if res.id[0]==" ")

print("‚úÖ Trim by contiguous hotspot window completed.")
print(f" - Input PDB : {src_pdb}")
print(f" - Chain     : {chain_id}")
print(f" - Hotspots  : {', '.join(f'{chain_id}{n}' for n in sorted(hot_nums))}")
print(f" - Flank     : ¬±{flank} residues")
print(f" - Kept range: {chain_id}{start} .. {chain_id}{end} (inclusive)")
print(f" - Residues  : kept {kept} / total {total_std} (chain {chain_id})")
print(f" - Output    : {out_pdb}")

‚úÖ Trim by contiguous hotspot window completed.
 - Input PDB : target_input.pdb
 - Chain     : A
 - Hotspots  : A79, A145, A173
 - Flank     : ¬±10 residues
 - Kept range: A69 .. A183 (inclusive)
 - Residues  : kept 115 / total 226 (chain A)
 - Output    : target_trimmed_seqwin.pdb
CPU times: user 19.1 ms, sys: 3.01 ms, total: 22.1 ms
Wall time: 21.9 ms


In [None]:
#@title **setup RFdiffusion**
%%time
import os, time, signal
import sys, random, string, re
if not os.path.isdir("params"):
  os.system("apt-get install aria2")
  os.system("mkdir params")
  # send param download into background
  os.system("(\
  aria2c -q -x 16 https://files.ipd.uw.edu/krypton/schedules.zip; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/6f5902ac237024bdd0c176cb93063dc4/Base_ckpt.pt; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/e29311f6f1bf1af907f9ef9f44b8328b/Complex_base_ckpt.pt; \
  aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar; \
  tar -xf alphafold_params_2022-12-06.tar -C params; \
  touch params/done.txt) &")

if not os.path.isdir("RFdiffusion"):
  print("installing RFdiffusion...")
  os.system("git clone https://github.com/sokrypton/RFdiffusion.git")
  os.system("pip install jedi omegaconf hydra-core icecream pyrsistent pynvml decorator")
  os.system("pip install git+https://github.com/NVIDIA/dllogger#egg=dllogger")
  # 17Mar2024: adding --no-dependencies to avoid installing nvidia-cuda-* dependencies
  # 25Aug2025: updating dgi install to work with latest pytorch
  os.system("pip install --no-dependencies dgl -f https://data.dgl.ai/wheels/torch-2.4/cu124/repo.html")
  os.system("pip install --no-dependencies e3nn==0.5.5 opt_einsum_fx")
  os.system("cd RFdiffusion/env/SE3Transformer; pip install .")
  os.system("wget -qnc https://files.ipd.uw.edu/krypton/ananas")
  os.system("chmod +x ananas")

if not os.path.isdir("colabdesign"):
  print("installing ColabDesign...")
  os.system("pip -q install git+https://github.com/sokrypton/ColabDesign.git@v1.1.1")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign")

if not os.path.isdir("RFdiffusion/models"):
  print("downloading RFdiffusion params...")
  os.system("mkdir RFdiffusion/models")
  models = ["Base_ckpt.pt","Complex_base_ckpt.pt"]
  for m in models:
    while os.path.isfile(f"{m}.aria2"):
      time.sleep(5)
  os.system(f"mv {' '.join(models)} RFdiffusion/models")
  os.system("unzip schedules.zip; rm schedules.zip")

if 'RFdiffusion' not in sys.path:
  os.environ["DGLBACKEND"] = "pytorch"
  sys.path.append('RFdiffusion')

from google.colab import files
import json
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import ipywidgets as widgets
import py3Dmol

from inference.utils import parse_pdb
from colabdesign.rf.utils import get_ca
from colabdesign.rf.utils import fix_contigs, fix_partial_contigs, fix_pdb, sym_it
from colabdesign.shared.protein import pdb_to_string
from colabdesign.shared.plot import plot_pseudo_3D

def get_pdb(pdb_code=None):
  if pdb_code is None or pdb_code == "":
    upload_dict = files.upload()
    pdb_string = upload_dict[list(upload_dict.keys())[0]]
    with open("tmp.pdb","wb") as out: out.write(pdb_string)
    return "tmp.pdb"
  elif os.path.isfile(pdb_code):
    return pdb_code
  elif len(pdb_code) == 4:
    if not os.path.isfile(f"{pdb_code}.pdb1"):
      os.system(f"wget -qnc https://files.rcsb.org/download/{pdb_code}.pdb1.gz")
      os.system(f"gunzip {pdb_code}.pdb1.gz")
    return f"{pdb_code}.pdb1"
  else:
    os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/AF-{pdb_code}-F1-model_v3.pdb")
    return f"AF-{pdb_code}-F1-model_v3.pdb"

def run_ananas(pdb_str, path, sym=None):
  pdb_filename = f"outputs/{path}/ananas_input.pdb"
  out_filename = f"outputs/{path}/ananas.json"
  with open(pdb_filename,"w") as handle:
    handle.write(pdb_str)

  cmd = f"./ananas {pdb_filename} -u -j {out_filename}"
  if sym is None: os.system(cmd)
  else: os.system(f"{cmd} {sym}")

  # parse results
  try:
    out = json.loads(open(out_filename,"r").read())
    results,AU = out[0], out[-1]["AU"]
    group = AU["group"]
    chains = AU["chain names"]
    rmsd = results["Average_RMSD"]
    print(f"AnAnaS detected {group} symmetry at RMSD:{rmsd:.3}")

    C = np.array(results['transforms'][0]['CENTER'])
    A = [np.array(t["AXIS"]) for t in results['transforms']]

    # apply symmetry and filter to the asymmetric unit
    new_lines = []
    for line in pdb_str.split("\n"):
      if line.startswith("ATOM"):
        chain = line[21:22]
        if chain in chains:
          x = np.array([float(line[i:(i+8)]) for i in [30,38,46]])
          if group[0] == "c":
            x = sym_it(x,C,A[0])
          if group[0] == "d":
            x = sym_it(x,C,A[1],A[0])
          coord_str = "".join(["{:8.3f}".format(a) for a in x])
          new_lines.append(line[:30]+coord_str+line[54:])
      else:
        new_lines.append(line)
    return results, "\n".join(new_lines)

  except:
    return None, pdb_str

def run(command, steps, num_designs=1, visual="none"):

  def run_command_and_get_pid(command):
    pid_file = '/dev/shm/pid'
    os.system(f'nohup {command} > /dev/null & echo $! > {pid_file}')
    with open(pid_file, 'r') as f:
      pid = int(f.read().strip())
    os.remove(pid_file)
    return pid
  def is_process_running(pid):
    try:
      os.kill(pid, 0)
    except OSError:
      return False
    else:
      return True

  run_output = widgets.Output()
  progress = widgets.FloatProgress(min=0, max=1, description='running', bar_style='info')
  display(widgets.VBox([progress, run_output]))

  # clear previous run
  for n in range(steps):
    if os.path.isfile(f"/dev/shm/{n}.pdb"):
      os.remove(f"/dev/shm/{n}.pdb")

  pid = run_command_and_get_pid(command)
  try:
    fail = False
    for _ in range(num_designs):

      # for each step check if output generated
      for n in range(steps):
        wait = True
        while wait and not fail:
          time.sleep(0.1)
          if os.path.isfile(f"/dev/shm/{n}.pdb"):
            pdb_str = open(f"/dev/shm/{n}.pdb").read()
            if pdb_str[-3:] == "TER":
              wait = False
            elif not is_process_running(pid):
              fail = True
          elif not is_process_running(pid):
            fail = True

        if fail:
          progress.bar_style = 'danger'
          progress.description = "failed"
          break

        else:
          progress.value = (n+1) / steps
          if visual != "none":
            with run_output:
              run_output.clear_output(wait=True)
              if visual == "image":
                xyz, bfact = get_ca(f"/dev/shm/{n}.pdb", get_bfact=True)
                fig = plt.figure()
                fig.set_dpi(100);fig.set_figwidth(6);fig.set_figheight(6)
                ax1 = fig.add_subplot(111);ax1.set_xticks([]);ax1.set_yticks([])
                plot_pseudo_3D(xyz, c=bfact, cmin=0.5, cmax=0.9, ax=ax1)
                plt.show()
              if visual == "interactive":
                view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')
                view.addModel(pdb_str,'pdb')
                view.setStyle({'cartoon': {'colorscheme': {'prop':'b','gradient': 'roygb','min':0.5,'max':0.9}}})
                view.zoomTo()
                view.show()
        if os.path.exists(f"/dev/shm/{n}.pdb"):
          os.remove(f"/dev/shm/{n}.pdb")
      if fail:
        progress.bar_style = 'danger'
        progress.description = "failed"
        break

    while is_process_running(pid):
      time.sleep(0.1)

  except KeyboardInterrupt:
    os.kill(pid, signal.SIGTERM)
    progress.bar_style = 'danger'
    progress.description = "stopped"

def run_diffusion(contigs, path, pdb=None, iterations=50,
                  symmetry="none", order=1, hotspot=None,
                  chains=None, add_potential=False,
                  num_designs=1, visual="none"):

  full_path = f"outputs/{path}"
  os.makedirs(full_path, exist_ok=True)
  opts = [f"inference.output_prefix={full_path}",
          f"inference.num_designs={num_designs}"]

  if chains == "": chains = None

  # determine symmetry type
  if symmetry in ["auto","cyclic","dihedral"]:
    if symmetry == "auto":
      sym, copies = None, 1
    else:
      sym, copies = {"cyclic":(f"c{order}",order),
                     "dihedral":(f"d{order}",order*2)}[symmetry]
  else:
    symmetry = None
    sym, copies = None, 1

  # determine mode
  contigs = contigs.replace(","," ").replace(":"," ").split()
  is_fixed, is_free = False, False
  fixed_chains = []
  for contig in contigs:
    for x in contig.split("/"):
      a = x.split("-")[0]
      if a[0].isalpha():
        is_fixed = True
        if a[0] not in fixed_chains:
          fixed_chains.append(a[0])
      if a.isnumeric():
        is_free = True
  if len(contigs) == 0 or not is_free:
    mode = "partial"
  elif is_fixed:
    mode = "fixed"
  else:
    mode = "free"

  # fix input contigs
  if mode in ["partial","fixed"]:
    pdb_str = pdb_to_string(get_pdb(pdb), chains=chains)
    if symmetry == "auto":
      a, pdb_str = run_ananas(pdb_str, path)
      if a is None:
        print(f'ERROR: no symmetry detected')
        symmetry = None
        sym, copies = None, 1
      else:
        if a["group"][0] == "c":
          symmetry = "cyclic"
          sym, copies = a["group"], int(a["group"][1:])
        elif a["group"][0] == "d":
          symmetry = "dihedral"
          sym, copies = a["group"], 2 * int(a["group"][1:])
        else:
          print(f'ERROR: the detected symmetry ({a["group"]}) not currently supported')
          symmetry = None
          sym, copies = None, 1

    elif mode == "fixed":
      pdb_str = pdb_to_string(pdb_str, chains=fixed_chains)

    pdb_filename = f"{full_path}/input.pdb"
    with open(pdb_filename, "w") as handle:
      handle.write(pdb_str)

    parsed_pdb = parse_pdb(pdb_filename)
    opts.append(f"inference.input_pdb={pdb_filename}")
    if mode in ["partial"]:
      iterations = int(80 * (iterations / 200))
      opts.append(f"diffuser.partial_T={iterations}")
      contigs = fix_partial_contigs(contigs, parsed_pdb)
    else:
      opts.append(f"diffuser.T={iterations}")
      contigs = fix_contigs(contigs, parsed_pdb)
  else:
    opts.append(f"diffuser.T={iterations}")
    parsed_pdb = None
    contigs = fix_contigs(contigs, parsed_pdb)

  if hotspot is not None and hotspot != "":
    opts.append(f"ppi.hotspot_res=[{hotspot}]")

  # setup symmetry
  if sym is not None:
    sym_opts = ["--config-name symmetry", f"inference.symmetry={sym}"]
    if add_potential:
      sym_opts += ["'potentials.guiding_potentials=[\"type:olig_contacts,weight_intra:1,weight_inter:0.1\"]'",
                   "potentials.olig_intra_all=True","potentials.olig_inter_all=True",
                   "potentials.guide_scale=2","potentials.guide_decay=quadratic"]
    opts = sym_opts + opts
    contigs = sum([contigs] * copies,[])

  opts.append(f"'contigmap.contigs=[{' '.join(contigs)}]'")
  opts += ["inference.dump_pdb=True","inference.dump_pdb_path='/dev/shm'"]

  print("mode:", mode)
  print("output:", full_path)
  print("contigs:", contigs)

  opts_str = " ".join(opts)
  cmd = f"./RFdiffusion/run_inference.py {opts_str}"
  print(cmd)

  # RUN
  run(cmd, iterations, num_designs, visual=visual)

  # fix pdbs
  for n in range(num_designs):
    pdbs = [f"outputs/traj/{path}_{n}_pX0_traj.pdb",
            f"outputs/traj/{path}_{n}_Xt-1_traj.pdb",
            f"{full_path}_{n}.pdb"]
    for pdb in pdbs:
      with open(pdb,"r") as handle: pdb_str = handle.read()
      with open(pdb,"w") as handle: handle.write(fix_pdb(pdb_str, contigs))

  return contigs, copies

installing RFdiffusion...
installing ColabDesign...
downloading RFdiffusion params...


  Extract \sigma(t) corresponding to chosen sigma schedule.
  sigma(t)^2 := \int_0^t g(s)^2 ds,
  @torch.cuda.amp.autocast(enabled=False)


CPU times: user 9.66 s, sys: 962 ms, total: 10.6 s
Wall time: 1min 42s


In [None]:
# @title ‚ñ∂Ô∏è **Run ProteinMPNN + AlphaFold per-backbone**
import os, glob, time, subprocess
from pathlib import Path

# Usa el patr√≥n detectado previamente:
BACKBONE_GLOB = "/content/drive/MyDrive/RFdiffusion_CDA/*.pdb"  # <- ya comprobado
BINDER_CHAIN  = "B"      # cadena del binder en tus PDBs
NUM_SEQS      = 8
INITIAL_GUESS = True
USE_MULTIMER  = True
NUM_RECYCLES  = 3
RM_AA         = "C"
MPNN_TEMP     = 0.1

# Dependencia m√≠nima
try:
    from Bio.PDB import PDBParser
except ImportError:
    !pip -q install biopython
    from Bio.PDB import PDBParser

def count_chain_len(pdb_path, chain_id):
    """Cuenta residuos est√°ndar con CA en la cadena indicada."""
    p = PDBParser(QUIET=True)
    s = p.get_structure("x", pdb_path)
    for model in s:
        for chain in model:
            if chain.id == chain_id:
                n = 0
                for res in chain:
                    if res.id[0] == " " and res.has_id("CA"):
                        n += 1
                return n
    return 0

# Espera a que est√©n los par√°metros de AlphaFold (si no est√°n ya)
if not os.path.isfile("params/done.txt"):
    print("‚è≥ waiting for AlphaFold params...")
    while not os.path.isfile("params/done.txt"):
        time.sleep(5)
print("‚úÖ AlphaFold params available.")

# Recoge backbones y filtra basura
all_pdbs = sorted(glob.glob(BACKBONE_GLOB))
pdb_list = []
for f in all_pdbs:
    base = os.path.basename(f)
    if base == "input.pdb":
        continue
    if base.endswith("_0_0.pdb"):  # evita posibles restos de trayectorias
        continue
    pdb_list.append(f)

print(f"Found {len(pdb_list)} backbones to process.")

# Bucle principal: rompe si algo falla
for i, pdb_path in enumerate(pdb_list, 1):
    L = count_chain_len(pdb_path, BINDER_CHAIN)
    if L <= 0:
        print(f"‚ö†Ô∏è  Skipping {Path(pdb_path).name}: chain {BINDER_CHAIN} not found or empty.")
        continue

    contigs_str = f"{BINDER_CHAIN}:{L}-{L}"  # p.ej., B:72-72
    loc = str(Path(pdb_path).with_suffix(""))  # carpeta base de salida por dise√±o
    Path(loc).mkdir(parents=True, exist_ok=True)

    opts = [
        f"--pdb={pdb_path}",
        f"--loc={loc}",
        f"--contig={contigs_str}",
        f"--copies=1",
        f"--num_seqs={NUM_SEQS}",
        f"--num_recycles={NUM_RECYCLES}",
        f"--rm_aa={RM_AA}",
        f"--mpnn_sampling_temp={MPNN_TEMP}",
        f"--num_designs=1",
    ]
    if INITIAL_GUESS: opts.append("--initial_guess")
    if USE_MULTIMER:  opts.append("--use_multimer")

    print(f"\n[{i}/{len(pdb_list)}] ‚ñ∂Ô∏è {Path(pdb_path).name} | contig={contigs_str}")
    res = subprocess.run(["python", "colabdesign/rf/designability_test.py"] + opts,
                         capture_output=True, text=True)

    if res.returncode != 0:
        print("‚ùå designability_test.py failed")
        # Imprime stderr si existe, si no stdout
        print((res.stderr or res.stdout).strip())
        print("‚õî Stopping loop due to failure.")
        break
    else:
        print("‚úÖ done")

In [None]:
# @title üîé Find backbone PDBs (Drive + local) and set BACKBONE_GLOB automatically
import os, glob
from pathlib import Path

# 1) Mount Drive if needed
from google.colab import drive
if not os.path.ismount("/content/drive"):
    drive.mount("/content/drive", force_remount=True)

# 2) Candidate folders and patterns (adjust/add if you changed names)
candidates = [
    "/content/drive/MyDrive/RFdiffusion_CDA/*.pdb",              # <- lo m√°s probable (tu carpeta de batches)
    "/content/drive/MyDrive/RFdiffusion_CDA/CDA_run_b*/*.pdb",   # por si hubieras dejado subcarpetas
    "/content/outputs/*.pdb",
    "outputs/*.pdb",
]

found = []
for pat in candidates:
    hits = sorted(glob.glob(pat))
    if hits:
        print(f"‚úÖ Found {len(hits)} PDB(s) with pattern: {pat}")
        # muestra algunos ejemplos
        for h in hits[:5]:
            print("  -", os.path.basename(h))
        found.append((pat, hits))

if not found:
    raise SystemExit("‚ùå No PDBs found. Revisa la carpeta en Drive y c√≥mo se llaman los archivos.")

# 3) Pick the first non-empty pattern as default BACKBONE_GLOB
BACKBONE_GLOB = found[0][0]
print("\nüìå Using BACKBONE_GLOB =", BACKBONE_GLOB)

# Si quieres forzar un patr√≥n concreto, ed√≠talo aqu√≠:
# BACKBONE_GLOB = "/content/drive/MyDrive/RFdiffusion_CDA/CDA_run_b0*_*.pdb"

Mounted at /content/drive
‚úÖ Found 126 PDB(s) with pattern: /content/drive/MyDrive/RFdiffusion_CDA/*.pdb
  - CDA_run_b00_0.pdb
  - CDA_run_b00_0_0.pdb
  - CDA_run_b00_1.pdb
  - CDA_run_b00_10.pdb
  - CDA_run_b00_10_0.pdb
‚úÖ Found 3 PDB(s) with pattern: /content/drive/MyDrive/RFdiffusion_CDA/CDA_run_b*/*.pdb
  - input.pdb
  - input.pdb
  - input.pdb

üìå Using BACKBONE_GLOB = /content/drive/MyDrive/RFdiffusion_CDA/*.pdb


In [None]:
# @title Run ProteinMPNN + AlphaFold per-backbone (reads from Drive, autodetect binder chain)
import os, glob, time, subprocess
from pathlib import Path

# ==== user settings ====
DRIVE_DIR      = "/content/drive/MyDrive/RFdiffusion_CDA"  # donde moviste los PDB de los batches
BACKBONE_GLOB  = f"{DRIVE_DIR}/CDA_run_b0*_*.pdb"          # patr√≥n de nombres
NUM_SEQS       = 8
INITIAL_GUESS  = True
USE_MULTIMER   = True
NUM_RECYCLES   = 3
RM_AA          = "C"
MPNN_TEMP      = 0.1
# =======================

# asegurar params AF2
if not os.path.isfile("params/done.txt"):
    print("waiting for AlphaFold params...")
    while not os.path.isfile("params/done.txt"):
        time.sleep(5)

# peque√±a utilidad: contar residuos y %GLY por cadena
def chain_info(pdb_path):
    counts = {}
    gly = {}
    with open(pdb_path) as fh:
        for line in fh:
            if not line.startswith("ATOM"):
                continue
            ch = line[21]
            resn = line[17:20].strip()
            if line[12:16].strip() == "CA":  # contar por CA
                counts[ch] = counts.get(ch, 0) + 1
                if resn == "GLY":
                    gly[ch] = gly.get(ch, 0) + 1
    info = []
    for ch in counts:
        gfrac = (gly.get(ch,0) / counts[ch]) if counts[ch] > 0 else 0.0
        info.append((ch, counts[ch], gfrac))
    # ordenar por longitud descendente
    info.sort(key=lambda x: x[1], reverse=True)
    return info  # lista de (chain, length, gly_fraction)

# escoger binder chain de forma robusta:
# 1) target trimmed suele ser la cadena A (m√°s larga ~100+)
# 2) binder suele salir como cadena con alto %GLY (poly-Gly backbone) y longitud 40‚Äì120
def pick_binder_chain(info):
    # candidates por %GLY
    cand = [x for x in info if 40 <= x[1] <= 120 and x[2] >= 0.5]
    if cand:
        # si hay varias, coge la de mayor %GLY
        cand.sort(key=lambda x: x[2], reverse=True)
        return cand[0][0], cand[0][1]
    # si no hay claro poly-GLY, coge la cadena m√°s corta distinta de la m√°s larga
    if len(info) >= 2:
        # m√°s larga como target, siguiente como binder
        return info[1][0], info[1][1]
    # fallback: √∫nica cadena
    return info[0][0], info[0][1]

# recoger PDBs
pdb_list = sorted(glob.glob(BACKBONE_GLOB))
print(f"Found {len(pdb_list)} backbones under {DRIVE_DIR}.")
for p in pdb_list[:5]:
    print(" -", os.path.basename(p))
if not pdb_list:
    raise SystemExit("‚ùå No PDBs found. Revisa DRIVE_DIR/BACKBONE_GLOB.")

# procesar uno a uno (romper en primer fallo)
for i, pdb_path in enumerate(pdb_list, 1):
    info = chain_info(pdb_path)
    if not info:
        print(f"‚ö†Ô∏è  Skipping {Path(pdb_path).name}: no chains with CA found.")
        continue
    binder_chain, L = pick_binder_chain(info)
    contigs_str = f"{binder_chain}:{L}-{L}"
    loc = str(Path(pdb_path).with_suffix(""))  # carpeta base
    Path(loc).mkdir(parents=True, exist_ok=True)

    opts = [
        f"--pdb={pdb_path}",
        f"--loc={loc}",
        f"--contig={contigs_str}",
        f"--copies=1",
        f"--num_seqs={NUM_SEQS}",
        f"--num_recycles={NUM_RECYCLES}",
        f"--rm_aa={RM_AA}",
        f"--mpnn_sampling_temp={MPNN_TEMP}",
        f"--num_designs=1"
    ]
    if INITIAL_GUESS: opts.append("--initial_guess")
    if USE_MULTIMER:  opts.append("--use_multimer")

    print(f"\n[{i}/{len(pdb_list)}] ‚ñ∂Ô∏è {Path(pdb_path).name} | binder={binder_chain} len={L} | contig={contigs_str}")
    cmd = ["python", "colabdesign/rf/designability_test.py"] + opts
    res = subprocess.run(cmd, capture_output=True, text=True)
    if res.returncode != 0:
        print("‚ùå designability_test.py failed")
        # muestra √∫ltima parte del log para diagnosticar
        tail = (res.stderr or res.stdout).strip().splitlines()[-20:]
        print("\n".join(tail))
        print("‚õî Stopping loop due to failure.")
        break
    else:
        print("‚úÖ done")

Found 0 backbones under /content/drive/MyDrive/RFdiffusion_CDA.


SystemExit: ‚ùå No PDBs found. Revisa DRIVE_DIR/BACKBONE_GLOB.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
# @title Run ProteinMPNN + AlphaFold per-backbone (reads from Drive, autodetect binder chain)
import os, glob, time, subprocess
from pathlib import Path

# ==== user settings ====
DRIVE_DIR      = "/content/drive/MyDrive/RFdiffusion_CDA"  # donde moviste los PDB de los batches
BACKBONE_GLOB  = f"{DRIVE_DIR}/CDA_run_b0*_*.pdb"          # patr√≥n de nombres
NUM_SEQS       = 8
INITIAL_GUESS  = True
USE_MULTIMER   = True
NUM_RECYCLES   = 3
RM_AA          = "C"
MPNN_TEMP      = 0.1
# =======================

# asegurar params AF2
if not os.path.isfile("params/done.txt"):
    print("waiting for AlphaFold params...")
    while not os.path.isfile("params/done.txt"):
        time.sleep(5)

# peque√±a utilidad: contar residuos y %GLY por cadena
def chain_info(pdb_path):
    counts = {}
    gly = {}
    with open(pdb_path) as fh:
        for line in fh:
            if not line.startswith("ATOM"):
                continue
            ch = line[21]
            resn = line[17:20].strip()
            if line[12:16].strip() == "CA":  # contar por CA
                counts[ch] = counts.get(ch, 0) + 1
                if resn == "GLY":
                    gly[ch] = gly.get(ch, 0) + 1
    info = []
    for ch in counts:
        gfrac = (gly.get(ch,0) / counts[ch]) if counts[ch] > 0 else 0.0
        info.append((ch, counts[ch], gfrac))
    # ordenar por longitud descendente
    info.sort(key=lambda x: x[1], reverse=True)
    return info  # lista de (chain, length, gly_fraction)

# escoger binder chain de forma robusta:
# 1) target trimmed suele ser la cadena A (m√°s larga ~100+)
# 2) binder suele salir como cadena con alto %GLY (poly-Gly backbone) y longitud 40‚Äì120
def pick_binder_chain(info):
    # candidates por %GLY
    cand = [x for x in info if 40 <= x[1] <= 120 and x[2] >= 0.5]
    if cand:
        # si hay varias, coge la de mayor %GLY
        cand.sort(key=lambda x: x[2], reverse=True)
        return cand[0][0], cand[0][1]
    # si no hay claro poly-GLY, coge la cadena m√°s corta distinta de la m√°s larga
    if len(info) >= 2:
        # m√°s larga como target, siguiente como binder
        return info[1][0], info[1][1]
    # fallback: √∫nica cadena
    return info[0][0], info[0][1]

# recoger PDBs
pdb_list = sorted(glob.glob(BACKBONE_GLOB))
print(f"Found {len(pdb_list)} backbones under {DRIVE_DIR}.")
for p in pdb_list[:5]:
    print(" -", os.path.basename(p))
if not pdb_list:
    raise SystemExit("‚ùå No PDBs found. Revisa DRIVE_DIR/BACKBONE_GLOB.")

# procesar uno a uno (romper en primer fallo)
for i, pdb_path in enumerate(pdb_list, 1):
    info = chain_info(pdb_path)
    if not info:
        print(f"‚ö†Ô∏è  Skipping {Path(pdb_path).name}: no chains with CA found.")
        continue
    binder_chain, L = pick_binder_chain(info)
    contigs_str = f"{binder_chain}:{L}-{L}"
    loc = str(Path(pdb_path).with_suffix(""))  # carpeta base
    Path(loc).mkdir(parents=True, exist_ok=True)

    opts = [
        f"--pdb={pdb_path}",
        f"--loc={loc}",
        f"--contig={contigs_str}",
        f"--copies=1",
        f"--num_seqs={NUM_SEQS}",
        f"--num_recycles={NUM_RECYCLES}",
        f"--rm_aa={RM_AA}",
        f"--mpnn_sampling_temp={MPNN_TEMP}",
        f"--num_designs=1"
    ]
    if INITIAL_GUESS: opts.append("--initial_guess")
    if USE_MULTIMER:  opts.append("--use_multimer")

    print(f"\n[{i}/{len(pdb_list)}] ‚ñ∂Ô∏è {Path(pdb_path).name} | binder={binder_chain} len={L} | contig={contigs_str}")
    cmd = ["python", "colabdesign/rf/designability_test.py"] + opts
    res = subprocess.run(cmd, capture_output=True, text=True)
    if res.returncode != 0:
        print("‚ùå designability_test.py failed")
        # muestra √∫ltima parte del log para diagnosticar
        tail = (res.stderr or res.stdout).strip().splitlines()[-20:]
        print("\n".join(tail))
        print("‚õî Stopping loop due to failure.")
        break
    else:
        print("‚úÖ done")

Found 0 backbones under /content/drive/MyDrive/RFdiffusion_CDA.


SystemExit: ‚ùå No PDBs found. Revisa DRIVE_DIR/BACKBONE_GLOB.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
#@title Display best result
import py3Dmol
def plot_pdb(num = "best"):
  if num == "best":
    with open(f"outputs/{path}/best.pdb","r") as f:
      # REMARK 001 design {m} N {n} RMSD {rmsd}
      info = f.readline().strip('\n').split()
    num = info[3]
  hbondCutoff = 4.0
  view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')
  pdb_str = open(f"outputs/{path}_{num}.pdb",'r').read()
  view.addModel(pdb_str,'pdb',{'hbondCutoff':hbondCutoff})
  pdb_str = open(f"outputs/{path}/best_design{num}.pdb",'r').read()
  view.addModel(pdb_str,'pdb',{'hbondCutoff':hbondCutoff})

  view.setStyle({"model":0},{'cartoon':{}}) #: {'colorscheme': {'prop':'b','gradient': 'roygb','min':0,'max':100}}})
  view.setStyle({"model":1},{'cartoon':{'colorscheme': {'prop':'b','gradient': 'roygb','min':0,'max':100}}})
  view.zoomTo()
  view.show()

if num_designs > 1:
  def on_change(change):
    if change['name'] == 'value':
      with output:
        output.clear_output(wait=True)
        plot_pdb(change['new'])
  dropdown = widgets.Dropdown(
    options=["best"] + [str(k) for k in range(num_designs)],
    value="best",
    description='design:',
  )
  dropdown.observe(on_change)
  output = widgets.Output()
  display(widgets.VBox([dropdown, output]))
  with output:
    plot_pdb(dropdown.value)
else:
  plot_pdb()

VBox(children=(Dropdown(description='design:', options=('best', '0', '1', '2', '3', '4', '5', '6', '7', '8', '‚Ä¶

In [None]:
#@title Package and download results
#@markdown If you are having issues downloading the result archive,
#@markdown try disabling your adblocker and run this cell again.
#@markdown  If that fails click on the little folder icon to the
#@markdown  left, navigate to file: `name.result.zip`,
#@markdown  right-click and select \"Download\"
#@markdown (see [screenshot](https://pbs.twimg.com/media/E6wRW2lWUAEOuoe?format=jpg&name=small)).
!zip -r {path}.result.zip outputs/{path}* outputs/traj/{path}*
files.download(f"{path}.result.zip")

  adding: outputs/CDA_trimmed_50i_32d/ (stored 0%)
  adding: outputs/CDA_trimmed_50i_32d/best_design16.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/best_design4.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/best_design13.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/ (stored 0%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design28_n3.pdb (deflated 77%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design13_n2.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design29_n0.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design31_n0.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design19_n1.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design4_n2.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design0_n0.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design8_n2.pdb (deflated 78%)
  adding: outputs/CDA_trimmed_50i_32d/all_pdb/design12_n3.pdb (de

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Celda 1 ‚Äî Instala el helper y lanza el instalador de PyRosetta
!pip -q install pyrosetta-help

import sys, subprocess
# --no-clipboard evita pedir acceso al portapapeles de Colab
completed = subprocess.run([sys.executable, "-m", "pyrosetta_help", "install", "--no-clipboard"], check=True)
print("Instalaci√≥n finalizada (si no hubo errores arriba).")

CalledProcessError: Command '['/usr/bin/python3', '-m', 'pyrosetta_help', 'install', '--no-clipboard']' returned non-zero exit status 1.

In [None]:
!pip -q install biopython freesasa mdanalysis pandas numpy

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m270.1/270.1 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m108.9/108.9 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m13.3/13.3 MB[0m [31m67.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.1/2.1 MB[0m [31m82.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m45.0/45

**Instructions**
---
---