#**RFdiffusion aa**
RFdiffusion aa is a method for structure generation, with or without conditional information (a motif, target etc). It can perform a whole range of protein design challenges as we have outlined in the RFdiffusion [manuscript](https://www.science.org/doi/10.1126/science.adl2528).

**<font color="red">NOTE:</font>** This notebook is in development, we are still working on adding all the options from the manuscript above.

For **instructions**, see end of Notebook.



This is a modified version of Sergey's notebook by Felipe Engelberger, see [original version](https://colab.research.google.com/github/sokrypton/ColabDesign/blob/main/rf/examples/diffusion_ori.ipynb) of this notebook (from 31Mar2023).



In [2]:
#@title COLAB ONLY setup **RFdiffusion All Atom** (~5m)
#%%time

import os
import subprocess
import time

# Function to detect if running on Google Colab
def is_colab():
    return "COLAB_GPU" in os.environ

# Base directory setup
if is_colab():
    BASE_DIR = "/content"
else:
    # For local setup, adjust this path as per your local environment
    # By default we will assume you are in the devcontainer path
    BASE_DIR = "/workspaces/all_atom_binder_diffusion"
    
    # For setups outside the devcontainer, you may need to adjust this path
    #BASE_DIR = os.path.expanduser("~")

# Adjust paths based on the environment
PARAMS_DIR = os.path.join(BASE_DIR, "params")
RF_DIFFUSION_DIR = os.path.join(BASE_DIR, "RFdiffusion")
RF_ALL_ATOM_DIR = os.path.join(BASE_DIR, "rf_diffusion_all_atom")

# Ensure the params directory exists
os.makedirs(PARAMS_DIR, exist_ok=True)

def run_command(command, progress_message, wait=True):
    """
    Run a system command with a progress message.
    If wait is False, the command is executed in the background.
    """
    print(f"Starting: {progress_message}")
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    if wait:
        stdout, stderr = process.communicate()
        if process.returncode != 0:
            print(f"Error during {progress_message}: {stderr}")
            raise subprocess.CalledProcessError(process.returncode, command)
        print(f"Completed: {progress_message}")
    return process

def setup_environment_colab():
    # Install aria2 if not already installed (for faster downloads)
    run_command("apt-get install -y aria2", "Installing aria2 for faster downloads")

    # Install Python dependencies
    run_command("pip install jedi omegaconf hydra-core icecream pyrsistent pynvml decorator git+https://github.com/sokrypton/ColabDesign.git@v1.1.1 py3Dmol", "Installing Python dependencies")

    # If parameters are already downloaded, skip the download process
    if not os.path.isfile(os.path.join(PARAMS_DIR, "done.txt")):
        print("Downloading parameters and models...")
        
        # Start downloading parameters and models in the background
        download_process = run_command(
            f"cd {PARAMS_DIR} && aria2c -q -x 16 https://files.ipd.uw.edu/krypton/schedules.zip && "
            #"aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar && "
            "aria2c -q -x 16 http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFDiffusionAA_paper_weights.pt && "
            "tar -xf alphafold_params_2022-12-06.tar && touch done.txt",
            "Downloading and extracting parameters", wait=False)

    # Clone RFdiffusion repository
    if not os.path.isdir(RF_DIFFUSION_DIR):
        run_command(f"git clone --branch max https://github.com/engelberger/RFdiffusion.git {RF_DIFFUSION_DIR}", "Cloning RFdiffusion repository")

    # Clone RFdiffusion all atom repository
    if not os.path.isdir(RF_ALL_ATOM_DIR):
        run_command(f"git clone --recurse-submodules --branch colab_march_2024 https://github.com/engelberger/rf_diffusion_all_atom.git {RF_ALL_ATOM_DIR}", "Cloning RFdiffusion all atom repository")

    # Install DGL
    run_command("pip install dgl -f https://data.dgl.ai/wheels/cu121/repo.html", "Installing DGL")

    # Install SE3 Transformer
    run_command(f"cd {os.path.join(RF_DIFFUSION_DIR, 'env/SE3Transformer')} && pip install -q --no-cache-dir -r requirements.txt && pip install -q .", "Installing SE3 Transformer")

    # Download and set execute permissions for 'ananas'
    run_command(f"wget -qnc https://files.ipd.uw.edu/krypton/ananas -P {BASE_DIR} && chmod +x {os.path.join(BASE_DIR, 'ananas')}", "Downloading and setting up 'ananas'")

    # Wait for the download process to complete
    download_process.communicate()
    print("Environment setup complete.")

# Call the setup function
setup_environment_colab()

Starting: Installing aria2 for faster downloads
Completed: Installing aria2 for faster downloads
Starting: Installing Python dependencies
Completed: Installing Python dependencies
Downloading parameters and models...
Starting: Downloading and extracting parameters
Starting: Installing DGL
Completed: Installing DGL
Starting: Installing SE3 Transformer
Completed: Installing SE3 Transformer
Starting: Downloading and setting up 'ananas'
Completed: Downloading and setting up 'ananas'
Environment setup complete.


In [3]:
import os
import requests
import random
import string
import yaml
import subprocess

import shutil
import glob


# Function to detect if running on Google Colab
def is_colab():
    return "COLAB_GPU" in os.environ

# Base directory setup
BASE_DIR = "/content" if is_colab() else os.path.expanduser("~")
INPUT_DIR = os.path.join(BASE_DIR, "input")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")

# Ensure the input and output directories exist
os.makedirs(INPUT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

def download_pdb(pdb_code, output_dir=INPUT_DIR):
    """
    Download a PDB file given a PDB code.
    """
    url = f"https://files.rcsb.org/download/{pdb_code}.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        pdb_path = os.path.join(output_dir, f"{pdb_code}.pdb")
        with open(pdb_path, 'w') as file:
            file.write(response.text)
        return pdb_path
    else:
        raise ValueError(f"Failed to download PDB file for {pdb_code}")

def handle_pdb_input(pdb_input_type, pdb_code=None, output_dir=INPUT_DIR):
    """
    Handle PDB input by either uploading a file or downloading it using a PDB code.
    """
    if pdb_input_type == "upload":
        if is_colab():
            from google.colab import files
            uploaded = files.upload()
            pdb_filename = next(iter(uploaded))
            pdb_path = os.path.join(output_dir, pdb_filename)
            with open(pdb_path, 'wb') as file:
                file.write(uploaded[pdb_filename])
            return pdb_path
        else:
            raise EnvironmentError("File upload is only supported on Google Colab.")
    elif pdb_input_type == "pdb_code":
        return download_pdb(pdb_code, output_dir)
    else:
        raise ValueError("Invalid PDB input type")

def run_rfdiffusion_all_atom(config, output_subfolder="ligand_protein_motif", output_prefix="sample", show_last_n_lines=5, save_stdout=True):
    """
    Wrapper function to run rfdiffusion all atom with specified options, using a YAML configuration file.
    The configuration is passed as a dictionary.
    """
    # Generate a base output directory name without duplicating parts of the path
    base_output_path = os.path.join(OUTPUT_DIR, output_subfolder)
    print(f"Output base directory: {base_output_path}")

    # Initialize counter to generate a unique output directory
    counter = 0
    unique_output_path = f"{base_output_path}/{output_prefix}_{counter}"
    while os.path.exists(unique_output_path):
        counter += 1
        unique_output_path = f"{base_output_path}/{output_prefix}_{counter}"

    final_output_path = unique_output_path
    print(f"Final output path: {final_output_path}")
    # Ensure the final output directory exists
    os.makedirs(final_output_path, exist_ok=True)

    # Update the output_prefix in the config with the actual output path
    config["inference"]["output_prefix"] = os.path.join(final_output_path, output_prefix)

    # Write the configuration to a YAML file inside the correct output directory
    config_filename = "config.yaml"  # Configuration file name
    config_file_path = os.path.join(final_output_path, config_filename)  # Full path to the configuration file
    with open(config_file_path, 'w') as file:
        yaml.dump(config, file)

    # Correct the command to run the inference script with the YAML config file
    cmd = [
        "python", "./rf_diffusion_all_atom/run_inference.py",
        f"--config-name={config_filename[:-5]}",  # Remove the '.yaml' extension
        f"--config-dir={final_output_path}",
        f"diffuser.T={config['diffuser']['T']}" # I do not know why this need to be added again if its already in the config.yaml
    ]

    # Print the command to the console
    print(f"Running command: {' '.join(cmd)}")

    # Initialize a list to keep track of the output lines
    output_lines = []

    # Use subprocess.Popen to run the command and capture stdout in real-time
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)

    # Correct the path for the log file to ensure it's saved in the final_output_path
    if save_stdout:
        log_file_path = os.path.join(final_output_path, "run_inference.log")  # Corrected path
        log_file = open(log_file_path, "w")

    # Periodically check for new output
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            output_lines.append(output.strip())
            # Save to log file if required
            if save_stdout:
                log_file.write(output)

            # Display the last N lines if required
            if show_last_n_lines > 0:
                display_lines = output_lines[-show_last_n_lines:]
                print("\n".join(display_lines))

        # time.sleep(1)  # Adjust the sleep time as needed

    # Ensure the process has finished and close the log file if it was opened
    process.poll()
    if save_stdout:
        log_file.close()




#@title ### Small molecule binder design with protein motif

# Interface for specifying PDB input
pdb_input_type = "pdb_code" #@param ["upload", "pdb_code", "manual_path"]
pdb_code = "7v11" #@param {type:"string"}

if pdb_input_type == "pdb_code":
    input_pdb = handle_pdb_input(pdb_input_type, pdb_code)
elif pdb_input_type == "upload":
    print("Please upload your PDB file:")
    # Assuming running in Colab
    input_pdb = handle_pdb_input(pdb_input_type)
elif pdb_input_type == "manual_path":
    input_pdb = pdb_code  # Assuming the user has manually specified the path

contigs = "100-100" #@param {type:"string"}
contig_length = "" #@param {type:"string"}
ligand = "OQO" #@param {type:"string"}
num_designs = 1 #@param {type:"integer"}
design_startnum = 0 #@param {type:"integer"}
output_prefix = "sample" #@param {type:"string"}
output_subfolder = "ligand_protein_motif" #@param {type:"string"}

deterministic = True #@param {type:"boolean"}
T = 50 #@param {type:"integer"}

# Split contigs string into list
contigs_list = contigs.split(',')

# Convert contigs list to string format for YAML
contigs_yaml = [f"{contig}" for contig in contigs_list]
contig_length = contig_length if contig_length else None

# Define the configuration dictionary based on the user inputs
config = {
    "inference": {
        "deterministic": deterministic,
        "input_pdb": input_pdb,
        "ligand": ligand,
        "num_designs": num_designs,
        "design_startnum": design_startnum,
        "ckpt_path": "./params/RFDiffusionAA_paper_weights.pt",
        "model_runner": "NRBStyleSelfCond"
    },
    "diffuser": {
        "T": T
    },
    "contigmap": {
        "contigs": contigs_yaml,
        "length": contig_length
    },
    "model": {"freeze_track_motif": "True"},
    "defaults": ["aa"]
}

# Specify additional options for the run function
show_last_n_lines = 1  # Show only the last 1 lines of stdout to avoid cluttering the notebook, above 1 is not working at the moment
save_stdout = True  # Save the stdout as a logfile in the output folder

# Call the run_rfdiffusion_all_atom function with the specified configuration and options
run_rfdiffusion_all_atom(config, output_subfolder=output_subfolder, output_prefix=output_prefix, show_last_n_lines=show_last_n_lines, save_stdout=save_stdout)

#@markdown After running the diffusion function, you can zip the last job's output for download:

#@markdown Run the following cell to zip and download the last job's output.

Output base directory: /root/output/ligand_protein_motif
Final output path: /root/output/ligand_protein_motif/sample_0
Running command: python ./rf_diffusion_all_atom/run_inference.py --config-name=config --config-dir=/root/output/ligand_protein_motif/sample_0 diffuser.T=50
Error executing job with overrides: ['diffuser.T=50']
Traceback (most recent call last):
File "/workspaces/all_atom_binder_diffusion/./rf_diffusion_all_atom/run_inference.py", line 74, in main
sampler = get_sampler(conf)
File "/workspaces/all_atom_binder_diffusion/./rf_diffusion_all_atom/run_inference.py", line 79, in get_sampler
make_deterministic()
File "/workspaces/all_atom_binder_diffusion/./rf_diffusion_all_atom/run_inference.py", line 58, in make_deterministic
warm_up_spherical_harmonics()
File "/workspaces/all_atom_binder_diffusion/./rf_diffusion_all_atom/run_inference.py", line 50, in warm_up_spherical_harmonics
relative_pos = torch.tensor([[1.,1.,1.], [1.,1.,1.]]).to(device).to(torch.float32)
RuntimeError: 

In [4]:
#@title Display 3D structure {run: "auto"}
animate = "interactive" #@param ["none", "movie", "interactive"]
color = "chain" #@param ["rainbow", "chain", "plddt"]
denoise = True
dpi = 100 #@param ["100", "200", "400"] {type:"raw"}

from colabdesign.shared.plot import pymol_color_list
from colabdesign.rf.utils import get_ca, get_Ls, make_animation
from string import ascii_uppercase, ascii_lowercase
import os
import ipywidgets as widgets
from IPython.display import display, HTML
import py3Dmol

alphabet_list = list(ascii_uppercase + ascii_lowercase)


# Construct the base output directory
base_output_dir = os.path.join(OUTPUT_DIR, output_subfolder)

def find_latest_output_dir(base_dir, prefix):
    """Find the latest output directory based on the prefix."""
    dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d)) and d.startswith(prefix)]
    if not dirs:
        raise FileNotFoundError(f"No output directories found with prefix '{prefix}' in '{base_dir}'")
    latest_dir = sorted(dirs, key=lambda x: int(x.split('_')[-1]))[-1]
    return os.path.join(base_dir, latest_dir)

def plot_pdb(num=0):
    # Find the latest output directory
    latest_output_dir = find_latest_output_dir(base_output_dir, output_prefix)

    # Construct the path to the PDB file
    pdb_path = os.path.join(latest_output_dir, f"{output_prefix}_{num}.pdb")

    # Load the PDB file
    pdb_str = open(pdb_path, 'r').read()

    # Initialize the 3Dmol.js viewer
    view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')
    view.addModel(pdb_str, 'pdb')

    # Apply color scheme
    if color == "rainbow":
        view.setStyle({'cartoon': {'color':'spectrum'}})
    elif color == "chain":
        # Example: Apply color by chain
        for n, chain, c in zip(range(len(contigs)), alphabet_list, pymol_color_list):
            view.setStyle({'chain': chain}, {'cartoon': {'color': c}})
            # If chain == B the visualization should be atoms
            if chain == "B":
                view.setStyle({'chain': chain}, {'stick': {}})
    else:
        # Example: Apply a custom color scheme
        view.setStyle({'cartoon': {'colorscheme': {'prop':'b','gradient': 'roygb','min':0.5,'max':0.9}}})

    # Zoom to fit and display the viewer
    view.zoomTo()
    view.show()



if num_designs > 1:
  output = widgets.Output()
  def on_change(change):
    if change['name'] == 'value':
      with output:
        output.clear_output(wait=True)
        plot_pdb(change['new'])
  dropdown = widgets.Dropdown(
      options=[(f'{k}',k) for k in range(num_designs)],
      value=0, description='design:',
  )
  dropdown.observe(on_change)
  display(widgets.VBox([dropdown, output]))
  with output:
    plot_pdb(dropdown.value)
else:
  plot_pdb()

FileNotFoundError: [Errno 2] No such file or directory: '/root/output/ligand_protein_motif/sample_0/sample_0.pdb'

In [None]:
#@title Package and download results
#@markdown If you are having issues downloading the result archive,
#@markdown try disabling your adblocker and run this cell again.
#@markdown  If that fails click on the little folder icon to the
#@markdown  left, navigate to file: `name.result.zip`,
#@markdown  right-click and select \"Download\"
#@markdown (see [screenshot](https://pbs.twimg.com/media/E6wRW2lWUAEOuoe?format=jpg&name=small)).
import shutil

def zip_last_job(output_subfolder=output_subfolder, output_prefix=output_prefix, counter):
    """
    Zip the last job's output directory for download.
    """
    output_prefix = output_prefix + "/" + output_subfolder + "_" + counter
    base_output_path = os.path.join(OUTPUT_DIR, output_prefix)
    counter = 0
    while os.path.exists(f"{base_output_path}_{counter}"):
        counter += 1
    # Adjust counter to get the last existing directory
    counter -= 1
    if counter >= 0:
        output_path = f"{base_output_path}_{counter}"
        shutil.make_archive(output_path, 'zip', output_path)
        return f"{output_path}.zip"
    else:
        print("No output directory found.")
        return None


# Assuming you're in a Jupyter notebook cell
from google.colab import files

# This cell should be run after the diffusion function to zip and download the output
zip_path = zip_last_job(output_prefix=output_prefix)

if zip_path and is_colab():
    from google.colab import files
    files.download(zip_path)
else:
    print("Zip file path:", zip_path)
    print("Note: Automatic download is only supported in Google Colab.")