In [None]:
#@title Download RGN2

#@markdown Please execute this cell by pressing the *Play* button on
#@markdown the left.
#@markdown Kernel will restart automatically after execution.

import os
import sys
import subprocess
from IPython import get_ipython
from IPython.utils import io

WORKDIR = './rgn2'
GIT_REPO = 'https://github.com/aqlaboratory/rgn2'
REQUIREMENTS = os.path.join(WORKDIR, 'requirements.txt')
RGN2_PARAM_SOURCE_URL = 'https://huggingface.co/christinafl/rgn2'
RGN2_PARAMS_DIR = os.path.join(WORKDIR, 'resources')
RGN2_PARAM_RUN_DIR = os.path.join(RGN2_PARAMS_DIR, 'rgn2_runs')
RGN2_RUN_DIR = os.path.join(WORKDIR, 'runs')

AF2_GIT_REPO = 'https://github.com/deepmind/alphafold'
AF2_SOURCE_URL = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar'
AF2_PARAMS_DIR = './alphafold/data/params'
AF2_PARAMS_PATH = os.path.join(AF2_PARAMS_DIR, os.path.basename(AF2_SOURCE_URL))

REFINER_DIR = os.path.join(WORKDIR, 'ter2pdb')
REFINER_PATH = os.path.join(REFINER_DIR, 'ModRefiner-l.zip')
REFINER_URL = 'https://zhanggroup.org/ModRefiner/ModRefiner-l.zip'

try:
  with io.capture_output() as captured:
    %cd '/content'

    %shell rm -rf {WORKDIR}
    %shell git clone {GIT_REPO} {WORKDIR}

    %shell for i in $(seq ${CONDA_SHLVL}); do conda deactivate; done
    %shell rm -rf /opt/conda
    %shell pip install -r {REQUIREMENTS}

    %shell wget -q -P /tmp \
      https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
        && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
        && rm /tmp/Miniconda3-latest-Linux-x86_64.sh

    CONDA_INIT = 'source /opt/conda/etc/profile.d/conda.sh && conda init'

    %shell rm -rf alphafold
    %shell git clone --branch main {AF2_GIT_REPO} alphafold
    %shell {CONDA_INIT} && conda create -y -q --name af2 python=3.7
    %shell {CONDA_INIT} && conda activate af2; pip install -r ./alphafold/requirements.txt
    %shell {CONDA_INIT} && conda activate af2; pip install --no-dependencies ./alphafold
    %shell {CONDA_INIT} && conda activate af2; pip install --upgrade jax==0.3.17 \
      jaxlib==0.3.15+cuda11.cudnn805 \
      -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
    %shell mkdir --parents "{AF2_PARAMS_DIR}"
    %shell wget -O "{AF2_PARAMS_PATH}" "{AF2_SOURCE_URL}"

    %shell tar --extract --verbose --file="{AF2_PARAMS_PATH}" \
      --directory="{AF2_PARAMS_DIR}" --preserve-permissions
    %shell rm "{AF2_PARAMS_PATH}"

    %shell tar --extract --verbose --file="{AF2_PARAMS_PATH}" \
      --directory="{AF2_PARAMS_DIR}" --preserve-permissions
    %shell rm "{AF2_PARAMS_PATH}"

    %shell GIT_LFS_SKIP_SMUDGE=1 git clone "{RGN2_PARAM_SOURCE_URL}" "{RGN2_PARAMS_DIR}"
    %shell cd {RGN2_PARAMS_DIR} && git lfs pull
    %shell mv {RGN2_PARAM_RUN_DIR} {RGN2_RUN_DIR}

    %shell wget -O {REFINER_PATH} {REFINER_URL}
    %shell unzip -o {REFINER_PATH} -d {REFINER_DIR}
    %shell rm {REFINER_PATH}
except subprocess.CalledProcessError:
  print(captured)
  raise

print("Installation complete!")
print("Restarting kernel...")
get_ipython().kernel.do_shutdown(True)

In [None]:
#@title Import Python packages
#@markdown Please execute this cell by pressing the *Play* button on 
#@markdown the left.

%cd '/content/rgn2'

import os
import sys
import subprocess
from pathlib import Path
from IPython.utils import io
from google.colab import files

%reload_ext autoreload
%autoreload 2

sys.path.append(os.path.join(os.getcwd(), 'aminobert'))
sys.path.append('/content/alphafold')
from aminobert.prediction import aminobert_predict_sequence
from data_processing.aminobert_postprocessing import aminobert_postprocess
from ter2pdb import ter2pdb

In [None]:
#@markdown ### Enter the amino acid sequence to fold ⬇️
seq_id = '6WRW'  #@param {type:"string"}
sequence = 'DEEEIQKAIEELLRKGVSEEEAAIIIVQRFNVAVVVVVQDERQGKHISEYIRRYIPEADVILFANLVVIKVETHELSTRVWEAAQKAY'  #@param {type:"string"}

MAX_SEQUENCE_LENGTH = 1023

# Remove all whitespaces, tabs and end lines; upper-case
sequence = sequence.translate(str.maketrans('', '', ' \n\t')).upper()
aatypes = set('ACDEFGHIKLMNPQRSTVWY')  # 20 standard aatypes
if not set(sequence).issubset(aatypes):
  raise Exception(f'Input sequence contains non-amino acid letters: {set(sequence) - aatypes}. AlphaFold only supports 20 standard amino acids as inputs.')
if len(sequence) > MAX_SEQUENCE_LENGTH:
  raise Exception(f'Input sequence is too long: {len(sequence)} amino acids, while the maximum is {MAX_SEQUENCE_LENGTH}. Please use the full AlphaFold system for long sequences.')

DATA_DIR = 'aminobert_output'
RUN_DIR = 'runs/15106000'
OUTPUT_DIR = 'output'
FULINIT_DIR = 'output/modref'
REFINE_DIR = 'output/refine_model1'
SEQ_PATH = os.path.join(DATA_DIR, f'{seq_id}.fa')
TER_PATH = os.path.join(RUN_DIR, '1', 'outputsTesting', f'{seq_id}.tertiary')
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(FULINIT_DIR, exist_ok=True)

In [None]:
#@title Generate Aminobert Embeddings
#@markdown Please execute this cell by pressing the *Play* button on 
#@markdown the left.

DATASET_NAME = '1'
PREPEND_M = True
AMINOBERT_CHKPT_DIR = 'resources/aminobert_checkpoint/AminoBERT_runs_v2_uniparc_dataset_v2_5-1024_fresh_start_model.ckpt-1100000'

try:
  with io.capture_output() as captured:
    aminobert_predict_sequence(seq=sequence, header=seq_id, prepend_m=PREPEND_M,
                           checkpoint=AMINOBERT_CHKPT_DIR, data_dir=DATA_DIR)
    aminobert_postprocess(data_dir=DATA_DIR, dataset_name=DATASET_NAME, prepend_m=PREPEND_M)
except subprocess.CalledProcessError:
  print(captured)
  raise

In [None]:
#@title Run RGN2
#@markdown Please execute this cell by pressing the *Play* button on 
#@markdown the left.
#@markdown Once this cell has been executed, a PDB file with the predicted
#@markdown C-alpha trace will be automatically downloaded to your computer.
try:
  with io.capture_output() as captured:
    cmd = (f"python rgn/protling.py {os.path.join(RUN_DIR, 'configuration')} "
           f"-p -e 'weighted_testing' -a -g 0")
    %shell {cmd}
except subprocess.CalledProcessError:
  print(captured)
  raise

print('Prediction completed!')
ter2pdb.predicted_ter2pdb(seq_path=SEQ_PATH, ter_path=TER_PATH,
                          output_dir=OUTPUT_DIR, seq_id=seq_id)
files.download(os.path.join(OUTPUT_DIR, f'{seq_id}_{ter2pdb.CA_TRACE_FNAME}'))

In [None]:
#@title Refinement
#@markdown Please execute this cell by pressing the *Play* button on 
#@markdown the left.
#@markdown Once this cell has been executed, a PDB file with the AF2Rank-refined
#@markdown structure will be automatically downloaded to your computer.

ter2pdb.run_ca_to_allatom(seq_path=SEQ_PATH, ter_path=TER_PATH,
                          output_dir=FULINIT_DIR, seq_id=seq_id)


env_init = 'source /opt/conda/etc/profile.d/conda.sh && conda init && conda activate af2'
jax_env_vars = 'TF_FORCE_UNIFIED_MEMORY=1 XLA_PYTHON_CLIENT_MEM_FRACTION=2.0'
cmd = (f"{jax_env_vars} python ter2pdb/run_af2rank.py refine_model1 "
       f"--target_list {seq_id} --af2_dir /content/alphafold/ "
       f"--seq_dir {Path(SEQ_PATH).parent} --pdb_dir {FULINIT_DIR} "
       f"--output_dir {OUTPUT_DIR} --deterministic --seq_replacement - "
       f"--mask_sidechains_add_cb --model_num 1")
try:
  with io.capture_output() as captured:
    %shell {env_init} && {cmd}
except subprocess.CalledProcessError:
  print(captured)
  raise

print('Refinement completed!')

files.download(os.path.join(REFINE_DIR, f'{seq_id}_rgn2.pdb'))