#**ProteinMPNN + AlphaFold2 for RFdiffusion Binders**

This notebook uses [ColabDesign](https://github.com/sokrypton/ColabDesign) to generate sequences for RFdiffusion binder backbones, and to then run AlphaFold2 to screen the binders.

**References:**
 - RFdiffusion: [Watson et al. *Nature* 2023.](https://www.nature.com/articles/s41586-023-06415-8)
 - ProteinMPNN: [Dauparas et al. *Science* 2022.](https://www.science.org/doi/full/10.1126/science.add2187)
 - AlphaFold2: [Jumper et al. *Nature* 2021.](https://www.nature.com/articles/s41586-021-03819-2)
 - ColabFold: [Mirdita et al. *Nature Methods* 2022.](https://www.nature.com/articles/s41592-022-01488-1)
 - AF2 initial guess: [Bennett et al. *Nature Communications* 2023.](https://www.nature.com/articles/s41467-023-38328-5)

In [None]:
#@title **1) Setup**
%%time

import os, time, signal, glob
import sys, random, string, re
from google.colab import files

print("Installing ColabDesign...")
os.system("pip -q install git+https://github.com/sokrypton/ColabDesign.git")
os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign")

print('Getting params...')
if not os.path.isdir("params"):
  os.system("apt-get install aria2")
  os.system("mkdir params")
  # send param download into background
  os.system("(\
  aria2c -q -x 16 https://files.ipd.uw.edu/krypton/schedules.zip; \
  aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar; \
  tar -xf alphafold_params_2022-12-06.tar -C params; \
  touch params/done.txt)")

!cd params
!for i in {1..5}; do ln -s params_model_"$i"_ptm.npz model_"$i"_ptm.npz; done

In [None]:
#@title **2) Specifications**
#@markdown Fill out the options below, run this cell, then upload a zipped folder of RFdiffusion pdb files when prompted.

from google.colab import files
import os, glob

contigs = 'A1-102:80-80' #@param {type:'string'}

output_folder_name = 'output_colab_test11' #@param {type:'string'}

#@markdown *If your RFdiffusion pdbs have the binder as chain A and the target as B, check the box below:*

flip_chains = True #@param {type:"boolean"}

#@markdown <u>ProteinMPNN:</u>

MPNN_seqs_per_struct = 2 #@param {type:"raw"}
MPNN_sampling_temp = 0.000001 #@param {type:"raw"}
MPNN_exclude_aa = 'C' #@param {type:"string"}
MPNN_use_soluble_params = False #@param {type:"boolean"}

#@markdown <u>AlphaFold2:</u>

AF_num_recycles = 3 #@param {type:"raw"}
AF_use_multimer = False #@param {type:"boolean"}
AF_use_initial_guess = True #@param {type:"boolean"}

upload_dict = files.upload()
uploaded_file_name = list(upload_dict.keys())[0]

os.system(f'unzip {uploaded_file_name}')

count = 0

for pdbpath in glob.glob(os.path.join(uploaded_file_name[:-4], '*.pdb')):
  count += 1

print(f'{count} pdbs unzipped.')

if flip_chains:
  for pdbpath in glob.glob(os.path.join(uploaded_file_name[:-4], '*.pdb')):
    !sed -i 's/ A / X /g' {pdbpath}
    !sed -i 's/ B / A /g' {pdbpath}
    !sed -i 's/ X / B /g' {pdbpath}
  print('Chains flipped.')

In [None]:
#@title **3) Run ProteinMPNN/AF2**
%%time


def run_designability_test(pdbpath):
  label = os.path.basename(pdbpath).split('.')[0]
  arg_string = f'--pdb={pdbpath} --loc={output_folder_name} --contigs=\'{contigs}\' --num_seqs={MPNN_seqs_per_struct} --mpnn_sampling_temp={MPNN_sampling_temp} --rm_aa={MPNN_exclude_aa} --num_recycles={AF_num_recycles}'
  if AF_use_multimer:
    arg_string += ' --use_multimer'
  if AF_use_initial_guess:
    arg_string += ' --initial_guess'
  if MPNN_use_soluble_params:
    arg_string += ' --use_soluble'
  !python colabdesign/rf/designability_test.py {arg_string}
  os.system(f'cat {output_folder_name}/design.fasta')
  with open(f'{output_folder_name}/design.fasta', 'r') as fastafile:
    flines = [line for line in fastafile]
  with open(f'{output_folder_name}/full_fasta.fasta', 'a') as newfasta:
    for i in range(MPNN_seqs_per_struct):
      newfasta.writelines(f'>{label}_seq_' + flines[(i * 2)].split(' n:')[1])
      newfasta.writelines(flines[(i * 2) + 1])
  os.system(f'rm {output_folder_name}/design.fasta')
  os.system(f'rm {output_folder_name}/mpnn_results.csv')
  if not os.path.exists(f'{output_folder_name}/af2_pdbs'):
    os.system(f'mkdir {output_folder_name}/af2_pdbs')
  for pdbresultpath in glob.glob(os.path.join(output_folder_name, 'all_pdb', '*.pdb')):
    seqnum = os.path.basename(pdbresultpath).split('.')[0].split('_n')[-1]
    os.system(f'mv {pdbresultpath} {output_folder_name}/af2_pdbs/{label}_seq_{seqnum}_af2.pdb')
  os.system(f'rm {output_folder_name}/best*.pdb')
  os.system(f'rm -r {output_folder_name}/all_pdb')
  print(f'Done with {pdbpath}!\n')



for pdbpath in glob.glob(os.path.join(uploaded_file_name[:-4], '*.pdb')):
  run_designability_test(pdbpath)



In [None]:
#@title **4) Download Results**

!apt-get install zip

os.system(f'zip -r {output_folder_name}.zip {output_folder_name}')

files.download(f'{output_folder_name}.zip')

print('Done! :)')