<a href="https://colab.research.google.com/github/artemg97/af2bind_prod/blob/main/AF2BIND_prod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### AF2BIND: Prediction of ligand-binding sites using AlphaFold2

AF2BIND is a simple and fast notebook that runs inference on the output obtained from [Alphafold](https://github.com/deepmind/alphafold).


The method utilizes [ColabDesign](https://github.com/sokrypton/ColabDesign) binder protocol framework which facilitates the identification of binding sites for protein-peptide and protein-ligand complexes.

Authors/Collaborators :

*   Artem Gazizov (agazizov@fas.harvard.edu)
*    Sergey Ovchinnikov (so@fas.harvard.edu)
*    Nicholas Polizzi (nicholasf_polizzi@dfci.harvard.edu)


<figure>
<center>
<img src='https://drive.google.com/uc?export=view&id=1fHB9irpruKRUQBIEd45pp9go4QKsFigg'  width="300" height="150"  align="right" />

</figure>






In [None]:
%%time
#@title Install AlphaFold2 (~2 mins)
#@markdown Please execute this cell by pressing the *Play* button on
#@markdown the left.

#@markdown **Note**: This installs the Colabdesign framework
import os, time
if not os.path.isdir("params"):
  # get code
  print("installing ColabDesign")
  os.system("(mkdir params; apt-get install aria2 -qq; \
  aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar; \
  aria2c -q -x 16 https://files.ipd.uw.edu/krypton/af2bind.zip; \
  tar -xf alphafold_params_2022-03-02.tar -C params; unzip af2bind.zip; touch params/done.txt )&")

  os.system("pip -q install git+https://github.com/sokrypton/ColabDesign.git@v1.1.1")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign")
  os.system("pip -q install atomium")


  # download params
  if not os.path.isfile("params/done.txt"):
    print("downloading AlphaFold params")
    while not os.path.isfile("params/done.txt"):
      time.sleep(5)

import os
from colabdesign import mk_afdesign_model
from IPython.display import HTML
from google.colab import files
import numpy as np

def get_pdb(pdb_code=""):
  if pdb_code is None or pdb_code == "":
    upload_dict = files.upload()
    pdb_string = upload_dict[list(upload_dict.keys())[0]]
    with open("tmp.pdb","wb") as out: out.write(pdb_string)
    return "tmp.pdb"
  elif os.path.isfile(pdb_code):
    return pdb_code
  elif len(pdb_code) == 4:
    os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
    return f"{pdb_code}.pdb"
  else:
    os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/AF-{pdb_code}-F1-model_v4.pdb")
    return f"AF-{pdb_code}-F1-model_v4.pdb"

In [None]:
#@title **Upload PDB**
#@markdown - Please indicate target pdb and chain (leave pdb blank for custom upload)
target_pdb = "4OVT" #@param {type:"string"}
target_chain = "A" #@param {type:"string"}
pdb_filename = get_pdb(target_pdb)

In [None]:
#@title **Run AlphaFold2** 🔬
if "af_model" not in dir():
  af_model = mk_afdesign_model(protocol="binder", debug=True)
af_model.prep_inputs(pdb_filename=pdb_filename, chain=target_chain, binder_len=20)


print("target_length",af_model._target_len)
print("binder_length",af_model._binder_len)

af_model.predict(seq="ACDEFGHIKLMNPQRSTVWY", num_recycles=0)

residues_repr=[]

for i in range(af_model._target_len):

  # Empty pairwise repr.

  pw_repr=[]

  for ii in range(20):
      pw_repr=np.concatenate([pw_repr,af_model.aux["debug"]["outputs"]["representations"]["pair"][i][af_model._target_len+ii]])

  #---Info about neighbours----------------------

  #-----------Process the first and the last AA separately


  residues_repr.append(pw_repr)

In [None]:
#@title **Scale inputs and get the prediction**
model_type = 'ligand_model' #@param ["ligand_model", "peptide_model"]
#@markdown - Please specify the range :)
top_n = "15" #@param {type:"string"}
top_n=int(top_n)

if "model" not in dir() or model_type != model_type_old:
  print("loading af2bind params...")
  from tensorflow.keras.models import Sequential, model_from_json
  import sklearn
  from sklearn.preprocessing import StandardScaler
  from pickle import load
  import os

  if(model_type=="ligand_model"):
    json_file = open('model_ligand_weights/model_ligand.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()

    model = model_from_json(loaded_model_json)
    # load weights into new model
    model.load_weights("model_ligand_weights/model_ligand.h5")
    # load the scaler
    scaler = load(open('model_ligand_weights/scaler_model_ligand.pkl', 'rb'))
    print("Model and scaler loaded model from disk")

  else:
    json_file = open('model_peptide_weights/model_peptide.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()

    model = model_from_json(loaded_model_json)
    # load weights into new model
    model.load_weights("model_peptide_weights/model_peptide.h5")
    # load the scaler
    scaler = load(open('model_peptide_weights/scaler_peptide.pkl', 'rb'))

    print("Model and scaler loaded model from disk")

  model_type_old = model_type


pw_scaled=scaler.transform(residues_repr)
Y_submit=model.predict(pw_scaled)

preds = Y_submit[:,0]
top_n_idx = preds.argsort()[::-1][:top_n]

pymol_cmd="select ch"+str(target_chain)+", "


print("\n 🧪 Top",top_n, "binding residues sorted by confidence: ")


residues_dict = {}
for n,i in enumerate(top_n_idx):
  p = preds[i]
  c = af_model._pdb["idx"]["chain"][i]
  r = af_model._pdb["idx"]["residue"][i]
  residues_dict[f"{c}_{r}"] = p
  pymol_cmd += " resi " + str(r)
  if n < top_n-1:
    pymol_cmd += " + "
  print(c,r,p)

print("\n🧪Pymol Selection Cmd:")
print(pymol_cmd)

In [None]:
import py3Dmol
import matplotlib.pyplot as plt
from colabdesign.shared.protein import pdb_to_string

#@title **Color the structure by confidence**
#partly inspired by OpeFold - https://colab.research.google.com/github/aqlaboratory/openfold/blob/main/notebooks/OpenFold.ipynb#scrollTo=rowN0bVYLe9n
#color_map = {i: bands[2] for i, bands in enumerate(PLDDT_BANDS)}
confidence_type = 'relative_of_top_value' #@param ["relative_of_top_value", "absolute"]
view_sidechains = False #@param {type:"boolean"}
# Color bands for visualizing binding sites
CONFIDENCE_BANDS = [
  (0, 50, '#FF7D45'),
  (50, 70, '#FFDB13'),
  (70, 90, '#65CBF3'),
  (90, 100, '#0053D6')
]

def plot_confidence_legend():

  thresh = [
            'Very low (confidence < 50)',
            'Low (70 > confidence > 50)',
            'Confident (90 > confidence > 70)',
            'Very high (confidence > 90)']

  colors = ['#FF7D45', '#FFDB13', '#65CBF3', '#0053D6']

  plt.figure(figsize=(1, 1))
  for c in colors:
    plt.bar(0, 0, color=c)
  plt.legend(thresh, frameon=False, loc='center', fontsize=20)
  plt.xticks([])
  plt.yticks([])
  ax = plt.gca()
  ax.spines['right'].set_visible(False)
  ax.spines['top'].set_visible(False)
  ax.spines['left'].set_visible(False)
  ax.spines['bottom'].set_visible(False)
  plt.title('Model Confidence', fontsize=20, pad=50)
  return plt

color_map={0: '#FF7D45', 1: '#FFDB13', 2: '#65CBF3', 3: '#0053D6'}

af_model.save_pdb("pred.pdb")

view = py3Dmol.view(width=800, height=400)
view.addModel(pdb_to_string("pred.pdb",chains=["A"]))
view.setStyle({'cartoon': {'color': '#FF7D45'}})
if(confidence_type=="absolute"):
  confidence_value_90=0.9
  confidence_value_70=0.7
  confidence_value_50=0.5
else:
  max=preds[top_n_idx].max()
  confidence_value_90=max-max*0.1
  confidence_value_70=max-max*0.3
  confidence_value_50=max-max*0.5

for i in range(af_model._target_len): #top_n_idx:
  p = preds[i]
  r = i + 1
  if p >= confidence_value_90:
    if(view_sidechains):
      view.setStyle({'resi':r},{'cartoon': {'color': '#0053D6'}, 'stick':{} }  )
    else:
      view.setStyle({'resi':r},{'cartoon': {'color': '#0053D6'}})

  elif p >= confidence_value_70:

    if(view_sidechains):
      view.setStyle({'resi': r},{'cartoon': {'color': '#65CBF3'}, 'stick':{} }  )
    else:
      view.setStyle({'resi': r},{'cartoon': {'color': '#65CBF3'}})
  elif p >= confidence_value_50:
    if(view_sidechains):
      view.setStyle({'resi': r},{'cartoon': {'color': '#FFDB13'}, 'stick':{} }  )
    else:
      view.setStyle({'resi': r},{'cartoon': {'color': '#FFDB13'}})

view.zoomTo()
view.show()

plot_confidence_legend().show()