[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dina-lab3D/Fold-Dock/blob/main/Fold_Dock.ipynb)



#Fold&Dock
Fold&Dock is a deep-learning based folding and docking tool that given an
antibody sequence and an antigen structure produces their complex structure.
It provides several solutions, and ranks them using a deep-learning based scoring function.


It can be used in order to predict structures of nanobodies and mAbs (with or without an antigen).


The source code and the trained model can be found [here](https://github.com/dina-lab3D/Fold-Dock)


<br>
<img src=https://drive.google.com/uc?id=1FUTKK5IZPNxNvi-aHA5vcQ0Pe_ba4B6h width="1000">


<strong>For Citations please use: </strong> [paper](https://www.mlsb.io/papers_2022/End_to_end_accurate_and_high_throughput_modeling_of_antibody_antigen_complexes.pdf)


In [None]:
#@title install dependencies and  Clone Fold&Dock trained model
import os
from IPython.display import clear_output

if not os.path.exists('FoldDockReady'):
  !pip -q install biopython
  !pip -q install py3Dmol
  !pip install -q condacolab
  import condacolab
  condacolab.install()
  !conda create --name myenv
  !conda activate myenv
  !conda install -c bioconda abnumber
  !pip install -q scipy
  !pip install -q tensorflow

!touch FoldDockReady

# download model
if not os.path.exists("Fold-Dock/" ):
  !git clone https://tomerco4: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@github.com/dina-lab3D/Fold-Dock --quiet
  !unzip Fold-Dock/DockModel.zip -d Fold-Dock/DockModel

clear_output()

In [None]:

#@title Input antibody sequence


from google.colab import files
import re
import os
from IPython.display import clear_output


input_type = 'Fasta file' #@param ["Sequence (String)", "Fasta file"]
antibody_sequence = '' #@param {type:"string"}

if input_type == "Fasta file":
  uploaded_ab = files.upload()
  antibody_sequence = uploaded_ab.keys()
  if len(antibody_sequence) > 1:
    raise ValueError("Please upload a single fasta file, for multiple sequences in a single run put each antibody sequence in a different entry in the fasta file.")
  antibody_sequence = list(antibody_sequence)[0]


#@markdown  **Note**: use **':'** in order to seperate the heavy and light chains. **heavy chain should come first!**

#@markdown **Note**: you can model multiple antibody sequences for a single antigen by seperating them to different entries in the fasta file.


# get NanoNet trained model path

if input_type == 'Sequence (String)':
  # remove whitespaces
  antibody_sequence = "".join(antibody_sequence.split())
  antibody_sequence = re.sub(r'[^a-zA-Z:]','', antibody_sequence).upper()
  with open("input_fasta.fa", "w") as fa_file:
    fa_file.write("> model\n")
    fa_file.write("{}\n".format(antibody_sequence))
  antibody_sequence = "input_fasta.fa"

if not os.path.exists(antibody_sequence):
  raise ValueError("can't find fasta file {}.".format(antibody_sequence))




In [None]:
#@title Input antigen structure
#@markdown  If you want to also perform docking run this cell after checking "do_docking" and upload the antigen PDB file (Can have multiple chains).

do_docking = True #@param {type:"boolean"}
antigen_chains = 'all' #@param {type:"string"}
uploaded_antigen = None

if do_docking:
  uploaded_antigen = files.upload()
  uploaded_antigen = uploaded_antigen.keys()
  if len(uploaded_antigen) > 1:
    raise ValueError("Please upload a single pdb file")
  uploaded_antigen = list(uploaded_antigen)[0]
  antigen_chains.replace(" ", "").replace(",", "")
  if not antigen_chains.isalpha():
    raise ValueError("Please use valid chain identifiers ([A-Z,a-z])")

specify_ag_chains = True
if antigen_chains.lower() == "all" or antigen_chains == "":
  specify_ag_chains = False


#@markdown If you need only some of the antigen chains for docking, specify them without spaces ('ABC' for example).

#@markdown **Note**: We currently support antigens with sequence length of up to 600 amino acids.



In [None]:

#@markdown ---
#@markdown ### Advanced settings
top_n_pdb_models_to_create = 5 #@param {type:"integer"}
reconsrtuct_side_chains_using_modeller = True #@param {type:"boolean"}
modeller_license_key = 'MODELIRANJE' #@param {type:"string"}
visualize_results = True #@param {type:"boolean"}
verbose = True #@param {type:"boolean"}

#@markdown ---
#@markdown ### Saving options
output_dir = 'Results' #@param {type:"string"}
# remove whitespaces
output_dir = "".join(output_dir.split())
output_dir = re.sub(r'\W+', '', output_dir)
save_to_google_drive = False #@param {type:"boolean"}
#@markdown ---
if top_n_pdb_models_to_create is None:
  top_n_pdb_models_to_create = 0

if top_n_pdb_models_to_create < 0:
  raise ValueError("Please insert a non-negative number of pdb models to create!")


if reconsrtuct_side_chains_using_modeller and modeller_license_key == '':
  raise ValueError("Please insert a valid license key!, you can get one from here: https://salilab.org/modeller/registration.html")

if save_to_google_drive == True:
  from pydrive.drive import GoogleDrive
  from pydrive.auth import GoogleAuth
  from google.colab import auth
  from oauth2client.client import GoogleCredentials
  auth.authenticate_user()
  gauth = GoogleAuth()
  gauth.credentials = GoogleCredentials.get_application_default()
  drive = GoogleDrive(gauth)
  print("Saving results into Drive")


In [None]:
#@title Download Modeller
#@markdown If 'reconsrtuct_side_chains_using_modeller' is set to false, you can skip this step.
if not os.path.exists("ModellerReady") and reconsrtuct_side_chains_using_modeller:
  #@markdown **You can get a license key for Modeller from** **[here](https://salilab.org/modeller/registration.html)** .
  # modeller_license_key = '' #@param {type:"string"}
  #MODELIRANJE
  !wget https://salilab.org/modeller/10.1/modeller-10.1.tar.gz
  !tar -zxf modeller-10.1.tar.gz
  print("MODELLER extraction completed")
  %cd modeller-10.1
  #And we prepare a file containing the minimal setup elements
  #For installing, including a license key
  with open('modeller_config', 'a') as f:
    f.write("3\n")
    f.write("/content/compiled/MODELLER\n")
  #ADD YOUR LICENSE KEY HERE!
    f.write(f"{modeller_license_key}\n")
  !./Install < modeller_config
  print("MODELLER set up completed")

  %cd /content/
  #Creating a symbolic link
  %cd modeller-10.1
  !ln -sf /content/compiled/MODELLER/bin/mod10.1 /usr/bin/
  %cd /content/
  #Checking if MODELLER works
  !mod10.1 | awk 'NR==1{if($1=="usage:") print "MODELLER succesfully installed"; else if($1!="usage:") print "Something went wrong. Please install again"}'

  with open("/content/compiled/MODELLER/modlib/modeller/config.py", "r") as file:
    lines = file.readlines()

  with open("/content/compiled/MODELLER/modlib/modeller/config.py", "w") as file:
    file.write(lines[0])
    file.write(f"license = '{modeller_license_key}'\n")
  with open("ModellerReady","w"):pass
  clear_output()


In [None]:
#@title Predict antibody-antigen complexes
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
import tensorflow as tf
import logging
logging.getLogger('tensorflow').disabled = True

os.chdir("/content/")

flags = ""
if do_docking:
  flags+=f" -a {uploaded_antigen}"
if specify_ag_chains:
  flags+=f" -c {antigen_chains}"
if reconsrtuct_side_chains_using_modeller:
  flags+=" -m"

flags+=f" -v {1 if verbose else 0} -o {output_dir} -t {top_n_pdb_models_to_create}"

if reconsrtuct_side_chains_using_modeller:
  !/content/compiled/MODELLER/bin/modpy.sh python Fold-Dock/fold_dock.py $antibody_sequence $flags
else:
  !python Fold-Dock/fold_dock.py $antibody_sequence $flags


In [None]:
#@title Visualize the best scoring complex (for each of the antibody sequences)
import py3Dmol
import pandas as pd
if '/content/Fold-Dock' not in sys.path:
  import sys
  sys.path.insert(0, '/content/Fold-Dock')
  from utils import seq_iterator

color_list = ["aquamarine","black","blue","bluewhite","brightorange","brown",
              "carbon","chartreuse","chocolate","cyan","darksalmon","dash","deepblue","deepolive",
              "deeppurple","deepsalmon","deepteal","density","dirtyviolet","firebrick","forest","gray",
              "green","greencyan","hotpink","hydrogen","lightblue","lightmagenta","lightorange","lightpink",
              "lightteal","lime","limegreen	","limon","magenta","marine","nitrogen","olive",
              "orange", "oxygen", "palecyan", "palegreen", "paleyellow", "pink", "purple", "purpleblue", "raspberry",
              "red", "ruby", "salmon", "sand", "skyblue", "slate", "smudge", "splitpea", "sulfur", "teal", "tv_blue",
              "tv_green", "tv_orange", "tv_red", "tv_yellow", "violet", "violetpurple", "warmpink", "wheat", "yellow", "yelloworange"]

ids_list = [*"ABCDEFGHIJKLMNOPQRSTUVWXYZ"] + [*"abcdefghijklmnopqrstuvwxyz"]


def plot_structure(ab_name, score, pdb_path):
    with open(pdb_path) as ifile:
      predicted = "".join([x for x in ifile])
    r,g,b = 0,0,255
    print(f"\033[38;2;{r};{g};{b}m {ab_name} Predicted model, Score: {score}\033[38;2;255;255;255m")
    view = py3Dmol.view(width=500, height=500)
    view.addModelsAsFrames(predicted)
    view.setStyle({'model': 0}, {"cartoon": {'arrows':True, 'color': 'blue'}})
    for chain_id, chain_color in zip(ids_list, color_list):
      view.setStyle({'chain':chain_id},{'cartoon': {'color':chain_color}})
    view.zoomTo()
    view.show()



file_ending = "unrelaxed.pdb" if not reconsrtuct_side_chains_using_modeller else "relaxed.pdb"
if visualize_results:
  print("Showing Fold&Dock best scoring predicted structure")
  for ab_seq_obj in seq_iterator(antibody_sequence):
    complex_score = "-"
    if do_docking:
      complex_score = pd.read_csv(os.path.join(output_dir, ab_seq_obj.id, "scores.csv"))["score"][0]
    plot_structure(ab_seq_obj.id, complex_score, os.path.join(output_dir, ab_seq_obj.id, f"{ab_seq_obj.id}_rank_1_{file_ending}"))

In [None]:
#@title Download results


!zip -FSr $output_dir".zip" $output_dir
files.download(f"{output_dir}.zip")

if save_to_google_drive == True and drive != None:
  uploaded = drive.CreateFile({'title': f"{output_dir}.zip"})
  uploaded.SetContentFile(f"{output_dir}.zip")
  uploaded.Upload()
  print(f"Uploaded {output_dir}.zip to Google Drive with ID {uploaded.get('id')}")