<a href="https://colab.research.google.com/github/avilella/utils/blob/master/af/design.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#AfDesign (beta version)
Backprop through AlphaFold for protein design.

**WARNING**
1.   This notebook is in active development and was designed for demonstration purposes only.
2.   Using AfDesign as the only "loss" function for design might be a bad idea, you may find adversarial sequences (aka. sequences that trick AlphaFold).

In [1]:
#@title install
%%bash
if [ ! -d params ]; then
  pip -q install git+https://github.com/sokrypton/af_backprop.git
  pip -q install git+https://github.com/sokrypton/ColabDesign.git@beta
  mkdir params
  curl -fsSL https://storage.googleapis.com/alphafold/alphafold_params_2021-07-14.tar | tar x -C params
fi

In [2]:
#@title import libraries
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
from af import mk_design_model, clear_mem
from IPython.display import HTML
from google.colab import files
import numpy as np

#########################
def get_pdb(pdb_code=""):
  if pdb_code is None or pdb_code == "":
    upload_dict = files.upload()
    pdb_string = upload_dict[list(upload_dict.keys())[0]]
    with open("/content/e9207a69d117f6fdece7b5bbbd2c19e9.unrelaxed_model_1.lcpb.pdb","wb") as out: out.write(pdb_string)
    return "tmp.pdb"
  else:
    os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
    return f"{pdb_code}.pdb"

# fixed backbone design (fixbb)
For a given protein backbone, generate/design a new sequence that AlphaFold thinks folds into that conformation. 

In [None]:
clear_mem()
model = mk_design_model(protocol="fixbb")
model.prep_inputs(pdb_filename=get_pdb("1TEN"), chain="A")

print("length",  model._len)
print("weights", model.opt["weights"])

In [None]:
model.restart()
model.design_3stage()

In [None]:
model.plot_traj()  

In [None]:
model.save_pdb(f"{model.protocol}.pdb")
model.plot_pdb()

In [None]:
HTML(model.animate())

In [None]:
model.get_seqs()

# hallucination
For a given length, generate/hallucinate a protein sequence that AlphaFold thinks folds into a well structured protein (high plddt, low pae, many contacts).

In [None]:
clear_mem()
model = mk_design_model(protocol="hallucination")
model.prep_inputs(length=100)

print("length",model._len)
print("weights",model.opt["weights"])

In [None]:
# pre-design with gumbel initialization and softmax activation
model.restart(seq_init="gumbel")
model.design(50, soft=True)

# three stage design  
model.restart(seq_init=model._outs["seq"]["pseudo"], keep_history=True)
model.design_3stage(50,50,10)

In [None]:
model.save_pdb(f"{model.protocol}.pdb")
model.plot_pdb()

In [None]:
HTML(model.animate())

In [None]:
model.get_seqs()

# binder hallucination
For a given protein target and protein binder length, generate/hallucinate a protein binder sequence AlphaFold thinks will bind to the target structure. To do this, we minimize PAE and maximize number of contacts at the interface and within the binder, and we maximize pLDDT of the binder.

In [None]:
clear_mem()
model = mk_design_model(protocol="binder")
model.prep_inputs(pdb_filename=get_pdb("4MZK"), chain="A", binder_len=19)

print("target_length",model._target_len)
print("binder_length",model._binder_len)
print("weights",model.opt["weights"])

In [None]:
model.restart(seq_init="soft_gumbel")
model.design_3stage(100,100,10)

In [None]:
model.save_pdb(f"{model.protocol}.pdb")
model.plot_pdb()

In [None]:
HTML(model.animate())

In [None]:
model.get_seqs()

In [None]:
clear_mem()
model = mk_design_model(protocol="binder")
model.prep_inputs(pdb_filename=get_pdb("1A0O"),
                  chain="A",
                  binder_chain="B",
                  use_binder_template=True,
                  split_templates=True)

print("target_length",model._target_len)
print("binder_length",model._binder_len)
print("weights",model.opt["weights"])

model.restart(mode="wildtype",
              opt={"template_dropout":0.15})
model.design_3stage(100,50,10)
model.save_pdb(f"{model.protocol}.pdb")
model.plot_pdb()


target_length 128
binder_length 70
weights {'msa_ent': 0.0, 'helix': 0.0, 'plddt': 0.01, 'pae': 0.01, 'dgram_cce': 1.0, 'fape': 0.0, 'rmsd': 0.0, 'con': 0.0, 'i_pae': 0.01, 'i_con': 0.0}
1	model [0] soft 0.00 temp 1.00 seqid 1.00 loss 1.47 plddt 0.27 pae 0.18 i_pae 0.70 dgram_cce 1.46 rmsd 44.38
2	model [0] soft 0.01 temp 1.00 seqid 1.00 loss 1.35 plddt 0.21 pae 0.15 i_pae 0.79 dgram_cce 1.34 rmsd 29.46
3	model [0] soft 0.02 temp 1.00 seqid 1.00 loss 1.27 plddt 0.16 pae 0.11 i_pae 0.74 dgram_cce 1.26 rmsd 38.49
4	model [1] soft 0.03 temp 1.00 seqid 1.00 loss 1.27 plddt 0.16 pae 0.12 i_pae 0.79 dgram_cce 1.26 rmsd 42.91
5	model [1] soft 0.04 temp 1.00 seqid 1.00 loss 1.27 plddt 0.12 pae 0.10 i_pae 0.75 dgram_cce 1.26 rmsd 42.52
6	model [1] soft 0.05 temp 1.00 seqid 1.00 loss 1.34 plddt 0.11 pae 0.10 i_pae 0.78 dgram_cce 1.33 rmsd 41.49
7	model [1] soft 0.06 temp 1.00 seqid 1.00 loss 1.28 plddt 0.10 pae 0.10 i_pae 0.82 dgram_cce 1.27 rmsd 44.64
8	model [1] soft 0.07 temp 1.00 seqid 1.00 