## Running structure predictions with multimer and monomeric predictions

Before running this notebook, please ensure you:

1. Are logged in by running `dm login EMAIL` in the terminal
2. Have a Token registered and saved on the file


In [None]:
!pip install deepmirror
# !dm login <YOUREMAIL>

In [None]:
import time
import zipfile

import py3Dmol
from IPython.display import SVG, display
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import rdMolDraw2D

import deepmirror.api as api

### Define some helper functions

In [None]:
def draw_rdmol(sml) -> None:
    mol = Chem.MolFromSmiles(sml)
    drawer = rdMolDraw2D.MolDraw2DSVG(400, 300)
    drawer.DrawMolecule(mol)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText().replace("svg:", "")
    display(SVG(svg))
    return

In [None]:
def draw_atom_names(reacted_smiles) -> None:
    mol = Chem.MolFromSmiles(reacted_smiles)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol, AllChem.ETKDG())
    AllChem.UFFOptimizeMolecule(mol)
    pdb_block = Chem.MolToPDBBlock(mol)
    pdb_mol = Chem.MolFromPDBBlock(pdb_block, removeHs=True)

    names = []
    for atom in pdb_mol.GetAtoms():
        names.append(atom.GetPDBResidueInfo().GetName().strip())

    pdb_mol.RemoveAllConformers()
    AllChem.Compute2DCoords(pdb_mol)
    drawer = rdMolDraw2D.MolDraw2DSVG(400, 300)
    opts = drawer.drawOptions()

    for i in range(len(names)):
        opts.atomLabels[i] = names[i]

    drawer.DrawMolecule(pdb_mol)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText().replace("svg:", "")
    display(SVG(svg))

    return

In [None]:
def cofold(chains: list[dict], constraint_settings: dict) -> str:
    response = api.structure_prediction(chains, constraint_settings)
    task_id = response["task_id"]

    while True:
        response = api.get_structure_prediction(task_id)
        status = response["status"]
        if status == "completed":
            break
        print(f"Current status: {status} - Waiting 2 min for completion...")
        time.sleep(120)

    with open(f"result-{task_id}.zip", "wb") as f:
        f.write(api.download_structure_prediction(task_id))

    return task_id


def view_results(task_id: str):
    with zipfile.ZipFile(f"result-{task_id}.zip", "r") as zf:
        cif_data = zf.read("data.cif").decode("utf-8")
    view = py3Dmol.view(width=400, height=400)
    view.addModel(cif_data, "cif")
    view.setStyle({"cartoon": {}})
    view.addStyle({"hetflag": True}, {"stick": {}})
    view.zoomTo()
    return view

# Protein + Ligand

Get the structure of ligand smile -- pre-covalent bond formation:
# <img src="./example_images/unreacted_mol.png" width="300"/>



In [None]:
unreacted_smiles = "C=CC(=O)N1CCN([C@H](C1)C)C2=NC(=O)N(c3c2cc(c(n3)c4c(cccc4F)O)F)c5c(ccnc5C(C)C)C"
draw_rdmol(unreacted_smiles)

Alter the smile to remove the leaving group
# <img src="./example_images/reacted_mol.png" width="300"/>



In [None]:
reacted_smiles = "CCC(=O)N1CCN([C@H](C1)C)C2=NC(=O)N(c3c2cc(c(n3)c4c(cccc4F)O)F)c5c(ccnc5C(C)C)C"
draw_rdmol(reacted_smiles)

Identify the reaction center's atom name
# <img src="./example_images/atom_names_mol.png" width="300"/>



In [None]:
draw_atom_names(reacted_smiles)

In [None]:
chains = [
    {
        "label": "A",
        "value": "MTEYKLVVVGACGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQH",
        "type": "protein",
    },
    {
        "label": "B",
        "value": reacted_smiles,
        "type": "ligand",
    },
]


## Bond constraint. 

This is used to define a covalent constraints between any two atoms within a protein or protein + small-molecule

* Please note that the residue indices are 1-indexed
* Connection Type: "covalent", used to define a covalent bond restraint
* res_idxA and res_idxB: residue indices with the atom names participating in the covalent bond
* For residue and atom names of the Amino Acids, please follow the PDB nomenclature, see below for few common examples.
* min_distance_angstron: minimum distance between the bonded heavy atoms, suggested to set as -0.5A of optimal bond length
* max_distance_angstron: maximum distance between the bonded heavy atoms, suggested to set as +1A of optimal bond length 
* confidence: Weight assigned to the constraint
* comment: User defined comment for the constraint.
* restraint_id: restraint_i, where i is the number assigned to the constraint, starting from 0
* Please define as many constraints and add them in the final list of constraints

For an extensive list, please refer to the PDB atom nomenclature

| Amino Acid (Letter Code) | Reactive Atom Name | 
| --- | --- | 
| CYS (C) | SG |
| SER (S) | OG |
| LYS (K) | NZ |

In [None]:
constraint_1 = {
    "chainA": "A",
    "res_idxA": "C12@SG",
    "chainB": "B",
    "res_idxB": "@C1",
    "connection_type": "covalent",
    "confidence": 1.0,
    "min_distance_angstrom": 1.2,
    "max_distance_angstrom": 2.1,
    "comment": "CG-C acrylamide bond",
    "restraint_id": "restraint_0",
}

In [None]:
constraints = [constraint_1]
constraint_settings = {"constraints": constraints}

### Initiate Structure Prediction

In [None]:
task_id = cofold(chains, constraint_settings)

In [None]:
view = view_results(task_id)
view