## Modified by @engelberger from the Colab notebook of Brian Naughton @btnaughton


In [1]:
PDB_id = '' #@param {type:"string"}
SMILES_or_pubchem_id = '' #@param {type:"string"}

In [2]:
import os
import requests
import time
from random import random

def download_pdb_file(pdb_id: str) -> str:
    """Download pdb file as a string from rcsb.org"""
    PDB_DIR ="./tmp/pdb/"
    os.makedirs(PDB_DIR, exist_ok=True)

    # url or pdb_id
    if pdb_id.startswith('http'):
        url = pdb_id
        filename = url.split('/')[-1]
    else:
        url = f"http://files.rcsb.org/view/{pdb_id}.pdb"
        filename = f'{pdb_id}.pdb'

    cache_path = os.path.join(PDB_DIR, filename)
    if os.path.exists(cache_path):
        return cache_path

    pdb_req = requests.get(url)
    pdb_req.raise_for_status()
    open(cache_path, 'w').write(pdb_req.text)
    return cache_path

def download_smiles_str(pubchem_id: str, retries:int = 2) -> str:
    """Given a pubchem id, get a smiles string"""
    while True:
        req = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/{pubchem_id}/property/CanonicalSMILES/CSV")
        smiles_url_csv = req.text if req.status_code == 200 else None
        if smiles_url_csv is not None:
            break
        if retries == 0:
            return None
        time.sleep(1+random())
        retries -= 1

    return smiles_url_csv.splitlines()[1].split(',')[1].strip('"').strip("'") if smiles_url_csv is not None else None

In [3]:
if not PDB_id or not SMILES_or_pubchem_id:
    PDB_id = "6agt"
    SMILES_or_pubchem_id = "COc(cc1)ccc1C#N"
    print(f"No input supplied. Using example data: {PDB_id} and {SMILES_or_pubchem_id}")
# to run many PDB+smiles at once, fill in a list of PDB_files and smiles here...
pdb_files = [download_pdb_file(PDB_id)]
smiless = [download_smiles_str(SMILES_or_pubchem_id) if str(SMILES_or_pubchem_id).isnumeric() else SMILES_or_pubchem_id]

with open("./tmp/input_protein_ligand.csv", 'w') as out:
    out.write("protein_path,ligand\n")
    for pdb_file in pdb_files:
        for smiles in smiless:
            out.write(f"{pdb_file},{smiles}\n")

No input supplied. Using example data: 6agt and COc(cc1)ccc1C#N


In [4]:
!pip install torch-sparse==0.6.12 -f torch_sparse-0.6.12-cp39-cp39-linux_x86_64.whl

Looking in links: torch_sparse-0.6.12-cp39-cp39-linux_x86_64.whl


In [5]:
!pip install torch-sparse==0.6.12 -f https://data.pyg.org/whl/torch-1.9.1+cu111.html
!pip install torch-scatter==2.0.9 -f https://data.pyg.org/whl/torch-1.9.1+cu111.html
!pip install torch-spline-conv==1.2.1 -f https://data.pyg.org/whl/torch-1.9.1+cu111.html
!pip install torch-geometric

Looking in links: https://data.pyg.org/whl/torch-1.9.1+cu111.html
Looking in links: https://data.pyg.org/whl/torch-1.9.1+cu111.html
Looking in links: https://data.pyg.org/whl/torch-1.9.1+cu111.html


In [2]:
import torch_geometric


RuntimeError: 
object has no attribute sparse_csr_tensor:
  File "/usr/local/lib/python3.8/dist-packages/torch_sparse/tensor.py", line 511
            value = torch.ones(self.nnz(), dtype=dtype, device=self.device())
    
        return torch.sparse_csr_tensor(rowptr, col, value, self.sizes())
               ~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE


In [2]:
!/miniconda/bin/conda install -y pytorch-cluster -c pyg

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /miniconda

  added / updated specs:
    - pytorch-cluster


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-22.9.0               |   py39h06a4308_0         884 KB
    cudatoolkit-11.3.1         |       h2bc3f7f_2       549.3 MB
    ninja-1.10.2               |       h06a4308_5           8 KB
    ninja-base-1.10.2          |       hd09550d_5         109 KB
    pytorch-1.10.2             |cpu_py39hfa7516b_0        44.1 MB
    pytorch-cluster-1.6.0      |py39_torch_1.10.0_cu113         1.8 MB  pyg
    ------------------------------------------------------------
                                           Total:       596.1 MB

The following NEW packages will be INSTALLED:

  cudatoolkit        pkgs/main/linux-64::cudatoolkit-11.3.1-h2bc3f7f_2
  ninja        

In [10]:
import torch
# Print the pytorch version
print(torch.__version__)

1.9.0+cu111


  from .autonotebook import tqdm as notebook_tqdm


In [1]:
!python -m inference \
    --protein_ligand_csv ./tmp/input_protein_ligand.csv \
        --out_dir results/user_predictions_small \
            --inference_steps 20 \
                --samples_per_complex 40 \
                    --batch_size 10

loading data from memory:  data/cache_torsion/limit0_INDEX_maxLigSizeNone_H0_recRad15.0_recMax24_esmEmbeddings32221061/heterographs.pkl
Number of complexes:  1
radius protein: mean 75.19402313232422, std 0.0, max 75.19402313232422
radius molecule: mean 3.9542152881622314, std 0.0, max 3.9542152881622314
distance protein-mol: mean 0.8058823347091675, std 0.0, max 0.8058823347091675
rmsd matching: mean 0.0, std 0.0, max 0
HAPPENING | confidence model uses different type of graphs than the score model. Loading (or creating if not existing) the data for the confidence model now.
loading data from memory:  data/cache_torsion_allatoms/limit0_INDEX_maxLigSizeNone_H0_recRad15.0_recMax24_atomRad5_atomMax8_esmEmbeddings32221061/heterographs.pkl
Number of complexes:  1
radius protein: mean 75.19402313232422, std 0.0, max 75.19402313232422
radius molecule: mean 3.9588654041290283, std 0.0, max 3.9588654041290283
distance protein-mol: mean 1.3410234451293945, std 0.0, max 1.3410234451293945
rmsd ma