<a href="https://colab.research.google.com/github/gkoorsen/Automatic_multi_docking/blob/main/V2_of_Smina_Multi_Docking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Instructions:

1. Set the exhaustiveness parameter
   * 8 for faster calculations
   * 16 for slower, but more accurate results

2. Upload the docking template. You can find the template here:
    https://github.com/gkoorsen/Automatic_multi_docking/blob/main/Docking_template.xlsx

3. That's it! Run the cells in the rest of the notebook to proceed with the
  docking with SMINA.


In [None]:
exhaustiveness = 8#@param {type:"integer"}

#Run the cells below to start docking...

##Install the necessary packages and libraries

In [None]:
!pip install biopython
!pip install wget
!pip install requests
!apt-get -qq install -y cmake
!apt-get -qq install -y swig
!apt-get -qq install -y libeigen3-dev
!wget https://github.com/openbabel/openbabel/archive/openbabel-2-4-1.tar.gz
!tar xzvf openbabel-2-4-1.tar.gz
!mkdir openbabel-openbabel-2-4-1/build
%cd openbabel-openbabel-2-4-1/build
!cmake ../ -DPYTHON_BINDINGS=ON -DRUN_SWIG=ON
!make
!make install
%cd /content
import os
os.environ['LD_LIBRARY_PATH'] += ':/usr/local/lib'
!wget https://sourceforge.net/projects/smina/files/smina.static/download -O smina && chmod +x smina
!pip install pubchempy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
--2023-06-26 05:20:31--  https://github.com/openbabel/openbabel/archive/openbabel-2-4-1.tar.gz
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/openbabel/openbabel/tar.gz/refs/tags/openbabel-2-4-1 [following]
--2023-06-26 05:20:31--  https://codeload.github.com/openbabel/openbabel/tar.gz/refs/tags/openbabel-2-4-1
Resolving codeload.github.com (codeload.github.com)... 140.82.114.9
Connecting to codeload.github.com (codeload.github.com)|140.82.114.9|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [appli

In [None]:
import os
import re
import time
import warnings
import subprocess
import pandas as pd
import requests
import smtplib
import pubchempy as pcp
import numpy as np
from email import encoders
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from datetime import datetime
from tqdm import tqdm
from Bio import BiopythonWarning
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.PDB import PDBParser, PDBIO, Select, Chain
from Bio.PDB.Polypeptide import three_to_one, protein_letters_3to1
from Bio.Blast import NCBIWWW, NCBIXML
from google.colab import files


## Define functions

In [None]:

class NoHeteroSelect(Select):
    def accept_residue(self, residue):
        return 1 if residue.get_id()[0] == " " else 0

from Bio.PDB import PDBParser, PDBIO, Select

from Bio.PDB import PDBParser, PDBIO, Select

class LigandSelect(Select):
    def __init__(self, chain_id, ligand_res_name, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.chain_id = chain_id
        self.ligand_res_name = ligand_res_name

    def accept_chain(self, chain):
        if chain.get_id() == self.chain_id:
            return 1
        else:
            return 0

    def accept_residue(self, residue):
        if residue.get_resname() == self.ligand_res_name:
            return 1
        else:
            return 0

class ApoSelect(Select):
    def __init__(self, chain_id, ligand_res_name, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.chain_id = chain_id
        self.ligand_res_name = ligand_res_name

    def accept_chain(self, chain):
        if chain.get_id() == self.chain_id:
            return 1
        else:
            return 0

    def accept_residue(self, residue):
        if residue.get_resname() != self.ligand_res_name:
            return 1
        else:
            return 0

def prepare_structure(name, file, ligand_res_name, chain_id):
    parser = PDBParser()
    structure = parser.get_structure(name, file)

    io = PDBIO()
    io.set_structure(structure)

    # Write the ligand to a separate file
    io.save(f"{name}_{ligand_res_name}.pdb", LigandSelect(chain_id, ligand_res_name))

    # Write the apo protein to a separate file
    io.save(f"{name}_apo.pdb", ApoSelect(chain_id, ligand_res_name))

    command = f'obabel {name}_{ligand_res_name}.pdb -O {name}_{ligand_res_name}.pdbqt -h'
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()

    command = f'obabel {name}_apo.pdb -O {name}_apo_prepared.pdbqt -h'
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()

def prepare_ligand(name, smiles):

    if smiles is not None:

        command = f'obabel -:\"{smiles}\" -O {name}.sdf --gen2D'
        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
        process.wait()


        command = f'obabel {name}.sdf -O {name}_3d.sdf --gen3D'
        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
        process.wait()

        command = f'obabel {name}_3d.sdf -O {name}.pdbqt'
        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
        process.wait()

        # Print the output
        print(process.stdout.read().decode())

def redock(name, lig):

    # Define the command as a string
    command = f"/content/smina -r {name}_apo_prepared.pdbqt -l {name}_{lig}.pdbqt --autobox_ligand {name}_{lig}.pdbqt --autobox_add 8 --exhaustiveness 8 -o {name}_{lig}_redock.pdbqt"
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()

    command = f"grep '^REMARK minimizedAffinity' {name}_{lig}_redock.pdbqt | head -n 1 | awk '{{print $3}}'"
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()
    top_score = subprocess.check_output(command, shell=True)
    top_score = top_score.decode("utf-8").strip()

    command = f'/content/smina --score_only -r {name}_apo_prepared.pdbqt -l {name}_{lig}.pdbqt'
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()
    score = subprocess.check_output(command, shell=True)
    score = score.decode("utf-8").strip()

    return score[score.find('Affinity: '):score.find('Affinity: ')+20].split()[1], top_score

def redock_only(name, lig):

    # Define the command as a string
    command = f"/content/smina -r {name}_apo_prepared.pdbqt -l {name}_{lig}.pdbqt --autobox_ligand {name}_{lig}.pdbqt --autobox_add 8 --exhaustiveness 8 -o {name}_{lig}_redock.pdbqt"
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()

    command = f"grep '^REMARK minimizedAffinity' {name}_{lig}_redock.pdbqt | head -n 1 | awk '{{print $3}}'"
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()
    top_score = subprocess.check_output(command, shell=True)
    top_score = top_score.decode("utf-8").strip()


    return top_score


def dock(name, ligand_name, smiles, lig):

    prepare_ligand(ligand_name,smiles)

    # Define the command as a string
    command = f"/content/smina -r {name}_apo_prepared.pdbqt -l {ligand_name}.pdbqt --autobox_ligand {name}_{lig}.pdbqt --autobox_add 8 --exhaustiveness {exhaustiveness} -o {name}_{ligand_name}_result.pdbqt"
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()

    command = f"grep '^REMARK minimizedAffinity' {name}_{ligand_name}_result.pdbqt | head -n 1 | awk '{{print $3}}'"
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()
    top_score = subprocess.check_output(command, shell=True)
    top_score = top_score.decode("utf-8").strip()

    return top_score

def download_pdb_file(pdb_id: str) -> str:

    PDB_DIR ="/tmp/pdb/"
    os.makedirs(PDB_DIR, exist_ok=True)

    # url or pdb_id
    if pdb_id.startswith('http'):
        url = pdb_id
        filename = url.split('/')[-1]
    elif pdb_id.endswith(".pdb"):
        return pdb_id
    else:
        if pdb_id.startswith("AF"):
            url = f'https://alphafold.ebi.ac.uk/files/{pdb_id}-model_v3.pdb'
        else:
            url = f'http://files.rcsb.org/view/{pdb_id}.pdb'
        filename = f'{pdb_id}.pdb'

    cache_path = os.path.join(PDB_DIR, filename)
    if os.path.exists(cache_path):
        return cache_path

    pdb_req = requests.get(url)
    pdb_req.raise_for_status()
    open(cache_path, 'w').write(pdb_req.text)
    return cache_path




In [None]:

def get_sequence_from_pdb(pdb_file):
    # Create a PDBParser object
    parser = PDBParser()

    # Get the structure
    structure = parser.get_structure("protein", pdb_file)

    sequence = ''

    # Iterate over each chain in the model
    for model in structure:
        for chain in model:
            for residue in chain:
                # Check if the residue name is in the list of standard amino acids
                if residue.get_resname() in protein_letters_3to1:
                    # Convert the three-letter code to one-letter code
                    sequence += protein_letters_3to1[residue.get_resname()]

    return sequence


def get_blast_results(sequence, sim):
    # Run BLAST search
    result_handle = NCBIWWW.qblast("blastp", "pdb", sequence.format("fasta"))

    # Parse BLAST results
    blast_records = NCBIXML.parse(result_handle)

    # Store BLAST results
    blast_results = []

    # iterate through blast_records
    for blast_record in blast_records:
        for alignment in blast_record.alignments:
            for hsp in alignment.hsps:
                if hsp.expect < 0.01: # You might need to adjust the e-value threshold
                    if hsp.identities/float(hsp.align_length) >= sim: # 90% sequence similarity
                      blast_results.append(alignment.title.split('|')[1].strip()) # Extract PDB id

    return blast_results


def find_similar_pdb_entries(pdb_file, sim=0.9):
    sequence = get_sequence_from_pdb(pdb_file)
    out = get_blast_results(sequence, sim)

    return out


def get_smiles(compound_name):
    try:
        compound = pcp.get_compounds(compound_name, 'name')
        if compound:
            return compound[0].isomeric_smiles
    except Exception as e:
        print(f"Error occurred while fetching SMILES for {compound_name}: {str(e)}")


## Upload template

In [None]:
from google.colab import files
import pandas as pd

uploaded = files.upload()
file_name = list(uploaded.keys())[0]
df = pd.read_excel(file_name)

Saving Docking_template_Pfano Monday.xlsx to Docking_template_Pfano Monday.xlsx


In [None]:
import os
import requests

PDB_files = []

for pdb_id in df['PDB'].dropna():
  PDB_files.append(download_pdb_file(pdb_id.strip()))


PDBs = df['PDB'].dropna()
ligs = df['Ligands'].dropna()
chains = df['Chains'].dropna()

compounds = df['Compound names']
smiles = df['SMILES']
compound_names = [f'compound_{i}' for i in range(len(compounds))]
compounds_dict = {n : c for n,c in zip(compound_names,compounds)}


## Perform redock controls

In [None]:
#Redock controls

with warnings.catch_warnings(record=True) as w:

  # Cause all warnings to be ignored and recorded
  warnings.simplefilter("always", BiopythonWarning)

  redock_affinities = []
  score_onlys = []

  for r,l,c in zip(PDBs,ligs,chains):
    prepare_structure(r, f'/tmp/pdb/{r}.pdb', l,c)
    try:
      a,b = redock(r,l)
      print(f'{r}({c}):{l} score only: {a}, redock: {b}')
      redock_affinities.append(a)
      score_onlys.append(b)
    except:
      redock_affinities.append('FAILED')
      score_onlys.append('FAILED')

  data = pd.DataFrame()
  data['PDB'] = PDBs
  data['Ligands'] = ligs
  data['Chains'] = chains
  data['Re-dock affinity'] = redock_affinities

  now = datetime.now()
  date_string = now.strftime("%Y-%m-%d")
  data.to_excel(f'redock_scores_{date_string}.xlsx')

  for warning in w:
    with open(f'warnings_{date_string}.txt','w') as f:
     f.write(str(warning.message))

gmail_user = 'g.koorsen@gmail.com'
gmail_password = 'jnqhnpsacfneglyp'
to = recipient

send_email('Re-dock finished', 'Please find attached the redock scores', to, 'g.koorsen@gmail.com', 'jnqhnpsacfneglyp', [f'redock_scores_{date_string}.xlsx',f'warnings_{date_string}.txt'])


## Perform docking experiments:

In [None]:
#Docking

docking_scores = []
total = len(PDBs) * len(compound_names)
count = 0

with warnings.catch_warnings(record=True) as w:

  # Cause all warnings to be ignored and recorded
  warnings.simplefilter("always", BiopythonWarning)

  for r,l in zip(PDBs,ligs):
    for n,s in zip(compound_names,smiles):
      count += 1
      try:
        score = dock(r, n, s,l)
        print(f'({count} of {total}) {r}:{compounds_dict[n]} score: {score}')
        docking_scores.append(score)
      except:
        print(f'({count} of {total}) {r}:{compounds_dict[n]} score: FAILED')
        docking_scores.append('FAILED')


  results = pd.DataFrame()
  results['PDB'] = PDBs
  results['ligand'] = compounds
  results['Affinity'] = docking_scores

  now = datetime.now()
  date_string = now.strftime("%Y-%m-%d")
  results.to_excel(f'Docking_results_{date_string}.xlsx')

  for warning in w:
    with open(f'warnings_{date_string}.txt','w') as f:
      f.write(str(warning.message))


