In [2]:
# Set the process name to be human readable in htop
import setproctitle
setproctitle.setproctitle("07_Run_Docking")

from config import *

import pandas as pd
pd.options.display.max_columns = 999

import glob
import os

from tqdm import tqdm, tqdm_notebook
from tqdm._tqdm_notebook import tqdm_notebook

tqdm.pandas(tqdm_notebook)
tqdm_notebook.pandas()

This notebook is a wrapper to call the HADDOCK protein-protein interaction docking protocol for all interactions.

- Inputs:
  - Interactions.txt
  - [P1]\_[P2]\_[Chain].pdb (Oriented Structures)
  - [P1]\_[P2].txt (Parsed ECLAIR Predictions)

- Outputs:
  - [P1]\_[P2] (Interaction Haddock Run Directory created under "Docking_Runs")


- Dependencies:
  - Must be run after 02_Fetch_Eclair_Preds and 04_Select_Models
  - Must have HADDOCK installed locally
  - Calls run_haddock.py
    - Which itself calls srescalc.py
      - **NOTE:** srescalc.py *may not* be currently properly extraced from the Yu Lab's server and may not run successfully in this repository. The raw code is provided, but it itself calls several separate dependencies and I have not been able to thoroughly confirm there are no specifics to our machine still linked to it.
      - If any end user encounters errors runngin srescalc.py from this repository please contact the authors.

# Run Protein-Protein Interaction Docking

In [3]:
# Read in interactions
interactions = pd.read_csv("{0}/Interactions.txt".format(input_dir), sep="\t")

In [6]:
# Read ECLAIR Preds
inter2preds = glob.glob("{0}/Eclair_Predictions/*".format(output_dir))
inter2preds = {os.path.basename(x).split(".")[0]:pd.read_csv(x, sep="\t") for x in inter2preds}

In [5]:
work = 0
for uniA, uniB in tqdm_notebook(interactions[["P1", "P2"]].values):
    name = "_".join([uniA, uniB])
    
    # Try to grab structures for docking this interaction from the Oriented Structures
    # NOTE: This could be replaced with the Undocked_Structures (the initial orientation should
    # not matter for HADDOCK)
    try:
        pdb1 = glob.glob("{0}/Oriented_Structures/{1}*{2}*_A.pdb".format(output_dir, uniA, uniB))[0]
        pdb2 = glob.glob("{0}/Oriented_Structures/{1}*{2}*_B.pdb".format(output_dir, uniA, uniB))[0]
    except IndexError:
        print "B"
        continue
    
    # May need to manually add "END" line to PDB structures (necessary for HADDOCK)
    try:
        lines = open(pdb1, "r").read()
    except:
        print "Missing File", pdb1
        continue
    if(not lines[-4:] == "END\n"):
        #print "ADDING END", pdb1
        lines += "END\n"
    out = open(pdb1, "w")
    out.write(lines)
    out.close()
    
    # May need to manually add "END" line to PDB structures (necessary for HADDOCK)
    try:
        lines = open(pdb2, "r").read()
    except:
        print "Missing File", pdb2
        continue
    if(not lines[-4:] == "END\n"):
        #print "ADDING END", pdb2
        lines += "END\n"
    out = open(pdb2, "w")
    out.write(lines)
    out.close()
    
    try:
        preds = inter2preds["_".join(sorted([uniA, uniB]))]
        ires1 = ",".join(preds[(preds["Prot"] == 0)&(preds["Pred"] >= 0.24)]["Pos"].map(lambda x: str(x)).to_list())
        ires2 = ",".join(preds[(preds["Prot"] == 1)&(preds["Pred"] >= 0.24)]["Pos"].map(lambda x: str(x)).to_list())
        
        passives1 = "None"
        passives2 = "None"
        work += 1
    except IOError:
        continue
    
    if(len(ires1) == 0 or len(ires2) == 0):
        print uniA, uniB
    
    sp.call("python run_haddock.py {0} {1} {2} {3} {4} {5} {6}".format(name, os.path.realpath(pdb1), os.path.realpath(pdb2), ires1, ires2, passives1, passives2), shell=True)

HBox(children=(IntProgress(value=0, max=10), HTML(value=u'')))

B
A
A
A
A
B
A
A
A
B



In [118]:
0

0