In [2]:
# Set the process name to be human readable in htop
import setproctitle
setproctitle.setproctitle("Guided_Docking_Test")

import helper as my
import pandas as pd
pd.options.display.max_columns = 999
import numpy as np

import os
os.environ["TMPDIR"] = "/tmp"
import glob

from tqdm import tqdm, tqdm_notebook
from tqdm._tqdm_notebook import tqdm_notebook

tqdm.pandas(tqdm_notebook)
tqdm_notebook.pandas()

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

from collections import defaultdict

import scipy

In [3]:
from mjm_tools import unzip_res_range, zip_res_range

# Initial Test Using Known Structures

In [4]:
# Read in Info for Ires / Sifts Mapping

In [5]:
ires = pd.read_csv("[REDACTED_PATH]/ires/parsed_files/ires_perpdb_alltax.txt", sep="\t")

In [6]:
sifts = pd.read_csv("[REDACTED_PATH]/sifts/parsed_files/pdbresiduemapping.txt", sep="\t")

In [7]:
pos_maps = sifts.set_index(["PDB", "Chain", "UniProt"])[["MappableResInPDBChainOnUniprotBasis", "MappableResInPDBChainOnPDBBasis"]].apply(lambda x: dict(zip(unzip_res_range(x[1]), unzip_res_range(x[0]))), axis=1).to_dict()

In [572]:
# Get access to ECLAIR Predictions
host = "[REDACTED]"
db = "eclair"
user, pw = my.retrieve("MYSQL")
mydb = my.MySQLdb.connect(host=host, user=user, passwd=pw, db=db)

In [575]:
# Select random interfaces from exisint heteromeric PDB chain pairs
selections = []
tmp = ires[(ires["UniProtA"] != ires["UniProtB"])&(ires["TaxIDA"] == 9606.0)&(ires["TaxIDB"] == 9606.0)]
tmp = tmp.sample(len(tmp))
for uniA, uniB, pdb, chainA, chainB, iresA, iresB, in tqdm_notebook(tmp[["UniProtA", "UniProtB", "PDB", "ChainA", "ChainB", "UniProtIresA", "UniProtIresB"]].values):
    if(len(pd.read_sql_query("select * from eclair_predictions where P1 = '{0}' and P2 = '{1}'".format(*sorted([uniA, uniB])), mydb)) > 0):
        selections.append([uniA, uniB, pdb, chainA, chainB, iresA, iresB])
    if(len(selections) >= 50):
        break

HBox(children=(IntProgress(value=0, max=9295), HTML(value=u'')))




In [576]:
len(selections)

50

In [577]:
# Reindex structures so they are numbered by Uniprot pos
for uniA, uniB, pdb, chainA, chainB, iresA, iresB in tqdm_notebook(selections):
    pos_mapA = pos_maps[pdb, chainA, uniA]
    pos_mapB = pos_maps[pdb, chainB, uniB]
    
    pdb_df = my.pdb2df(pdb)
    
    pdb_df = pdb_df[pdb_df["Data Type"] == "ATOM"]
    pdb_df = pdb_df[pdb_df["Chain"].map(lambda x: x == chainA or x == chainB)]
    def do(x):
        chain, pos = x
        try:
            if(chain == chainA):
                return pos_mapA[str(pos)]
            else:
                return pos_mapB[str(pos)]
        except:
            return -pos
    pdb_df["Residue ID"] = pdb_df[["Chain", "Residue ID"]].apply(do, axis=1)
    
    my.df2pdb("reindexed_structures/{0}_{1}.pdb".format(uniA, uniB), pdb_df)

HBox(children=(IntProgress(value=0, max=50), HTML(value=u'')))

I do not know why this is happening, maybe consult the PDB format summary again (http://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html)


In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import normalize

In [93]:
def x_rotation_matrix(theta):
    return np.array([1,              0,              0,
                     0,              np.cos(theta),  -np.sin(theta),
                     0,              np.sin(theta),  np.cos(theta)
                    ]).reshape((3, 3))
# FUNCTION END

def y_rotation_matrix(theta):
    return np.array([np.cos(theta),  0,              np.sin(theta),
                     0,              1,              0,
                     -np.sin(theta), 0,              np.cos(theta)
                    ]).reshape((3, 3))
# FUNCTION END

def z_rotation_matrix(theta):
    return np.array([np.cos(theta),  -np.sin(theta), 0,
                     np.sin(theta),  np.cos(theta), 0,
                     0,               0,             1
                    ]).reshape((3, 3))
# FUNCTION END

def rotate(xyz, theta_x, theta_y, theta_z, origin=np.array([0, 0, 0]), return_function=True):
    rot_matx = x_rotation_matrix(theta_x)
    rot_maty = y_rotation_matrix(theta_y)
    rot_matz = z_rotation_matrix(theta_z)
    
    if(return_function):
        return lambda x: rot_matz.dot(rot_maty.dot(rot_matx.dot(x - origin))) + origin
    return rot_matz.dot(rot_maty.dot(rot_matx.dot(xyz - origin))) + origin
# FUNCTION END

def translate(df, offset):
    df[["X", "Y", "Z"]] = df[["X", "Y", "Z"]] - offset
# FUNCTION END

def rotate_pdb(df, tx=None, ty=None, tz=None, origin=np.zeros(3), rot_func=None, angle_max=180):
    if(origin is "center"):
        origin = center_of_mass(pose, chain)
    if(tx is None):
        tx, ty, tz = (np.random.random(size=3) - 0.5)*angle_max*np.pi/180.0
    
    if(rot_func is None):
        rot_func = rotate(origin, tx, ty, tz, origin, True)
    
    # Apply the rotation
    tmp = np.concatenate(df[["X", "Y", "Z"]].apply(lambda x: rot_func(np.array(x)), axis=1).to_list(), axis=0).reshape(len(df), 3)
    df["X"] = np.round(tmp[:,0], 3)
    df["Y"] = np.round(tmp[:,1], 3)
    df["Z"] = np.round(tmp[:,2], 3)
# FUNCTION END

def orient_pdb(df, refA, refB):
    # Center
    translate(df, refA)
    refB = refB - refA
    
    # Generate First Rotation
    tmp = refB
    
    d_x = (tmp[0] - 0)
    d_z = (tmp[2] - 0)
    if(d_x*d_z != 0):
        sign = 1
        theta1 = sign*np.arctan(d_z / d_x)
        rot_mat1 = y_rotation_matrix(theta1)
    else:
        rot_mat1 = np.identity(3)
    
    # Generate Second Rotation
    tmp = rot_mat1.dot(tmp)
    
    d_x = tmp[0] - 0
    d_y = tmp[1] - 0
    if(d_x*d_y != 0):
        sign = -1#*[-1, 1][d_x*d_z <= 0]
        theta2 = -sign*np.arctan(d_x / d_y)
        rot_mat2 = z_rotation_matrix(theta2)
    else:
        rot_mat2 = np.identity(3)
    
    # Generate Third Rotation (to correct something?)
    tmp = rot_mat2.dot(tmp)
    
    if(tmp[1] < 0):
        theta3 = np.pi
        rot_mat3 = z_rotation_matrix(theta3)
    else:
        rot_mat3 = np.identity(3)
    
    tmp = rot_mat3.dot(tmp)
    
    # Apply the rotation
    rot_func = lambda x: rot_mat3.dot(rot_mat2.dot(rot_mat1.dot(np.array(x))))
    rotate_pdb(df, rot_func=rot_func)
# FUNCTION END

def rot_matrix_from_plane(ref1, ref2, ref3):
    # Center by reference 1
    original_delta = ref1.copy()

    # Rotate along Z axis so that ref 2 is at Y=0
    tmp = ref2 - original_delta

    d_x = tmp[0] - 0
    d_y = tmp[1] - 0
    if(d_x*d_y != 0):
        sign = -1#*[-1, 1][d_x*d_z <= 0]
        theta1 = sign*np.arctan(d_y / d_x)
        rot_mat1 = z_rotation_matrix(theta1)
    else:
        rot_mat1 = np.identity(3)

    # Roate along X axis so that ref 3 is at Z=0
    tmp = rot_mat1.dot(ref3 - original_delta)

    d_y = tmp[1] - 0
    d_z = tmp[2] - 0
    if(d_x*d_y != 0):
        sign = -1#*[-1, 1][d_x*d_z <= 0]
        theta2 = -sign*np.arctan(d_y / d_z)
        rot_mat2 = x_rotation_matrix(theta2)
    else:
        rot_mat2 = np.identity(3)

    # Return a function for this transformation
    return lambda x: rot_mat2.dot(rot_mat1.dot(x - original_delta))
# FUNCTION END

def orient_pdb_by_linear_regression(df, fit_col="Pred", up=True):
    # Generate Linear Regression Fit to the provided Label Column
    clf = LinearRegression()
    clf.fit(df[(~pd.isnull(df[fit_col]))&(df["Atom Name"] == "CA")][["X", "Y", "Z"]], df[(~pd.isnull(df[fit_col]))&(df["Atom Name"] == "CA")][fit_col])
    
    # Generate rotation matrix to match plane
    ref1 = np.array([-clf.intercept_/clf.coef_[0], 0, 0])
    ref2 = np.array([0, -clf.intercept_/clf.coef_[1], 0])
    ref3 = np.array([0, 0, -clf.intercept_/clf.coef_[2]])
    
    rot_func = rot_matrix_from_plane(ref1, ref2, ref3)
    
    # Apply Rotation and Center
    rotate_pdb(df, rot_func=rot_func)
    translate(df, df[["X", "Y", "Z"]].mean().values)
    
    # Final rotation to make sure interface is pointing the right direction
    if(up == True):
        if(clf.coef_[1] < 0):
            rotate_pdb(df, tx=np.deg2rad(180), ty=0, tz=0)
    else:
        if(clf.coef_[1] > 0):
            rotate_pdb(df, tx=np.deg2rad(180), ty=0, tz=0)
# FUNCTION END

In [10]:
# Pymol Imports

In [11]:
import __main__
__main__.pymol_argv = ['pymol','-cqk'] # Pymol: quiet and no GUI

import pymol
pymol.finish_launching()
from pymol import cmd as pymolCmd
pymolCmd.reinitialize()

In [596]:
# Reorient all structures so that interface faces are facing each other
for uniA, uniB, pdb, chainA, chainB, iresA, iresB in tqdm_notebook(selections):
    # Read Structure
    pdb_df = my.pdb2df("reindexed_structures/{0}_{1}.pdb".format(uniA, uniB))
    
    # Split by Chain
    A_df = pdb_df[pdb_df["Chain"] == chainA].copy()
    B_df = pdb_df[pdb_df["Chain"] == chainB].copy()
    
    if(len(A_df) == 0 or len(B_df) == 0):
        print "ERROR: no data for", uniA, uniB, pdb, chainA, chainB
        continue
    
    # Get Eclair Preds
    tmp = pd.read_sql_query("select * from eclair_predictions where P1 = '{0}' and P2 = '{1}'".format(*sorted([uniA, uniB])), mydb)
    prot = int(uniA > uniB)
    #print prot
    #print uniA, uniB
    tmp = tmp.set_index(["Prot", "Pos"])["Pred"].to_dict()
    
    # Assign Prediction Scores
    if(uniA != uniB):
        A_df["Pred"] = A_df["Residue ID"].map(lambda x: tmp[(prot, x)] if x > 0 else np.nan)
        B_df["Pred"] = B_df["Residue ID"].map(lambda x: tmp[(1-prot, x)] if x > 0 else np.nan)
    else:
        A_df["Pred"] = A_df["Residue ID"].map(lambda x: tmp[(0, x)] if x > 0 else np.nan)
        B_df["Pred"] = B_df["Residue ID"].map(lambda x: tmp[(0, x)] if x > 0 else np.nan)
    
    A_df_orig = A_df.copy()
    B_df_orig = B_df.copy()
    
    # Begin Reorientation
    
    # Handle Chain A
    #centroid_A = A_df[["X", "Y", "Z"]].mean().values
    #ires_centroid_A = A_df[(A_df["Atom Name"] == "CA")&(~pd.isnull(A_df["Pred"]))][["X", "Y", "Z", "Pred"]].apply(lambda x: np.array(x[:3])*x[3], axis=1).sum() / A_df[(A_df["Atom Name"] == "CA")&(~pd.isnull(A_df["Pred"]))]["Pred"].sum()
    
    #orient_pdb(A_df, centroid_A, ires_centroid_A)
    orient_pdb_by_linear_regression(A_df, up=True)
    
    
    # Handle Chain B
    #centroid_B = B_df[["X", "Y", "Z"]].mean().values
    #ires_centroid_B = B_df[(B_df["Atom Name"] == "CA")&(~pd.isnull(B_df["Pred"]))][["X", "Y", "Z", "Pred"]].apply(lambda x: np.array(x[:3])*x[3], axis=1).sum() / B_df[(B_df["Atom Name"] == "CA")&(~pd.isnull(B_df["Pred"]))]["Pred"].sum()

    #orient_pdb(B_df, centroid_B, ires_centroid_B)
    orient_pdb_by_linear_regression(B_df, up=False)
    #rotate_pdb(B_df, tx=np.deg2rad(180), ty=0, tz=0)
    
    # Final Translation to separate chains by 5 A
    translate(A_df, np.array([0, A_df["Y"].max() + 2.5, 0]))
    translate(B_df, np.array([0, B_df["Y"].min() - 2.5, 0]))
    
    # ADD CODE TO OPTIMZIE ROTATION ALONG Y AXIS (ONLY IN CHAIN B)
    
    # Record Centroid Locations
    #centroid_A = A_df[["X", "Y", "Z"]].mean().values
    ires_centroid_A = A_df[(A_df["Atom Name"] == "CA")&(~pd.isnull(A_df["Pred"]))][["X", "Y", "Z", "Pred"]].apply(lambda x: np.array(x[:3])*x[3], axis=1).sum() / A_df[(A_df["Atom Name"] == "CA")&(~pd.isnull(A_df["Pred"]))]["Pred"].sum()
    #
    #centroid_B = B_df[["X", "Y", "Z"]].mean().values
    ires_centroid_B = B_df[(B_df["Atom Name"] == "CA")&(~pd.isnull(B_df["Pred"]))][["X", "Y", "Z", "Pred"]].apply(lambda x: np.array(x[:3])*x[3], axis=1).sum() / B_df[(B_df["Atom Name"] == "CA")&(~pd.isnull(B_df["Pred"]))]["Pred"].sum()
    
    my.df2pdb("Test1.pdb".format(uniA, uniB), pd.concat([A_df, B_df]))
    
    # Make sure X:Z centorids of the Interfaces Line Up
    translate(A_df, np.array([ires_centroid_A[0], 0, ires_centroid_A[2]]))
    translate(B_df, np.array([ires_centroid_B[0], 0, ires_centroid_B[2]]))
    
    my.df2pdb("Test2.pdb".format(uniA, uniB), pd.concat([A_df, B_df]))
    

    # Save Reoriented Structures
    my.df2pdb("oriented_structures/{0}_{1}.pdb".format(uniA, uniB), pd.concat([A_df, B_df]))
    
    
    
    # Create Pymol Session
    pymolCmd.reinitialize()
    
    # Load Original Structure
    name = "{0}_{1}".format(uniA, uniB)
    pymolCmd.load("reindexed_structures/{0}.pdb".format(name), name + "_RAW")
    
    my.df2pdb("oriented_structures/{0}_{1}_A.pdb".format(uniA, uniB), A_df)
    my.df2pdb("oriented_structures/{0}_{1}_B.pdb".format(uniA, uniB), B_df)
    
    # Load Reoriented Structures
    pymolCmd.load("oriented_structures/{0}_A.pdb".format(name), name + "_A")
    pymolCmd.load("oriented_structures/{0}_B.pdb".format(name), name + "_B")
    
    
    # Color By Prediction
    cmapA = cm.get_cmap('Greens')   
    cmapB = cm.get_cmap('Blues')
    
    for resi, pred in A_df[["Residue ID", "Pred"]].drop_duplicates().values:
        if(np.isnan(pred)):
            pred = 0
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapA(float(pred))[:3]).replace("#", "0x"), "{0}_A and resi {2}".format(name, chainA, int(resi)))
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapA(float(pred))[:3]).replace("#", "0x"), "{0}_RAW and chain {1} and resi {2}".format(name, chainA, int(resi)))
    
    for resi, pred in B_df[["Residue ID", "Pred"]].drop_duplicates().values:
        if(np.isnan(pred)):
            pred = 0
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapB(float(pred))[:3]).replace("#", "0x"), "{0}_B and resi {2}".format(name, chainB, int(resi)))
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapB(float(pred))[:3]).replace("#", "0x"), "{0}_RAW and chain {1} and resi {2}".format(name, chainB, int(resi)))
    
    
    # Align Raw Structure to A
    pymolCmd.align("{0}_RAW and chain {1}".format(name, chainA), "{0}_A".format(name))
    
    # Save Structure Session
    pymolCmd.save("PyMolSessions/{0}.pse".format(name))
    
    #mins = 10*np.ceil(np.abs(A_df[["X", "Y", "Z"]].min() / 10))*(1-2*(A_df[["X", "Y", "Z"]].min() < 0)) - 20
    #maxes = 10*np.ceil(np.abs(A_df[["X", "Y", "Z"]].max() / 10))*(1-2*(A_df[["X", "Y", "Z"]].max() < 0)) + 20
    #clf = LinearRegression()
    #clf.fit(A_df[(~pd.isnull(A_df["Pred"]))&(A_df["Atom Name"] == "CA")][["X", "Y", "Z"]], A_df[(~pd.isnull(A_df["Pred"]))&(A_df["Atom Name"] == "CA")]["Pred"])
    #print uniA, uniB, "A", clf.coef_
    #
    ## Add colored point cloud
    #for x in tqdm_notebook(range(int(mins[0]), int(maxes[0])+1, 10)):
    #    for y in range(int(mins[1]), int(maxes[1])+1, 10):
    #        for z in range(int(mins[2]), int(maxes[2])+1, 10):
    #            pred2 = clf.predict(np.array([x, y, z]).reshape((1, 3)))
    #            c = matplotlib.colors.rgb2hex(cmapA(float(pred2))[:3]).replace("#", "0x")
    #            pymolCmd.pseudoatom("A_{0}_{1}_{2}".format(x, y, z), pos=[x, y, z], color=c)
    
    
    #mins = 10*np.ceil(np.abs(B_df[["X", "Y", "Z"]].min() / 10))*(1-2*(B_df[["X", "Y", "Z"]].min() < 0)) - 20
    #maxes = 10*np.ceil(np.abs(B_df[["X", "Y", "Z"]].max() / 10))*(1-2*(B_df[["X", "Y", "Z"]].max() < 0)) + 20
    #clf = LinearRegression()
    #clf.fit(B_df[(~pd.isnull(B_df["Pred"]))&(B_df["Atom Name"] == "CA")][["X", "Y", "Z"]], B_df[(~pd.isnull(B_df["Pred"]))&(B_df["Atom Name"] == "CA")]["Pred"])
    #print uniA, uniB, "B", clf.coef_
    #
    ## Add colored point cloud
    #for x in tqdm_notebook(range(int(mins[0]), int(maxes[0])+1, 10)):
    #    for y in range(int(mins[1]), int(maxes[1])+1, 10):
    #        for z in range(int(mins[2]), int(maxes[2])+1, 10):
    #            pred2 = clf.predict(np.array([x, y, z]).reshape((1, 3)))
    #            c = matplotlib.colors.rgb2hex(cmapB(float(pred2))[:3]).replace("#", "0x")
    #            pymolCmd.pseudoatom("B_{0}_{1}_{2}".format(x, y, z), pos=[x, y, z], color=c)

    
    
    
    
    
    continue
    
    # Load Original / Reoriented Structure
    name = "{0}_{1}".format(uniA, uniB)
    pymolCmd.load("reindexed_structures/{0}.pdb".format(name), name + "_RAW")
    pymolCmd.load("oriented_structures/{0}.pdb".format(name), name + "_ORIENTED")
    
    # Color By Prediction Score
    cmapA = cm.get_cmap('Blues')
    cmapB = cm.get_cmap('Greens')
    
    for resi, pred in A_df[["Residue ID", "Pred"]].drop_duplicates().values:
        if(np.isnan(pred)):
            pred = 0
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapA(float(pred))[:3]).replace("#", "0x"), "{0}_RAW and chain {1} and resi {2}".format(name, chainA, int(resi)))
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapA(float(pred))[:3]).replace("#", "0x"), "{0}_ORIENTED and chain {1} and resi {2}".format(name, chainA, int(resi)))
    
    for resi, pred in B_df[["Residue ID", "Pred"]].drop_duplicates().values:
        if(np.isnan(pred)):
            pred = 0
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapB(float(pred))[:3]).replace("#", "0x"), "{0}_RAW and chain {1} and resi {2}".format(name, chainB, int(resi)))
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapB(float(pred))[:3]).replace("#", "0x"), "{0}_ORIENTED and chain {1} and resi {2}".format(name, chainB, int(resi)))
    
    # Visualize the Centroids
    #pymolCmd.pseudoatom("ORIENTED_A_centroid", pos=list(centroid_A))
    #pymolCmd.pseudoatom("ORIENTED_A_ires_centroid", pos=list(ires_centroid_A))
    #pymolCmd.pseudoatom("ORIENTED_B_centroid", pos=list(centroid_B))
    #pymolCmd.pseudoatom("ORIENTED_B_ires_centroid", pos=list(ires_centroid_B))
    #
    #centroid_A = A_df_orig[["X", "Y", "Z"]].mean().values
    #ires_centroid_A = A_df_orig[(A_df_orig["Atom Name"] == "CA")&(~pd.isnull(A_df_orig["Pred"]))][["X", "Y", "Z", "Pred"]].apply(lambda x: np.array(x[:3])*x[3], axis=1).sum() / A_df_orig[(A_df_orig["Atom Name"] == "CA")&(~pd.isnull(A_df_orig["Pred"]))]["Pred"].sum()
    #
    #centroid_B = B_df_orig[["X", "Y", "Z"]].mean().values
    #ires_centroid_B = B_df_orig[(B_df_orig["Atom Name"] == "CA")&(~pd.isnull(B_df_orig["Pred"]))][["X", "Y", "Z", "Pred"]].apply(lambda x: np.array(x[:3])*x[3], axis=1).sum() / B_df_orig[(B_df_orig["Atom Name"] == "CA")&(~pd.isnull(B_df_orig["Pred"]))]["Pred"].sum()
    
    #pymolCmd.pseudoatom("A_centroid", pos=list(centroid_A))
    #pymolCmd.pseudoatom("A_ires_centroid", pos=list(ires_centroid_A))
    #pymolCmd.pseudoatom("B_centroid", pos=list(centroid_B))
    #pymolCmd.pseudoatom("B_ires_centroid", pos=list(ires_centroid_B))
    
    # Save the Sessions
    pymolCmd.save("PyMolSessions/{0}.pse".format(name))

HBox(children=(IntProgress(value=0, max=50), HTML(value=u'')))

ERROR: no data for P68871 P69905 1XZ2 B C
ERROR: no data for P20248 P24941 3EOC D C
ERROR: no data for P03372 Q15596 5DXB A D
ERROR: no data for P68871 P69905 1SDK B C
ERROR: no data for P68871 P69905 1XZU B A
ERROR: no data for P68871 P69905 1QSH D A


# COVID Predictions

In [12]:
# Read ECLAIR Preds
inter2preds = glob.glob("[REDACTED_PATH]/Collaborators/Eclair_Runs/2020_04_22_COVID19_Human_Interactome/Predictions/*")
inter2preds = {os.path.basename(x).split(".")[0]:pd.read_pickle(x) for x in inter2preds}

In [13]:
# Read Interactions
interactions = pd.read_csv("[REDACTED_PATH]/Collaborators/Eclair_Runs/2020_04_22_COVID19_Human_Interactome/Interactions.txt", names=["P1", "P2"], sep="\t")

In [14]:
# Select Homology Models for COVID Protein
#interactions["P1 PDB"] = interactions["P1"].map(lambda x: glob.glob("[REDACTED_PATH]/Collaborators/Lab_Member_Requests/Haiyuan/2020_03_27_COVID19_3DInteractome/modelling_test/{0}.*pdb".format(x.replace("COVID19", "")))[0] if glob.glob("[REDACTED_PATH]/Collaborators/Lab_Member_Requests/Haiyuan/2020_03_27_COVID19_3DInteractome/modelling_test/{0}.*pdb".format(x.replace("COVID19", ""))) else np.nan)
#interactions.ix[interactions["P1"] == "COVID19nsp5C145A", "P1 PDB"] = "[REDACTED_PATH]/Collaborators/Lab_Member_Requests/Haiyuan/2020_03_27_COVID19_3DInteractome/modelling_test/nsp5_C145A.B99990001.pdb"

interactions["P1 PDB"] = interactions["P1"].map(lambda x: glob.glob("[REDACTED_PATH]/eclair/data/modbase/models/hash/{0}.pdb".format(x.replace("COVID19", "")))[0] if glob.glob("[REDACTED_PATH]/eclair/data/modbase/models/hash/{0}.pdb".format(x.replace("COVID19", ""))) else np.nan)
interactions.ix[interactions["P1"] == "COVID19nsp5C145A", "P1 PDB"] = glob.glob("[REDACTED_PATH]/eclair/data/modbase/models/hash/nsp5_C145A.pdb")[0]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


In [15]:
# Select Best PDB Model for Human Protein (best overlap with ECLAIR Predictions)
def get_best_pdb(cov, uni):
    # First Fetch ECLAIR Predictions
    pair = "_".join(sorted([cov, uni]))
    preds = inter2preds[pair]
    preds = preds[preds["Prot"] == (uni > cov)].set_index("Pos")["Pred"].to_dict()
    
    
    tmp = sifts[sifts["UniProt"] == uni][["PDB", "Chain", "UniProt", "MappableResInPDBChainOnUniprotBasis"]].copy()
    if(len(tmp) == 0):
        return np.nan, np.nan
    elif(len(tmp) == 1):
        return tmp[["PDB", "Chain"]].values[0]
    
    
    tmp["Len"] = tmp["MappableResInPDBChainOnUniprotBasis"].map(lambda x: len(unzip_res_range(x)))
    tmp["Weighted_Len"] = tmp["MappableResInPDBChainOnUniprotBasis"].map(lambda x:sum([preds[int(x)] for x in unzip_res_range(x)]))
    
    tmp.sort_values(["Weighted_Len", "Len"], ascending=False)
    
    return tmp[["PDB", "Chain"]].values[0]    
# FUNCTION END
tmp = interactions[["P1", "P2"]].apply(lambda x: get_best_pdb(*x), axis=1)
interactions["P2 PDB"] = [x[0] for x in tmp]
interactions["P2 Chain"] = [x[1] for x in tmp]

In [16]:
print len(interactions[(~pd.isnull(interactions["P1 PDB"]))&(~pd.isnull(interactions["P2 PDB"]))])

112


In [17]:
# Add in Modbase Models where no PDB structure available
modbase = pd.read_csv("[REDACTED_PATH]/modbase/parsed_files/all_modbase_models.txt", sep="\t")
modbase[modbase["modpipe_quality_score"] >= 1.1].drop_duplicates("uniprot")

uni2modbase = modbase.set_index("uniprot")["modbase_modelID"].map(lambda x: ("[REDACTED_PATH]/modbase/data/hash/{0}.pdb".format(x))).to_dict()

interactions.ix[pd.isnull(interactions["P2 PDB"]), "P2 Chain"] = " "
interactions.ix[pd.isnull(interactions["P2 PDB"]), "P2 PDB"] = interactions.ix[pd.isnull(interactions["P2 PDB"]), "P2"].map(lambda x: uni2modbase[x] if x in uni2modbase else np.nan)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


In [18]:
print len(interactions[(~pd.isnull(interactions["P1 PDB"]))&(~pd.isnull(interactions["P2 PDB"]))])

255


In [19]:
# COVID Proteins were already indexed for ECLAIR, just copy them here
f2f = dict()
for f in tqdm_notebook(interactions["P1 PDB"].unique()):
    if(pd.isnull(f)):
        f2f[f] = f
        continue
    os.system("cp {0} reindexed_structures/{1}".format(f, os.path.basename(f)))
    f2f[f] = "reindexed_structures/{0}".format(os.path.basename(f))

HBox(children=(IntProgress(value=0, max=20), HTML(value=u'')))




In [20]:
# Reindex structures so they are numbered by Uniprot pos
for uni, pdb, chain in tqdm_notebook(interactions[["P2", "P2 PDB", "P2 Chain"]].values):
    if(pd.isnull(pdb)):
        continue
    
    # Handle Modbase Structures (should already be indexed, just copy)
    if(chain == " "):
        os.system("cp {0} reindexed_structures/{1}".format(pdb, os.path.basename(pdb)))
        f2f[pdb] = "reindexed_structures/{0}".format(os.path.basename(pdb))
        continue
    
    pos_map = pos_maps[pdb, chain, uni]
    
    pdb_df = my.pdb2df(pdb)
    pdb_df = pdb_df[pdb_df["Data Type"] == "ATOM"]
    pdb_df = pdb_df[pdb_df["Chain"] == chain]
    def do(x):
        chain, pos = x
        try:
            return pos_map[str(pos)]
        except:
            return -pos
    pdb_df["Residue ID"] = pdb_df[["Chain", "Residue ID"]].apply(do, axis=1)
    
    my.df2pdb("reindexed_structures/{0}_{1}_{2}.pdb".format(uni, pdb, chain), pdb_df)
    f2f[pdb] = "reindexed_structures/{0}_{1}_{2}.pdb".format(uni, pdb, chain)

HBox(children=(IntProgress(value=0, max=332), HTML(value=u'')))

  df.header = header
  df.tailer = tailer





In [21]:
interactions["P1 PDB"] = interactions["P1 PDB"].map(lambda x: f2f[x])
interactions["P2 PDB"] = interactions["P2 PDB"].map(lambda x: f2f[x])

In [31]:
from matplotlib import cm

In [96]:
# Reorient all structures so that interface faces are facing each other
for uniA, uniB, pdbA, pdbB, chainB in tqdm_notebook(interactions[["P1", "P2", "P1 PDB", "P2 PDB", "P2 Chain"]].values):
    chainA = " "
    
    if(pd.isnull(pdbA) or pd.isnull(pdbB)):
        continue
    
    # Read Structures
    A_df = my.pdb2df(pdbA)
    if(not chainA == " "):
        A_df = A_df[A_df["Chain"] == chainA].copy()
    
    B_df = my.pdb2df(pdbB)
    if(not chainB == " "):
        B_df = B_df[B_df["Chain"] == chainB].copy()
    
    if(len(A_df) == 0 or len(B_df) == 0):
        print "ERROR: no data for", uniA, uniB, pdbA, pdbB, chainA, chainB, len(A_df), len(B_df)
        continue
    
    # Get Eclair Preds
    tmp = inter2preds["_".join(sorted([uniA, uniB]))]
    tmp = tmp.set_index(["Prot", "Pos"])["Pred"].to_dict()
    prot = int(uniA > uniB)
    
    # Assign Prediction Scores
    if(uniA != uniB):
        A_df["Pred"] = A_df["Residue ID"].map(lambda x: tmp[(prot, x)] if x > 0 else np.nan)
        B_df["Pred"] = B_df["Residue ID"].map(lambda x: tmp[(1-prot, x)] if x > 0 else np.nan)
    else:
        A_df["Pred"] = A_df["Residue ID"].map(lambda x: tmp[(0, x)] if x > 0 else np.nan)
        B_df["Pred"] = B_df["Residue ID"].map(lambda x: tmp[(0, x)] if x > 0 else np.nan)
    
    
    # Begin Reorientation
    
    # Handle Chain A
    orient_pdb_by_linear_regression(A_df, up=True)
    
    # Handle Chain B
    orient_pdb_by_linear_regression(B_df, up=False)
    
    # Final Translation to separate chains by 5 A
    translate(A_df, np.array([0, A_df["Y"].max() + 2.5, 0]))
    translate(B_df, np.array([0, B_df["Y"].min() - 2.5, 0]))
    
    # ADD CODE TO OPTIMZIE ROTATION ALONG Y AXIS (ONLY IN CHAIN B)
    
    
    # Record Centroid Locations
    ires_centroid_A = A_df[(A_df["Atom Name"] == "CA")&(~pd.isnull(A_df["Pred"]))][["X", "Y", "Z", "Pred"]].apply(lambda x: np.array(x[:3])*x[3], axis=1).sum() / A_df[(A_df["Atom Name"] == "CA")&(~pd.isnull(A_df["Pred"]))]["Pred"].sum()
    ires_centroid_B = B_df[(B_df["Atom Name"] == "CA")&(~pd.isnull(B_df["Pred"]))][["X", "Y", "Z", "Pred"]].apply(lambda x: np.array(x[:3])*x[3], axis=1).sum() / B_df[(B_df["Atom Name"] == "CA")&(~pd.isnull(B_df["Pred"]))]["Pred"].sum()
    
    # Make sure X:Z centorids of the Interfaces Line Up
    translate(A_df, np.array([ires_centroid_A[0], 0, ires_centroid_A[2]]))
    translate(B_df, np.array([ires_centroid_B[0], 0, ires_centroid_B[2]]))
    
    # Save Reoriented Structures
    A_df["Chain"] = "A"
    B_df["Chain"] = "B"
    my.df2pdb("oriented_structures/{0}_{1}.pdb".format(uniA, uniB), pd.concat([A_df, B_df]))
    my.df2pdb("oriented_structures/{0}_{1}_A.pdb".format(uniA, uniB), A_df)
    my.df2pdb("oriented_structures/{0}_{1}_B.pdb".format(uniA, uniB), B_df)
    
    
    # Create Pymol Session
    pymolCmd.reinitialize()
    
    # Load Original Structure
    name = "{0}_{1}".format(uniA, uniB)
    
    # Load Reoriented Structures
    pymolCmd.load("oriented_structures/{0}_A.pdb".format(name), name + "_A")
    pymolCmd.load("oriented_structures/{0}_B.pdb".format(name), name + "_B")
    
    
    # Color By Prediction
    cmapA = cm.get_cmap('Greens')   
    cmapB = cm.get_cmap('Blues')
    
    for resi, pred in A_df[["Residue ID", "Pred"]].drop_duplicates().values:
        if(np.isnan(pred)):
            pred = 0
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapA(float(pred))[:3]).replace("#", "0x"), "{0}_A and resi {2}".format(name, "A", int(resi)))
        
    for resi, pred in B_df[["Residue ID", "Pred"]].drop_duplicates().values:
        if(np.isnan(pred)):
            pred = 0
        pymolCmd.color(matplotlib.colors.rgb2hex(cmapB(float(pred))[:3]).replace("#", "0x"), "{0}_B and resi {2}".format(name, "B", int(resi)))
    
    # Save Structure Session
    pymolCmd.save("PyMolSessions/{0}.pse".format(name))

HBox(children=(IntProgress(value=0, max=332), HTML(value=u'')))

ERROR: no data for COVID19nsp13 Q08379 reindexed_structures/nsp13.pdb reindexed_structures/Q9BQQ3_4REY_A.pdb   B 4684 0
ERROR: no data for COVID19nsp4 P62072 reindexed_structures/nsp4.pdb reindexed_structures/Q9Y5J7_2BSK_A.pdb   B 3955 0
ERROR: no data for COVID19nsp8 Q96B26 reindexed_structures/nsp8.pdb reindexed_structures/Q13868_2NN6_H.pdb   C 1486 0
ERROR: no data for COVID19nsp8 Q9NQT4 reindexed_structures/nsp8.pdb reindexed_structures/Q13868_2NN6_H.pdb   D 1486 0
ERROR: no data for COVID19nsp8 Q9NQT5 reindexed_structures/nsp8.pdb reindexed_structures/Q13868_2NN6_H.pdb   G 1486 0



In [97]:
def x_rotation_matrix(theta):
    return np.array([1,              0,              0,
                     0,              np.cos(theta),  -np.sin(theta),
                     0,              np.sin(theta),  np.cos(theta)
                    ]).reshape((3, 3))
# FUNCTION END

def y_rotation_matrix(theta):
    return np.array([np.cos(theta),  0,              np.sin(theta),
                     0,              1,              0,
                     -np.sin(theta), 0,              np.cos(theta)
                    ]).reshape((3, 3))
# FUNCTION END

def z_rotation_matrix(theta):
    return np.array([np.cos(theta),  -np.sin(theta), 0,
                     np.sin(theta),  np.cos(theta), 0,
                     0,               0,             1
                    ]).reshape((3, 3))
# FUNCTION END

def rotate(xyz, theta_x, theta_y, theta_z, origin=np.array([0, 0, 0]), return_function=True):
    rot_matx = x_rotation_matrix(theta_x)
    rot_maty = y_rotation_matrix(theta_y)
    rot_matz = z_rotation_matrix(theta_z)
    
    if(return_function):
        return lambda x: rot_matz.dot(rot_maty.dot(rot_matx.dot(x - origin))) + origin
    return rot_matz.dot(rot_maty.dot(rot_matx.dot(xyz - origin))) + origin
# FUNCTION END

def center_of_mass(pose, chain):
    center = np.zeros(3)
    total = 0
    for r in range(1, 1+pose.total_residue()):
        c = pose.pdb_info().pose2pdb(r).split()[-1]
        if(c != chain):
            continue
        center += np.array(pose.residue(r).xyz("CA"))
        total += 1
    
    return center / float(total)
# FUNCTION END

def translate_chain(pose, chain, xyz):
    # Apply translation vector
    delta_xyz = rosetta.numeric.xyzVector_double_t(*xyz)
    for r in range(1, 1+pose.total_residue()):
        c = pose.pdb_info().pose2pdb(r).split()[-1]
        if(c != chain):
            continue
        
        for a in range(1, pose.residue(r).natoms() + 1):
            pose.residue(r).set_xyz(a, pose.residue(r).xyz(a) + delta_xyz)
# FUNCTION END

def rotate_chain(pose, chain, tx=None, ty=None, tz=None, origin=np.zeros(3), rot_func=None, angle_max=180):
    if(origin is "center"):
        origin = center_of_mass(pose, chain)
    if(tx is None):
        tx, ty, tz = (np.random.random(size=3) - 0.5)*angle_max*np.pi/180.0
    
    if(rot_func is None):
        rot_func = rotate(origin, tx, ty, tz, origin, True)
    
    for r in range(1, 1+pose.total_residue()):
        c = pose.pdb_info().pose2pdb(r).split()[-1]
        if(c != chain):
            continue
        
        for a in range(1, pose.residue(r).natoms() + 1):
            new_xyz = rot_func(np.array(pose.residue(r).xyz(a)))
            new_xyz = rosetta.numeric.xyzVector_double_t(*new_xyz)
            pose.residue(r).set_xyz(a, new_xyz)
# FUNCTION END

def random_translate_chain(pose, chain, magnitude=5, angle=5):
    # Generate random translation vector
    delta_xyz = np.array([random.gauss(magnitude, magnitude*0.25), 0, 0])
    delta_xyz = rotate(delta_xyz, np.random.random()*2*np.pi, np.random.random()*2*np.pi, np.random.random()*2*np.pi)
    
    # Apply translation vector
    delta_xyz = rosetta.numeric.xyzVector_double_t(*delta_xyz)
    for r in range(1, 1+pose.total_residue()):
        c = pose.pdb_info().pose2pdb(r).split()[-1]
        if(c != chain):
            continue
        
        for a in range(1, pose.residue(r).natoms() + 1):
            pose.residue(r).set_xyz(a, pose.residue(r).xyz(a) + delta_xyz)
    
    # Generate random rotation
    theta1, theta2, theta3 = random.gauss(0, angle)*np.pi/180.0, random.gauss(0, angle)*np.pi/180.0, random.gauss(0, angle)*np.pi/180.0
    origin = center_of_mass(pose, chain)
    for r in range(1, 1+pose.total_residue()):
        c = pose.pdb_info().pose2pdb(r).split()[-1]
        if(c != chain):
            continue
        
        for a in range(1, pose.residue(r).natoms() + 1):
            new_xyz = rotate(np.array(pose.residue(r).xyz(a)), theta1, theta2, theta3, origin=origin)
            new_xyz = rosetta.numeric.xyzVector_double_t(*new_xyz)
            pose.residue(r).set_xyz(a, new_xyz)
# FUNCTION END

In [98]:
from pyrosetta import *
from pyrosetta.toolbox import *
init()

PyRosetta-4 2020 [Rosetta PyRosetta4.Release.python27.linux 2020.11+release.ce6f14f37b41d7372ea79cca59fad6b33adc1596 2020-03-12T19:59:10] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.
core.init: Checking for fconfig files in pwd and ./rosetta/flags
core.init: Rosetta version: PyRosetta4.Release.python27.linux r249 2020.11+release.ce6f14f ce6f14f37b41d7372ea79cca59fad6b33adc1596 http://www.pyrosetta.org 2020-03-12T19:59:10
core.init: command: PyRosetta -ex1 -ex2aro -database [REDACTED_PATH]/bin/pyrosetta/database
basic.random.init_random_generator: 'RNG device' seed mode, using '/dev/urandom', seed=-1496668198 seed_offset=0 real_seed=-1496668198
basic.random.init_random_generator: RandomGenerator:init: Normal mode, seed=-1496668198 RG_type=mt19937


In [117]:
import subprocess as sp

In [116]:
cleanATOM("oriented_structures/COVID19nsp10_O94973.pdb")
pose = pose_from_file("oriented_structures/COVID19nsp10_O94973.clean.pdb")

core.import_pose.import_pose: File 'oriented_structures/COVID19nsp10_O94973.clean.pdb' automatically determined to be of type PDB


In [148]:
import time

In [166]:
def run_dock(pdb_file, i_num, rot_x=30, rot_y=None, rot_z=30, trans_x=5, trans_y=0, trans_z=5, recp_chain="A", lig_chain="B"):
    # Clean Pose / Load Pose
    cleanATOM(pdb_file)
    pose = pose_from_file(pdb_file.replace(".pdb", ".clean.pdb"))
    
    # Apply Random Spin on Ligand Chain
    if(not rot_x is None):
        tx = min([np.random.normal(0, rot_x/2.5), rot_x], key=lambda x: abs(x))
    else:
        tx = np.random.random()*360
    tx = np.deg2rad(tx)
    if(not rot_y is None):
        ty = min([np.random.normal(0, rot_y/2.5), rot_x], key=lambda x: abs(x))
    else:
        ty = np.random.random()*360
    ty = np.deg2rad(ty)
    if(not rot_z is None):
        tz = min([np.random.normal(0, rot_z/2.5), rot_x], key=lambda x: abs(x))
    else:
        tz = np.random.random()*360
    tz = np.deg2rad(tz)
    rotate_chain(pose, lig_chain, tx=tx, ty=ty, tz=tz, origin=center_of_mass(pose, lig_chain))
    
    # Apply Random Translation on Ligand Chain
    dx = min([np.random.normal(0, trans_x/2.5), trans_x], key=lambda x: abs(x))
    dy = min([np.random.normal(0, trans_y/2.5), trans_x], key=lambda x: abs(x))
    dz = min([np.random.normal(0, trans_z/2.5), trans_x], key=lambda x: abs(x))
    translate_chain(pose, lig_chain, np.array([dx, dy, dz]))
    
    # Apply Random Spin Around Receptor Chain
    if(not rot_x is None):
        tx = min([np.random.normal(0, rot_x/2.5), rot_x], key=lambda x: abs(x))
    else:
        tx = np.random.random()*360
    tx = np.deg2rad(tx)
    if(not rot_y is None):
        ty = min([np.random.normal(0, rot_y/2.5), rot_x], key=lambda x: abs(x))
    else:
        ty = np.random.random()*360
    ty = np.deg2rad(ty)
    if(not rot_z is None):
        tz = min([np.random.normal(0, rot_z/2.5), rot_x], key=lambda x: abs(x))
    else:
        tz = np.random.random()*360
    tz = np.deg2rad(tz)
    rotate_chain(pose, lig_chain, tx=tx, ty=ty, tz=tz, origin=center_of_mass(pose, recp_chain))
    
    # Save Pose as tmp input file
    out_pdb = os.path.abspath("random_docking_inputs/{0}_{1}.pdb".format(os.path.basename(pdb_file).split(".")[0], i_num))
    pose.dump_file(out_pdb)
    
    # Generate Process to run docking
    cmd = "nice python Docking_Script.py --pdb_filename {0} --partners A_B --jobs 1 --job_output {1}".format(out_pdb, os.path.basename(out_pdb).split(".")[0])
    #print cmd
    p = sp.Popen(cmd, shell=True)
    
    return {"p":p, "cmd":cmd, "start_time":time.time(), "end_time":None, "in_f":pdb_file}
# FUNCTION END

In [None]:
i_num = 1
input_pdbs = [x for x in glob.glob("oriented_structures/*COVID19*.pdb") if len(os.path.basename(x).split("_")) == 2 and not "clean" in x]
finished_processes = []
processes = []

os.system("rm [REDACTED_PATH]/Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/docking_outputs/*.in_progress")
while(True):
    print "Docking Iteration", i_num
    
    # Iterate over all Docking Inputs
    for pdb_file in tqdm_notebook(input_pdbs):
        if(os.path.exists("[REDACTED_PATH]/Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/docking_outputs/{0}_{1}_0.pdb".format(os.path.basename(pdb_file).split(".")[0], i_num))):
            continue
        # Update Max_Processes from File
        try:
            max_processes = int(my.easyReadLines("max_cores")[0])
        except:
            print "max_cores file formatted incorrectly"
            max_processes = 30
        
        # Make Block new jobs if too many running already
        while(True):
            if(len(processes) <= max_processes):
                p = run_dock(pdb_file, i_num)
                processes.append(p)
                break
            else:
                new_processes = []
                for p in processes:
                    if(p["p"].poll() is None):
                        new_processes.append(p)
                    elif(p["p"].poll() != 0):
                        p["end_time"] = time.time()
                        print "Error", p["p"].poll()
                        print "cmd:", p["cmd"]
                        print "RunTime:", (p["end_time"] - p["start_time"])
                        print
                        finished_processes.append(p)
                    else:
                        p["end_time"] = time.time()
                        print "Finished Docking", p["in_f"], "in", (p["end_time"] - p["start_time"])
                        finished_processes.append(p)
                processes = new_processes
                time.sleep(30)
    i_num += 1

Docking Iteration 1


HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))


197/|/ 79%|| 197/250 [00:29<00:07,  6.79it/s]Docking Iteration 2


HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))


Docking Iteration 3


HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))


Docking Iteration 4


HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))

Finished Docking oriented_structures/COVID19orf8_P05556.pdb in 474.95126605
Finished Docking oriented_structures/COVID19orf7a_Q7Z4Q2.pdb in 590.414237976
Finished Docking oriented_structures/COVID19orf9c_Q6NXT6.pdb in 524.39854598
Finished Docking oriented_structures/COVID19orf8_Q9H4F8.pdb in 523.533063173
Finished Docking oriented_structures/COVID19orf8_Q6UW63.pdb in 611.945548773
Finished Docking oriented_structures/COVID19nsp7_Q5JTV8.pdb in 685.160368919
Finished Docking oriented_structures/COVID19orf8_Q08431.pdb in 135.676477194
Finished Docking oriented_structures/COVID19nsp7_Q5VT66.pdb in 712.589363098
Finished Docking oriented_structures/COVID19nsp1_Q99959.pdb in 707.10914588
Finished Docking oriented_structures/COVID19orf9c_Q2PZI1.pdb in 130.14140296

Docking Iteration 5


HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))

Finished Docking oriented_structures/COVID19orf9c_Q9Y276.pdb in 805.728705883
Finished Docking oriented_structures/COVID19orf9c_Q86UT6.pdb in 801.678977013
Finished Docking oriented_structures/COVID19orf9c_Q96K12.pdb in 119.827090025
Finished Docking oriented_structures/COVID19nsp7_P62873.pdb in 930.67772603
Finished Docking oriented_structures/COVID19orf9c_Q8TEQ8.pdb in 130.928827047
Finished Docking oriented_structures/COVID19nsp7_P51148.pdb in 1078.58062005
Finished Docking oriented_structures/COVID19nsp7_Q9NP72.pdb in 1041.89763284
Finished Docking oriented_structures/COVID19orf9c_P33527.pdb in 1087.78267097
Finished Docking oriented_structures/COVID19nsp7_Q8WVC6.pdb in 1160.99965596
Finished Docking oriented_structures/COVID19nsp9_P61962.pdb in 525.253942013
Finished Docking oriented_structures/COVID19orf8_P29122.pdb in 202.341947079
Finished Docking oriented_structures/COVID19orf8_P26358.pdb in 1341.15544105
Finished Docking oriented_structures/COVID19nsp8_Q92552.pdb in 1536.4081

Finished Docking oriented_structures/COVID19nsp13_Q04724.pdb in 2481.39554906
Finished Docking oriented_structures/COVID19nsp13_Q9UHD2.pdb in 3425.33551192
Finished Docking oriented_structures/COVID19nsp8_O76094.pdb in 1147.4468751
Finished Docking oriented_structures/COVID19nsp13_P35241.pdb in 1135.14560986
Finished Docking oriented_structures/COVID19orf8_Q96AY3.pdb in 586.69879508
Finished Docking oriented_structures/COVID19orf8_Q96MM7.pdb in 655.189255953
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_docking_inputs/COVID19nsp1_Q99959_5.pdb --partners A_B --jobs 1 --job_output COVID19nsp1_Q99959_5
RunTime: 2147.65352893

Finished Docking oriented_structures/COVID19nsp8_Q8NEJ9.pdb in 818.385672808
Finished Docking oriented_structures/COVID19nsp12_O43823.pdb in 4624.4099741
Finished Docking oriented_structures/COVID19nsp10_Q969X5.pdb in 184.468966961
Finished Docking oriented_structures/

HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))

Finished Docking oriented_structures/COVID19nsp7_Q8N183.pdb in 506.986329079
Finished Docking oriented_structures/COVID19nsp13_Q8IWJ2.pdb in 2346.561481
Finished Docking oriented_structures/COVID19nsp8_P82675.pdb in 706.441822052
Finished Docking oriented_structures/COVID19orf9b_Q9H2P9.pdb in 1172.72000504
Finished Docking oriented_structures/COVID19nsp7_Q8NBX0.pdb in 412.617512941
Finished Docking oriented_structures/COVID19nsp7_Q96A26.pdb in 380.377938032
Finished Docking oriented_structures/COVID19nsp7_P62873.pdb in 1081.79391003
Finished Docking oriented_structures/COVID19orf9c_Q9H3K2.pdb in 312.59779501
Finished Docking oriented_structures/COVID19nsp1_P09884.pdb in 126.324621916
Finished Docking oriented_structures/COVID19nsp13_Q9UJC3.pdb in 7111.42616606
Finished Docking oriented_structures/COVID19nsp9_P35556.pdb in 168.734998941
Finished Docking oriented_structures/COVID19nsp14_P12268.pdb in 3046.60288811
Finished Docking oriented_structures/COVID19orf8_Q96F46.pdb in 1414.554130

Finished Docking oriented_structures/COVID19nsp12_O14874.pdb in 6662.94881606
Finished Docking oriented_structures/COVID19orf9c_O00124.pdb in 131.044164896
Finished Docking oriented_structures/COVID19nsp13_O95613.pdb in 9580.09813094
Finished Docking oriented_structures/COVID19orf8_Q9NYU1.pdb in 4778.10576916
Finished Docking oriented_structures/COVID19orf9b_O94826.pdb in 170.998675823
Finished Docking oriented_structures/COVID19nsp7_Q8WTV0.pdb in 160.65277195
Finished Docking oriented_structures/COVID19orf9b_O43633.pdb in 466.352965117
Finished Docking oriented_structures/COVID19nsp13_Q66GS9.pdb in 2445.83800101
Finished Docking oriented_structures/COVID19nsp5C145A_Q9NXH9.pdb in 485.323612928
Finished Docking oriented_structures/COVID19nsp1_P49643.pdb in 710.370737076
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_docking_inputs/COVID19orf8_Q9Y4L1_5.pdb --partners A_B --jobs 1 --job_outp

Finished Docking oriented_structures/COVID19nsp12_Q9H2H8.pdb in 3687.40275002
Finished Docking oriented_structures/COVID19nsp7_Q9BQE4.pdb in 419.420682907
Finished Docking oriented_structures/COVID19nsp13_P31323.pdb in 1009.85044885
Finished Docking oriented_structures/COVID19nsp7_P61019.pdb in 215.808012962
Finished Docking oriented_structures/COVID19nsp9_P61962.pdb in 548.06382823
Finished Docking oriented_structures/COVID19nsp13_Q9BV73.pdb in 9257.49567485
Finished Docking oriented_structures/COVID19nsp13_Q92995.pdb in 2601.61531806
Finished Docking oriented_structures/COVID19nsp13_Q9UHD2.pdb in 1300.10614896
Finished Docking oriented_structures/COVID19orf9c_Q9GZU3.pdb in 124.047619104
Finished Docking oriented_structures/COVID19nsp12_Q5VUA4.pdb in 4000.15999889
Finished Docking oriented_structures/COVID19orf8_P05556.pdb in 409.328941107
Finished Docking oriented_structures/COVID19nsp7_P63218.pdb in 311.534960032
Finished Docking oriented_structures/COVID19nsp13_A7MCY6.pdb in 3806.9

HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))

Finished Docking oriented_structures/COVID19nsp7_P62873.pdb in 485.544555187
Finished Docking oriented_structures/COVID19nsp10_Q969X5.pdb in 565.657472849
Finished Docking oriented_structures/COVID19nsp7_Q8N183.pdb in 431.809425116
Finished Docking oriented_structures/COVID19orf9b_O14745.pdb in 525.574223042
Finished Docking oriented_structures/COVID19orf9b_Q9P0L2.pdb in 523.635840178
Finished Docking oriented_structures/COVID19nsp7_Q8NBX0.pdb in 476.310188055
Finished Docking oriented_structures/COVID19nsp15_P61970.pdb in 1838.57658887
Finished Docking oriented_structures/COVID19nsp7_Q96A26.pdb in 506.307767868
Finished Docking oriented_structures/COVID19orf9c_Q9H845.pdb in 1154.38309979
Finished Docking oriented_structures/COVID19orf9c_Q9H3K2.pdb in 386.923538923
Finished Docking oriented_structures/COVID19nsp13_O14578.pdb in 7166.00784707
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_

Finished Docking oriented_structures/COVID19orf9b_O43633.pdb in 467.422884941
Finished Docking oriented_structures/COVID19orf9b_O94826.pdb in 540.409012794
Finished Docking oriented_structures/COVID19orf8_Q8N766.pdb in 2729.00532389
Finished Docking oriented_structures/COVID19nsp7_Q8WTV0.pdb in 505.245733023
Finished Docking oriented_structures/COVID19nsp8_P61011.pdb in 337.264401913
Finished Docking oriented_structures/COVID19orf8_Q08431.pdb in 405.189374924
Finished Docking oriented_structures/COVID19nsp9_Q15056.pdb in 163.076268196
Finished Docking oriented_structures/COVID19nsp12_A3KN83.pdb in 3585.55435801
Finished Docking oriented_structures/COVID19nsp13_Q04726.pdb in 2159.16247296
Finished Docking oriented_structures/COVID19nsp1_P49643.pdb in 765.448155165
Finished Docking oriented_structures/COVID19nsp8_O00566.pdb in 763.240442991
Finished Docking oriented_structures/COVID19nsp12_O14874.pdb in 7036.14074802
Finished Docking oriented_structures/COVID19orf9c_Q9Y3A6.pdb in 196.088

Finished Docking oriented_structures/COVID19nsp8_Q9Y399.pdb in 640.518346071
Finished Docking oriented_structures/COVID19nsp7_Q9BQE4.pdb in 475.045727968
Finished Docking oriented_structures/COVID19nsp12_Q5EBL8.pdb in 4667.10730004
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_docking_inputs/COVID19orf8_O76061_7.pdb --partners A_B --jobs 1 --job_output COVID19orf8_O76061_7
RunTime: 746.286458969

Finished Docking oriented_structures/COVID19nsp9_P61962.pdb in 165.240530014
Finished Docking oriented_structures/COVID19nsp13_O95684.pdb in 1075.41540408
Finished Docking oriented_structures/COVID19nsp7_O95573.pdb in 399.662139177
Finished Docking oriented_structures/COVID19nsp7_P61019.pdb in 254.082389116
Finished Docking oriented_structures/COVID19nsp13_Q9BV73.pdb in 9284.35914588
Finished Docking oriented_structures/COVID19orf9c_Q9GZU3.pdb in 365.176368952
Finished Docking oriented_structure

HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))

Finished Docking oriented_structures/COVID19nsp12_O43823.pdb in 4700.84925485
Finished Docking oriented_structures/COVID19nsp15_P61970.pdb in 1523.15965104
Finished Docking oriented_structures/COVID19nsp7_Q8N183.pdb in 450.022789955
Finished Docking oriented_structures/COVID19nsp10_Q969X5.pdb in 696.747117996
Finished Docking oriented_structures/COVID19nsp13_Q9BQS8.pdb in 2923.18353105
Finished Docking oriented_structures/COVID19nsp8_P82675.pdb in 723.326658964
Finished Docking oriented_structures/COVID19orf9c_Q9H3K2.pdb in 129.283293009
Finished Docking oriented_structures/COVID19nsp13_Q8TD10.pdb in 3452.25784492
Finished Docking oriented_structures/COVID19nsp1_P09884.pdb in 126.957901001
Finished Docking oriented_structures/COVID19nsp7_Q8NBX0.pdb in 377.809907198
Finished Docking oriented_structures/COVID19nsp7_Q96A26.pdb in 345.345775843
Finished Docking oriented_structures/COVID19nsp9_P35556.pdb in 200.689696789
Finished Docking oriented_structures/COVID19nsp9_Q96F45.pdb in 166.396

Finished Docking oriented_structures/COVID19orf9c_Q6NXT6.pdb in 190.765954018
Finished Docking oriented_structures/COVID19orf8_Q13443.pdb in 435.40982604
Finished Docking oriented_structures/COVID19orf9c_O00124.pdb in 126.550062895
Finished Docking oriented_structures/COVID19nsp12_A3KN83.pdb in 3521.05452609
Finished Docking oriented_structures/COVID19orf9b_O94826.pdb in 492.378593206
Finished Docking oriented_structures/COVID19orf7a_Q9NU22.pdb in 2686.93129683
Finished Docking oriented_structures/COVID19nsp9_P35555.pdb in 718.797042847
Finished Docking oriented_structures/COVID19Spike_Q9C0B5.pdb in 3149.17797899
Finished Docking oriented_structures/COVID19orf8_Q9NYU1.pdb in 5274.58367705
Finished Docking oriented_structures/COVID19nsp2_Q5T1M5.pdb in 2812.75300908
Finished Docking oriented_structures/COVID19nsp13_O95613.pdb in 9421.75203705
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_d

Finished Docking oriented_structures/COVID19nsp7_Q9BQE4.pdb in 371.799801826
Finished Docking oriented_structures/COVID19nsp13_Q9BV73.pdb in 8827.82668495
Finished Docking oriented_structures/COVID19nsp12_Q13546.pdb in 5034.99467206
Finished Docking oriented_structures/COVID19nsp12_Q6UUV7.pdb in 7723.13362098
Finished Docking oriented_structures/COVID19orf8_O14656.pdb in 468.559636831
Finished Docking oriented_structures/COVID19nsp7_O95573.pdb in 461.878944159
Finished Docking oriented_structures/COVID19nsp9_P61962.pdb in 496.194296122
Finished Docking oriented_structures/COVID19nsp13_A7MCY6.pdb in 3131.2101779
Finished Docking oriented_structures/COVID19orf9c_Q9GZU3.pdb in 370.928468943
Finished Docking oriented_structures/COVID19nsp13_Q9UHD2.pdb in 1171.53250408
Finished Docking oriented_structures/COVID19orf8_P05556.pdb in 320.544409037
Finished Docking oriented_structures/COVID19nsp12_Q5VUA4.pdb in 3903.17215419
Finished Docking oriented_structures/COVID19nsp7_P61019.pdb in 645.252

HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))

Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_docking_inputs/COVID19orf8_Q96MM7_8.pdb --partners A_B --jobs 1 --job_output COVID19orf8_Q96MM7_8
RunTime: 1069.80312204

Finished Docking oriented_structures/COVID19nsp15_P61970.pdb in 1397.42423487
Finished Docking oriented_structures/COVID19orf9b_O14745.pdb in 430.004554033
Finished Docking oriented_structures/COVID19nsp2_Q6Y7W6.pdb in 1209.01051903
Finished Docking oriented_structures/COVID19nsp7_Q8N183.pdb in 494.671560049
Finished Docking oriented_structures/COVID19nsp8_P82675.pdb in 568.725346088
Finished Docking oriented_structures/COVID19orf9b_Q9P0L2.pdb in 438.206270933
Finished Docking oriented_structures/COVID19orf9c_Q9H3K2.pdb in 132.633143902
Finished Docking oriented_structures/COVID19nsp7_Q8NBX0.pdb in 334.107575893
Finished Docking oriented_structures/COVID19nsp7_Q96A26.pdb in 432.840430975
Finished Docking oriented_structure

Finished Docking oriented_structures/COVID19orf9c_O43292.pdb in 2496.02818799
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_docking_inputs/COVID19nsp8_Q92552_9.pdb --partners A_B --jobs 1 --job_output COVID19nsp8_Q92552_9
RunTime: 2376.62845016

Finished Docking oriented_structures/COVID19orf9c_O00124.pdb in 152.563567162
Finished Docking oriented_structures/COVID19orf8_Q13443.pdb in 503.060024023
Finished Docking oriented_structures/COVID19nsp9_P35555.pdb in 441.269898176
Finished Docking oriented_structures/COVID19nsp13_O95613.pdb in 8767.31651902
Finished Docking oriented_structures/COVID19orf9b_O94826.pdb in 556.708401918
Finished Docking oriented_structures/COVID19orf9b_O43633.pdb in 490.554683924
Finished Docking oriented_structures/COVID19nsp7_Q8WTV0.pdb in 491.219330788
Finished Docking oriented_structures/COVID19nsp7_P00387.pdb in 894.339633942
Finished Docking oriented_structur

Finished Docking oriented_structures/COVID19nsp4_Q9BSF4.pdb in 1922.26120305
Finished Docking oriented_structures/COVID19orf8_O76061.pdb in 424.123075008
Finished Docking oriented_structures/COVID19orf8_Q96DZ1.pdb in 578.759721041
Finished Docking oriented_structures/COVID19nsp7_O95573.pdb in 120.118443966
Finished Docking oriented_structures/COVID19nsp2_P16435.pdb in 1507.2505219
Finished Docking oriented_structures/COVID19nsp8_Q9Y399.pdb in 671.228215933
Finished Docking oriented_structures/COVID19orf8_O14656.pdb in 451.985252142
Finished Docking oriented_structures/COVID19nsp12_O95391.pdb in 2451.58350492
Finished Docking oriented_structures/COVID19nsp13_O95684.pdb in 1127.14312601
Finished Docking oriented_structures/COVID19nsp9_P61962.pdb in 483.928174973
Finished Docking oriented_structures/COVID19nsp13_A7MCY6.pdb in 2609.39453292
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_docki

HBox(children=(IntProgress(value=0, max=250), HTML(value=u'')))

Finished Docking oriented_structures/COVID19nsp7_P62873.pdb in 514.566546917
Finished Docking oriented_structures/COVID19orf9b_O14745.pdb in 476.781852961
Finished Docking oriented_structures/COVID19orf9b_Q9P0L2.pdb in 454.17978096
Finished Docking oriented_structures/COVID19nsp15_P61970.pdb in 1646.76872993
Finished Docking oriented_structures/COVID19nsp2_Q6Y7W6.pdb in 1210.53835917
Finished Docking oriented_structures/COVID19orf9c_Q9H3K2.pdb in 128.697988987
Finished Docking oriented_structures/COVID19nsp7_Q8NBX0.pdb in 356.964604855
Finished Docking oriented_structures/COVID19orf9b_Q9H2P9.pdb in 1034.64205694
Finished Docking oriented_structures/COVID19nsp1_P09884.pdb in 122.248862982
Finished Docking oriented_structures/COVID19nsp7_Q96A26.pdb in 461.676167011
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_docking_inputs/COVID19nsp4_Q2TAA5_9.pdb --partners A_B --jobs 1 --job_output COV

Finished Docking oriented_structures/COVID19nsp13_P17612.pdb in 1999.05853319
Finished Docking oriented_structures/COVID19nsp13_O95613.pdb in 8995.71831512
Error 1
cmd: nice python Docking_Script.py --pdb_filename [REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/random_docking_inputs/COVID19orf9c_Q9Y276_10.pdb --partners A_B --jobs 1 --job_output COVID19orf9c_Q9Y276_10
RunTime: 888.666384935

Finished Docking oriented_structures/COVID19orf8_Q9NYU1.pdb in 4655.23952794
Finished Docking oriented_structures/COVID19orf9b_O43633.pdb in 406.809316158
Finished Docking oriented_structures/COVID19nsp7_Q8WTV0.pdb in 376.599058151


In [184]:
input_pdbs

['oriented_structures/COVID19nsp7_Q96A26.pdb',
 'oriented_structures/COVID19nsp9_Q7Z3B4.clean.pdb',
 'oriented_structures/COVID19nsp13_Q8N4C6.pdb',
 'oriented_structures/COVID19orf9c_Q9H3K2.pdb',
 'oriented_structures/COVID19nsp12_Q96IZ5.pdb',
 'oriented_structures/COVID19nsp9_P35556.pdb',
 'oriented_structures/COVID19nsp1_P09884.pdb',
 'oriented_structures/COVID19nsp4_Q9Y5J6.clean.pdb',
 'oriented_structures/COVID19nsp9_Q96F45.pdb',
 'oriented_structures/COVID19nsp15_P62330.pdb',
 'oriented_structures/COVID19nsp13_Q4V328.pdb',
 'oriented_structures/COVID19nsp13_Q96CN9.pdb',
 'oriented_structures/COVID19orf8_Q13438.pdb',
 'oriented_structures/COVID19nsp4_Q9Y5J6.pdb',
 'oriented_structures/COVID19nsp7_O00116.pdb',
 'oriented_structures/COVID19nsp9_Q9NZL9.pdb',
 'oriented_structures/COVID19nsp7_Q7LGA3.clean.pdb',
 'oriented_structures/COVID19orf8_O00469.pdb',
 'oriented_structures/COVID19nsp10_Q96CW1.clean.pdb',
 'oriented_structures/COVID19nsp12_Q92615.pdb',
 'oriented_structures/COVID1

In [187]:
s = 
print s
print os.path.exists(s)

[REDACTED_PATH]/Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/docking_outputs/COVID19nsp7_Q96A26_1_0.pdb
True


'[REDACTED_PATH]/Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test/docking_outputs/COVID19nsp7_Q96A26_1.pdb'

In [174]:
input_pdbs[0]

'oriented_structures/COVID19nsp7_Q96A26.pdb'

In [196]:
for p in processes:
    try:
        p["p"].kill()
    except:
        pass

In [197]:
0

0

In [143]:
os.getcwd()

'[REDACTED_PATH]Collaborators/Resource_Maintenance/2020_04_27_Guided_Docking_Test'