In [1]:
### GENERATE POCKETVEC DESCRIPRORS FOR 7 MURD STRUCTURES ####

In [55]:
from subprocess import Popen, PIPE
from scipy.spatial import distance
from collections import Counter
import matplotlib.pyplot as plt
from Bio.SeqUtils import seq1
from Bio.PDB import *
from rdkit import Chem
import pandas as pd
import numpy as np
import scipy.stats as ss
import seaborn as sns
import tqdm
import pickle
import os
import time
import shutil
import gzip
import tarfile
import sys
import random
import pybel

In [56]:
path_in = "../data/structures"
path_out = '../pocketvec/MurD/structures/'

sts = ['SagaMurD_Frag349', "SagaMurD_Frag373", "SagaMurD_Frag374", "SagaMurD_Frag378", "SagaMurD_apo", "EColiMurD_substrate", "EColiMurE"]
st_path = os.getcwd()

In [64]:
### PREPARE STRUCTURES FOR DOCKING

In [63]:
for st in sts:
    
    # It takes ~1min/st (substrate takes ~5min)
    
    os.chdir(st_path)
    
    ### FIRST STEPS ###

    path_in = os.path.join("../data/structures", st + ".pdb")
    path_out = os.path.join("../pocketvec/MurD/structures/", st)

    # 1. Make output folder
    if os.path.exists(path_out) is False: os.makedirs(path_out)
    if os.path.exists("../pocketvec/MurD/structures/all") is False: os.makedirs("../pocketvec/MurD/structures/all")
    
    
    # 2. Remove water molecules and ligands (LIG, SO4, EDO, IPA, DMS, CL, EPE)
    parser = PDBParser()
    structure = parser.get_structure("st", path_in)

    shutil.copyfile(path_in, os.path.join(path_out, st + ".pdb"))

    class WaterLIGRemove(Select):
        def accept_residue(self, residue):
            if residue.get_id()[0] not in ["H_SO4", "H_EDO", "H_IPA", "W", "H_DMS", "H_ CL", "H_CL ", "H_EPE", "H_LIG"]:
                return 1
            else:
                return 0

    io = PDBIO()
    io.set_structure(structure)
    io.save(os.path.join(path_out, st + "_water.pdb"), WaterLIGRemove())


    # 3. Remove hydrogens
    command = 'python ../src/check_structure.py -i ' + os.path.join(path_out , st + '_water.pdb') + ' -o ' + os.path.join(path_out , st + '_hydrogens.pdb') + ' --force_save --non_interactive rem_hydrogen --remove Yes'
    o = os.popen(command).read()
    sys.stderr.write(o + "\n\n")
    sys.stderr.flush()

    # 4. Select occupancies
    command = 'python ../src/check_structure.py -i ' + os.path.join(path_out , st + '_hydrogens.pdb') + ' -o ' + os.path.join(path_out , st + '_altloc.pdb') + ' --force_save --non_interactive altloc --select occupancy'
    o = os.popen(command).read()
    sys.stderr.write(o + "\n\n")
    sys.stderr.flush()
    
    
    # 5. Prepare st MOE
    def prepare(infile, prepared_prot, logfile, all_outputs):
        if os.path.isfile(prepared_prot):
            return True, all_outputs
        else:
            moefunct = "../src/moefunctions.svl"
            # PROTEINS ARE PREPARED USING MOES
            process=Popen(["/aloy/home/acomajuncosa/programs/MOE/moe2020/bin/moebatch -load " + moefunct + " -exec \"proteinprep['" + infile + "','" + prepared_prot + "','" + logfile + "']\""],stdout=PIPE,stderr=PIPE,shell=True)
            stdout, stderr = process.communicate()
            sys.stderr.write(str(stdout) + "\n\n")
            sys.stderr.write(str(stderr) + "\n\n")
            all_outputs.append(str(stderr))
        if os.path.isfile(prepared_prot):
            return True, all_outputs
        else:
            return False, all_outputs


    infile = os.path.join(path_out , st + '_altloc.pdb')
    prepared_prot = os.path.join(path_out , st + '_MOEprep.mol2')
    logfile = os.path.join(path_out , st + '_log.log')

    all_outputs = []
    result, all_outputs = prepare(infile, prepared_prot, logfile, all_outputs)
    
    
    # 6. Look for errors
    with open(logfile, "r") as f:
        for l in f:
            if "error" in l.lower():
                print("ERROR WHEN CHECKING LOG FILE! \n\n\n")
                sys.stderr.flush()
                break

    # 7. Tar other files
    os.chdir(path_out)
    tar = tarfile.open(st + "_files.tar.gz", "w:gz")
    files = [st + i for i in ['_water.pdb', "_hydrogens.pdb", "_altloc.pdb", '_log.log']]
    for name in files:
        tar.add(name)
        os.remove(name)
    tar.close()
    
    os.chdir(st_path)
    # 8. Copy to all/ directory
    shutil.copyfile(os.path.join(path_out, st + "_MOEprep.mol2"), os.path.join("../pocketvec/MurD/structures/all", st + "_MOEprep.mol2"))


=                   BioBB structure checking utility v3.0.3                   =
=                 A. Hospital, P. Andrio, J.L. Gelpi 2018-20                  =

Structure ../pocketvec/MurD/structures/SagaMurD_Frag349/SagaMurD_Frag349_water.pdb loaded
 Title: 
 Experimental method: unknown
 Resolution: 0.0 A

 Num. models: 1
 Num. chains: 1 (A: Protein)
 Num. residues:  448
 Num. residues with ins. codes:  0
 Num. HETATM residues:  0
 Num. ligands or modified residues:  0
 Num. water mol.:  0
 Num. atoms:  3416

Running rem_hydrogen. Options: --remove Yes
No residues with Hydrogen atoms found
Structure not modified, saving due to --force_save option
Final Num. models: 1
Final Num. chains: 1 (A: Protein)
Final Num. residues:  448
Final Num. residues with ins. codes:  0
Final Num. HETATM residues:  0
Final Num. ligands or modified residues:  0
Final Num. water mol.:  0
Final Num. atoms:  3416
Structure saved on ../pocketvec/MurD/structures/SagaMurD_Frag349/SagaMurD_Frag349_hydrogens.pdb



In [None]:
### GET THE CONSENUS CENTER OF 

In [86]:
def get_centroid(file):
    """
    Gets the centroid of a ligand
    """
    
    parser = PDBParser()
    frag = parser.get_structure("st", file)
    coords = np.array([i.get_coord() for i in frag.get_atoms()])
    
    coords = [round(np.mean(coords[:,0]), 3), round(np.mean(coords[:,1]), 3), round(np.mean(coords[:,2]), 3)]
    return coords[0], coords[1], coords[2]


def print_centroid(x, y, z, file_pdb, file_sd):
    """
    Prints de centroid in a file -- pdb format
    """
    
    ctr = " "*(8-len(x)) + x + " "*(8-len(y)) + y + " "*(8-len(z)) + z
    text = """HEADER\nHETATM    1   C  CTR A   1    """ + ctr + """  1.00  1.00           C\nEND"""

    with open(file_pdb, "w") as f:
        f.write(text)

    command = "obabel " + file_pdb + " -O " + file_sd
    os.system(command)

In [92]:
path_in = "../data/structures"
path_out = '../pocketvec/MurD/structures/'

frags = ["373", "374", "378", "349"]
sts = ['SagaMurD_Frag349', "SagaMurD_Frag373", "SagaMurD_Frag374", "SagaMurD_Frag378", "SagaMurD_apo", "EColiMurD_substrate", "EColiMurE"]

ctr = []

for frag in frags:
    x, y, z = get_centroid(os.path.join(path_in, frag + "_ligand_only.pdb"))
    ctr.append([x, y, z])
    
ctr = np.array(ctr)
coords = [round(np.mean(ctr[:,0]), 3), round(np.mean(ctr[:,1]), 3), round(np.mean(ctr[:,2]), 3)]
x, y, z = str(coords[0]), str(coords[1]), str(coords[2])

for st in sts:
    file_pdb = os.path.join(path_out, st, "centroid.pdb")
    file_sd = os.path.join(path_out, st, "centroid.sd")
    print_centroid(x, y, z, file_pdb, file_sd)
    
shutil.copyfile(os.path.join(path_out, st, "centroid.sd"), os.path.join(path_out, "all", "centroid.sd"))

'../pocketvec/MurD/structures/all/centroid.sd'