In [1]:
%cd ~/REVIVAL2

/disk2/fli/REVIVAL2


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from MDAnalysis import Universe

from REVIVAL.chem_helper import apply_mutation
from REVIVAL.preprocess import ZSData
from REVIVAL.zs.vina import ligand_smiles2pdbqt



In [6]:
ligand_smiles2pdbqt(
    smiles="C1=CC2=C(C=CN2)C(=C1)Br.[O-]C1=C(/C=[N+]([H])/C(C([O-])=O)=C)C(CP([O-])([O-])=O)=CN=C1C.[Na+]",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/joint.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/joint.pdbqt", pH=7.4   
)

1 molecule converted


In [7]:
ligand_smiles2pdbqt(
    smiles="[O-]C1=C(/C=[N+]([H])/C(C([O-])=O)=C)C(CP([O-])([O-])=O)=CN=C1C",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/PLP-dependent_aminoacrylate.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/PLP-dependent_aminoacrylate.pdbqt", pH=7.4   
)

1 molecule converted


In [9]:
ligand_smiles2pdbqt(
    smiles="C1=CC2=C(C=CN2)C(=C1)Br",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/4bromo.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/4bromo.pdbqt", pH=7.4   
)

1 molecule converted


In [10]:
ligand_smiles2pdbqt(
    smiles="[Na+]",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/Na+.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/Na+.pdbqt", pH=7.4   
)

1 molecule converted


In [14]:
from REVIVAL.util import calculate_ligand_centroid

In [15]:
from REVIVAL.global_param import ENZYME_INFO_DICT

In [36]:
coords = calculate_ligand_centroid(
    pdb_file="/disk2/fli/REVIVAL2/data/structure/PfTrpB.pdb",
    ligand_info=ENZYME_INFO_DICT["PfTrpB"]["ligand-info"]
)
coords

array([ -1.8210907, -37.15664  , -14.083181 ], dtype=float32)

In [19]:
conf_path = "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/conf.txt"

In [32]:
import os

def pdb_to_pdbqt_protein(input_path: str, output_path=None, pH: float = 7.4):

    """
    Convert a pdb file to a pdbqt file.
    """

    # Need to first remove stuff that is sometimes added by
    lines = []
    with open(input_path, "r+") as fin:
        for line in fin:
            if (
                line.split(" ")[0] not in ["ENDBRANCH", "BRANCH", "ROOT", "ENDROOT"]
                and "Fe" not in line
            ):  # Add in the removal of the Iron bit
                lines.append(line)
    with open(input_path, "w+") as fout:
        for line in lines:
            fout.write(line)

    output_path = output_path if output_path else input_path.replace(".pdb", ".pdbqt")
    os.system(
        f"obabel {input_path} -xr -p {pH} --partialcharge gasteiger -O {output_path}"
    )
    # Now we also want to be cheeky and remove any secondary model parts from the file
    # This is a hacky way to keep a bound heme or something, seems to work fine.
    lines = []
    with open(output_path, "r+") as fin:
        for line in fin:
            if line.split(" ")[0] not in ["MODEL", "TER", "ENDMDL", "REMARK"]:
                lines.append(line)
    with open(output_path, "w+") as fout:
        for line in lines:
            if "ENDMDL" not in line:
                fout.write(line)
        fout.write("TER\n")


In [35]:
pdb_to_pdbqt_protein(
    input_path="/disk2/fli/REVIVAL2/data/structure/apo/PfTrpB.pdb",
    output_path="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/apo_TrpB.pdbqt", pH = 7.4)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is /disk2/fli/REVIVAL2/data/structure/apo/PfTrpB.pdb)

1 molecule converted


In [48]:
conf_path = "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/conf_3.txt"
receptor_pdbqt="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/apo_TrpB_Na.pdbqt"
ligand_pdbqt="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/4bromo.pdbqt"
cofactor2dock = [
    "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/PLP-dependent_aminoacrylate.pdbqt",
    # "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/Na+.pdbqt",
]
# NEED TO MERGE THE MAIN WITH NA

with open(conf_path, "w") as fout:
    fout.write(f"receptor = {receptor_pdbqt}\n")
    fout.write(f"ligand = {ligand_pdbqt}\n")

    # Include cofactors
    if cofactor2dock is not None:
        for cofactor_file in cofactor2dock:
            fout.write(f"ligand = {cofactor_file}\n")

    fout.write(f"center_x = {coords[0]}\n")
    fout.write(f"center_y = {coords[1]}\n")
    fout.write(f"center_z = {coords[2]}\n")
    fout.write(f"size_x = 20\n")
    fout.write(f"size_y = 20\n")
    fout.write(f"size_z = 20\n")
    fout.write("num_modes = 9\n")
    fout.write("exhaustiveness = 32\n")

In [53]:
from REVIVAL.zs.vina import merge_pdbqt

In [54]:
merge_pdbqt(input_files=["/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/apo_TrpB.pdbqt", "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/PLS.pdbqt"],
            output_file_path="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/merge.pdbqt")

Merging /disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/apo_TrpB.pdbqt
Merging /disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/PLS.pdbqt
Combined ATOM lines saved to /disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/merge.pdbqt


In [None]:

# def dock_apo_lib_parallel(
#     struct_dir: str, # ie 
#     dock_opt: str,  #  ie "substrate",
#     score_only: bool,  # = True,
#     cofactor_dets: str = "cofactor",
#     vina_dir: str = "zs/vina",
#     residues4centriod: list = None,
#     from_pdb: bool = True,
#     pH: float = 7.4,
#     size_x=20.0,
#     size_y=20.0,
#     size_z=20.0,
#     num_modes=9,
#     exhaustiveness=32,
#     regen=False,
#     rerun=False,
#     seed=42,
#     num_cpus=None,  # for each dock function
#     max_workers=24,  # Number of parallelized variants to be docked
# ):

In [None]:
# first clean the apo to be apo
# then mutate the apo to have variant seq
# then dock from smiles 

In [45]:
ligand_smiles2pdbqt(
    smiles=r"C=CC1=C(/C=C2C(C)=C(C=C)C3=N/2)NC(/C=C4N=C(/C=C(C(CCC([O-])=O)=C/5C)\NC5=C/3)C(CCC([O-])=O)=C\4C)=C1C",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/heme-no-Fe.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/heme-no-Fe.pdbqt", pH=7.4
)

1 molecule converted


In [46]:
ligand_smiles2pdbqt(
    smiles="CCOC([C])=O",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/activated-carbene.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/activated-carbene.pdbqt", pH=7.4
)

1 molecule converted


In [49]:
pdb_to_pdbqt_protein(
    input_path="/disk2/fli/REVIVAL2/data/structure/apo/ParLQ.pdb",
    output_path="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/apo_ParLQ.pdbqt", pH = 7.4)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is /disk2/fli/REVIVAL2/data/structure/apo/ParLQ.pdb)

1 molecule converted


In [51]:
coords = calculate_ligand_centroid(
    pdb_file="/disk2/fli/REVIVAL2/data/structure/ParLQ.pdb",
    ligand_info=ENZYME_INFO_DICT["ParLQ"]["ligand-info"]
)
coords

array([ 1.9006428,  5.4527626, -3.6450245], dtype=float32)

In [52]:
conf_path = "/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/conf.txt"
receptor_pdbqt="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/apo_ParLQ_fe.pdbqt"
ligand_pdbqt="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/4-vinylanisole.pdbqt"
cofactor2dock = [
    "/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/activated-carbene.pdbqt",
    "/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/heme-no-Fe.pdbqt"
]
# NEED TO MERGE THE MAIN WITH NA

with open(conf_path, "w") as fout:
    fout.write(f"receptor = {receptor_pdbqt}\n")
    fout.write(f"ligand = {ligand_pdbqt}\n")

    # Include cofactors
    if cofactor2dock is not None:
        for cofactor_file in cofactor2dock:
            fout.write(f"ligand = {cofactor_file}\n")

    fout.write(f"center_x = {coords[0]}\n")
    fout.write(f"center_y = {coords[1]}\n")
    fout.write(f"center_z = {coords[2]}\n")
    fout.write(f"size_x = 20\n")
    fout.write(f"size_y = 20\n")
    fout.write(f"size_z = 20\n")
    fout.write("num_modes = 9\n")
    fout.write("exhaustiveness = 32\n")

In [None]:
# for tprb and heme based, can do all seperate
# for trpb can also do just substrate with PLS Na from pdb
# heme based can do pdb for heme then with carbene and substrate

In [60]:
import shutil
from REVIVAL.global_param import AA_DICT
from REVIVAL.zs.vina import mutate_and_save_pdb, merge_pdbqt
from REVIVAL.util import checkNgen_folder, save_hetatm_only

In [None]:
class VinaApoDock(ZSData):
    
    def __init__(
        self,
        input_csv: str,
        dock_opt: str,  #  ie "substrate", "joint", "all"
        cofactor_dets: str = "cofactor", # or inactivated_cofactor
        in_structure_dir: str = "data/structure/apo",
        combo_col_name: str = "AAs",
        var_col_name: str = "var",
        fit_col_name: str = "fitness",
        output_dir: str = "zs/vina/apo",
        pH: float = 7.4,
        size_x: float = 20.0,
        size_y: float = 20.0,
        size_z: float = 20.0,
        num_modes: int = 9,
        exhaustiveness: int = 32,
        regen: bool = False,
        redock: bool = False
    ):

        super().__init__(
            input_csv=input_csv,
            combo_col_name=combo_col_name,
            fit_col_name=fit_col_name,
        )

        self._dock_opt = dock_opt
        self._cofactor_dets = cofactor_dets
        self._in_structure_dir = in_structure_dir
        self._output_dir = checkNgen_folder(output_dir)
        self._common_pdbqt_dir = checkNgen_folder(os.path.join(self._output_dir, "common_pdbqt"))

        self._pH = pH
        self._size_x = size_x
        self._size_y = size_y
        self._size_z = size_z
        self._num_modes = num_modes
        self._exhaustiveness = exhaustiveness

        self._regen = regen
        self._redock = redock

        self._coords = self._get_coords()

        self._ligand_pdbqt = os.path.join(self._common_pdbqt_dir, f"{self.substrate_dets}.pdbqt")
        self._cofactor2dock, self._cofactor2freeze = self._prep_common_pdbqt()


    def _get_coords(self):
        """
        Calculates the centroid of the ligand in the given structure.
        """

        coords = calculate_ligand_centroid(
            pdb_file=self.clean_struct, # the main pdb file dir
            ligand_info=ENZYME_INFO_DICT[self.protein_name]["ligand-info"]
        )

        return coords


    def _prep_common_pdbqt(self):
        """
        Prepares the PDBQT files for docking by converting smiles to PDBQT format.
        """

        # generate all pdbqt files from substrate and individual cofactor
        ligand_smiles2pdbqt(
            smiles=self.substrate_smiles,
            ligand_sdf_file=self._ligand_pdbqt.replace(".pdbqt", ".sdf"),
            ligand_pdbqt_file=self._ligand_pdbqt,
            pH=self._pH
        )

        cofactor2dock = []
        cofactor2freeze = []

        for (cofactor_name, cofactor_smiles) in zip(
            self.lib_info[self._cofactor_dets],
            self.lib_info[f"{self._cofactor_dets}-smiles"],
        ):  
            sub_folder = checkNgen_folder(os.path.join(self._common_pdbqt_dir, cofactor_name))
            cofactor_pdbqt = os.path.join(sub_folder, f"{cofactor_name}.pdbqt")

            if self._dock_opt == "all":
                cofactor2dock.append(cofactor_pdbqt)
                ligand_smiles2pdbqt(
                    smiles=cofactor_smiles,
                    ligand_sdf_file=cofactor_pdbqt.replace(".pdbqt", ".sdf"),
                    ligand_pdbqt_file=cofactor_pdbqt,
                    pH=self._pH
                )
        
            # substrate only for trpb but sub + carbene for heme
            # extract heat atom to a pdb
            else:
                hetatm_pdbqt = os.path.join(sub_folder, "hetatm.pdbqt")
                save_hetatm_only(
                    pdb_file=self._ligand_pdbqt,
                    hetatm_file=hetatm_pdbqt
                )
                cofactor2freeze.append(hetatm_pdbqt)
            
        return cofactor2dock, cofactor2freeze
            
    def _mutate_apo(self, var):
        """
        Mutates the apo structure with the given mutation.
        """

        # make var_dir
        var_dir = checkNgen_folder(os.path.join(self._output_dir, var))
        var_pdb = os.path.join(var_dir, f"{var}.pdb")

        mutation_dict = {}

        if var != "WT":
            for v in var.split(":"):
                mutation_dict[int(v[1:-1])] = AA_DICT[v[-1]]

            # Mutate the apo structure
            mutate_and_save_pdb(
                parent_pdb=f"{self._in_structure_dir}/{self.protein_name}.pdb",
                mutations=mutation_dict,
                output_pdb=var_pdb
            )

        else:
            # copy the apo structure to the output directory
            shutil.copy(self.apo_struct, var_pdb)
        
        return var_pdb

    def _make_config(self, var):
        """
        Makes the config file for docking.
        """

        var_dir = checkNgen_folder(os.path.join(self._output_dir, var))
        conf_path = os.path.join(var_dir, "conf.txt")

        # make the receptor pdbqt file
        if self._dock_opt != "all" and self._cofactor2freeze != []:
            # merge the pdbqt files
            receptor_pdbqt = os.path.join(var_dir, "receptor.pdbqt")
            merge_pdbqt(
                input_files=[self._mutate_apo(var)] + self._cofactor2freeze,
                output_file_path=receptor_pdbqt
            )

        else:
            receptor_pdbqt = self._mutate_apo(var)

        with open(conf_path, "w") as fout:
            fout.write(f"receptor = {receptor_pdbqt}\n")
            fout.write(f"ligand = {self._ligand_pdbqt}\n")

            # Include cofactors
            if self._cofactor2dock is not None:
                for cofactor_file in self._cofactor2dock:
                    fout.write(f"ligand = {cofactor_file}\n")

            fout.write(f"center_x = {self._coords[0]}\n")
            fout.write(f"center_y = {self._coords[1]}\n")
            fout.write(f"center_z = {self._coords[2]}\n")
            fout.write(f"size_x = {self._size_x}\n")
            fout.write(f"size_y = {self._size_y}\n")
            fout.write(f"size_z = {self._size_z}\n")
            fout.write(f"num_modes = {self._num_modes}\n")
            fout.write(f"exhaustiveness = {self._exhaustiveness}\n")

        return conf_path

    def _dock(self, var):
        """
        Dock the given variant.
        """

        # make the config file
        conf_path = self._make_config(var)

        # dock the variant
        os.system(f"vina --config {conf_path} --log {self._output_dir}/{var}/log.txt")
        
    def _dock_lib_parallel(self):
        """
        Dock the library in parallel.
        """

        # dock the library in parallel
        with concurrent.futures.ThreadPoolExecutor(max_workers=self._max_workers) as executor:
            futures = []
            for var in self.lib_info[self._var_col_name]:
                if self._regen or self._redock:
                    futures.append(executor.submit(self._dock, var))
                else:
                    if not os.path.exists(os.path.join(self._output_dir, var, "log.txt")):
                        futures.append(executor.submit(self._dock, var))

            for future in concurrent.futures.as_completed(futures):
                future.result()


    @property
    def clean_struct(self):
        """The clean (no water or SO4) pdb file of the protein."""
        return os.path.join(self._in_structure_dir, "clean", f"{self.protein_name}.pdb")

    @property
    def apo_struct(self):
        """The apo pdb file of the protein."""
        return os.path.join(self._in_structure_dir, "apo", f"{self.protein_name}.pdb")
    
    @property
    def coords(self):
        return self._coords
            

        
