In [1]:
%cd ~/REVIVAL2

/disk2/fli/REVIVAL2


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from MDAnalysis import Universe

from REVIVAL.chem_helper import apply_mutation
from REVIVAL.preprocess import ZSData
from REVIVAL.zs.vina import ligand_smiles2pdbqt



In [4]:

def mutate_and_save_pdb(parent_pdb, mutations, output_pdb):
    """
    Apply mutations to a PDB structure and save the mutated structure.

    Args:
        parent_pdb (str): Path to the parent PDB file.
        mutations (dict): Dictionary of mutations in the format {residue_id: new_aa}.
        output_pdb (str): Path to save the mutated PDB file.

    Returns:
        str: Path to the saved mutated PDB file.
    """

    # Load the parent structure
    universe = Universe(parent_pdb)

    # Apply all mutations
    for loc, aa in mutations.items():
        universe = apply_mutation(universe, (loc, aa))  # Ensure apply_mutation is defined

    # Save the mutated structure
    universe.atoms.write(output_pdb)
    print(f"Mutated structure saved to: {output_pdb}")

    return output_pdb

In [5]:
class VinaApoDock(ZSData):
    
    def __init__(
        self,
        input_csv: str,
        dock_opt: str,  #  ie "substrate", "joint", "seperate"
        cofactor_dets: str = "cofactor", # or inactivated_cofactor
        in_structure_dir: str = "data/structure/apo",
        combo_col_name: str = "AAs",
        fit_col_name: str = "fitness",
        output_dir: str = "zs/vina/apo",
        withsub: bool = True,
        regen: bool = False,
        redock: bool = False
    ):

        super().__init__(
            input_csv=input_csv,
            combo_col_name=combo_col_name,
            fit_col_name=fit_col_name,
            withsub=withsub,
        )

        self._dock_opt = dock_opt
        self._cofactor_dets = cofactor_dets
        self._in_structure_dir = in_structure_dir
        self._output_dir = output_dir
        self._regen = regen
        self._redock = redock

    def _prep_pdbqt(self):
        """
        Prepares the PDBQT files for docking by converting smiles to PDBQT format.
        """
        # make each substrate, cofactor, and inactivated cofactor into a pdbqt
        if self._dock_opt == "seperate":
            pass

            

        


In [6]:
ligand_smiles2pdbqt(
    smiles="C1=CC2=C(C=CN2)C(=C1)Br.[O-]C1=C(/C=[N+]([H])/C(C([O-])=O)=C)C(CP([O-])([O-])=O)=CN=C1C.[Na+]",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/joint.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/joint.pdbqt", pH=7.4   
)

1 molecule converted


In [7]:
ligand_smiles2pdbqt(
    smiles="[O-]C1=C(/C=[N+]([H])/C(C([O-])=O)=C)C(CP([O-])([O-])=O)=CN=C1C",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/PLP-dependent_aminoacrylate.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/PLP-dependent_aminoacrylate.pdbqt", pH=7.4   
)

1 molecule converted


In [9]:
ligand_smiles2pdbqt(
    smiles="C1=CC2=C(C=CN2)C(=C1)Br",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/4bromo.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/4bromo.pdbqt", pH=7.4   
)

1 molecule converted


In [10]:
ligand_smiles2pdbqt(
    smiles="[Na+]",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/Na+.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/Na+.pdbqt", pH=7.4   
)

1 molecule converted


In [14]:
from REVIVAL.util import calculate_ligand_centroid

In [15]:
from REVIVAL.global_param import ENZYME_INFO_DICT

In [36]:
coords = calculate_ligand_centroid(
    pdb_file="/disk2/fli/REVIVAL2/data/structure/PfTrpB.pdb",
    ligand_info=ENZYME_INFO_DICT["PfTrpB"]["ligand-info"]
)
coords

array([ -1.8210907, -37.15664  , -14.083181 ], dtype=float32)

In [19]:
conf_path = "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/conf.txt"

In [32]:
import os

def pdb_to_pdbqt_protein(input_path: str, output_path=None, pH: float = 7.4):

    """
    Convert a pdb file to a pdbqt file.
    """

    # Need to first remove stuff that is sometimes added by
    lines = []
    with open(input_path, "r+") as fin:
        for line in fin:
            if (
                line.split(" ")[0] not in ["ENDBRANCH", "BRANCH", "ROOT", "ENDROOT"]
                and "Fe" not in line
            ):  # Add in the removal of the Iron bit
                lines.append(line)
    with open(input_path, "w+") as fout:
        for line in lines:
            fout.write(line)

    output_path = output_path if output_path else input_path.replace(".pdb", ".pdbqt")
    os.system(
        f"obabel {input_path} -xr -p {pH} --partialcharge gasteiger -O {output_path}"
    )
    # Now we also want to be cheeky and remove any secondary model parts from the file
    # This is a hacky way to keep a bound heme or something, seems to work fine.
    lines = []
    with open(output_path, "r+") as fin:
        for line in fin:
            if line.split(" ")[0] not in ["MODEL", "TER", "ENDMDL", "REMARK"]:
                lines.append(line)
    with open(output_path, "w+") as fout:
        for line in lines:
            if "ENDMDL" not in line:
                fout.write(line)
        fout.write("TER\n")


In [35]:
pdb_to_pdbqt_protein(
    input_path="/disk2/fli/REVIVAL2/data/structure/apo/PfTrpB.pdb",
    output_path="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/apo_TrpB.pdbqt", pH = 7.4)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is /disk2/fli/REVIVAL2/data/structure/apo/PfTrpB.pdb)

1 molecule converted


In [48]:
conf_path = "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/conf_3.txt"
receptor_pdbqt="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/apo_TrpB_Na.pdbqt"
ligand_pdbqt="/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/4bromo.pdbqt"
cofactor2dock = [
    "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/PLP-dependent_aminoacrylate.pdbqt",
    # "/disk2/fli/REVIVAL2/sandbox/test_vina/TrpB/Na+.pdbqt",
]
# NEED TO MERGE THE MAIN WITH NA

with open(conf_path, "w") as fout:
    fout.write(f"receptor = {receptor_pdbqt}\n")
    fout.write(f"ligand = {ligand_pdbqt}\n")

    # Include cofactors
    if cofactor2dock is not None:
        for cofactor_file in cofactor2dock:
            fout.write(f"ligand = {cofactor_file}\n")

    fout.write(f"center_x = {coords[0]}\n")
    fout.write(f"center_y = {coords[1]}\n")
    fout.write(f"center_z = {coords[2]}\n")
    fout.write(f"size_x = 20\n")
    fout.write(f"size_y = 20\n")
    fout.write(f"size_z = 20\n")
    fout.write("num_modes = 9\n")
    fout.write("exhaustiveness = 32\n")

In [None]:

# def dock_apo_lib_parallel(
#     struct_dir: str, # ie 
#     dock_opt: str,  #  ie "substrate",
#     score_only: bool,  # = True,
#     cofactor_dets: str = "cofactor",
#     vina_dir: str = "zs/vina",
#     residues4centriod: list = None,
#     from_pdb: bool = True,
#     pH: float = 7.4,
#     size_x=20.0,
#     size_y=20.0,
#     size_z=20.0,
#     num_modes=9,
#     exhaustiveness=32,
#     regen=False,
#     rerun=False,
#     seed=42,
#     num_cpus=None,  # for each dock function
#     max_workers=24,  # Number of parallelized variants to be docked
# ):

In [None]:
# first clean the apo to be apo
# then mutate the apo to have variant seq
# then dock from smiles 

In [45]:
ligand_smiles2pdbqt(
    smiles=r"C=CC1=C(/C=C2C(C)=C(C=C)C3=N/2)NC(/C=C4N=C(/C=C(C(CCC([O-])=O)=C/5C)\NC5=C/3)C(CCC([O-])=O)=C\4C)=C1C",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/heme-no-Fe.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/heme-no-Fe.pdbqt", pH=7.4
)

1 molecule converted


In [46]:
ligand_smiles2pdbqt(
    smiles="CCOC([C])=O",
    ligand_sdf_file="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/activated-carbene.sdf",
    ligand_pdbqt_file="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/activated-carbene.pdbqt", pH=7.4
)

1 molecule converted


In [49]:
pdb_to_pdbqt_protein(
    input_path="/disk2/fli/REVIVAL2/data/structure/apo/ParLQ.pdb",
    output_path="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/apo_ParLQ.pdbqt", pH = 7.4)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is /disk2/fli/REVIVAL2/data/structure/apo/ParLQ.pdb)

1 molecule converted


In [51]:
coords = calculate_ligand_centroid(
    pdb_file="/disk2/fli/REVIVAL2/data/structure/ParLQ.pdb",
    ligand_info=ENZYME_INFO_DICT["ParLQ"]["ligand-info"]
)
coords

array([ 1.9006428,  5.4527626, -3.6450245], dtype=float32)

In [52]:
conf_path = "/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/conf.txt"
receptor_pdbqt="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/apo_ParLQ_fe.pdbqt"
ligand_pdbqt="/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/4-vinylanisole.pdbqt"
cofactor2dock = [
    "/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/activated-carbene.pdbqt",
    "/disk2/fli/REVIVAL2/sandbox/test_vina/ParLQ/heme-no-Fe.pdbqt"
]
# NEED TO MERGE THE MAIN WITH NA

with open(conf_path, "w") as fout:
    fout.write(f"receptor = {receptor_pdbqt}\n")
    fout.write(f"ligand = {ligand_pdbqt}\n")

    # Include cofactors
    if cofactor2dock is not None:
        for cofactor_file in cofactor2dock:
            fout.write(f"ligand = {cofactor_file}\n")

    fout.write(f"center_x = {coords[0]}\n")
    fout.write(f"center_y = {coords[1]}\n")
    fout.write(f"center_z = {coords[2]}\n")
    fout.write(f"size_x = 20\n")
    fout.write(f"size_y = 20\n")
    fout.write(f"size_z = 20\n")
    fout.write("num_modes = 9\n")
    fout.write("exhaustiveness = 32\n")