# Imports

In [None]:
from asapdiscovery import data
from pathlib import Path
from asapdiscovery.data.openeye import load_openeye_pdb, save_openeye_pdb, openeye_perceive_residues, split_openeye_design_unit

## Paths

In [None]:
inputs = Path("../tests/prep_mers_files/inputs/")
cifpath = inputs/"rcsb_8DGY-assembly1.cif"
output = Path("../tests/prep_mers_files/outputs/")

In [None]:
cifpath.resolve()

# Openfftoolkit

In [None]:
from openff.toolkit.topology import Molecule
from openff.toolkit.utils import get_data_file_path

In [None]:
protein = Molecule.f(str(cifpath))

# OpenMM

In [None]:
from openmm.app import PDBxFile, PDBFile

In [None]:
cif = PDBxFile(str(cifpath))

In [None]:
residues = [residue for residue in cif.topology.residues() if residue]

In [None]:
residues[2].id

In [None]:
outfile = inputs / f"{cifpath.stem}-openmm.pdb"

In [None]:
with open(outfile, 'w') as f:
    PDBFile.writeFile(cif.topology, cif.positions, f, keepIds=True)

## load with openeye

In [None]:
from asapdiscovery.data.openeye import load_openeye_pdb

In [None]:
load_openeye_pdb(str(outfile))

## to openfftoolkit

In [None]:
mol = Molecule.from_topology(cif.topology)

# Try loading aligned pdb

In [None]:
from asapdiscovery.data.openeye import load_openeye_sdf
from openeye import oechem, oespruce

In [None]:
pdb = load_openeye_pdb("../tests/prep_mers_files/align_test.pdb")

In [None]:
mol = load_openeye_sdf("/Users/alexpayne/lilac-mount-point/asap-datasets/current/sars_01_prepped_v3/Mpro-P3054_0A_MAT-POS-50a80394-2/Mpro-P3054_0A_MAT-POS-50a80394-2.sdf")

In [None]:
def make_du_from_lig_and_prot(initial_prot, new_lig):
    ## Add Hs to prep protein and ligand
    oechem.OEAddExplicitHydrogens(initial_prot)
    oechem.OEAddExplicitHydrogens(new_lig)

    ## Set up DU building options
    opts = oespruce.OEMakeDesignUnitOptions()
    opts.SetSuperpose(False)
#     if loop_db is not None:
#         opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetLoopDBFilename(
#             loop_db
#         )

    ## Options set from John's function ########################################
    ## (https://github.com/FoldingAtHome/covid-moonshot/blob/454098f4255467f4655102e0330ebf9da0d09ccb/synthetic-enumeration/sprint-14-quinolones/00-prep-receptor.py)
    opts.GetPrepOptions().SetStrictProtonationMode(True)
    # set minimal number of ligand atoms to 5, e.g. a 5-membered ring fragment\
    opts.GetSplitOptions().SetMinLigAtoms(5)

    # also consider alternate locations outside binding pocket, important for later filtering
    opts.GetPrepOptions().GetEnumerateSitesOptions().SetCollapseNonSiteAlts(
        False
    )

    # alignment options, only matches are important
    opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetSeqAlignMethod(
        oechem.OESeqAlignmentMethod_Identity
    )
    opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetSeqAlignGapPenalty(
        -1
    )
    opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetSeqAlignExtendPenalty(
        0
    )

    # Both N- and C-termini should be zwitterionic
    # Mpro cleaves its own N- and C-termini
    # See https://www.pnas.org/content/113/46/12997
    opts.GetPrepOptions().GetBuildOptions().SetCapNTermini(False)
    opts.GetPrepOptions().GetBuildOptions().SetCapCTermini(False)
    # Don't allow truncation of termini, since force fields don't have
    #  parameters for this
    opts.GetPrepOptions().GetBuildOptions().GetCapBuilderOptions().SetAllowTruncate(
        False
    )
    # Build loops and sidechains
    opts.GetPrepOptions().GetBuildOptions().SetBuildLoops(True)
    opts.GetPrepOptions().GetBuildOptions().SetBuildSidechains(True)

    # Generate ligand tautomers
    opts.GetPrepOptions().GetProtonateOptions().SetGenerateTautomers(True)
    ############################################################################

    ## Finally make new DesignUnit
    du = oechem.OEDesignUnit()
    oespruce.OEMakeDesignUnit(du, initial_prot, new_lig, opts)
    assert du.HasProtein() and du.HasLigand()

    return du

In [None]:
du = make_du_from_lig_and_prot(pdb, mol)

In [None]:
du

In [None]:
from asapdiscovery.data.openeye import split_openeye_design_unit

In [None]:
lig, prot, complex_ = split_openeye_design_unit(du)

In [None]:
from asapdiscovery.data.openeye import save_openeye_pdb, save_openeye_sdf

In [None]:
save_openeye_pdb(complex_, str(output / f"{cifpath.stem}-openmm-openeye.pdb"))

## test making DU without ligand

In [None]:
prot

In [None]:
def prep_du(initial_prot, site_residue):
    ## Add Hs to prep protein and ligand
    oechem.OEAddExplicitHydrogens(initial_prot)

    ## Set up DU building options
    opts = oespruce.OEMakeDesignUnitOptions()
    opts.SetSuperpose(False)
    ## Options set from John's function ########################################
    ## (https://github.com/FoldingAtHome/covid-moonshot/blob/454098f4255467f4655102e0330ebf9da0d09ccb/synthetic-enumeration/sprint-14-quinolones/00-prep-receptor.py)
    opts.GetPrepOptions().SetStrictProtonationMode(True)
    # set minimal number of ligand atoms to 5, e.g. a 5-membered ring fragment\
    opts.GetSplitOptions().SetMinLigAtoms(5)
    
    # also consider alternate locations outside binding pocket, important for later filtering
    opts.GetPrepOptions().GetEnumerateSitesOptions().SetCollapseNonSiteAlts(
        True
    )
    
    # Both N- and C-termini should be zwitterionic
    # Mpro cleaves its own N- and C-termini
    # See https://www.pnas.org/content/113/46/12997
    opts.GetPrepOptions().GetBuildOptions().SetCapNTermini(False)
    opts.GetPrepOptions().GetBuildOptions().SetCapCTermini(False)
    # Don't allow truncation of termini, since force fields don't have
    #  parameters for this
    opts.GetPrepOptions().GetBuildOptions().GetCapBuilderOptions().SetAllowTruncate(
        False
    )
    # Build loops and sidechains
    opts.GetPrepOptions().GetBuildOptions().SetBuildLoops(True)
    opts.GetPrepOptions().GetBuildOptions().SetBuildSidechains(True)
    
    ## Structure metadata object
    metadata = oespruce.OEStructureMetadata()
    
    ## Finally make new DesignUnit
    dus = list(
        oespruce.OEMakeDesignUnits(initial_prot, metadata, opts, site_residue)
    )
    assert dus[0].HasProtein()
    if not protein_only:
        assert dus[0].HasLigand()

    ## Generate docking receptor for each DU
    for du in dus:
        oedocking.OEMakeReceptor(du)

    return dus

In [None]:
hier = oechem.OEHierView(prot)

In [None]:
residues = [residue for residue in hier.GetResidues()]

In [None]:
residues[2].GetOEResidue()

In [None]:
help(oechem.OEResidueToString)

In [None]:
oechem.OEResidueToString(residues[145].GetOEResidue(), ":")

In [None]:
save_openeye_pdb(prot, "test.pdb")

In [None]:
du = prep_du(prot, 'CYS:148: :A:0: ')

# Use Spruce Instead of Make OE Design Unit

In [None]:
def spruce_prot(initial_prot):
    ## Add Hs to prep protein and ligand
    oechem.OEAddExplicitHydrogens(initial_prot)

    ## Set up DU building options
    opts = oespruce.OEMakeDesignUnitOptions()
    opts.SetSuperpose(False)
    ## Options set from John's function ########################################
    ## (https://github.com/FoldingAtHome/covid-moonshot/blob/454098f4255467f4655102e0330ebf9da0d09ccb/synthetic-enumeration/sprint-14-quinolones/00-prep-receptor.py)
    opts.GetPrepOptions().SetStrictProtonationMode(True)
    # set minimal number of ligand atoms to 5, e.g. a 5-membered ring fragment\
    opts.GetSplitOptions().SetMinLigAtoms(5)
    
    # also consider alternate locations outside binding pocket, important for later filtering
    opts.GetPrepOptions().GetEnumerateSitesOptions().SetCollapseNonSiteAlts(
        True
    )
    
    # Both N- and C-termini should be zwitterionic
    # Mpro cleaves its own N- and C-termini
    # See https://www.pnas.org/content/113/46/12997
    opts.GetPrepOptions().GetBuildOptions().SetCapNTermini(False)
    opts.GetPrepOptions().GetBuildOptions().SetCapCTermini(False)
    # Don't allow truncation of termini, since force fields don't have
    #  parameters for this
    opts.GetPrepOptions().GetBuildOptions().GetCapBuilderOptions().SetAllowTruncate(
        False
    )
    # Build loops and sidechains
    opts.GetPrepOptions().GetBuildOptions().SetBuildLoops(True)
    opts.GetPrepOptions().GetBuildOptions().SetBuildSidechains(True)
    
    ## Structure metadata object
    metadata = oespruce.OEStructureMetadata()
    
    ## Construct spruce filter
    spruce_opts = oespruce.OESpruceFilterOptions()
    spruce = oespruce.OESpruceFilter(spruce_opts, opts)
    
    ## Spruce!
    from openeye import oegrid
    grid = oegrid.OESkewGrid()
    
    spruce.StandardizeAndFilter(initial_prot, grid, metadata)

    return initial_prot

In [None]:
spruced = spruce_prot(prot)

In [None]:
save_openeye_pdb(spruced, str(output / f"{cifpath.stem}-openmm-oespruced.pdb"))

## Now try sprucing with loop addition and seqres addition

In [None]:
loop_path = Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/spruce_bace.loop_db")

In [None]:
import yaml
seqres_path = Path("../../../../metadata/mpro_mers_seqres.yaml")
with open(seqres_path) as f:
    seqres_dict = yaml.safe_load(f)

In [None]:
seqres = seqres_dict["SEQRES"]

In [None]:
from asapdiscovery.data.utils import seqres_to_res_list

In [None]:
res_list = seqres_to_res_list(seqres)

In [None]:
from asapdiscovery.docking.modeling import mutate_residues
prot = mutate_residues(prot, res_list, place_h=True)

In [None]:
seqres = " ".join(res_list)

In [None]:
seqres

In [None]:
def spruce_prot(initial_prot, seqres=None, loop_db=None):
    ## Add Hs to prep protein and ligand
    oechem.OEAddExplicitHydrogens(initial_prot)

    ## Set up DU building options
    opts = oespruce.OEMakeDesignUnitOptions()
    opts.SetSuperpose(False)
    ## Options set from John's function ########################################
    ## (https://github.com/FoldingAtHome/covid-moonshot/blob/454098f4255467f4655102e0330ebf9da0d09ccb/synthetic-enumeration/sprint-14-quinolones/00-prep-receptor.py)
    opts.GetPrepOptions().SetStrictProtonationMode(True)
    # set minimal number of ligand atoms to 5, e.g. a 5-membered ring fragment\
    opts.GetSplitOptions().SetMinLigAtoms(5)
    
    # also consider alternate locations outside binding pocket, important for later filtering
    opts.GetPrepOptions().GetEnumerateSitesOptions().SetCollapseNonSiteAlts(
        True
    )
    
    # alignment options, only matches are important
    opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetSeqAlignMethod(
        oechem.OESeqAlignmentMethod_Identity
    )
    opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetSeqAlignGapPenalty(
        -1
    )
    opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetSeqAlignExtendPenalty(
        0
    )
    
    # Both N- and C-termini should be zwitterionic
    # Mpro cleaves its own N- and C-termini
    # See https://www.pnas.org/content/113/46/12997
    opts.GetPrepOptions().GetBuildOptions().SetCapNTermini(False)
    opts.GetPrepOptions().GetBuildOptions().SetCapCTermini(False)
    # Don't allow truncation of termini, since force fields don't have
    #  parameters for this
    opts.GetPrepOptions().GetBuildOptions().GetCapBuilderOptions().SetAllowTruncate(
        False
    )
    # Set Build Loop and Sidechain Opts
    sc_opts = oespruce.OESidechainBuilderOptions()

    
    loop_opts = oespruce.OELoopBuilderOptions()
    loop_opts.SetSeqAlignMethod(oechem.OESeqAlignmentMethod_Identity)
    loop_opts.SetSeqAlignGapPenalty(-1)
    loop_opts.SetSeqAlignExtendPenalty(0)
    loop_opts.SetLoopDBFilename(loop_db)
    loop_opts.SetBuildTails(True)
    
    ## Allow for adding residues at the beginning/end if they're missing
    opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetBuildTails(
        True
    )
    
    if loop_db is not None:
        print("Adding loop")
        opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetLoopDBFilename(
            loop_db
        )
    
    ## Structure metadata object
    metadata = oespruce.OEStructureMetadata()
    
    ## Add SEQRES metadata
    if seqres:
        print("adding seqres")
        all_prot_chains = {
            res.GetExtChainID()
            for res in oechem.OEGetResidues(initial_prot)
            if (res.GetName() != "LIG") and (res.GetName() != "HOH")
        }
        for chain in all_prot_chains:
            seq_metadata = oespruce.OESequenceMetadata()
            seq_metadata.SetChainID(chain)
            seq_metadata.SetSequence(seqres)
            metadata.AddSequenceMetadata(seq_metadata)
    
    ## Construct spruce filter
    spruce_opts = oespruce.OESpruceFilterOptions()
    spruce = oespruce.OESpruceFilter(spruce_opts, opts)
    
    ## Spruce!
    from openeye import oegrid
    grid = oegrid.OESkewGrid()
    
    oespruce.OEBuildLoops(initial_prot, metadata, sc_opts, loop_opts)
    oespruce.OEBuildSidechains(initial_prot, sc_opts)
    oechem.OEPlaceHydrogens(initial_prot)
    spruce.StandardizeAndFilter(initial_prot, grid, metadata)
    
    ## Re-percieve residues so that atom number and connect records dont get screwed up
    openeye_perceive_residues(initial_prot)
    
    dus = list(oespruce.OEMakeDesignUnits(initial_prot, metadata, opts,'HIS:41: :A:0: '))
    

    return initial_prot, dus

In [None]:
spruced_v2, dus = spruce_prot(initial_prot=prot, seqres=seqres, loop_db=str(loop_path))
save_openeye_pdb(spruced_v2, str(output / f"{cifpath.stem}-openmm-oespruced-loops-and-seqres.pdb"))

In [None]:
du = dus[0]

In [None]:
oechem.OEWriteDesignUnit(str(output / f"{cifpath.stem}-openmm-oespruced-loops-and-seqres-du.oedu"), du)

In [None]:
du

In [None]:
du = oechem.OEDesignUnit()

In [None]:
oechem.OEReadDesignUnit(str(output / f"{cifpath.stem}-openmm-oespruced-loops-and-seqres-du.oedu"), du)

In [None]:
lig, prot, complex_ = split_openeye_design_unit(du)

In [None]:
prot = oechem.OEGraphMol()
du.GetProtein(prot)

In [None]:
save_openeye_pdb(prot, str(output / f"{cifpath.stem}-openmm-oespruced-loops-and-seqres-du.pdb"))

# Test OpenMM