In [None]:
import os

from openeye import oechem, oespruce

In [None]:
pdb_fn = "input_pdbs/pdb8dgy.ent"
ref_fn = "input_pdbs/reference.pdb"
loop_db = "rcsb_spruce.loop_db"

out_dir = "design_units/"

In [None]:
## Load molecule to extract
ifs = oechem.oemolistream()
ifs.SetFlavor(
    oechem.OEFormat_PDB,
    oechem.OEIFlavor_PDB_Default | oechem.OEIFlavor_PDB_DATA,
)
ifs.open(pdb_fn)
in_mol = oechem.OEGraphMol()
oechem.OEReadMolecule(ifs, in_mol)
ifs.close()

## Load reference molecule
ifs.open(ref_fn)
ref_mol = oechem.OEGraphMol()
oechem.OEReadMolecule(ifs, ref_mol)
ifs.close()

In [None]:
## First convert ref from mol into design unit
# opts = oespruce.OEMakeDesignUnitOptions()
# ref_du = oespruce.OEMakeDesignUnits(
#     ref_mol, oespruce.OEStructureMetadata(), opts
# )
# ref_du = next(iter(ref_du))

# ref_du = oechem.OEDesignUnit()
# oespruce.OEMakeDesignUnit(ref_du, ref_mol, oechem.OEGraphMol())

In [None]:
## Make BioUnitExtractionOptions object and set superpose
##  (aligning to reference)
bio_opts = oespruce.OEBioUnitExtractionOptions()
bio_opts.SetSuperpose(True)

biounits = oespruce.OEExtractBioUnits(in_mol, ref_mol, bio_opts)
aligned_prot = list(biounits)[0]

In [None]:
## Make DesignUnitOptions
opts = oespruce.OEMakeDesignUnitOptions()
# opts.SetBioUnitExtractionOptions(bio_opts)
opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetBuildTails(
    False
)
opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetLoopDBFilename(
    loop_db
)
opts.GetPrepOptions().GetEnumerateSitesOptions().SetRestrictToRefSite(True)

## Make Metadata object
metadata = oespruce.OEStructureMetadata()

In [None]:
design_units = oespruce.OEMakeDesignUnits(aligned_prot, metadata, opts)
design_units = list(design_units)

In [None]:
out_base = (
    f"{out_dir}/{os.path.splitext(os.path.basename(pdb_fn))[0]}_{{}}.oedu"
)

design_units = list(design_units)
# for i, du in enumerate(design_units):
#     print(i)
#     oechem.OEWriteDesignUnit(out_base.format(i), du)

In [None]:
print(design_units)
lig_mol = oechem.OEGraphMol()
prot_mol = oechem.OEGraphMol()
design_units[0].GetLigand(lig_mol)
design_units[0].GetProtein(prot_mol)
print(design_units[0].GetSiteResidues())
du_site_res = sorted(
    [int(r.split(":")[1]) for r in design_units[0].GetSiteResidues()]
)

In [None]:
for a in lig_mol.GetAtoms():
    print(oechem.OEGetResiduea)

In [None]:
out_base = f"{out_dir}/{os.path.splitext(os.path.basename(pdb_fn))[0]}_du_protein_{{}}.pdb"
ofs = oechem.oemolostream()
ofs.SetFlavor(
    oechem.OEFormat_PDB,
    oechem.OEOFlavor_PDB_Default,
)
prot_mol = oechem.OEGraphMol()
for i, du in enumerate(design_units):
    print(out_base.format(i))

    prot_mol.Clear()
    du.GetProtein(prot_mol)

    ofs.open(out_base.format(i))
    oechem.OEWriteMolecule(ofs, prot_mol)
    ofs.close()

In [None]:
import re

import yaml

active_site_fn = "mers_active_site.yaml"
active_site_dict = yaml.load(open(active_site_fn), Loader=yaml.CLoader)

## First parse all resids from the atom selection texts
pat_match = r"^resi ((?:[0-9]+\+)*[0-9]+)$"
matches = [re.match(pat_match, sel) for sel in active_site_dict.values()]
matches = [m.groups()[0] for m in matches if m is not None]
all_active_res = {int(resid) for m in matches for resid in m.split("+")}
print(sorted(all_active_res))

## Format in the site residue format for OE
res_sites = []
hv = oechem.OEHierView(aligned_prot)
for chain in hv.GetChains():
    chain_id = chain.GetChainID()
    for frag in chain.GetFragments():
        for hres in frag.GetResidues():
            if hres.GetResidueNumber() not in all_active_res:
                continue
            site_spec = f"{hres.GetResidueName()}:{hres.GetResidueNumber()}: :{chain_id}"
            res_sites.append(site_spec)

print(res_sites)

In [None]:
du = oechem.OEDesignUnit(aligned_prot, res_sites)
prot_mol = oechem.OEGraphMol()
lig_mol = oechem.OEGraphMol()
print(du.GetProtein(prot_mol))
print(du.GetLigand(lig_mol))

print(prot_mol)
lig_res = []
for r in oechem.OEGetResidues(prot_mol):
    if r.GetName() == "S9U":
        lig_res.append(r)
print(lig_res)
# oespruce.OEMakeDesignUnit(du, aligned_prot, oechem.OEGraphMol())

# oespruce.OEMakeDesignUnits(aligned_prot, )
# dir(aligned_prot)

# For already generated DesignUnits

In [None]:
du_dir = "design_units/"
frag_dir = "fragalysis/"

In [None]:
du = oechem.OEDesignUnit()
oechem.OEReadDesignUnit(f"{du_dir}/pdb8dgy_0.oedu", du)

In [None]:
lig_mol = oechem.OEGraphMol()
prot_mol = oechem.OEGraphMol()
print(du.GetLigand(lig_mol))
print(du.GetProtein(prot_mol))

print(lig_mol.GetCoords())

In [None]:
## Load a random molecule from SDF file
sdf_fn = f"{frag_dir}/aligned/Mpro-P2007_0A/Mpro-P2007_0A.sdf"
ifs = oechem.oemolistream()
ifs.SetFlavor(
    oechem.OEFormat_SDF,
    oechem.OEIFlavor_SDF_Default,
)
ifs.open(sdf_fn)
new_lig = oechem.OEGraphMol()
oechem.OEReadMolecule(ifs, new_lig)
ifs.close()

print(new_lig.GetCoords())

In [None]:
print(oespruce.OEMakeDesignUnit(du, prot_mol, new_lig))
print(du.HasLigand(), du.HasProtein())

In [None]:
## Try and save full design unit as PDB?
out_fn = "test/pdb8dgy_0_P2007_0A_lig.pdb"
ofs = oechem.oemolostream()
ofs.SetFlavor(
    oechem.OEFormat_PDB,
    oechem.OEOFlavor_PDB_Default,
)
ofs.open(out_fn)
oechem.OEWriteDesignUnit(ofs, du)
ofs.close()

In [None]:
## Doesn't work... maybe we can combine the prot and lig molecules?
prot_mol.Clear()
lig_mol.Clear()
print(du)
print(du.GetProtein(prot_mol))
print(du.GetLigand(lig_mol))