# load repos

In [None]:
import argparse
import os
import sys
import pandas as pd

repo_path = os.path.dirname(os.path.dirname(os.getcwd()))
sys.path.append(repo_path)

exp_data_fn = "COVID_Moonshot_Takeda_panCorona_enzyme_measurements.csv"
sars2_structures_fn = "/Users/alexpayne/lilac-mount-point/fragalysis/metadata.csv"
sars2_cmpds_fn = "/Users/alexpayne/lilac-mount-point/fragalysis/extra_files/Mpro_compound_tracker_csv.csv"

from asapdiscovery.docking.docking import build_docking_systems,\
    parse_xtal, run_docking

In [None]:
os.environ['OE_LICENSE'] = "/Users/alexpayne/Scientific_Packages/oe_license_the_laboratory_of_john_chodera_memorial_sloankettering_cancer_center_20200828-16-27-18.txt"

# Load Datasets

In [None]:
sars2_structures = pd.read_csv(sars2_structures_fn)
sars2_structures.head()

In [None]:
exp_data = pd.read_csv(exp_data_fn).fillna("")
exp_data

# Filter sars2 cmps by mers data

In [None]:
sars2_filtered = sars2_structures[sars2_structures.alternate_name.isin(exp_data['External ID'])]

In [None]:
sars2_filtered

In [None]:
len(sars2_filtered)

In [None]:
exp_data["External ID"]

In [None]:
sars2_filtered.alternate_name

# Check what is going on with these ligands

## get smiles strings

In [None]:
exp_smiles = exp_data['SMILES']

In [None]:
exp_smiles

In [None]:
type(exp_smiles[0])

In [None]:
from kinoml.core.ligands import Ligand

In [None]:
from openeye import oechem, oedepict

In [None]:
# mol = oechem.OEGraphMol()
# if oechem.OESmilesToMol(mol, exp_smiles[0]):
#     oedepict.OEPrepareDepiction(mol)
#     oedepict.OE2DMolDisplay(mol)

In [None]:
# from rdkit import Chem
# mols = [Chem.MolFromSmiles(exp_cmp) for exp_cmp in exp_smiles]
# img = Chem.Draw.MolsToGridImage(mols, legends=list(exp_data["Molecule Name"]))
# img

In [None]:
list(exp_data["CDD Number"])

# Try different SARS File

In [None]:
sars2_structures = pd.read_csv(sars2_cmpds_fn)
sars2_structures.head()

In [None]:
sars2_filtered = sars2_structures[sars2_structures['Compound ID'].isin(exp_data['External ID'])]

In [None]:
sars2_filtered['Compound ID']

In [None]:
len(sars2_filtered)

In [None]:
len(exp_smiles)

## Use this csv to get structures

In [None]:
from rdkit import Chem
mols = [Chem.MolFromSmiles(exp_cmp) for exp_cmp in exp_smiles]
img = Chem.Draw.MolsToGridImage(mols, legends=list(exp_data["External ID"]))
img

In [None]:
sars2_filtered

In [None]:
common_ids = sars2_filtered["Compound ID"].tolist()

In [None]:
help(pd.Series.to_string)

In [None]:
mols = []
for common_id in common_ids:
    smiles = exp_data[exp_data["External ID"] == common_id]["SMILES"].values[0]
    mols.append(Chem.MolFromSmiles(smiles))
img = Chem.Draw.MolsToGridImage(mols, legends=list(exp_data["External ID"]))
img

In [None]:
sars2_strucs = sars2_filtered["Dataset"]

In [None]:
sars2_strucs

In [None]:
sum(sars2_strucs.isna())

In [None]:
sars2_strucs.isna()

In [None]:
mols_wo_sars2_xtal = sars2_filtered[sars2_strucs.isna()][["Compound ID", "SMILES"]]

In [None]:
mols_wo_sars2_xtal

In [None]:
mols_w_sars2_xtal = sars2_filtered[~sars2_strucs.isna()][["Compound ID", "SMILES", "Dataset"]]

In [None]:
mols_w_sars2_xtal

In [None]:
help(parse_xtal)

## try a version of parse_xtal

In [None]:
from asapdiscovery.data.schema import CrystalCompoundData

In [None]:
## Find all P-files
df = mols_w_sars2_xtal
x_dir = "/Users/alexpayne/lilac-mount-point/fragalysis/aligned/"
# idx = [(type(d) is str) and ('-P' in d) for d in df['Dataset']]
idx = [(type(d) is str) for d in df['Dataset']]

## Build argument dicts for the CrystalCompoundData objects
xtal_dicts = [dict(zip(('smiles', 'dataset'), r[1].values)) \
    for r in df.loc[idx,['SMILES', 'Dataset']].iterrows()]

## Add structure filename information
for d in xtal_dicts:
    fn_base = (f'{x_dir}/{d["dataset"]}_0{{}}/{d["dataset"]}_0{{}}_'
        'seqres.pdb')
    fn = fn_base.format('A', 'A')
    if os.path.isfile(fn):
        d['str_fn'] = fn
    else:
        fn = fn_base.format('B', 'B')
        assert os.path.isfile(fn), f'No structure found for {d["dataset"]}.'
        d['str_fn'] = fn

## Build CrystalCompoundData objects for each row
xtal_compounds = [CrystalCompoundData(**d) for d in xtal_dicts]

In [None]:
len(xtal_compounds)

In [None]:
xtal_compounds

## but this isn't really what I want, i need to load the mers structures for docking

In [None]:
from kinoml.features.complexes import OEDockingFeaturizer
from kinoml.core.proteins import Protein
from kinoml.core.ligands import Ligand
from kinoml.core.systems import ProteinLigandComplex
from asapdiscovery.data import pdb

def parse_pdb_structs(pdb_list, pdb_dir):
    proteins = []
    
    pdb_fn_list = [os.path.join(pdb_dir, f"{pdb}_aligned_to_frag_ref.pdb") for pdb in pdb_list]
    
    for pdb in pdb_fn_list:
        protein = Protein.from_file(pdb, name='MPRO')
        protein.chain_id = "A"
        proteins.append(protein)
    return proteins

In [None]:
pdb_list = pdb.load_pdbs_from_yaml("mers-structures.yaml")
proteins = parse_pdb_structs(pdb_list, "/Users/alexpayne/lilac-mount-point/mers-structures/")

In [None]:
proteins[0].pdb_id

In [None]:
help(Ligand.from_file)

In [None]:
def get_ligand_coords_from_sdf(df, db_dir, chain="A"):
    
    fns = [os.path.join(db_dir, f"{dataset}_0{chain}/{dataset}_0{chain}.sdf") for dataset in df["Dataset"].to_list()]
    print(fns)
#     df["sdf"] = df["Datset"]
    
    ligands = [Ligand.from_file(fn, allow_undefined_stereo=True) for fn in fns]
    
    return ligands
    

In [None]:
get_ligand_coords_from_sdf(mols_w_sars2_xtal, 
                           db_dir = "/Users/alexpayne/lilac-mount-point/fragalysis/aligned/",
                          )

In [None]:
from openff.toolkit.topology import Molecule
help(Molecule)

In [None]:
Molecule.from_file('/Users/alexpayne/lilac-mount-point/fragalysis/aligned/Mpro-P1661_0A/Mpro-P1661_0A.sdf',
                  allow_undefined_stereo=True)

In [None]:
def build_systems(proteins, smiles_df):
    
    smiles = smiles_df
    
    for protein in proteins:
        for ligand in ligands:
            

# Try build_design_units

In [None]:
from openeye import oechem, oespruce

In [None]:
# -target
# "/Users/alexpayne/lilac-mount-point/mers-structures/rcsb_5WKK.pdb"
# -ref
# "/Users/alexpayne/lilac-mount-point/fragalysis/extra_files/reference.pdb"
# -loop
# "/Users/alexpayne/lilac-mount-point/rcsb_spruce.loop_db"
# -o
# "/Users/alexpayne/lilac-mount-point/mers-structures/spruce_test"

In [None]:
## Load target molecule
ifs = oechem.oemolistream()
ifs.SetFlavor(
    oechem.OEFormat_PDB,
    oechem.OEIFlavor_PDB_Default | oechem.OEIFlavor_PDB_DATA,
)
ifs.open("/home/paynea/mers-structures/rcsb_8DGY.pdb")
in_mol = oechem.OEGraphMol()
oechem.OEReadMolecule(ifs, in_mol)
ifs.close()

In [None]:
## Load reference molecule
ifs.open("/home/paynea/fragalysis/extra_files/reference.pdb")
ref_mol = oechem.OEGraphMol()
oechem.OEReadMolecule(ifs, ref_mol)
ifs.close()

##Extract and align protein
bio_opts = oespruce.OEBioUnitExtractionOptions()
bio_opts.SetSuperpose(True)

biounits = oespruce.OEExtractBioUnits(in_mol, ref_mol, bio_opts)
in_mol = list(biounits)[0]

In [None]:
in_mol

In [None]:
## Set up options for building DesignUnits
opts = oespruce.OEMakeDesignUnitOptions()
# opts.SetBioUnitExtractionOptions(bio_opts)
opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetBuildTails(
    False
)
opts.GetPrepOptions().GetBuildOptions().GetLoopBuilderOptions().SetLoopDBFilename(
    "/home/paynea/rcsb_spruce.loop_db"
)
opts.GetPrepOptions().GetEnumerateSitesOptions().SetRestrictToRefSite(True)

In [None]:
## Build DesignUnits
design_units = oespruce.OEMakeDesignUnits(
    in_mol, oespruce.OEStructureMetadata(), opts
)
design_units = list(design_units)

In [None]:
design_units

In [None]:
out_base_du = (
        f"/home/paynea/mers-structures/spruce_test/"
        f"rcsb_8DGY_{{}}.oedu"
    )
out_base = (
    f"/home/paynea/mers-structures/spruce_test/"
    f"rcsb_8DGY_du_protein_{{}}.pdb"
)
ofs = oechem.oemolostream()
ofs.SetFlavor(
    oechem.OEFormat_PDB,
    oechem.OEOFlavor_PDB_Default,
)
prot_mol = oechem.OEGraphMol()

design_units = list(design_units)
for i, du in enumerate(design_units):
    print(i)
    ## Save the DesignUnit object
    oechem.OEWriteDesignUnit(out_base_du.format(i), du)

    ## Save the protein as a PDB file
    prot_mol.Clear()
    du.GetProtein(prot_mol)

    ofs.open(out_base.format(i))
    oechem.OEWriteMolecule(ofs, prot_mol)
    ofs.close()

In [None]:
du = design_units[0]

In [None]:
prot_mol = oechem.OEGraphMol()
res = du.GetProtein(prot_mol)
res

In [None]:
lig = oechem.OEGraphMol()
du.GetLigand(lig)

In [None]:
atom = prot_mol.GetAtom(oechem.OEIsHetero())

In [None]:
atom.GetName()

In [None]:
atom = prot_mol.GetAtom(oechem.OEHasAtomIdx(9191))

In [None]:
atom.GetName()

In [None]:
prot_mol.DeleteAtom(atom)

In [None]:
oespruce.OEBuildSidechains(prot_mol)

In [None]:
options = oechem.OEPlaceHydrogensOptions()
options.SetBypassPredicate(oechem.OENotAtom(oespruce.OEIsModeledAtom()))
oechem.OEPlaceHydrogens(prot_mol, options)

In [None]:
atom = prot_mol.GetAtom(oechem.OEHasAtomIdx(9191))

In [None]:
hier_view = oechem.OEHierView(prot_mol)

In [None]:
res = hier_view.GetResidue("A", "CYS", 148)

In [None]:
res

In [None]:
for atom in res.GetAtoms():
    print(atom.GetIdx(), atom.GetName())

In [None]:
ofs.open("test_w_hydrogens_v2.pdb")
oechem.OEWriteMolecule(ofs, prot_mol)
ofs.close()

In [None]:
hier_view = oechem.OEHierView(du.GetReceptor())
res = hier_view.GetResidue("B", "CYS", 148)
for atom in res.GetAtoms():
    print(atom.GetName())

In [None]:
receptor = du.GetProtein()

In [None]:
hier_view = oechem.OEHierView(in_mol)
res = hier_view.GetResidue("A", "CYS", 148)
for atom in res.GetAtoms():
    print(atom.GetName())

In [None]:
in_mol

In [None]:
for residue in hier_view.GetResidues():
    print(residue.GetResidueNumber(), residue.GetResidueName())

In [None]:
oespruce.OEMutateResidue(in_mol, res.GetOEResidue(), "ALA")

In [None]:
hier_view = oechem.OEHierView(in_mol)
res = hier_view.GetResidue("A", "ALA", 148)
for atom in res.GetAtoms():
    print(atom.GetName())

In [None]:
oespruce.OEMutateResidue(in_mol, res.GetOEResidue(), "CYS")