# Imports

In [None]:
from tqdm import tqdm
import pandas as pd, numpy as np
import plotly.express as px
from plotly.graph_objs import Figure
from pathlib import Path
from importlib import reload
import software.analysis as a
from asapdiscovery.data.schema_v2.ligand import Ligand
reload(a)

# Load Paths

In [None]:
import sys
sys.path.append(str(Path("../../../").resolve()))
from software.paths import paths

In [None]:
datadir = Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/20231030_cross_docked_hybrid_p_only_2d_50poses/")

In [None]:
csvs = [pd.read_csv(csv) for csv in tqdm(datadir.glob("*.csv"))]

In [None]:
df = pd.concat(csvs)

In [None]:
df

In [None]:
results_count = df.groupby(["ligand_id", "du_structure"]).count()['docked_file']

# Get unique structures

In [None]:
structs = df.du_structure.unique()

In [None]:
len(structs)

In [None]:
sdf_dir = Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/full_frag_prepped_mpro_20230603/sdf_lsf_array_p_only_by_name/")

In [None]:
ligs = [Ligand.from_sdf(sdf) for sdf in sdf_dir.glob("*.sdf")]

In [None]:
lig_dict = {lig.tags["Dataset"]: lig for lig in ligs}

## load docked mols

In [None]:
docked_sdfs = [path for path in Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/20231030_cross_docked_hybrid_p_only_2d_50poses/").glob("*/*.sdf")]

# Test loading MCMols

In [None]:
from asapdiscovery.data.openeye import oechem

In [None]:
import copy
from asapdiscovery.data.openeye import (
    _set_SD_data_repr,
    clear_SD_data,
    get_SD_data,
    load_openeye_sdf,
    oechem,
    oemol_to_inchi,
    oemol_to_inchikey,
    oemol_to_sdf_string,
    oemol_to_smiles,
    oequacpac,
    sdf_string_to_oemol,
    smiles_to_oemol,
)
from asapdiscovery.data.schema_v2.identifiers import LigandIdentifiers, LigandProvenance
from asapdiscovery.data.schema_v2.schema_base import DataStorageType
from asapdiscovery.data.state_expanders.expansion_tag import StateExpansionTag
from pydantic import Field, root_validator, validator
def from_oemol(mol: oechem.OEMol, **kwargs) -> "Ligand":
    """
    Create a Ligand from an OEMol extracting all SD tags into the internal model
    """
    # work with a copy as we change the state of the molecule
    input_mol = copy.deepcopy(mol)
    kwargs.pop("data", None)
    sd_tags = get_SD_data(input_mol)
    for key, value in sd_tags.items():
        try:
            # check to see if we have JSON of a model field
            kwargs[key] = json.loads(value)
        except json.JSONDecodeError:
            kwargs[key] = value

    # extract all info as a tag if it has no field on the model
    tags = {
        (key, value)
        for key, value in kwargs.items()
        if key not in cls.__fields__.keys()
    }
    kwargs["tags"] = tags
    # clean the sdf data for the internal model
    sdf_str = oemol_to_sdf_string(clear_SD_data(input_mol))
    # create a smiles which does not have nitrogen stereo
    smiles = oemol_to_smiles(input_mol)
    # create the internal LigandProvenance model
    if "provenance" not in kwargs:
        provenance = LigandProvenance(
            isomeric_smiles=smiles,
            inchi=oemol_to_inchi(input_mol),
            inchi_key=oemol_to_inchikey(input_mol),
            fixed_inchi=oemol_to_inchi(input_mol, fixed_hydrogens=True),
            fixed_inchikey=oemol_to_inchikey(input_mol, fixed_hydrogens=True),
        )
        kwargs["provenance"] = provenance
    # check for an openeye title which could be used as a compound name
    if mol.GetTitle() != "" and kwargs.get("compound_name") is None:
        kwargs["compound_name"] = mol.GetTitle()

    return Ligand(data=sdf_str, **kwargs)

In [None]:
ifs = oechem.oemolistream(str(docked_sdfs[0]))
ifs.SetConfTest(oechem.OEOmegaConfTest())
for mol in ifs.GetOEMols():
    print(mol.GetTitle(), "has", mol.NumConfs(), "conformers")
    # lig = from_oemol(mol)
    mol = copy.deepcopy(mol)
    print("hi")

In [None]:
get_SD_data(mol)

In [None]:
pairs = {dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(mol)}

In [None]:
confs = [conf for conf in mol.GetConfIter()]

In [None]:
get_SD_data(confs[0])

In [None]:
get_SD_data(mol.GetActive())

In [None]:
mol

In [None]:
mol.NumConfs()

In [None]:
from asapdiscovery.data import openeye as oe

In [None]:
reload(oe)

In [None]:
multiconf = oe.load_openeye_multiconf_sdf(docked_sdfs[0])

In [None]:
multiconf.NumConfs()

In [None]:
def load_openeye_multiconf_sdf(sdf_fn):
    if not Path(sdf_fn).exists():
        raise FileNotFoundError(f"{sdf_fn} does not exist!")

    ifs = oechem.oemolistream()
    ifs.SetFlavor(
        oechem.OEFormat_SDF,
        oechem.OEIFlavor_SDF_Default,
    )
    ifs.SetConfTest(oechem.OEOmegaConfTest())
    if ifs.open(str(sdf_fn)):
        for mol in ifs.GetOEMols():
            return mol
        ifs.close()
    else:
        oechem.OEThrow.Fatal(f"Unable to open {sdf_fn}")

In [None]:
multiconf = load_openeye_multiconf_sdf(docked_sdfs[0])

In [None]:
for conf in multiconf.GetConfIter():
    print(get_SD_data(conf))