In [3]:
from asapdiscovery.modeling.protein_prep import PreppedComplex, ProteinPrepperBase
from asapdiscovery.data.schema.complex import Complex, PreppedComplex
from asapdiscovery.data.readers.meta_structure_factory import MetaStructureFactory
from pathlib import Path

In [4]:
structure_factory = MetaStructureFactory(
        structure_dir= None,
        fragalysis_dir= None,
        pdb_file= Path('/lila/home/pengs/fold_zika/test_dock/input/NP_722463.1_lig_0.pdb'),
        use_dask= 'false',
        dask_failure_mode='skip',
        dask_client=None,
    )
inputs = structure_factory.load()
print(inputs)

[Complex(target=Target(target_name='NP_722463.1_lig_0', ids=None, data_format=<DataStorageType.pdb: 'pdb'>), ligand=Ligand(compound_name='NP_722463.1_lig_0_ligand', ids=None, provenance=LigandProvenance(isomeric_smiles='c1ccc2c(c1)c(ccn2)N3CC[NH2+]CC3', inchi='InChI=1S/C13H15N3/c1-2-4-12-11(3-1)13(5-6-15-12)16-9-7-14-8-10-16/h1-6,14H,7-10H2/p+1', inchi_key='CGSWRHKBLLDUHO-UHFFFAOYSA-O', fixed_inchi='InChI=1/C13H15N3/c1-2-4-12-11(3-1)13(5-6-15-12)16-9-7-14-8-10-16/h1-6,14H,7-10H2/p+1/fC13H16N3/h14H/q+1', fixed_inchikey='CGSWRHKBLLDUHO-FOEMDWODNA-O'), experimental_data=None, expansion_tag=None, tags={}, data_format=<DataStorageType.sdf: 'sdf'>), ligand_chain=' ')]


In [5]:
def gather_new_tasks(
    complex_to_prep: list[Complex], cached_complexs: list[PreppedComplex]
) -> tuple[list[Complex], list[PreppedComplex]]:
    """
    For a set of complexs we want to prep gather a list of tasks to do removing complexs that have already
    been prepped and are in the cache.
    Parameters
    ----------
    complex_to_prep: The list of complexs we want to perform prep on.
    cached_complexs: The list of PreppedComplexs found in the cache which can be reused.

    Returns
    -------
        A tuple of two lists, the first contains the complexs which should be prepped and the second contains
        the PreppedComplex from the cache which should be reused.
    """
    cached_by_hash = {comp.hash: comp for comp in cached_complexs}
    print('cached_by_hash')
    print(cached_by_hash)
    # gather outputs which are in the cache
    cached_outputs = []
    for inp in complex_to_prep:
        print('inp.hash')
        print(inp.hash)
        if inp.hash in cached_by_hash:
            cached_outputs.append(cached_by_hash[inp.hash])
            
    if cached_outputs:
        to_prep = [inp for inp in complex_to_prep if inp.hash not in cached_by_hash]
    else:
        to_prep = complex_to_prep

    return to_prep, cached_outputs

In [7]:
cache_dir = '/home/pengs/fold_zika/test_dock/temp_dir/NP_722463.1_lig_0-1a2bb0eb26cbb8705ba5b6ed9060f89ca9828fe333abdc83d3b7867ed1b3d458+CGSWRHKBLLDUHO-BXBSGAQWNA-P'

if not (cache_dir := Path(cache_dir)).exists():
    raise ValueError(f"Cache directory {cache_dir} does not exist.")

prepped_complexes = []
for complex_file in cache_dir.rglob("*.json"):
    print(complex_file)
    prepped_complexes.append(PreppedComplex.from_json_file(complex_file))

cached_complexs = prepped_complexes

# workout what we can reuse
if cached_complexs:
    #print(f"Loaded {len(cached_complexs)} cached structures from: {cache_dir}.")
    # reduce the number of tasks using any possible cached structures
    inputs, cached_outputs = gather_new_tasks(
        complex_to_prep=inputs, cached_complexs=cached_complexs
    )
    if inputs:
        print(f"Disregarding {len(inputs)} structures which could not be found in the cache.")
        inputs = None

    if cached_outputs:
        print(f"Matched {len(cached_outputs)} cached structures which will be reused.")
        all_outputs.extend(cached_outputs)

/home/pengs/fold_zika/test_dock/temp_dir/NP_722463.1_lig_0-1a2bb0eb26cbb8705ba5b6ed9060f89ca9828fe333abdc83d3b7867ed1b3d458+CGSWRHKBLLDUHO-BXBSGAQWNA-P/NP_722463.1_lig_0.json
cached_by_hash
{'1a2bb0eb26cbb8705ba5b6ed9060f89ca9828fe333abdc83d3b7867ed1b3d458+CGSWRHKBLLDUHO-BXBSGAQWNA-P': PreppedComplex(target=PreppedTarget(target_name='NP_722463.1_lig_0', ids=None, data_format=<DataStorageType.b64oedu: 'b64oedu'>, target_hash='1a2bb0eb26cbb8705ba5b6ed9060f89ca9828fe333abdc83d3b7867ed1b3d458'), ligand=Ligand(compound_name='NP_722463.1_lig_0_ligand', ids=None, provenance=LigandProvenance(isomeric_smiles='c1ccc2c(c1)c(ccn2)N3CC[NH2+]CC3', inchi='InChI=1S/C13H15N3/c1-2-4-12-11(3-1)13(5-6-15-12)16-9-7-14-8-10-16/h1-6,14H,7-10H2/p+1', inchi_key='CGSWRHKBLLDUHO-UHFFFAOYSA-O', fixed_inchi='InChI=1/C13H15N3/c1-2-4-12-11(3-1)13(5-6-15-12)16-9-7-14-8-10-16/h1-6,14H,7-10H2/p+1/fC13H16N3/h14H/q+1', fixed_inchikey='CGSWRHKBLLDUHO-FOEMDWODNA-O'), experimental_data=None, expansion_tag=None, tags={}, dat

In [9]:
print(inputs)
print(cached_outputs)

None
[]
