# rework datainstances and DataInstanceLibs
* DILs should be linked to an actual local directory
* Transform libraries can use DILs
* DILs know how to move self to remote Sources

In [1]:
from pathlib import Path
from metasmith.agents.presets import Agent
from metasmith.models.libraries import *

from local.constants import WORKSPACE_ROOT
from local.utils import LinkifyPath

In [2]:
CACHE = WORKSPACE_ROOT/"main/local_mock/cache/xgdb_tests"
CACHE.mkdir(parents=True, exist_ok=True)

In [3]:
types = DataTypeLibrary.Load(WORKSPACE_ROOT/"main/local_mock/prototypes/metagenomics.dev3.yml")
len(types)

6

In [4]:
xgdb_path = CACHE/"test.xgdb"
refdb_path = CACHE/"ref.xgdb"
_xgdb = DataInstanceLibrary(xgdb_path)
refdb = DataInstanceLibrary(refdb_path)
_xgdb.AddTypeLibrary("metagenomics", types)
added = _xgdb.Add(
    [
        (Path(WORKSPACE_ROOT/"scratch/test_ws/data/local/example.fna"), "contigs.fna", "metagenomics::contigs"),
    ],
)
print(added)
refdb.AddTypeLibrary("metagenomics", types)
added = refdb.Add(
    [
        (WORKSPACE_ROOT/"scratch/test_ws/data/local/uniprot_sprot.dmnd", "reference.uniprot_sprot.dmnd", "metagenomics::protein_reference_diamond"),
        (WORKSPACE_ROOT/"scratch/test_ws/data/local/diamond.oci.uri", "container.diamond.oci.uri", "metagenomics::oci_image_diamond"),
        (WORKSPACE_ROOT/"scratch/test_ws/data/local/pprodigal.oci.uri", "container.pprodigal.oci.uri", "metagenomics::oci_image_pprodigal"),
    ],
)
print(added)

_xgdb.Save()
refdb.Save()
LinkifyPath((refdb_path/refdb._path_to_meta)/(refdb._index_name+refdb._metadata_ext))
# LinkifyPath(refdb_path/refdb._path_to_types)
xgdb_local = DataInstanceLibrary.Load(xgdb_path)

[PosixPath('contigs.fna')]
[PosixPath('reference.uniprot_sprot.dmnd'), PosixPath('container.diamond.oci.uri'), PosixPath('container.pprodigal.oci.uri')]
./../../main/local_mock/cache/xgdb_tests/ref.xgdb/_metadata/index.yml


In [5]:
xgdb_local.manifest

{PosixPath('contigs.fna'): 'metagenomics::contigs'}

In [6]:
refdb.manifest

{PosixPath('reference.uniprot_sprot.dmnd'): 'metagenomics::protein_reference_diamond',
 PosixPath('container.diamond.oci.uri'): 'metagenomics::oci_image_diamond',
 PosixPath('container.pprodigal.oci.uri'): 'metagenomics::oci_image_pprodigal'}

In [7]:
# REMOTE = GlobusSource.Parse("https://app.globus.org/file-manager?origin_id=2602486c-1e0f-47a0-be15-eec1b0ff0f96&origin_path=%2FMetasmith%2Fglobus_test5%2F")
# REMOTE.path = REMOTE.path.parent/"globus_test6"
# REMOTE = REMOTE.AsSource()
# REMOTE

Source(address='globus://2602486c-1e0f-47a0-be15-eec1b0ff0f96:/Metasmith/globus_test6', type=SourceType.GLOBUS)

In [8]:
# res = xgdb_local.SaveAs(REMOTE, label="msm.dev8")

[(Source(address='/home/tony/workspace/tools/Metasmith/main/local_mock/cache/xgdb_tests/test.xgdb', type=SourceType.DIRECT), Source(address='globus://2602486c-1e0f-47a0-be15-eec1b0ff0f96:/Metasmith/globus_test6', type=SourceType.GLOBUS))]
>[(Source(address='/home/tony/workspace/tools/Metasmith/main/local_mock/cache/xgdb_tests/test.xgdb', type=SourceType.DIRECT), Source(address='globus://2602486c-1e0f-47a0-be15-eec1b0ff0f96:/Metasmith/globus_test6', type=SourceType.GLOBUS))]


In [9]:
# res

LogiscsResult(completed=[(Source(address='/home/tony/workspace/tools/Metasmith/main/local_mock/cache/xgdb_tests/test.xgdb', type=SourceType.DIRECT), Source(address='globus://2602486c-1e0f-47a0-be15-eec1b0ff0f96:/Metasmith/globus_test6', type=SourceType.GLOBUS))], errors=[])

In [9]:
# remotedb_path = CACHE/"remote.xgdb"

# xgdb_remote = DataInstanceLibrary.DownloadFrom(REMOTE, remotedb_path)
# xgdb_remote

In [10]:
tr_path = CACHE/"transforms"
tr_lib = TransformInstanceLibrary(tr_path)
_path = tr_path/"test.py"
if _path.exists(): _path.unlink()
added = tr_lib.AddStub("test", exist_ok=True)
LinkifyPath(added)

./../../main/local_mock/cache/xgdb_tests/transforms/test.py


In [11]:
tr_lib = TransformInstanceLibrary.Load(tr_path)
tr_lib.lib.types

{'transforms': DataTypeLibrary(types={'example_input': <[example_input,metasmith]:Z0ETnMgP>, 'example_output': <[example_output,metasmith]:dBmYCKSX>, 'transform': <[metasmith,transform]:yi7vyC4a>})}

In [12]:
inst = tr_lib.GetTransform("test")
inst

TransformInstance(protocol=<function protocol at 0x7f3c643ca840>, model={["example_input"]-["metasmith"]}->{["example_output"]-["metasmith"]}, output_signature={(D:["example_output"]-["metasmith"]): PosixPath('output.txt')})

In [13]:
types.types["contigs"]

<{data:DNA sequence,format:FASTA}:4M4PqXwA>

In [14]:
tr_lib.GetType("transforms::example_input")

<[example_input,metasmith]:Z0ETnMgP>

In [15]:
_work_dir = WORKSPACE_ROOT/"main/local_mock/cache/ws1/run_dev8"
_work_dir.mkdir(parents=True, exist_ok=True)
HERE = os.getcwd()
os.chdir(_work_dir)
with LiveShell() as shell:
    shell.RegisterOnOut(lambda x: print(f" |{x}"))
    shell.RegisterOnErr(lambda x: print(f"E |{x}"))
    try:
        for p in inst.output_signature.values():
            p = _work_dir/p
            if p.exists(): p.unlink()
        res = inst.protocol(ExecutionContext(
            inputs={
                tr_lib.GetType("transforms::example_input"): xgdb_local.location/"contigs.fna",
            },
            work_dir=_work_dir,
            shell=shell,
        ))
    finally:
        os.chdir(HERE)
    print(res)
for p in inst.output_signature.values():
    p = _work_dir/p
    print(p, p.exists())

ExecutionResult(success=True)
/home/tony/workspace/tools/Metasmith/main/local_mock/cache/ws1/run_dev8/output.txt True


In [16]:
from metasmith.models.remote import SshSource

remote_source = SshSource(
    host="cosmos",
    path="~/downloads/metasmith_transformlib_test"
).AsSource()

tr_lib.lib.SaveAs(
    dest = remote_source,
)

In [17]:
tr_remote = TransformInstanceLibrary.LoadFrom(
    src = remote_source,
    dest = WORKSPACE_ROOT/"main/local_mock/cache/xgdb_tests/transforms.remote",
)
tr_remote.lib.types

{'transforms': DataTypeLibrary(types={'example_input': <[example_input,metasmith]:Z0ETnMgP>, 'example_output': <[example_output,metasmith]:dBmYCKSX>, 'transform': <[metasmith,transform]:yi7vyC4a>})}

In [20]:
inst_remote = tr_remote.GetTransform("test")
inst_remote.output_signature

{(D:["example_output"]-["metasmith"]): PosixPath('output.txt')}

In [21]:
_work_dir = WORKSPACE_ROOT/"main/local_mock/cache/ws1/run_dev8.remote"
_work_dir.mkdir(parents=True, exist_ok=True)
HERE = os.getcwd()
os.chdir(_work_dir)
with LiveShell() as shell:
    shell.RegisterOnOut(lambda x: print(f" |{x}"))
    shell.RegisterOnErr(lambda x: print(f"E |{x}"))
    try:
        for p in inst_remote.output_signature.values():
            p = _work_dir/p
            if p.exists(): p.unlink()
        res = inst_remote.protocol(ExecutionContext(
            inputs={
                tr_lib.GetType("transforms::example_input"): xgdb_local.location/"contigs.fna",
            },
            work_dir=_work_dir,
            shell=shell,
        ))
    finally:
        os.chdir(HERE)
    print(res)
for p in inst_remote.output_signature.values():
    p = _work_dir/p
    print(p, p.exists())

ExecutionResult(success=True)
/home/tony/workspace/tools/Metasmith/main/local_mock/cache/ws1/run_dev8.remote/output.txt True
