In [1]:
from pathlib import Path
from metasmith.agents.presets import Agent
from metasmith.models.libraries import *
from metasmith.models.remote import *

from local.constants import WORKSPACE_ROOT

In [2]:
agent = Agent(
    setup_commands=[],
    cleanup_commands=[],
    home=Source.FromLocal(WORKSPACE_ROOT/"main/local_mock/cache/local_home"),
)

agent.Deploy()

2025-03-09_22-12-41  | /home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home
2025-03-09_22-12-41  | /home/tony
2025-03-09_22-12-41  | >>> mkdir -p /home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home
2025-03-09_22-12-41  | >>> mkdir -p /home/tony/.globus
2025-03-09_22-12-41  | >>> mkdir -p /home/tony/.globusonline
2025-03-09_22-12-42  | >>> [ -e /home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home/metasmith.sif ] || apptainer pull /home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home/metasmith.sif docker://quay.io/hallamlab/metasmith:latest
2025-03-09_22-12-42  | >>> deploying file [/home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home/msm_stub]
2025-03-09_22-12-42  | >>> deploying file [/home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home/msm]
2025-03-09_22-12-42  | >>> cd /home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home && ./msm api deploy_from_container
2025-03-09_

In [3]:
CACHE = WORKSPACE_ROOT/"main/local_mock/cache/xgdb_tests"
types = DataTypeLibrary.Load(WORKSPACE_ROOT/"main/local_mock/prototypes/metagenomics.dev3.yml")
for name, t in types:
    print(t)

<{data:DNA sequence,format:FASTA}:4M4PqXwA>
<{data:software container,format:OCI,provides:diamond}:iGL288Xm>
<{data:software container,format:OCI,provides:pprodigal}:90LdbjQO>
<{data:Protein features,format:CSV}:kImyYZjD>
<{data:Amino acid sequence,format:FASTA}:oF3YSVYQ>
<{data:database reference,format:.dmnd}:VpPqsgy1>


In [4]:
trlib = TransformInstanceLibrary.Load("./transforms/simple_1")
for path, name, tr in trlib.IterateTransforms():
    print(name, tr)

transforms::transform TransformInstance(protocol=<function protocol at 0x7fb4bf1f65c0>, model={{"data":"software container"}-{"format":"OCI"}-{"provides":"diamond"}},{{"data":"Amino acid sequence"}-{"format":"FASTA"}},{{"data":"database reference"}-{"format":".dmnd"}}->{{"data":"Protein features"}-{"format":"CSV"}}, output_signature={(D:{"data":"Protein features"}-{"format":"CSV"}): PosixPath('annotations.csv')}, name='diamond')
transforms::transform TransformInstance(protocol=<function protocol at 0x7fb4b4dc6d40>, model={{"data":"software container"}-{"format":"OCI"}-{"provides":"pprodigal"}},{{"data":"DNA sequence"}-{"format":"FASTA"}}->{{"data":"Amino acid sequence"}-{"format":"FASTA"}}, output_signature={(D:{"data":"Amino acid sequence"}-{"format":"FASTA"}): PosixPath('orfs.faa')}, name='pprodigal')


In [5]:
xgdb_path = CACHE/"test.xgdb"
refdb_path = CACHE/"ref.xgdb"

xgdb = DataInstanceLibrary.Load(xgdb_path)
for path, name, data in xgdb.Iterate():
    print(name, data)
print()
refdb = DataInstanceLibrary.Load(refdb_path)
for path, name, data in refdb.Iterate():
    print(name, data)

metagenomics::contigs <{data:DNA sequence,format:FASTA}:4M4PqXwA>

metagenomics::oci_image_diamond <{data:software container,format:OCI,provides:diamond}:iGL288Xm>
metagenomics::oci_image_pprodigal <{data:software container,format:OCI,provides:pprodigal}:90LdbjQO>
metagenomics::protein_reference_diamond <{data:database reference,format:.dmnd}:VpPqsgy1>


In [6]:
from metasmith.models.workflow import WorkflowPlan, WorkflowTask

plan = WorkflowPlan.Generate(
    given=[xgdb, refdb],
    transforms=[trlib],
    targets=[
        types["orf_annotations"].WithLineage([
            types["contigs"],
            # xgdb["example.fna"].type,
        ]),
    ],
)

for step in plan.steps:
    step_path = Path(step.transform.name)
    model = step.transform.model
    print(f"{step_path.stem}")
    for x in model.requires:
        print(f"    {x}")
    print("    v")
    for x in model.produces:
        print(f"    {x}")
    # print([f"{x.source}" for x in step.uses], [f"{x.source}" for x in step.produces], sep="->")
    # LinkifyPath(step.transform._source.address)

metagenomics::orfs_faa
metagenomics::orf_annotations
pprodigal
    (D:{"data":"software container"}-{"format":"OCI"}-{"provides":"pprodigal"})
    (D:{"data":"DNA sequence"}-{"format":"FASTA"})
    v
    (D:{"data":"Amino acid sequence"}-{"format":"FASTA"})
diamond
    (D:{"data":"software container"}-{"format":"OCI"}-{"provides":"diamond"})
    (D:{"data":"Amino acid sequence"}-{"format":"FASTA"})
    (D:{"data":"database reference"}-{"format":".dmnd"})
    v
    (D:{"data":"Protein features"}-{"format":"CSV"})


In [7]:
with open(WORKSPACE_ROOT/"secrets/slurm_account") as f:
    slurm_account = f.read().strip()

local_task_path = WORKSPACE_ROOT/"main/local_mock/cache/xgdb_tests/task"

# send task & plan
task = WorkflowTask(
    plan=plan,
    agent=agent,
    data_libraries=[xgdb, refdb],
    transform_libraries=[trlib],
    config=dict(
        nextflow=dict(
            preset="default",
            # slurm_account=slurm_account,
        ),
    ),
)
# task.SaveAs(Source.FromLocal(local_task_path))

In [8]:
local_task = WorkflowTask.Load(local_task_path)

In [9]:
task.plan._key

'0zGGH'

In [10]:
remote_cache = agent.home.GetPath()/f"tmp/{plan._key}"
# res = task.SaveAs(agent.home.WithPath(remote_cache))
# res.completed

In [11]:
local_temp = Path("./cache/mock_temp/")
with LiveShell() as sh_local, LiveShell() as sh_remote:
    local_temp = Path(local_temp)
    sh_local.RegisterOnOut(lambda x: Log.Info(f"{x}"))
    sh_local.RegisterOnErr(lambda x: Log.Error(f"{x}"))
    sh_remote.RegisterOnOut(lambda x: print(f"R| {x}\x1b[0;m"))
    sh_remote.RegisterOnErr(lambda x: print(f"E| {x}"))
    # remote_cache = Path(f"/tmp/metasmith.{plan._key}")

    # connect and prepare remote
    agent.RunSetup(sh_remote)
    Log.Info(f"making destination cache at [{remote_cache}]")
    # sh_remote.Exec(f"[ -e {remote_cache} ] && rm -rf {remote_cache}")
    sh_remote.Exec(f"mkdir -p {remote_cache} && cd {agent.home.GetPath()}")
    Log.Info(f"starting relay service")
    sh_remote.Exec(f"./relay/msm_relay start")
    
    Log.Info(f"calling stage")
    sh_remote.Exec(f'./msm api stage_workflow -a task_dir={remote_cache} -a force=True') # this is ok because /tmp is mounted
    Log.Info(f"calling execute")
    sh_remote.Exec(f'./msm api execute_workflow -a key={plan._key}')

    # sh_remote.Exec(f"rm -rf {remote_cache}")
    # sh_remote.Exec(f"{agent.home}/relay/msm_relay stop")
    agent.RunCleanup(sh_remote)

2025-03-09_22-12-50  | making destination cache at [/home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home/tmp/0zGGH]
2025-03-09_22-12-50  | starting relay service
R| 2025-03-09_22-12-51  | relay server started with pid: [87325][0;m
2025-03-09_22-12-52  | calling stage
R| including dev binds[0;m
R| 2025-03-09_22-12-53  | api call to [stage_workflow] with [{'task_dir': '/home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home/tmp/0zGGH', 'force': 'True'}][0;m
R| 2025-03-09_22-12-53  | staging workflow [0zGGH] with [4] given data instances[0;m
R| 2025-03-09_22-12-53  | already staged at [/ws/runs/0zGGH][0;m
R| 2025-03-09_22-12-53  | removing previous[0;m
R| 2025-03-09_22-12-53  | ex| /home/tony/workspace/tools/Metasmith/main/local_mock/cache/local_home[0;m
R| 2025-03-09_22-12-53  | work [/ws/runs/0zGGH][0;m
R| 2025-03-09_22-12-53  | data [/ws/data][0;m
R| 2025-03-09_22-12-53  | external work [/home/tony/workspace/tools/Metasmith/main/local_mock/cache/