enable lineage constraint

In [1]:
from __future__ import annotations
from dataclasses import dataclass, field

from metasmith.models.solver import Endpoint, Namespace, _set_default_namespace
from metasmith.models.libraries import *

from local.utils import LinkifyPath
from local.constants import WORKSPACE_ROOT
CACHE = WORKSPACE_ROOT/"main/local_mock/cache"

_set_default_namespace(Namespace(seed=42))

In [2]:
types = DataTypeLibrary.Load(WORKSPACE_ROOT/"main/local_mock/prototypes/metagenomics.dev3.yml")
xgdb = DataInstanceLibrary.Load(CACHE/"test.xgdb")
refdb = DataInstanceLibrary.Load(CACHE/"ref.xgdb")
len(types), len(xgdb), len(refdb)

(6, 1, 3)

In [3]:
trlib = TransformInstanceLibrary.Load([
    Path("./transforms/simple_1"),
    # Path("./transforms/dupe_test"),
])
for k, v in trlib:
    print(k.name, type(v))

diamond.py <class 'metasmith.models.libraries.TransformInstance'>
pprodigal.py <class 'metasmith.models.libraries.TransformInstance'>


In [4]:
from metasmith.models.workflow import WorkflowPlan

plan = WorkflowPlan.Generate(
    given=[xgdb, refdb],
    transforms=trlib,
    targets=[
        types["orf_annotations"].WithLineage([
            types["contigs"],
            # xgdb["example.fna"].type,
        ]),
    ],
)

for step in plan.steps:
    print([f"{x.source}" for x in step.uses], [f"{x.source}" for x in step.produces], sep="->")
    LinkifyPath(step.transform._source)

['/home/tony/workspace/tools/Metasmith/main/local_mock/cache/ref.xgdb/pprodigal.oci.uri', '/home/tony/workspace/tools/Metasmith/main/local_mock/cache/test.xgdb/example.fna']->['orfs.faa']
./../../main/local_mock/transforms/simple_1/pprodigal.py
['/home/tony/workspace/tools/Metasmith/main/local_mock/cache/ref.xgdb/diamond.oci.uri', 'orfs.faa', '/home/tony/workspace/tools/Metasmith/main/local_mock/cache/ref.xgdb/uniprot_sprot.dmnd']->['annotations.csv']
./../../main/local_mock/transforms/simple_1/diamond.py


In [5]:
from metasmith.models.workflow import Workspace

# base = Workspace(source=)

In [6]:
WS = Path("./cache/ws1/run_dev4")
BOOTSTRAP_BASH = WS/"bootstrap.sh"
os.system(f"rm -r {WS}")
WS.mkdir(exist_ok=True, parents=True)

In [7]:
from metasmith.agents.bootstrap import Container

CONTAINER = Container(
    image = "quay.io/hallamlab/metasmith:0.3",
    binds = [
        (WORKSPACE_ROOT/"main/relay_agent/dist", "/app"),
        (WORKSPACE_ROOT/"src/metasmith", "/opt/conda/envs/metasmith_env/lib/python3.12/site-packages/metasmith"),
    ]
)

_deployment = Path("./.msm")
_relay_path = _deployment/"relay"
cmd_deploy = CONTAINER.RunCommand("metasmith api deploy_from_container")
cmd_start_relay = f"nohup {_relay_path}/server --io {_relay_path}/connections start >{_deployment}/logs/relay.log 2>&1 &"
cmd_start_task = CONTAINER.RunCommand('metasmith api execute_transform --body "{\\"context\\": \\"$1\\"}"')
cmd_stop_relay = f"{_relay_path}/server --io {_relay_path}/connections stop"
with open(BOOTSTRAP_BASH, "w") as f:
    f.write("#!/bin/bash\n")
    f.write(cmd_deploy + "\n")
    f.write(cmd_start_relay + "\n")
    f.write(cmd_start_task + "\n")
    f.write(cmd_stop_relay + "\n")

LinkifyPath(BOOTSTRAP_BASH)

./cache/ws1/run_dev4/bootstrap.sh


In [8]:
import yaml
from metasmith.models.libraries import ExecutionContext

TAB = " "*4
wf_path = WS/"workflow.nf"
context_dir = WS/"contexts"
context_dir.mkdir(parents=True, exist_ok=True)
contexts: dict[str, Path] = {}
process_definitions = {}
workflow_definition = []
target_endpoints = {x for x in plan.targets}
used_libraries = {lib for step in plan.steps for lib in step.transform._used_libraries}
for step in plan.steps:
    name = f"{step.transform._source.stem}__{step.transform_key}"
    if name not in process_definitions:
        src = [f"process {name}"+" {"]
        to_pubish = [x for x in step.produces if x in target_endpoints]
        for x in to_pubish:
            src.append(TAB+f'publishDir "$params.output", mode: "copy", pattern: "{x.source}"')
        if len(to_pubish)>0:
            src.append("") # newline

        src += [
            TAB+"input:",
            TAB+TAB+f'path bootstrap',
            TAB+TAB+f'path context',
        ] + [
            TAB+TAB+f'path _{i+1:02} // {str(x.type).replace(":"+x.type.key, "")}' for i, x in enumerate(step.uses)
        ] + [
            "",
            TAB+"output:",
        ] + [
            TAB+TAB+f'path "{x.source}"' for x in step.produces
        ] + [
            "",
            TAB+'script:',
            TAB+'"""',
        ] + [
            TAB+f'bash $bootstrap $context',
            TAB+'"""',
            "}"
        ]
        process_definitions[name] = "\n".join(src)

    step_key = f"{step.order:03}"
    context_path = context_dir/f"{step_key}.yml"
    context = ExecutionContext(
        input = [x for x in step.uses],
        output = [x for x in step.produces],
        transform_definition = step.transform._source,
        type_libraries = used_libraries,
    )
    with open(context_path, "w") as f:
        yaml.dump(context.Pack(), f)
    contexts[step_key] = context_path

    output_vars = [f"_{x.type.key}" for x in step.produces]
    output_vars = ', '.join(output_vars)
    if len(step.produces) > 1:
        output_vars = f"({output_vars})"
    input_vars = ['bootstrap', f'context_{step_key}']+[f"_{x.type.key}" for x in step.uses]
    input_vars = ', '.join(input_vars)
    workflow_definition.append(TAB+f'{output_vars} = {name}({input_vars})')

workflow_definition = [
    "workflow {",
    TAB+f'bootstrap = Channel.fromPath("{BOOTSTRAP_BASH.resolve()}")',
] + [
    TAB+f'context_{k} = Channel.fromPath("{p.resolve()}")' for k, p in contexts.items()
] + [
    "",
] + [
    TAB+f'_{x.type.key}'+f' = Channel.fromPath("{x.source.resolve()}") // {x.type}' for x in plan.given
] + [
    "",
] + workflow_definition + [
    "}",
]

wf_contents = [
    "\n\n".join(process_definitions.values()),
    "\n\n",
    "\n".join(workflow_definition),
    "\n",
]
wf_contents = ''.join(wf_contents)
with open(wf_path, "w") as f:
    f.write(wf_contents)

LinkifyPath(wf_path)

./cache/ws1/run_dev4/workflow.nf


In [9]:
from metasmith.coms.ipc import LiveShell

with LiveShell() as shell:
    shell.Exec(
        f"""\
        PATH={WORKSPACE_ROOT/"main/local_mock/mock"}:$PATH
        cd {WS.resolve()}
        nextflow -C ../../../config/nxf_slurm.nf \
            -log {(WS/"logs").resolve()}/log \
            run {wf_path.resolve()} \
            -resume \
            -work-dir {(WS/"work").resolve()} \
            --account dummy_slurm_account
        """,
        timeout=None,
        silent=False,
    )

E: [33mNextflow 24.10.4 is available - Please consider updating your version to it[m
I: [1m[38;5;232m[48;5;43m N E X T F L O W [0;2m  ~  [mversion 24.10.2[m
I: [K
I: WARN: It appears you have never run this project before -- Option `-resume` is ignored
I: Launching[35m `/home/tony/workspace/tools/Metasmith/main/local_mock/cache/ws1/run_dev4/workflow.nf` [0;2m[[0;1;36mjovial_swartz[0;2m] DSL2 - [36mrevision: [0;36m519e30a6da[m
I: [K
I: [2m[[0;34m-        [0;2m] [0;2m[mpprod[2m |[m 0 of 1[K
I: [3;2mPlus [1m1[0;3;2m more processes waiting for tasks…[m[K
I: [3A
I: [2mexecutor >  slurm (1)[m[K
I: [2m[[0;34m09/865251[0;2m] [0;2m[mpprod[2m |[m 0 of 1[K
I: [3;2mPlus [1m1[0;3;2m more processes waiting for tasks…[m[K
I: [4A
I: [2mexecutor >  slurm (1)[m[K
I: [2m[[0;34m09/865251[0;2m] [0;2m[mpprod[2m |[m 0 of 1[K
I: [3;2mPlus [1m1[0;3;2m more processes waiting for tasks…[m[K
I: [4A
I: [2mexecutor >  slurm (1)[m[K
I: [2m[[0;34m0