# convert plan to nextflow

In [1]:
import os
from pathlib import Path
from metasmith.solver import WorkflowSolver
from metasmith.models.libraries import DataInstanceLibrary, DataTypeLibrary, DataInstance, TransformInstanceLibrary

from local.constants import WORKSPACE_ROOT

In [2]:
lib = DataTypeLibrary.Load(WORKSPACE_ROOT/"main/local_mock/prototypes/metagenomics.yml")
trlib = TransformInstanceLibrary.Load([
    Path("./transforms/simple_1"),
    # Path("./transforms/dupe_test"),
])
ilib_path = Path("./cache/test.yml")
ilib = DataInstanceLibrary.Load(ilib_path)

In [3]:
solver = WorkflowSolver(trlib)
plan = solver.Solve(
    [
        ilib["contigs"],
        ilib["diamond_reference.uniprot_sprot"],
        ilib["container.diamond"],
        ilib["container.pprodigal"],
    ],
    [
        lib["orf_annotations"].WithAncestors([ilib["contigs"].type]),
    ],
    seed=42,
)
plan is not None

True

In [4]:
for step in plan.steps:
    print(step.key, [x.source.name for x, e in step.uses])

PZ4TH ['example.fna', 'pprodigal.oci.uri']
JBCpR ['diamond.oci.uri', 'uniprot_sprot.dmnd', 'orfs.faa']


In [5]:
WS = Path("./cache/ws1/run_1")
BOOTSTRAP_BASH = WS/"bootstrap.sh"
# os.system(f"rm -r {WS}")
WS.mkdir(exist_ok=True, parents=True)

In [6]:
from metasmith.agents.bootstrap import Container

CONTAINER = Container(
    image = "quay.io/hallamlab/metasmith:0.2.dev-e185f76",
    binds = [
        (WORKSPACE_ROOT/"main/relay_agent/dist", "/app"),
        (WORKSPACE_ROOT/"src/metasmith", "/opt/conda/envs/metasmith_env/lib/python3.12/site-packages/metasmith"),
    ]
)

_deployment = Path("./.msm")
_relay_path = _deployment/"relay"
cmd_deploy = CONTAINER.RunCommand("metasmith api deploy_from_container")
cmd_start_relay = f"nohup {_relay_path}/server --io {_relay_path}/connections start >{_deployment}/logs/relay.log 2>&1 &"
cmd_start_task = CONTAINER.RunCommand('metasmith api execute_transform --body "{\\"context\\": \\"$1\\"}"')
cmd_stop_relay = f"{_relay_path}/server --io {_relay_path}/connections stop"
with open(BOOTSTRAP_BASH, "w") as f:
    f.write("#!/bin/bash\n")
    f.write(cmd_deploy + "\n")
    f.write(cmd_start_relay + "\n")
    f.write(cmd_start_task + "\n")
    f.write(cmd_stop_relay + "\n")

print(f"./{BOOTSTRAP_BASH}")

./cache/ws1/run_1/bootstrap.sh


In [7]:
import yaml
from metasmith.models.libraries import ExecutionContext

TAB = " "*4
wf_path = WS/"workflow.nf"
context_dir = WS/"contexts"
context_dir.mkdir(parents=True, exist_ok=True)
contexts = []
process_definitions = {}
workflow_definition = []
target_endpoints = {e for x, e in plan.targets}
for step in plan.steps:
    name = f"{step.transform.source.stem}__{step.transform_key}"
    if name not in process_definitions:
        src = [f"process {name}"+" {"]
        to_pubish = [x for x, e in step.produces if e in target_endpoints]
        for x in to_pubish:
            src.append(TAB+f'publishDir "$params.output", mode: "copy", pattern: "{x.source}"')
        if len(to_pubish)>0:
            src.append("") # newline

        src += [
            TAB+"input:",
            TAB+TAB+f'path bootstrap',
            TAB+TAB+f'path context',
        ] + [
            TAB+TAB+f'path _{x.type.name}' for x, e in step.uses
        ] + [
            "",
            TAB+"output:",
        ] + [
            TAB+TAB+f'path "{x.source}"' for x, e in step.produces
        ] + [
            "",
            TAB+'script:',
            TAB+'"""',
        ] + [
            TAB+f'echo "{x.type.name},'+'${_'+x.type.name+'}" >>inputs.manifest' for x, e in step.uses
        ] + [
            TAB+f'bash $bootstrap $context',
            TAB+'"""',
            "}"
        ]
        process_definitions[name] = "\n".join(src)

    type_libraries = [x.type.library.source for x, e in step.uses]
    type_libraries = {p.stem: p for p in type_libraries}
    type_libraries = list(type_libraries.values())
    context = ExecutionContext(
        inputs = {e.key:x for x, e in step.uses},
        outputs = {e.key:x for x, e in step.produces},
        transform_definition = step.transform.source,
        type_libraries = type_libraries,
    )
    context_path = context_dir/f"{step.key}.yml"
    with open(context_path, "w") as f:
        yaml.dump(context.Pack(), f)
    contexts.append((step.key, context_path))

    output_vars = [f"_{e.key}" for x, e in step.produces]
    output_vars = ', '.join(output_vars)
    if len(step.produces) > 1:
        output_vars = f"({output_vars})"
    input_vars = ['bootstrap', f'context_{step.key}']+[f"_{e.key}" for x, e in step.uses]
    input_vars = ', '.join(input_vars)
    workflow_definition.append(TAB+f'{output_vars} = {name}({input_vars})')

workflow_definition = [
    "workflow {",
    TAB+f'bootstrap = Channel.fromPath("{BOOTSTRAP_BASH.resolve()}")',
] + [
    TAB+f'context_{k} = Channel.fromPath("{p.resolve()}")' for k, p in contexts
] + [
    "",
] + [
    TAB+f'_{e.key}'+f' = Channel.fromPath("{x.source.resolve()}") // {x.type.name}' for x, e in plan.given
] + [
    "",
] + workflow_definition + [
    "}",
]

wf_contents = [
    "\n\n".join(process_definitions.values()),
    "\n\n",
    "\n".join(workflow_definition),
    "\n",
]
wf_contents = ''.join(wf_contents)
with open(wf_path, "w") as f:
    f.write(wf_contents)

print(f"./{wf_path}")

./cache/ws1/run_1/workflow.nf


In [8]:
import os

os.system(f"""\
PATH={WORKSPACE_ROOT/"main/local_mock/mock"}:$PATH
cd {WS.resolve()}
nextflow -C ../../../config/nxf_slurm.nf \
    -log {(WS/"logs").resolve()}/log \
    run {wf_path.resolve()} \
    -resume \
    -work-dir {(WS/"work").resolve()} \
    --account dummy_slurm_account
""")

[33mNextflow 24.10.4 is available - Please consider updating your version to it[m



 N E X T F L O W   ~  version 24.10.2

WARN: It appears you have never run this project before -- Option `-resume` is ignored
Launching `/home/tony/workspace/tools/Metasmith/main/local_mock/cache/ws1/run_1/workflow.nf` [irreverent_visvesvaraya] DSL2 - revision: 031f5e3c50

Plus 2 more processes waiting for tasks…

executor >  slurm (1)
[6f/8c0fce] pprod | 0 of 1
Plus 1 more processes waiting for tasks…
