In [None]:
from pathlib import Path
from metasmith.python_api import Agent, Source, DataInstanceLibrary, TransformInstanceLibrary, DataTypeLibrary
from metasmith.python_api import Resources, Size, Duration
from metasmith.python_api import ContainerRuntime

base_dir = Path("./cache")

agent_home = Source.FromLocal((base_dir/"local_home").resolve())
smith = Agent(
    home = agent_home,
    runtime=ContainerRuntime.DOCKER
)
# smith.Deploy()

In [None]:
import ipynbname
notebook_name = ipynbname.name()
in_dir = base_dir/f"{notebook_name}/accessions.xgdb"
inputs = DataInstanceLibrary(in_dir)
inputs.AddTypeLibrary("ncbi", DataTypeLibrary.Load("../data_types/ncbi.yml"))

fname = f"acc_lst.csv"
with open(in_dir/fname, "w") as f:
    for acc in [
        "GCF_052692645.1", # EPI30
        "GCF_000019425.1", # DH10b
        "GCF_000005845.2", # K12 MG1655
    ]:
        f.write(acc+"\n")
inputs.AddItem(fname, "ncbi::accessionList")
inputs.Save()

In [None]:
containers = DataInstanceLibrary.Load("../resources/containers")
len(containers.manifest)

In [None]:
transforms = [
    TransformInstanceLibrary.Load(f"../transforms/{n}")
    for n in ["dataDownload", "pangenome"]
]
sum(len(t.manifest) for t in transforms)

In [None]:
# dtypes = DataTypeLibrary.Load("../data_types/sequences.yml")
dtypes = DataTypeLibrary.Load("../data_types/pangenome.yml")
len(dtypes.types)

In [None]:
task = smith.GenerateWorkflow(
    samples=[inputs],
    resources=[containers],
    transforms=transforms,
    # targets=[dtypes["genome-like"]]
    targets=[dtypes["ppanggolinRaw"]]
)
task.plans[0][0].RenderDAG(base_dir/f"{notebook_name}/dag")

In [None]:
smith.StageWorkflow(task, on_exist="clear")
# smith.StageWorkflow(task, on_exist="update_workflow")

In [None]:
smith.RunWorkflow(
    task,
    config_file=smith.GetNxfConfigPresets()["local"],
    resource_overrides={
        "all": Resources(
            memory=Size.GB(2),
        )
    }
)