enable lineage constraint

In [1]:
from __future__ import annotations
from dataclasses import dataclass, field

from metasmith.models.solver import Endpoint, Namespace, _set_default_namespace
from metasmith.models.libraries import *

from local.utils import LinkifyPath
from local.constants import WORKSPACE_ROOT
CACHE = WORKSPACE_ROOT/"main/local_mock/cache"

_set_default_namespace(Namespace(seed=42))

In [2]:
_types = DataTypeLibrary(
    source = WORKSPACE_ROOT/"main/local_mock/prototypes/metagenomics.dev3.yml",
)
_types["contigs"] = Endpoint.FromDict(dict(
    format="FASTA",
    data="DNA sequence"
))
_types["orfs_faa"] = Endpoint.FromDict(dict(
    format="FASTA",
    data="Amino acid sequence"
))
_types["protein_reference_diamond"] = Endpoint.FromDict(dict(
    format=".dmnd",
    data="database reference"
))
_types["oci_image_diamond"] = Endpoint.FromDict(dict(
    format="OCI",
    data="software container",
    provides=["diamond"]
))
_types["oci_image_pprodigal"] = Endpoint.FromDict(dict(
    format="OCI", 
    data="software container",
    provides=["pprodigal"]
))
_types["orf_annotations"] = Endpoint.FromDict(dict(
    format="CSV",
    data="Protein features"
))
_types.Save()
LinkifyPath(_types.source)
types = DataTypeLibrary.Load(_types.source)
len(types)

./../../main/local_mock/prototypes/metagenomics.dev3.yml


6

In [3]:
types.types

{'contigs': <{"data":"DNA sequence"},{"format":"FASTA"}:5leRQ>,
 'orfs_faa': <{"data":"Amino acid sequence"},{"format":"FASTA"}:r5hC5>,
 'protein_reference_diamond': <{"data":"database reference"},{"format":".dmnd"}:Wyjli>,
 'oci_image_diamond': <{"data":"software container"},{"format":"OCI"},{"provides":["diamond"]}:mV7qR>,
 'oci_image_pprodigal': <{"data":"software container"},{"format":"OCI"},{"provides":["pprodigal"]}:VMBvm>,
 'orf_annotations': <{"data":"Protein features"},{"format":"CSV"}:dOpXR>}

In [4]:
xgdb_path = CACHE/"test.xgdb"
_xgdb = DataInstanceLibrary(xgdb_path)
refdb = DataInstanceLibrary(CACHE/"ref.xgdb")
_xgdb.ImportDataInstance(WORKSPACE_ROOT/"scratch/test_ws/data/local/example.fna", types["contigs"], overwrite=True)
refdb.ImportDataInstance(WORKSPACE_ROOT/"scratch/test_ws/data/local/uniprot_sprot.dmnd", types["protein_reference_diamond"], overwrite=True)
refdb.ImportDataInstance(WORKSPACE_ROOT/"scratch/test_ws/data/local/diamond.oci.uri", types["oci_image_diamond"], overwrite=True)
refdb.ImportDataInstance(WORKSPACE_ROOT/"scratch/test_ws/data/local/pprodigal.oci.uri", types["oci_image_pprodigal"], overwrite=True)
_xgdb.Save()
refdb.Save()
LinkifyPath(_xgdb._index_path())
xgdb = DataInstanceLibrary.Load(_xgdb.source)

./../../main/local_mock/cache/test.xgdb/info.yml


In [5]:
trlib = TransformInstanceLibrary.Load([
    Path("./transforms/simple_1"),
    # Path("./transforms/dupe_test"),
])
for k, v in trlib:
    print(k.name, type(v))

diamond.py <class 'metasmith.models.libraries.TransformInstance'>
pprodigal.py <class 'metasmith.models.libraries.TransformInstance'>


In [8]:
from metasmith.models.workflow import WorkflowPlan

plan = WorkflowPlan.Generate(
    given=[xgdb, refdb],
    transforms=trlib,
    targets=[
        types["orf_annotations"].WithLineage([
            types["contigs"],
            # xgdb["example.fna"].type,
        ]),
    ],
)

for step in plan.steps:
    print([f"{x.source}" for x in step.uses], [f"{x.source}" for x in step.produces], sep="->")

['pprodigal.oci.uri', 'example.fna']->['orfs.faa']
['diamond.oci.uri', 'orfs.faa', 'uniprot_sprot.dmnd']->['annotations.csv']
