In [78]:
from __future__ import annotations
from dataclasses import dataclass, field
import os, sys
from typing import Any, Iterable, Literal
import hashlib

from limes_x.utils import KeyGenerator

class Namespace:
    def __init__(self) -> None:
        self.node_hashes: dict[str, int] = {}
        self._keygen = KeyGenerator()
        self._keys: set[str] = set()

    def NewKey(self):
        return self._keygen.GenerateUID(blacklist=self._keys)

class Node:
    def __init__(
        self,
        namespace: Namespace,
        properties: set[str],
        parents: set[Node],
    ) -> None:
        self.namespace = namespace
        self.properties = properties
        self.parents = parents
        self.key = namespace.NewKey()

    def __str__(self) -> str:
        return f"<{self.key}:{','.join(self.properties)}>"

    def __repr__(self) -> str:
        return f"{self}"

    def __hash__(self) -> int:
        node_hashes = self.namespace.node_hashes
        if self.key not in node_hashes:
            node_hashes[self.key] = int(hashlib.md5(self.key.encode("latin1")).hexdigest(), 16)
        return node_hashes[self.key]
    
    # x == y if x is a "subset" of y
    # that is, x has at least all features of y
    def __eq__(self, __value: object) -> bool:
        if not isinstance(__value, Node): return False
        for p in __value.properties:
            if p not in self.properties: return False
        for p in __value.parents:
            if all(p != op for op in self.parents): return False
        return True
    
    def MatchesMemberOf(self, collection: Iterable[Node]):
        return any(self == m for m in collection)

class Dependency(Node):
    def __init__(self, namespace: Namespace, properties: set[str], parents: set[Node]) -> None:
        super().__init__(namespace, properties, parents)

class Endpoint(Node):
    def __init__(self, namespace: Namespace, properties: set[str], parents: set[Node]=set()) -> None:
        super().__init__(namespace, properties, parents)

@dataclass
class InputGrouping:
    group: Iterable[Dependency]
    parent_prototype: Node

@dataclass
class Application:
    transform: Transform
    used: Iterable[Endpoint]
    produced: Iterable[Endpoint]

    def Signature(self): return self.CalculateSignature(self.transform, self.used)
    
    @classmethod
    def CalculateSignature(cls, tr:Transform, trial: Iterable[Endpoint]):
        return f"{tr._key}"+"-".join(e.key for e in trial)

class Transform:
    def __init__(self, ns: Namespace) -> None:
        self.requires: set[Dependency] = set()
        self.produces: set[Dependency] = set()
        self._ns = ns
        self._input_groupings: list[InputGrouping] = []
        self._key = ns.NewKey()

    def __str__(self) -> str:
        def _props(d: Dependency):
            return "{"+"-".join(d.properties)+"}"
        return f"<{','.join(_props(r) for r in self.requires)}->{','.join(_props(p) for p in self.produces)}>"

    def __repr__(self): return f"{self}"

    def AddInputGrouping(self, grp: Iterable[Dependency], parent_prototype: Node):
        for d in grp:
            assert d in self.requires, f"{d} not in requirements"
        self._input_groupings.append(InputGrouping(group=grp, parent_prototype=parent_prototype))

    def AddRequirement(self, properties: Iterable[str], parents: set[Dependency]=set()):
        return self._add_dependency(self.requires, properties, parents)

    def AddProduct(self, properties: Iterable[str], parents: set[Dependency]=set()):
        return self._add_dependency(self.produces, properties, parents)

    def _add_dependency(self, destination: set[Dependency], properties: Iterable[str], parents: set[Dependency]=set()):
        _parents: Any = parents
        _dep = Dependency(properties=set(properties), parents=_parents, namespace=self._ns)
        destination.add(_dep)
        return _dep

    def _valid_trail(self, trial: Iterable[tuple[Dependency, Endpoint]]):
        for grouping in self._input_groupings:
            common_parents = None
            group = [e for d, e in trial if d in grouping.group]
            for member in group:
                p_candidates = set()
                for p in member.parents:
                    if p == grouping.parent_prototype:
                        p_candidates.add(p)
                if common_parents is None:
                    common_parents = p_candidates
                else:
                    common_parents = common_parents.intersection(p_candidates)

                if len(common_parents) == 0: return False
        return True

    def Apply(self, have: Iterable[Endpoint], blacklist: Iterable[Application]):
        matches: list[list[Endpoint]] = []
        _reqs = list(self.requires)
        for req in _reqs:
            _m = [m for m in have if m == req]
            if len(_m) == 0: return []
            matches.append(_m)

        trails: list[list[int]] = [] # trials is exponential!
        for candidates in matches:
            if len(trails) == 0:
                trails = [[i] for i, c in enumerate(candidates)]
                continue
            new = []
            for row in trails:
                for i, c in enumerate(candidates):
                    new.append(row+[i])
            trails = new

        blacklist_signatures = [a.Signature() for a in blacklist]
        applications: list[Application] = []
        for trial_indexes in trails:
            trial = [(_reqs[i], matches[i][j]) for i, j in enumerate(trial_indexes)]
            if not self._valid_trail(trial): continue
            sig = Application.CalculateSignature(self, [e for d, e in trial])
            if sig in blacklist_signatures: continue
            _parents = set()
            for req, cand in trial:
                _parents = _parents | cand.parents
                _parents.add(cand)

            produced = [
                Endpoint(
                    namespace=self._ns,
                    properties=out.properties,
                    parents=_parents
                )
            for out in self.produces]
            applications.append(Application(
                transform= self,
                used = [e for d, e in trial],
                produced = produced,
            ))
        return applications
    
NS = Namespace()
def _set(s: str):
    return set(s.split(", "))

asm = Endpoint(NS, _set("asm, annable, taxable"))
bin = Endpoint(NS, _set("bin, annable, taxable"))

sum_asm = Endpoint(NS, _set("sum"), {asm})
sum_bin = Endpoint(NS, _set("sum"), {bin})

anner = Transform(NS)
anner.AddRequirement(_set("annable"))
anner.AddProduct(_set("ann"))

taxer = Transform(NS)
taxer.AddRequirement(_set("taxable"))
taxer.AddProduct(_set("tax"))

sumer = Transform(NS)
d_ann = sumer.AddRequirement(_set("ann"))
d_tax = sumer.AddRequirement(_set("tax"))
sumer.AddInputGrouping(
    [d_ann, d_tax],
    Endpoint(NS, _set("annable, taxable"))
)
sumer.AddProduct(_set("sum"))

def Solve(given: Iterable[Endpoint], targets: Iterable[Endpoint], transforms: Iterable[Transform]):
    todo: list[tuple[set[Endpoint], list[Application]]] = [
        (set(given), [])
    ]

    while len(todo) > 0:
        _have, _path = todo.pop(0)
        if all(any(e == t for e in _have) for t in targets): return _path
        for tr in transforms:
            applications = tr.Apply(_have, _path)
            for appl in applications:
                todo.append((_have | set(appl.produced), _path+[appl]))
    return False

x = Solve([asm, bin], [sum_asm, sum_bin], [anner, taxer, sumer])
x

[Application(transform=<{annable}->{ann}>, used=[<iBLp9efruEYo:taxable,annable,asm>], produced=[<WmMiKdZ21wbQ:ann>]),
 Application(transform=<{annable}->{ann}>, used=[<hCo6o52e7CrJ:taxable,annable,bin>], produced=[<T7DH2XGCR7hV:ann>]),
 Application(transform=<{taxable}->{tax}>, used=[<iBLp9efruEYo:taxable,annable,asm>], produced=[<UN0tLjl6Ze0V:tax>]),
 Application(transform=<{taxable}->{tax}>, used=[<hCo6o52e7CrJ:taxable,annable,bin>], produced=[<qOr6VNOo0WdX:tax>]),
 Application(transform=<{ann},{tax}->{sum}>, used=[<WmMiKdZ21wbQ:ann>, <UN0tLjl6Ze0V:tax>], produced=[<iNNxj69p5A3Q:sum>]),
 Application(transform=<{ann},{tax}->{sum}>, used=[<T7DH2XGCR7hV:ann>, <qOr6VNOo0WdX:tax>], produced=[<4p8riM5j6mGg:sum>])]

In [None]:
# _start = Endpoint(namespace=NS, properties=_settify("primeable"))
# _target = Endpoint(namespace=NS, properties=_settify("starred"), parents={_start})

# primer = Transform(NS)
# primer.AddDependency(
#     "req", _settify("primeable"),
# )
# primer.AddDependency(
#     "prod", _settify("primed"),
# )

# starer = Transform(NS)
# starer.AddDependency(
#     "req", _settify("primed"),
# )
# starer.AddDependency(
#     "prod", _settify("starred"),
# )

# have = {_start}
# targets = {_target}

# def Solve(have: Iterable[Endpoint], targets: Iterable[Endpoint]):
#     todo = [
#         (set(have), [])
#     ]
#     while len(todo)

# xs = primer.Apply([_start])
# ys = starer.Apply(xs)

# for x in ys:
#     print(x == _target, _target == x)
#     print(x.parents, x.properties)
#     print(_target.parents, _target.properties)

In [None]:
  
# NS = Namespace()
# anner = Transform("anner", NS)
# anner.AddDependency(
#     "req", "a_in",
#     "annable".split(", "),
# )
# anner.AddDependency(
#     "prod", "a_out",
#     "ann".split(", "),
# )

# taxer = Transform("taxer", NS)
# taxer.AddDependency(
#     "req", "t_in",
#     "taxable".split(", "),
# )
# taxer.AddDependency(
#     "prod", "t_out",
#     "tax".split(", "),
# )

# sumer = Transform("sumer", NS)
# sumer.AddDependency(
#     "req", "s_in_ann",
#     "ann".split(", "),
# )
# sumer.AddDependency(
#     "req", "s_in_tax",
#     "tax".split(", "),
# )
# sumer.AddDependency(
#     "prod", "s_out",
#     "sum".split(", "),
# )

# in_asm = Endpoint.New(NS, "in_asm", "asm, annable, taxable", have=True)
# in_bin = Endpoint.New(NS, "in_bin", "bin, annable, taxable", have=True)

# for tr in [anner, taxer, sumer]:
#     pass

In [None]:
# from __future__ import annotations
# from dataclasses import dataclass, field
# import os, sys
# from typing import Any, Iterable, Literal
# import networkx as nx
# import hashlib

# class Namespace:
#     def __init__(self) -> None:
#         self.node_hashes: dict[str, int] = {}
#         self.properties: dict[str, Property] = {}

#     def GetProperty(self, key: str):
#         if key not in self.properties:
#             new = Property(self, key)
#             new.back_links = set()
#             self.properties[key] = new
#         return self.properties[key]

# @dataclass
# class Node:
#     namespace: Namespace
#     key: str

#     def __hash__(self) -> int:
#         node_hashes = self.namespace.node_hashes
#         if self.key not in node_hashes:
#             node_hashes[self.key] = int(hashlib.md5(self.key.encode("latin1")).hexdigest(), 16)
#         return node_hashes[self.key]
    
#     def __eq__(self, __value: object) -> bool:
#         if not isinstance(__value, type(self)): return False
#         return self.key == __value.key

# class Linkable:
#     back_links: set[HasLinks]

# @dataclass
# class Haveable:
#     have: bool

# @dataclass
# class HasLinks:
#     links: set[Linkable]

#     def Enforce_backlinks(self):
#         for o in self.links:
#             o.back_links.add(self)

#     def Link(self, o: Linkable):
#         self.links.add(o)
#         o.back_links.add(self)

#     def Clear(self):
#         for o in self.links:
#             o.back_links.remove(self)
#         self.links.clear()

#     def Matches(self, other: HasLinks):
#         return all(l in other.links for l in self.links)

# @dataclass
# class Property(Node, Linkable):
#     def __hash__(self) -> int: return Node.__hash__(self)
#     def __eq__(self, __value: object) -> bool: return Node.__eq__(self, __value)
    
# @dataclass
# class Template(Node, HasLinks):
#     pass

# @dataclass
# class Dependency(Node, HasLinks, Haveable):
#     template: Template
#     def __hash__(self) -> int: return Node.__hash__(self)
#     def __eq__(self, __value: object) -> bool: return Node.__eq__(self, __value)

#     def Reset(self):
#         self.Clear()
#         self.links = self.template.links.copy()
#         self.Enforce_backlinks()

# @dataclass
# class Endpoint(Node, HasLinks, Linkable, Haveable):
#     def __hash__(self) -> int: return Node.__hash__(self)
#     def __eq__(self, __value: object) -> bool: return Node.__eq__(self, __value)

#     @classmethod
#     def New(cls, ns: Namespace, key: str, properties: Iterable[str], parents: set[Linkable]=set(), have=False):
#         _links: set[Linkable] = {ns.GetProperty(p) for p in properties}
#         _links = _links.union(parents)
#         return Endpoint(
#             key = key, links = _links,
#             have = have, namespace=ns,
#         )

# class Transform:
#     def __init__(self, name: str, namespace: Namespace) -> None:
#         self.requires: set[Dependency] = set()
#         self.produces: set[Dependency] = set()
#         self.raw: bool = True
#         self.name = name
#         self._ns = namespace

#     def __repr__(self) -> str:
#         return f"Tr:{self.name}"

#     def AddDependency(self, role: Literal["req"]|Literal["prod"], key: str, properties: Iterable[str], parents: set[Linkable]=set()):
#         _links: set[Linkable] = {self._ns.GetProperty(p) for p in properties}
#         _links = _links.union(parents)
#         _template = Template(key=f"T-{key}", links =_links, namespace=self._ns)
#         _dep = Dependency(key=key, links=_links, template=_template, have=False, namespace=self._ns)
#         _dep.Enforce_backlinks() # should be in init, but @_dep is dataclass!

#         if role == "req":
#             assert _dep not in self.produces
#             self.requires.add(_dep)
#         else:
#             assert _dep not in self.requires
#             self.produces.add(_dep)

#     def Reset(self):
#         self.raw = True
#         for d in self.requires | self.produces:
#             d.Reset()
        
# NS = Namespace()
# anner = Transform("anner", NS)
# anner.AddDependency(
#     "req", "a_in",
#     "annable".split(", "),
# )
# anner.AddDependency(
#     "prod", "a_out",
#     "ann".split(", "),
# )

# taxer = Transform("taxer", NS)
# taxer.AddDependency(
#     "req", "t_in",
#     "taxable".split(", "),
# )
# taxer.AddDependency(
#     "prod", "t_out",
#     "tax".split(", "),
# )

# sumer = Transform("sumer", NS)
# sumer.AddDependency(
#     "req", "s_in_ann",
#     "ann".split(", "),
# )
# sumer.AddDependency(
#     "req", "s_in_tax",
#     "tax".split(", "),
# )
# sumer.AddDependency(
#     "prod", "s_out",
#     "sum".split(", "),
# )

# in_asm = Endpoint.New(NS, "in_asm", "asm, annable, taxable", have=True)
# in_bin = Endpoint.New(NS, "in_bin", "bin, annable, taxable", have=True)

# for tr in [anner, taxer, sumer]:
#     pass

In [2]:
# from __future__ import annotations
# import os, sys
# import asyncio
# from typing import Iterable, Callable, Any
# from pathlib import Path

# from limes_x.solver import DependencySolver, Plan, Dependency
# from limes_x.persistence import ProjectState, Instance
# from limes_x.compute_module import ComputeModule

# mpath = Path("./test_solver/")
# modules = [
#     ComputeModule(mpath.joinpath(d)) for d in os.listdir(mpath)
# ]
# print(modules)

# given = [
#     ("a", "./test_data/a1"),
#     ("a", "./test_data/a2"),
#     ("b", "./test_data/b1"),
# ]

# prj_path = "./cache/man_test01/"
# state = ProjectState(prj_path, on_exist="overwrite")
# for dtype, val in given:
#     state.RegisterInstance(Instance.Str(dtype, val))
# for m in modules:
#     state.RegisterInstance(Instance.ComputeModule(m))

# deps = []
# for k, inst in state._instances.items():
#     if not inst.IsPyType(ComputeModule): continue
#     deps.append(Dependency(inst.val.requires, inst.val.produces, k))

# solver = DependencySolver(deps)
# # plan = solver.Solve({"a"}, {"reuse", "linear", "branched"})
# plan = solver.Solve({"a"}, {"branched"})
# assert plan != False
# [state.GetInstance(m.ref_key) for m in plan]

P:[Dep:7Fvc04hOSij0, Dep:CeaxX8t6TCPk]

In [3]:
# def make_dependency(module: ComputeModule):
#     return Dependency(module.requires, module.produces, module)

# modules = Path("./test_solver/")
# solver = Plan([
#     make_dependency(ComputeModule(p))
# for p in [
#     modules.joinpath(p) for p in os.listdir(modules)
# ]])
# plan = solver.Solve({"a"}, {"reuse", "linear", "branched"})
# plan

In [4]:
# from limes_x.compute_module import ComputeModule

# a = ComputeModule("./test_modules/copy/")
# b = ComputeModule("./test_modules/copy2/")

# a.requires, b.requires

In [5]:
# state = ProjectState("./cache/test_persist")
# ok = Instance("asdf", 1)
# ov = Instance("s", 2)
# state._lineage[ok] = [ov]
# state.Save()

# s2 = ProjectState.Load("./cache/test_persist")
# for k, v in s2._lineage.items():
#     _te = k.type, k.value, ok == k, [(i.type, i.value, i == ov) for i in v]
#     print(_te)

# ok._id

In [6]:
# test = []
# for i in range(100000):
#     x = Instance("asdf", ["x"*150, "y"*150])
#     # x = 1
#     test.append(x)