In [9]:
import time

from hypernodes import Pipeline, node
from hypernodes.cache import DiskCache
from hypernodes.integrations.daft.engine_v2 import DaftEngineV2
from hypernodes.telemetry import ProgressCallback


# Define simple nodes
@node(output_name="doubled")
def double(x: int) -> int:
    """Double the input."""
    print(f"  [EXECUTING] double({x})")
    time.sleep(0.3)
    return x * 2


@node(output_name="result")
def add_ten(doubled: int) -> int:
    """Add 10 to the input."""
    print(f"  [EXECUTING] add_ten({doubled})")
    time.sleep(0.3)
    return doubled + 10


"""Test 1: Basic .run() with caching."""
print("=" * 70)
print("TEST 1: Basic .run() with caching")
print("=" * 70)

# Create cache and engine
cache = DiskCache(".daft_v2_test_cache")
cache.clear()

engine = DaftEngineV2()
pipeline = Pipeline(
    nodes=[double, add_ten],
    engine=engine,
    # cache=cache,
    # callbacks=[ProgressCallback()],
)

# First run - should execute
print("\n--- First run (x=5) - should EXECUTE ---")
result1 = pipeline.run(inputs={"x": 5})
print(f"Result: {result1}")
assert result1["result"] == 20, f"Expected 20, got {result1['result']}"

# Second run - should hit cache
print("\n--- Second run (x=5) - should CACHE HIT ---")
result2 = pipeline.run(inputs={"x": 5})
print(f"Result: {result2}")
assert result2["result"] == 20, f"Expected 20, got {result2['result']}"

# Third run with different input - should execute
print("\n--- Third run (x=10) - should EXECUTE ---")
result3 = pipeline.run(inputs={"x": 10})
print(f"Result: {result3}")
assert result3["result"] == 30, f"Expected 30, got {result3['result']}"

print("\n✓ Test 1 passed!\n")

TEST 1: Basic .run() with caching

--- First run (x=5) - should EXECUTE ---
  [EXECUTING] double(5)
  [EXECUTING] add_ten(10)
Result: {'doubled': 10, 'result': 20}

--- Second run (x=5) - should CACHE HIT ---
  [EXECUTING] double(5)
  [EXECUTING] add_ten(10)
Result: {'doubled': 10, 'result': 20}

--- Third run (x=10) - should EXECUTE ---
  [EXECUTING] double(10)
  [EXECUTING] add_ten(20)
Result: {'doubled': 20, 'result': 30}

✓ Test 1 passed!



In [10]:
"""Test 2: .map() with multiple inputs."""
print("=" * 70)
print("TEST 2: .map() with multiple inputs")
print("=" * 70)

cache = DiskCache(".daft_v2_test_cache")
cache.clear()

engine = DaftEngineV2()
pipeline = Pipeline(
    nodes=[double, add_ten],
    engine=engine,
    # cache=cache,
    # callbacks=[ProgressCallback()],
)

# Map over list
print("\n--- Mapping over [1, 2, 3] ---")
results = pipeline.map(inputs={"x": [1, 2, 3]}, map_over="x")
print(f"Results: {results}")

# Verify results
assert results["result"] == [12, 14, 16], (
    f"Expected [12, 14, 16], got {results['result']}"
)

print("\n✓ Test 2 passed!\n")


TEST 2: .map() with multiple inputs

--- Mapping over [1, 2, 3] ---
  [EXECUTING] double(1)
  [EXECUTING] double(2)
  [EXECUTING] double(3)
  [EXECUTING] add_ten(2)
  [EXECUTING] add_ten(4)
  [EXECUTING] add_ten(6)
Results: {'doubled': [2, 4, 6], 'result': [12, 14, 16]}

✓ Test 2 passed!



In [3]:
"""Test 3: .map() with partial cache hits."""
print("=" * 70)
print("TEST 3: .map() with partial cache hits")
print("=" * 70)

cache = DiskCache(".daft_v2_test_cache")
cache.clear()

engine = DaftEngineV2()
pipeline = Pipeline(
    nodes=[double, add_ten],
    engine=engine,
    cache=cache,
    callbacks=[ProgressCallback()],
)

# First map
print("\n--- First map: [1, 2, 3] - should EXECUTE ---")
results1 = pipeline.map(inputs={"x": [1, 2, 3]}, map_over="x")
print(f"Results: {results1}")

# Second map with overlap
print("\n--- Second map: [2, 3, 4] - should have partial CACHE HITs ---")
results2 = pipeline.map(inputs={"x": [2, 3, 4]}, map_over="x")
print(f"Results: {results2}")

assert results2["result"] == [14, 16, 18], (
    f"Expected [14, 16, 18], got {results2['result']}"
)

print("\n✓ Test 3 passed!\n")

TEST 3: .map() with partial cache hits

--- First map: [1, 2, 3] - should EXECUTE ---


Running pipeline_4478148560 with 3 examples...   0%|          | 0/3 [00:00<?, ?it/s]

double   0%|          | 0/3 [00:00<?, ?it/s]

add_ten   0%|          | 0/3 [00:00<?, ?it/s]

  [EXECUTING] double(1)
  [EXECUTING] double(2)
  [EXECUTING] double(3)
  [EXECUTING] add_ten(2)
  [EXECUTING] add_ten(4)
  [EXECUTING] add_ten(6)
Results: {'doubled': [2, 4, 6], 'result': [12, 14, 16]}

--- Second map: [2, 3, 4] - should have partial CACHE HITs ---


Running pipeline_4478148560 with 3 examples...   0%|          | 0/3 [00:00<?, ?it/s]

double   0%|          | 0/3 [00:00<?, ?it/s]

add_ten   0%|          | 0/3 [00:00<?, ?it/s]

  [EXECUTING] double(2)
  [EXECUTING] double(3)
  [EXECUTING] double(4)
  [EXECUTING] add_ten(4)
  [EXECUTING] add_ten(6)
  [EXECUTING] add_ten(8)
Results: {'doubled': [4, 6, 8], 'result': [14, 16, 18]}

✓ Test 3 passed!



In [4]:
"""Test 4: Output filtering with output_name parameter."""
print("=" * 70)
print("TEST 4: Output filtering")
print("=" * 70)

cache = DiskCache(".daft_v2_test_cache")
cache.clear()

engine = DaftEngineV2()
pipeline = Pipeline(
    nodes=[double, add_ten],
    engine=engine,
    cache=cache,
)

# Run with output filtering
print("\n--- Run with output_name='result' ---")
result = pipeline.run(inputs={"x": 5}, output_name="result")
print(f"Result: {result}")

# Should only have 'result', not 'doubled'
assert "result" in result, "Expected 'result' in output"
assert "doubled" not in result, "Expected 'doubled' NOT in output"
assert result["result"] == 20, f"Expected 20, got {result['result']}"

print("\n✓ Test 4 passed!\n")

TEST 4: Output filtering

--- Run with output_name='result' ---
  [EXECUTING] double(5)
  [EXECUTING] add_ten(10)
Result: {'result': 20}

✓ Test 4 passed!

