In [1]:
"""Test ProgressCallback with basic pipeline."""
import time

from hypernodes import Pipeline, node
from hypernodes.telemetry import ProgressCallback


@node(output_name="doubled")
def double(x: int) -> int:
    time.sleep(0.1)  # Small delay to see progress
    return x * 2


@node(output_name="result")
def add_one(doubled: int) -> int:
    time.sleep(0.1)
    return doubled + 1


# Create pipeline with progress callback
progress = ProgressCallback(enable=True)
pipeline = Pipeline(
    nodes=[double, add_one], callbacks=[progress], name="double_and_add"
)

result = pipeline.run(inputs={"x": 5})
print(f"Result: {result}")

double_and_add   0%|          | 0/2 [00:00<?, ?it/s]

double   0%|          | 0/1 [00:00<?, ?it/s]

add_one   0%|          | 0/1 [00:00<?, ?it/s]

Result: {'doubled': 10, 'result': 11}


In [2]:
"""Test ProgressCallback with .map() operation."""


@node(output_name="squared")
def square(x: int) -> int:
    time.sleep(0.05)
    return x**2


# Create pipeline with progress for map
progress_map = ProgressCallback(enable=True)
pipeline_map = Pipeline(nodes=[square], callbacks=[progress_map], name="map_example")

results = pipeline_map.map(inputs={"x": [1, 2, 3, 4, 5]}, map_over="x")
print(f"Results: {results}")


Running map_example with 5 examples...   0%|          | 0/5 [00:00<?, ?it/s]

square   0%|          | 0/5 [00:00<?, ?it/s]

Results: [{'squared': 1}, {'squared': 4}, {'squared': 9}, {'squared': 16}, {'squared': 25}]


In [3]:
result = pipeline.run(inputs={"x": 5})
result

double_and_add   0%|          | 0/2 [00:00<?, ?it/s]

double   0%|          | 0/1 [00:00<?, ?it/s]

add_one   0%|          | 0/1 [00:00<?, ?it/s]

{'doubled': 10, 'result': 11}

In [4]:
result = pipeline.map(inputs={"x": [1, 2, 3]}, map_over="x")
result

Running double_and_add with 3 examples...   0%|          | 0/3 [00:00<?, ?it/s]

double   0%|          | 0/3 [00:00<?, ?it/s]

add_one   0%|          | 0/3 [00:00<?, ?it/s]

[{'doubled': 2, 'result': 3},
 {'doubled': 4, 'result': 5},
 {'doubled': 6, 'result': 7}]

In [5]:
from hypernodes import Pipeline, node
from hypernodes.telemetry import ProgressCallback


@node(output_name="doubled")
def double(x: int) -> int:
    time.sleep(0.1)  # Small delay to see progress
    return x * 2


@node(output_name="result")
def add_one(doubled: int) -> int:
    time.sleep(0.1)
    return doubled + 1


# Create pipeline with progress callback
progress = ProgressCallback(enable=True)  # Disable for tests
pipeline = Pipeline(
    nodes=[double, add_one], callbacks=[progress], name="double_and_add"
)

result = pipeline.run(inputs={"x": 5})

double_and_add   0%|          | 0/2 [00:00<?, ?it/s]

double   0%|          | 0/1 [00:00<?, ?it/s]

add_one   0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
import logfire

from hypernodes import Pipeline, node
from hypernodes.telemetry import TelemetryCallback

# Configure logfire (local only, no cloud export)
logfire.configure(send_to_logfire=False)
telemetry = TelemetryCallback()

pipeline = Pipeline(
    nodes=[double, add_one], callbacks=[telemetry], name="double_and_add"
)

result = pipeline.run(inputs={"x": 5})

15:59:13.481 pipeline:pipeline_4587878928
15:59:13.483   node:double
15:59:13.588   node:add_one


In [7]:
chart = telemetry.get_waterfall_chart()
chart

In [8]:
from hypernodes import Pipeline, node
from hypernodes.telemetry import ProgressCallback


@node(output_name="encoded")
def encode_text(text: str) -> list:
    """Simulate text encoding."""
    time.sleep(0.3)
    return [1, 2, 3]


@node(output_name="embedded")
def embed_vectors(encoded: list) -> list:
    """Simulate vector embedding."""
    time.sleep(0.3)
    return [0.1, 0.2, 0.3]


@node(output_name="normalized")
def normalize(embedded: list) -> list:
    """Simulate normalization."""
    time.sleep(0.3)
    total = sum(embedded)
    return [x / total for x in embedded]


@node(output_name="scores")
def calculate_scores(normalized: list) -> float:
    """Simulate scoring."""
    time.sleep(0.3)
    return sum(normalized)


# Create pipeline with a meaningful name
progress = ProgressCallback(enable=True)
retrieval_pipeline = Pipeline(
    nodes=[encode_text, embed_vectors, normalize, calculate_scores],
    callbacks=[progress],
    name="retrieval pipeline",
)

# The progress bar will now show:
# "Running retrieval pipeline with N examples..." initially
# "retrieval pipeline → encode_text" when encoding
# "retrieval pipeline → embed_vectors" when embedding
# "retrieval pipeline → normalize" when normalizing
# "retrieval pipeline → calculate_scores" when scoring

results = retrieval_pipeline.map(
    inputs={"text": ["doc1", "doc2", "doc3", "doc4", "doc5"]}, map_over="text"
)

print(f"\nProcessed {len(results)} documents")

Running retrieval pipeline with 5 examples...   0%|          | 0/5 [00:00<?, ?it/s]

encode_text   0%|          | 0/5 [00:00<?, ?it/s]

embed_vectors   0%|          | 0/5 [00:00<?, ?it/s]

normalize   0%|          | 0/5 [00:00<?, ?it/s]

calculate_scores   0%|          | 0/5 [00:00<?, ?it/s]


Processed 5 documents


In [9]:
# Test the fix: reload the module to pick up changes
import importlib

import hypernodes.integrations.daft.engine

importlib.reload(hypernodes.integrations.daft.engine)
from hypernodes.engines import DaftEngine

# Re-run the same pipeline - should work now!
retrieval_pipeline_fixed = Pipeline(
    nodes=[encode_text, embed_vectors, normalize, calculate_scores],
    name="retrieval pipeline (fixed)",
    engine=DaftEngine(),  # Batch UDFs enabled, but auto-detects list returns!
)

results = retrieval_pipeline_fixed.map(
    inputs={"text": ["doc1", "doc2", "doc3"]}, map_over="text"
)

print(f"✓ Success! Processed {len(results)} documents")
print(f"Results: {results}")



thread 'DAFTCPU-9' (814211) panicked at src/daft-core/src/array/ops/cast.rs:1626:18:
not implemented: List casting not implemented for dtype: Python
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Error when running pipeline node UDF batch_udf-ac132358-2c17-488c-9c65-921a0900806a


DaftCoreException: DaftError::External task 67 panicked with message "not implemented: List casting not implemented for dtype: Python"

In [None]:
from hypernodes import Pipeline, node
from hypernodes.engines import DaftEngine
from hypernodes.telemetry import ProgressCallback


@node(output_name="encoded")
def encode_text(text: str) -> list:
    """Simulate text encoding."""
    time.sleep(0.3)
    return [1, 2, 3]


@node(output_name="embedded")
def embed_vectors(encoded: list) -> list:
    """Simulate vector embedding."""
    time.sleep(0.3)
    return [0.1, 0.2, 0.3]


@node(output_name="normalized")
def normalize(embedded: list) -> list:
    """Simulate normalization."""
    time.sleep(0.3)
    total = sum(embedded)
    return [x / total for x in embedded]


@node(output_name="scores")
def calculate_scores(normalized: list) -> float:
    """Simulate scoring."""
    time.sleep(0.3)
    return sum(normalized)


# Create pipeline with a meaningful name
progress = ProgressCallback(enable=True)
retrieval_pipeline = Pipeline(
    nodes=[encode_text, embed_vectors, normalize, calculate_scores],
    # callbacks=[progress],
    name="retrieval pipeline",
    engine=DaftEngine(),
)

# The progress bar will now show:
# "Running retrieval pipeline with N examples..." initially
# "retrieval pipeline → encode_text" when encoding
# "retrieval pipeline → embed_vectors" when embedding
# "retrieval pipeline → normalize" when normalizing
# "retrieval pipeline → calculate_scores" when scoring

results = retrieval_pipeline.map(
    inputs={"text": ["doc1", "doc2", "doc3", "doc4", "doc5"]}, map_over="text"
)

print(f"\nProcessed {len(results)} documents")


thread 'DAFTCPU-7' (804517) panicked at src/daft-core/src/array/ops/cast.rs:1626:18:
not implemented: List casting not implemented for dtype: Python
Error when running pipeline node UDF batch_udf-2df580e2-a031-4f4f-9587-e5676d67e622


DaftCoreException: DaftError::External task 125 panicked with message "not implemented: List casting not implemented for dtype: Python"