In [1]:
from typing import List

from daft_func import Pipeline, Runner, func
from examples.retrieval import (
    IdentityReranker,
    Query,
    RerankedHit,
    Reranker,
    RetrievalResult,
    Retriever,
    ToyRetriever,
)

In [2]:
from typing import Dict


@func(output="index")
def index(retriever: Retriever, corpus: Dict[str, str], test: bool = True) -> bool:
    retriever.index(corpus)
    return True


@func(output="hits", map_axis="query", key_attr="query_uuid")
def retrieve(
    retriever: Retriever, query: Query, top_k: int, index: bool
) -> RetrievalResult:
    return retriever.retrieve(query, top_k=top_k)


@func(output="reranked_hits", map_axis="query", key_attr="query_uuid")
def rerank(
    reranker: Reranker, query: Query, hits: RetrievalResult, top_k: int
) -> List[RerankedHit]:
    return reranker.rerank(query, hits, top_k=top_k)

In [3]:
# Create pipeline with explicit functions
pipeline = Pipeline(functions=[index, retrieve, rerank])

In [4]:
pipeline.visualize()

In [5]:
corpus = {
    "d1": "a quick brown fox jumps",
    "d2": "brown dog sleeps",
    "d3": "five boxing wizards jump quickly",
}

single_inputs = {
    "retriever": ToyRetriever(),
    "corpus": corpus,
    "reranker": IdentityReranker(),
    "query": Query(query_uuid="q1", text="quick brown"),
    "top_k": 2,
}

In [6]:
# Create runner with auto mode (chooses based on batch size)
runner = Runner(pipeline=pipeline, mode="auto", batch_threshold=2)

In [7]:
result = runner.run(inputs=single_inputs)

In [8]:
result["reranked_hits"]

[RerankedHit(query_uuid='q1', doc_id='d1', score=2.0),
 RerankedHit(query_uuid='q1', doc_id='d2', score=1.0)]

In [None]:
multi_inputs = {
    "corpus": corpus,
    "retriever": ToyRetriever(),
    "reranker": IdentityReranker(),
    "query": [
        Query(query_uuid="q1", text="quick brown"),
        Query(query_uuid="q2", text="wizards jump"),
        Query(query_uuid="q3", text="brown dog"),
    ],
    "top_k": 2,
}

In [10]:
result = runner.run(inputs=multi_inputs)

In [11]:
result["reranked_hits"]

[[RerankedHit(query_uuid='q1', doc_id='d1', score=2.0),
  RerankedHit(query_uuid='q1', doc_id='d2', score=1.0)],
 [RerankedHit(query_uuid='q2', doc_id='d3', score=2.0),
  RerankedHit(query_uuid='q2', doc_id='d1', score=1.0)],
 [RerankedHit(query_uuid='q3', doc_id='d2', score=2.0),
  RerankedHit(query_uuid='q3', doc_id='d1', score=1.0)]]

In [13]:
for mode in ["local", "daft", "auto"]:
    runner = Runner(pipeline=pipeline, mode=mode, batch_threshold=2)
    result = runner.run(inputs=multi_inputs)
    print(
        f"✅ {mode.upper():5s} mode: {len(result['reranked_hits'])} queries processed"
    )

print("\n" + "=" * 70)
print("🎉 Demo complete!")
print("=" * 70)

✅ LOCAL mode: 3 queries processed
✅ DAFT  mode: 3 queries processed
✅ AUTO  mode: 3 queries processed

🎉 Demo complete!


In [None]:
from time import sleep

from daft_func import CacheConfig, func


@func(output="embeddings", cache=True, cache_key="model_v1")
def encode(text: str) -> list:
    sleep(6)
    return "s"


@func(output="result", cache=True)
def process(embeddings: list, threshold: float) -> dict:
    sleep(2)
    return "t"


pipeline = Pipeline(functions=[encode, process])
cache_config = CacheConfig(enabled=True, cache_dir=".cache")
runner = Runner(pipeline=pipeline, cache_config=cache_config)

# First run: executes both
result1 = runner.run(inputs={"text": "hello", "threshold": 0.5})

In [None]:
# Second run, change threshold: encode cached, process re-executes
result2 = runner.run(inputs={"text": "hello", "threshold": 0.8})

In [None]:
from daft_func import Pipeline, func


# Define a simple pipeline
@func(output="doubled")
def double(x: int) -> int:
    """Double the input value."""
    return x * 2


@func(output="result")
def add_value(doubled: int, offset: int = 5) -> int:
    """Add an offset to the doubled value."""
    return doubled + offset


# Create pipeline with explicit functions
pipeline = Pipeline(functions=[double, add_value])

In [None]:
# Create and display visualization
pipeline.visualize()

In [None]:
from pydantic import BaseModel

from daft_func import Runner, func


# 1. Define your data models
class Query(BaseModel):
    id: str
    text: str


class Result(BaseModel):
    id: str
    score: float


@func(output="results", map_axis="query", key_attr="id")
def process(query: Query, threshold: float) -> Result:
    score = len(query.text) * threshold
    return Result(id=query.id, score=score)


# 3. Create pipeline and runner
pipeline = Pipeline(functions=[process])
runner = Runner(pipeline=pipeline)

In [None]:
outputs = runner.run(
    inputs={
        "query": [Query(id="q1", text="hello")],
        "threshold": 0.5,
    }
)

print(outputs["results"])
# [Result(id='q1', score=2.5), Result(id='q2', score=2.5)]

[Result(id='q1', score=2.5)]


In [None]:
outputs = runner.run(
    inputs={
        "query": [
            Query(id="q1", text="hello"),
            Query(id="q2", text="world"),
        ],
        "threshold": 0.5,
    }
)

print(outputs["results"])
# [Result(id='q1', score=2.5), Result(id='q2', score=2.5)]

[Result(id='q1', score=2.5), Result(id='q2', score=2.5)]
