From c245afeb26762da1a3205b593d2a54a21430f7a6 Mon Sep 17 00:00:00 2001 From: LJ Date: Sun, 23 Mar 2025 11:13:37 -0700 Subject: [PATCH] Update documentations for the evaluation and dump functionality --- docs/docs/core/cli.mdx | 1 + docs/docs/core/flow_methods.mdx | 22 +++++++++++++++++++++- python/cocoindex/__init__.py | 2 +- python/cocoindex/cli.py | 11 ++++++++--- python/cocoindex/flow.py | 2 +- 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/docs/docs/core/cli.mdx b/docs/docs/core/cli.mdx index b0a41433..a25ae531 100644 --- a/docs/docs/core/cli.mdx +++ b/docs/docs/core/cli.mdx @@ -65,6 +65,7 @@ The following subcommands are available: | `setup` | Check and apply setup changes for flows, including the internal and target storage (to export). | | `show` | Show the spec for a specific flow. | | `update` | Update the index defined by the flow. | +| `evaluate` | Evaluate the flow and dump flow outputs to files. Instead of updating the index, it dumps what should be indexed to files. Mainly used for evaluation purpose. | Use `--help` to see the full list of subcommands, and `subcommand --help` to see the usage of a specific one. diff --git a/docs/docs/core/flow_methods.mdx b/docs/docs/core/flow_methods.mdx index 6f9b092c..727a21db 100644 --- a/docs/docs/core/flow_methods.mdx +++ b/docs/docs/core/flow_methods.mdx @@ -12,7 +12,7 @@ After a flow is defined as discussed in [Flow Definition](/docs/core/flow_def), ## update -The `update()` method will update will update the index defined by the flow. +The `update()` method will update the index defined by the flow. Once the function returns, the indice is fresh up to the moment when the function is called. @@ -23,5 +23,25 @@ Once the function returns, the indice is fresh up to the moment when the functio flow.update() ``` + + + +## evaluate_and_dump + +The `evaluate_and_dump()` method evaluates the flow and dump flow outputs to files. + +It takes a `EvaluateAndDumpOptions` dataclass as input to configure, with the following fields: + +* `output_dir` (type: `str`, required): The directory to dump the result to. +* `use_cache` (type: `bool`, default: `True`): Use already-cached intermediate data if available. + Note that we only reuse existing cached data without updating the cache even if it's turned on. + + + + +```python +flow.evaluate_and_dump(EvaluateAndDumpOptions(output_dir="./eval_output")) +``` + \ No newline at end of file diff --git a/python/cocoindex/__init__.py b/python/cocoindex/__init__.py index 4dcc18a7..8c991677 100644 --- a/python/cocoindex/__init__.py +++ b/python/cocoindex/__init__.py @@ -2,7 +2,7 @@ Cocoindex is a framework for building and running indexing pipelines. """ from . import flow, functions, query, sources, storages, cli -from .flow import FlowBuilder, DataScope, DataSlice, Flow, flow_def +from .flow import FlowBuilder, DataScope, DataSlice, Flow, flow_def, EvaluateAndDumpOptions from .llm import LlmSpec, LlmApiType from .vector import VectorSimilarityMetric from .lib import * diff --git a/python/cocoindex/cli.py b/python/cocoindex/cli.py index c70e8948..e474fb1d 100644 --- a/python/cocoindex/cli.py +++ b/python/cocoindex/cli.py @@ -57,13 +57,18 @@ def update(flow_name: str | None): @click.argument("flow_name", type=str, required=False) @click.option( "-o", "--output-dir", type=str, required=False, - help="The directory to dump the evaluation output to.") + help="The directory to dump the output to.") @click.option( "-c", "--use-cache", is_flag=True, show_default=True, default=True, - help="Use cached evaluation results if available.") + help="Use already-cached intermediate data if available. " + "Note that we only reuse existing cached data without updating the cache " + "even if it's turned on.") def evaluate(flow_name: str | None, output_dir: str | None, use_cache: bool = True): """ - Evaluate and dump the flow. + Evaluate the flow and dump flow outputs to files. + + Instead of updating the index, it dumps what should be indexed to files. + Mainly used for evaluation purpose. """ fl = _flow_by_name(flow_name) if output_dir is None: diff --git a/python/cocoindex/flow.py b/python/cocoindex/flow.py index 5af0a89d..23fc8872 100644 --- a/python/cocoindex/flow.py +++ b/python/cocoindex/flow.py @@ -372,7 +372,7 @@ def update(self): def evaluate_and_dump(self, options: EvaluateAndDumpOptions): """ - Evaluate and dump the flow. + Evaluate the flow and dump flow outputs to files. """ return self._lazy_engine_flow().evaluate_and_dump(_dump_engine_object(options))