Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ itertools = "0.14.0"
derivative = "2.2.0"
async-lock = "3.4.0"
hex = "0.4.3"
pythonize = "0.23.0"
13 changes: 6 additions & 7 deletions python/cocoindex/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from __future__ import annotations

import re
import json
import inspect
from typing import Any, Callable, Sequence, TypeVar
from threading import Lock
Expand Down Expand Up @@ -61,8 +60,8 @@ def _create_data_slice(
def _spec_kind(spec: Any) -> str:
return spec.__class__.__name__

def _spec_json_dump(spec: Any) -> str:
return json.dumps(spec.__dict__)
def _spec_dump(spec: Any) -> dict[str, Any]:
return spec.__dict__

T = TypeVar('T')

Expand Down Expand Up @@ -162,7 +161,7 @@ def transform(self, fn_spec: op.FunctionSpec, /, name: str | None = None) -> Dat
lambda target_scope, name:
flow_builder_state.engine_flow_builder.transform(
_spec_kind(fn_spec),
_spec_json_dump(fn_spec),
_spec_dump(fn_spec),
args,
target_scope,
flow_builder_state.field_name_builder.build_name(
Expand Down Expand Up @@ -253,8 +252,8 @@ def export(self, name: str, target_spec: op.StorageSpec, /, *,
{"field_name": field_name, "metric": metric.value}
for field_name, metric in vector_index]
self._flow_builder_state.engine_flow_builder.export(
name, _spec_kind(target_spec), _spec_json_dump(target_spec),
json.dumps(index_options), self._engine_data_collector)
name, _spec_kind(target_spec), _spec_dump(target_spec),
index_options, self._engine_data_collector)


_flow_name_builder = _NameBuilder()
Expand Down Expand Up @@ -294,7 +293,7 @@ def add_source(self, spec: op.SourceSpec, /, name: str | None = None) -> DataSli
self._state,
lambda target_scope, name: self._state.engine_flow_builder.add_source(
_spec_kind(spec),
_spec_json_dump(spec),
_spec_dump(spec),
target_scope,
self._state.field_name_builder.build_name(
name, prefix=_to_snake_case(_spec_kind(spec))+'_'),
Expand Down
5 changes: 2 additions & 3 deletions python/cocoindex/functions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""All builtin functions."""
from typing import Annotated, Any

import json
import sentence_transformers
from .typing import Float32, Vector, TypeAttr
from . import op
Expand Down Expand Up @@ -31,11 +30,11 @@ class SentenceTransformerEmbedExecutor:
spec: SentenceTransformerEmbed
_model: sentence_transformers.SentenceTransformer

def analyze(self, text = None):
def analyze(self, text):
args = self.spec.args or {}
self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args)
dim = self._model.get_sentence_embedding_dimension()
return Annotated[list[Float32], Vector(dim=dim), TypeAttr("cocoindex.io/vector_origin_text", json.loads(text.analyzed_value))]
return Annotated[list[Float32], Vector(dim=dim), TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value)]

def __call__(self, text: str) -> list[Float32]:
return self._model.encode(text).tolist()
4 changes: 2 additions & 2 deletions python/cocoindex/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def from_env(cls) -> Self:

def init(settings: Settings):
"""Initialize the cocoindex library."""
_engine.init(json.dumps(settings.__dict__))
_engine.init(settings.__dict__)

@dataclass
class ServerSettings:
Expand All @@ -62,7 +62,7 @@ def start_server(settings: ServerSettings):
"""Start the cocoindex server."""
flow.ensure_all_flows_built()
query.ensure_all_handlers_built()
_engine.start_server(json.dumps(settings.__dict__))
_engine.start_server(settings.__dict__)

def stop():
"""Stop the cocoindex library."""
Expand Down
7 changes: 3 additions & 4 deletions python/cocoindex/op.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
Facilities for defining cocoindex operations.
"""
import inspect
import json

from typing import get_type_hints, Protocol, Callable, dataclass_transform
from typing import get_type_hints, Protocol, Any, Callable, dataclass_transform
from dataclasses import dataclass
from enum import Enum
from threading import Lock
Expand Down Expand Up @@ -54,8 +53,8 @@ def __init__(self, spec_cls: type, executor_cls: type):
self._spec_cls = spec_cls
self._executor_cls = executor_cls

def __call__(self, spec_json: str, *args, **kwargs):
spec = self._spec_cls(**json.loads(spec_json))
def __call__(self, spec: dict[str, Any], *args, **kwargs):
spec = self._spec_cls(**spec)
executor = self._executor_cls(spec)
result_type = executor.analyze(*args, **kwargs)
return (dump_type(result_type), executor)
Expand Down
7 changes: 2 additions & 5 deletions python/cocoindex/query.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Callable, Any
from dataclasses import dataclass
from threading import Lock
import json

from . import flow as fl
from . import vector
Expand Down Expand Up @@ -52,7 +51,7 @@ def _lazy_handler() -> _engine.SimpleSemanticsQueryHandler:
engine_handler = _engine.SimpleSemanticsQueryHandler(
flow.internal_flow(), target_name,
fl.TransientFlow(query_transform_flow, [str]).internal_flow(),
json.dumps(default_similarity_metric.value))
default_similarity_metric.value)
engine_handler.register_query_handler(name)
return engine_handler
self._lazy_query_handler = _lazy_handler
Expand All @@ -71,11 +70,9 @@ def search(self, query: str, limit: int, vector_field_name: str | None = None,
"""
Search the index with the given query, limit, vector field name, and similarity metric.
"""
internal_results_json, internal_info_json = self.internal_handler().search(
internal_results, internal_info = self.internal_handler().search(
query, limit, vector_field_name,
similarity_matric.value if similarity_matric is not None else None)
internal_results = json.loads(internal_results_json)
internal_info = json.loads(internal_info_json)
fields = [field['name'] for field in internal_results['fields']]
results = [QueryResult(data=dict(zip(fields, result['data'])), score=result['score']) for result in internal_results['results']]
info = SimpleSemanticsQueryInfo(
Expand Down
3 changes: 1 addition & 2 deletions python/cocoindex/setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
from typing import NamedTuple

from . import flow
Expand All @@ -9,7 +8,7 @@ class CheckSetupStatusOptions(NamedTuple):

def check_setup_status(options: CheckSetupStatusOptions) -> _engine.SetupStatusCheck:
flow.ensure_all_flows_built()
return _engine.check_setup_status(json.dumps(options))
return _engine.check_setup_status(options)

def apply_setup_changes(status_check: _engine.SetupStatusCheck):
_engine.apply_setup_changes(status_check)
7 changes: 3 additions & 4 deletions python/cocoindex/typing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import typing
import collections
from typing import Annotated, NamedTuple, Any
Expand Down Expand Up @@ -67,7 +66,7 @@ def _basic_type_to_json_value(t, metadata):

return type_json

def _enriched_type_to_json_value(t):
def _enriched_type_to_json_value(t) -> dict[str, Any] | None:
if t is None:
return None
t, metadata = _get_origin_type_and_metadata(t)
Expand All @@ -83,8 +82,8 @@ def _enriched_type_to_json_value(t):
return enriched_type_json


def dump_type(t) -> str:
def dump_type(t) -> dict[str, Any] | None:
"""
Convert a Python type to a CocoIndex's type in JSON.
"""
return json.dumps(_enriched_type_to_json_value(t))
return _enriched_type_to_json_value(t)
10 changes: 5 additions & 5 deletions src/builder/flow_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ impl FlowBuilder {
pub fn add_source(
&mut self,
kind: String,
op_spec: py::Json<serde_json::Map<String, serde_json::Value>>,
op_spec: py::Pythonized<serde_json::Map<String, serde_json::Value>>,
target_scope: Option<DataScopeRef>,
name: String,
) -> PyResult<DataSlice> {
Expand Down Expand Up @@ -423,7 +423,7 @@ impl FlowBuilder {
pub fn add_direct_input(
&mut self,
name: String,
value_type: py::Json<schema::EnrichedValueType>,
value_type: py::Pythonized<schema::EnrichedValueType>,
) -> PyResult<DataSlice> {
let mut root_data_scope = self.root_data_scope.lock().unwrap();
root_data_scope
Expand Down Expand Up @@ -453,7 +453,7 @@ impl FlowBuilder {
pub fn transform(
&mut self,
kind: String,
op_spec: py::Json<serde_json::Map<String, serde_json::Value>>,
op_spec: py::Pythonized<serde_json::Map<String, serde_json::Value>>,
args: Vec<(DataSlice, Option<String>)>,
target_scope: Option<DataScopeRef>,
name: String,
Expand Down Expand Up @@ -555,8 +555,8 @@ impl FlowBuilder {
&mut self,
name: String,
kind: String,
op_spec: py::Json<serde_json::Map<String, serde_json::Value>>,
index_options: py::Json<spec::IndexOptions>,
op_spec: py::Pythonized<serde_json::Map<String, serde_json::Value>>,
index_options: py::Pythonized<spec::IndexOptions>,
input: &DataCollector,
) -> PyResult<()> {
let spec = spec::OpSpec {
Expand Down
18 changes: 9 additions & 9 deletions src/ops/py_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,26 +113,26 @@ fn value_from_py_object<'py>(

#[pyclass(name = "OpArgSchema")]
pub struct PyOpArgSchema {
value_type: crate::py::Json<schema::EnrichedValueType>,
analyzed_value: crate::py::Json<plan::AnalyzedValueMapping>,
value_type: crate::py::Pythonized<schema::EnrichedValueType>,
analyzed_value: crate::py::Pythonized<plan::AnalyzedValueMapping>,
}

#[pymethods]
impl PyOpArgSchema {
#[getter]
fn value_type(&self) -> &crate::py::Json<schema::EnrichedValueType> {
fn value_type(&self) -> &crate::py::Pythonized<schema::EnrichedValueType> {
&self.value_type
}

#[getter]
fn analyzed_value(&self) -> &crate::py::Json<plan::AnalyzedValueMapping> {
fn analyzed_value(&self) -> &crate::py::Pythonized<plan::AnalyzedValueMapping> {
&self.analyzed_value
}

fn validate_arg(
&self,
name: &str,
typ: crate::py::Json<schema::EnrichedValueType>,
typ: crate::py::Pythonized<schema::EnrichedValueType>,
) -> PyResult<()> {
if self.value_type.0.typ != typ.0.typ {
return Err(PyException::new_err(format!(
Expand Down Expand Up @@ -222,13 +222,13 @@ impl SimpleFunctionFactory for PyFunctionFactory {
)> {
let (result_type, executor, kw_args_names, num_positional_args) =
Python::with_gil(|py| -> anyhow::Result<_> {
let mut args = vec![crate::py::Json(spec).into_py_any(py)?];
let mut args = vec![crate::py::Pythonized(spec).into_py_any(py)?];
let mut kwargs = vec![];
let mut num_positional_args = 0;
for arg in input_schema.into_iter() {
let py_arg_schema = PyOpArgSchema {
value_type: crate::py::Json(arg.value_type.clone()),
analyzed_value: crate::py::Json(arg.analyzed_value.clone()),
value_type: crate::py::Pythonized(arg.value_type.clone()),
analyzed_value: crate::py::Pythonized(arg.analyzed_value.clone()),
};
match arg.name.0 {
Some(name) => {
Expand All @@ -251,7 +251,7 @@ impl SimpleFunctionFactory for PyFunctionFactory {
Some(&kwargs.into_py_dict(py)?),
)?;
let (result_type, executor) = result
.extract::<(crate::py::Json<schema::EnrichedValueType>, Py<PyAny>)>(py)?;
.extract::<(crate::py::Pythonized<schema::EnrichedValueType>, Py<PyAny>)>(py)?;
Ok((
result_type.into_inner(),
executor,
Expand Down
Loading