From 18a2329afbd79a8ba0afec830df456d8793bd91d Mon Sep 17 00:00:00 2001 From: LJ Date: Fri, 7 Mar 2025 12:16:44 -0800 Subject: [PATCH 1/3] Clean up a few places that may panic. --- src/execution/evaluator.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/execution/evaluator.rs b/src/execution/evaluator.rs index 507bf30c..f9920b48 100644 --- a/src/execution/evaluator.rs +++ b/src/execution/evaluator.rs @@ -115,7 +115,7 @@ fn augmented_value( .map(|v| ScopeValueBuilder::augmented_from(v, t)) .collect::>>()?, ), - (val, _) => panic!("Value kind doesn't match the type {val_type}: {val:?}"), + (val, _) => bail!("Value kind doesn't match the type {val_type}: {val:?}"), }; Ok(value) } @@ -157,18 +157,19 @@ impl<'a> ScopeEntry<'a> { fn get_local_field_schema<'b>( schema: &'b schema::StructSchema, indices: &[u32], - ) -> &'b schema::FieldSchema { + ) -> Result<&'b schema::FieldSchema> { let field_idx = indices[0] as usize; let field_schema = &schema.fields[field_idx]; - if indices.len() == 1 { + let result = if indices.len() == 1 { field_schema } else { let struct_field_schema = match &field_schema.value_type.typ { schema::ValueType::Struct(s) => s, - _ => panic!("Expect struct field"), + _ => bail!("Expect struct field"), }; - Self::get_local_field_schema(&struct_field_schema, &indices[1..]) - } + Self::get_local_field_schema(&struct_field_schema, &indices[1..])? + }; + Ok(result) } fn get_local_key_field<'b>( @@ -229,8 +230,14 @@ impl<'a> ScopeEntry<'a> { } } - fn get_field_schema(&self, field_ref: &AnalyzedLocalFieldReference) -> &schema::FieldSchema { - Self::get_local_field_schema(self.schema, &field_ref.fields_idx) + fn get_field_schema( + &self, + field_ref: &AnalyzedLocalFieldReference, + ) -> Result<&schema::FieldSchema> { + Ok(Self::get_local_field_schema( + self.schema, + &field_ref.fields_idx, + )?) } fn define_field_w_builder( @@ -329,7 +336,7 @@ async fn evaluate_op_scope( } AnalyzedReactiveOp::ForEach(op) => { - let target_field_schema = head_scope.get_field_schema(&op.local_field_ref); + let target_field_schema = head_scope.get_field_schema(&op.local_field_ref)?; let collection_schema = match &target_field_schema.value_type.typ { schema::ValueType::Collection(cs) => cs, _ => panic!("Expect target field to be a collection"), From 4d308283d17815b984fb1345ec6c2181af9a66e0 Mon Sep 17 00:00:00 2001 From: LJ Date: Fri, 7 Mar 2025 14:22:34 -0800 Subject: [PATCH 2/3] Simplify handling for `List` type. --- src/base/schema.rs | 100 +++++-------------------- src/ops/functions/split_recursively.rs | 11 +-- src/ops/py_factory.rs | 9 +-- src/ops/sources/local_file.rs | 22 +++--- 4 files changed, 38 insertions(+), 104 deletions(-) diff --git a/src/base/schema.rs b/src/base/schema.rs index c5d94e88..5ae2a3e9 100644 --- a/src/base/schema.rs +++ b/src/base/schema.rs @@ -3,11 +3,7 @@ use crate::builder::plan::AnalyzedValueMapping; use super::spec::*; use anyhow::Result; use serde::{Deserialize, Serialize}; -use std::{ - collections::BTreeMap, - ops::Deref, - sync::{Arc, LazyLock}, -}; +use std::{collections::BTreeMap, ops::Deref, sync::Arc}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct VectorTypeSchema { @@ -127,21 +123,20 @@ pub struct CollectionSchema { impl CollectionSchema { pub fn has_key(&self) -> bool { match self.kind { - CollectionKind::Collection => false, - CollectionKind::Table | CollectionKind::List => true, + CollectionKind::Table => true, + CollectionKind::Collection | CollectionKind::List => false, } } pub fn key_type(&self) -> Option<&EnrichedValueType> { match self.kind { - CollectionKind::Collection => None, CollectionKind::Table => self .row .fields .first() .as_ref() .map(|field| &field.value_type), - CollectionKind::List => Some(&LIST_INDEX_FIELD.value_type), + CollectionKind::Collection | CollectionKind::List => None, } } @@ -172,89 +167,21 @@ impl std::fmt::Display for CollectionSchema { } } -pub const KEY_FIELD_NAME: &'static str = "__key"; -pub const VALUE_FIELD_NAME: &'static str = "__value"; -pub const LIST_INDEX_FIELD_NAME: &'static str = "__index"; - -pub static LIST_INDEX_FIELD: LazyLock = LazyLock::new(|| FieldSchema { - name: LIST_INDEX_FIELD_NAME.to_string(), - value_type: EnrichedValueType { - typ: ValueType::Basic(BasicValueType::Int64), - nullable: false, - attrs: Default::default(), - }, -}); - impl CollectionSchema { - pub fn new_collection(value_name: Option, value: EnrichedValueType) -> Self { - Self { - kind: CollectionKind::Collection, - row: StructSchema { - fields: Arc::new(vec![FieldSchema { - name: value_name.unwrap_or_else(|| VALUE_FIELD_NAME.to_string()), - value_type: value, - }]), - }, - collectors: Default::default(), - } - } - - pub fn new_table( - key_name: Option, - key: EnrichedValueType, - value_name: Option, - value: EnrichedValueType, - ) -> Self { + pub fn new(kind: CollectionKind, fields: Vec) -> Self { Self { - kind: CollectionKind::Table, + kind, row: StructSchema { - fields: Arc::new(vec![ - FieldSchema { - name: key_name.unwrap_or_else(|| KEY_FIELD_NAME.to_string()), - value_type: key, - }, - FieldSchema { - name: value_name.unwrap_or_else(|| VALUE_FIELD_NAME.to_string()), - value_type: value, - }, - ]), + fields: Arc::new(fields), }, collectors: Default::default(), } } - pub fn new_list(value_name: Option, value: EnrichedValueType) -> Self { - Self { - kind: CollectionKind::List, - row: StructSchema { - fields: Arc::new(vec![ - LIST_INDEX_FIELD.clone(), - FieldSchema { - name: value_name.unwrap_or_else(|| VALUE_FIELD_NAME.to_string()), - value_type: value, - }, - ]), - }, - collectors: Default::default(), - } - } - - pub fn is_table(&self) -> bool { - match self.kind { - CollectionKind::Collection => false, - CollectionKind::Table | CollectionKind::List => true, - } - } - - pub fn is_list(&self) -> bool { - self.kind == CollectionKind::List - } - pub fn key_field<'a>(&'a self) -> Option<&'a FieldSchema> { - if self.is_table() { - Some(self.row.fields.first().unwrap()) - } else { - None + match self.kind { + CollectionKind::Table => Some(self.row.fields.first().unwrap()), + CollectionKind::Collection | CollectionKind::List => None, } } } @@ -373,6 +300,13 @@ pub struct FieldSchema { } impl FieldSchema { + pub fn new(name: impl ToString, value_type: EnrichedValueType) -> Self { + Self { + name: name.to_string(), + value_type, + } + } + pub fn without_attrs(&self) -> Self { Self { name: self.name.clone(), diff --git a/src/ops/functions/split_recursively.rs b/src/ops/functions/split_recursively.rs index 2032f926..9dbdbf58 100644 --- a/src/ops/functions/split_recursively.rs +++ b/src/ops/functions/split_recursively.rs @@ -265,11 +265,12 @@ impl SimpleFunctionFactoryBase for Factory { api_bail!("Expect String as input type, got {}", t) } } - Ok(make_output_type(CollectionSchema::new_table( - Some("location".to_string()), - make_output_type(BasicValueType::Range), - Some("text".to_string()), - make_output_type(BasicValueType::Str), + Ok(make_output_type(CollectionSchema::new( + CollectionKind::Table, + vec![ + FieldSchema::new("location", make_output_type(BasicValueType::Range)), + FieldSchema::new("text", make_output_type(BasicValueType::Str)), + ], )) .with_attr( field_attrs::CHUNK_BASE_TEXT, diff --git a/src/ops/py_factory.rs b/src/ops/py_factory.rs index db820934..5905640f 100644 --- a/src/ops/py_factory.rs +++ b/src/ops/py_factory.rs @@ -3,10 +3,11 @@ use std::{collections::BTreeMap, sync::Arc}; use axum::async_trait; use blocking::unblock; use futures::FutureExt; +use log::warn; use pyo3::{ exceptions::PyException, pyclass, pymethods, - types::{IntoPyDict, PyAnyMethods, PyList, PyString, PyTuple}, + types::{IntoPyDict, PyAnyMethods, PyString, PyTuple}, Bound, IntoPyObjectExt, Py, PyAny, PyResult, Python, }; @@ -156,12 +157,6 @@ fn value_from_py_object<'py>( ), } } - _ => { - return Err(PyException::new_err(format!( - "unsupported value type: {}", - typ - ))) - } } }; Ok(result) diff --git a/src/ops/sources/local_file.rs b/src/ops/sources/local_file.rs index 3ebc65c1..6918639b 100644 --- a/src/ops/sources/local_file.rs +++ b/src/ops/sources/local_file.rs @@ -77,15 +77,19 @@ impl SourceFactoryBase for Factory { spec: &Spec, _context: &FlowInstanceContext, ) -> Result { - Ok(make_output_type(CollectionSchema::new_table( - Some("filename".to_string()), - make_output_type(BasicValueType::Str), - Some("content".to_string()), - make_output_type(if spec.binary { - BasicValueType::Bytes - } else { - BasicValueType::Str - }), + Ok(make_output_type(CollectionSchema::new( + CollectionKind::Table, + vec![ + FieldSchema::new("filename", make_output_type(BasicValueType::Str)), + FieldSchema::new( + "content", + make_output_type(if spec.binary { + BasicValueType::Bytes + } else { + BasicValueType::Str + }), + ), + ], ))) } From 4bd88a6af3a13cfd3e2c1f225af017e0210d912e Mon Sep 17 00:00:00 2001 From: LJ Date: Fri, 7 Mar 2025 14:22:54 -0800 Subject: [PATCH 3/3] Use `List` instead of `Table` for python `list` type. --- python/cocoindex/typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cocoindex/typing.py b/python/cocoindex/typing.py index d75f2920..bfd526ac 100644 --- a/python/cocoindex/typing.py +++ b/python/cocoindex/typing.py @@ -55,7 +55,7 @@ def _dump_type(t, metadata): } elif dataclasses.is_dataclass(elem_type): encoded_type = { - 'kind': 'Table', + 'kind': 'List', 'row': { 'fields': _dump_fields_schema(elem_type) }, } else: