From baa69f65a213ad271180f6c3fb3fc669124d4b5e Mon Sep 17 00:00:00 2001 From: LJ Date: Mon, 28 Apr 2025 10:33:12 -0700 Subject: [PATCH 1/7] feat(types): rename table types List->LTable, Table->KTable, etc. --- Cargo.lock | 521 ++++++++++++++---------- examples/manuals_llm_extraction/main.py | 8 +- python/cocoindex/convert.py | 6 +- python/cocoindex/flow.py | 4 +- python/cocoindex/typing.py | 26 +- src/base/json_schema.rs | 2 +- src/base/schema.rs | 52 +-- src/base/spec.rs | 2 +- src/base/value.rs | 69 ++-- src/builder/analyzer.rs | 52 ++- src/builder/flow_builder.rs | 8 +- src/execution/evaluator.rs | 52 ++- src/ops/functions/split_recursively.rs | 13 +- src/ops/sdk.rs | 4 +- src/ops/sources/google_drive.rs | 4 +- src/ops/sources/local_file.rs | 4 +- src/ops/storages/neo4j.rs | 10 +- src/py/convert.rs | 16 +- src/service/flows.rs | 6 +- 19 files changed, 470 insertions(+), 389 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7257834e..e8022c1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,7 +26,7 @@ dependencies = [ "cfg-if", "once_cell", "version_check", - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -111,9 +111,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.97" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "arraydeque" @@ -123,9 +123,9 @@ checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" [[package]] name = "async-openai" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c566b15aa847e60a9e6c9b9b4b9d4be94bbf776804624279afa69559fea7e1" +checksum = "14d76e2f5af19477d6254415acc95ba97c6cc6f3b1e3cb4676b7f0fab8194298" dependencies = [ "async-openai-macros", "backoff", @@ -155,7 +155,7 @@ checksum = "0289cba6d5143bfe8251d57b4a8cac036adf158525a76533a7082ba65ec76398" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -177,7 +177,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -188,7 +188,7 @@ checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -224,9 +224,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.28.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9f7720b74ed28ca77f90769a71fd8c637a0137f6fae4ae947e1050229cff57f" +checksum = "bfa9b6986f250236c27e5a204062434a773a13243d2ffc2955f37bdba4c5c6a1" dependencies = [ "bindgen", "cc", @@ -321,7 +321,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ "futures-core", - "getrandom 0.2.15", + "getrandom 0.2.16", "instant", "pin-project-lite", "rand 0.8.5", @@ -357,9 +357,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "base64ct" -version = "1.6.0" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +checksum = "89e25b6adfb930f02d1981565a6e5d9c547ac15a96606256d3b59040e5cd4ca3" [[package]] name = "bindgen" @@ -380,7 +380,7 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.99", + "syn 2.0.101", "which", ] @@ -413,9 +413,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.11.3" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" dependencies = [ "memchr", "serde", @@ -444,9 +444,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.16" +version = "1.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" +checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a" dependencies = [ "jobserver", "libc", @@ -558,7 +558,7 @@ dependencies = [ "hyper-rustls", "hyper-util", "indenter", - "indexmap 2.8.0", + "indexmap 2.9.0", "indoc", "itertools 0.14.0", "json5", @@ -709,7 +709,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", "once_cell", "tiny-keccak", ] @@ -804,9 +804,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.14" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] @@ -844,9 +844,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ "darling_core", "darling_macro", @@ -854,27 +854,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] name = "darling_macro" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -909,9 +909,9 @@ dependencies = [ [[package]] name = "der" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ "const-oid", "pem-rfc7468", @@ -957,7 +957,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -967,7 +967,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -990,7 +990,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -1022,9 +1022,9 @@ checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" [[package]] name = "either" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" dependencies = [ "serde", ] @@ -1050,9 +1050,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.7" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ "anstream", "anstyle", @@ -1069,9 +1069,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" dependencies = [ "libc", "windows-sys 0.59.0", @@ -1118,9 +1118,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "flate2" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", "miniz_oxide", @@ -1145,9 +1145,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "foldhash" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "form_urlencoded" @@ -1231,7 +1231,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -1282,9 +1282,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "js-sys", @@ -1295,14 +1295,16 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" dependencies = [ "cfg-if", + "js-sys", "libc", - "wasi 0.13.3+wasi-0.2.2", - "windows-targets 0.52.6", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", + "wasm-bindgen", ] [[package]] @@ -1375,9 +1377,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5017294ff4bb30944501348f6f8e42e6ad28f42c8bbef7a74029aff064a4e3c2" +checksum = "75249d144030531f8dee69fe9cea04d3edf809a017ae445e2abdff6629e86633" dependencies = [ "atomic-waker", "bytes", @@ -1385,7 +1387,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.8.0", + "indexmap 2.9.0", "slab", "tokio", "tokio-util", @@ -1497,9 +1499,9 @@ dependencies = [ [[package]] name = "http" -version = "1.2.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ "bytes", "fnv", @@ -1623,14 +1625,15 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", "windows-core", ] @@ -1685,9 +1688,9 @@ dependencies = [ [[package]] name = "icu_locid_transform_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" +checksum = "7515e6d781098bf9f7205ab3fc7e9709d34554ae0b21ddbcb5febfa4bc7df11d" [[package]] name = "icu_normalizer" @@ -1709,9 +1712,9 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "c5e8338228bdc8ab83303f16b797e177953730f601a96c25d10cb3ab0daa0cb7" [[package]] name = "icu_properties" @@ -1730,9 +1733,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "85fb8799753b75aee8d2a21d7c14d9f38921b54b3dbda10f5a3c7a7b82dba5e2" [[package]] name = "icu_provider" @@ -1759,7 +1762,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -1808,9 +1811,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -1879,9 +1882,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.4" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d699bc6dfc879fb1bf9bdff0d4c56f0884fc6f0d0eb0fba397a6d00cd9a6b85e" +checksum = "5a064218214dc6a10fbae5ec5fa888d80c45d611aba169222fc272072bf7aef6" dependencies = [ "jiff-static", "log", @@ -1892,21 +1895,22 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.4" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d16e75759ee0aa64c57a56acbf43916987b20c77373cb7e808979e02b93c9f9" +checksum = "199b7932d97e325aff3a7030e141eafe7f2c6268e1d1b24859b753a627f45254" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] name = "jobserver" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" dependencies = [ + "getrandom 0.3.2", "libc", ] @@ -1948,9 +1952,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.171" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libloading" @@ -1964,9 +1968,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.11" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +checksum = "c9627da5196e5d8ed0b0495e61e518847578da83483c37288316d9b2e03a7f72" [[package]] name = "libsqlite3-sys" @@ -1984,6 +1988,12 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "litemap" version = "0.7.5" @@ -2070,9 +2080,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -2140,7 +2150,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a0d57c55d2d1dc62a2b1d16a0a1079eb78d67c36bdf468d582ab4482ec7002" dependencies = [ "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -2236,9 +2246,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.3" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "openssl-probe" @@ -2329,9 +2339,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" +checksum = "198db74531d58c70a361c42201efde7e2591e976d518caf7662a47dc5720e7b6" dependencies = [ "memchr", "thiserror 2.0.12", @@ -2340,9 +2350,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" +checksum = "d725d9cfd79e87dccc9341a2ef39d1b6f6353d68c4b33c177febbe1a402c97c5" dependencies = [ "pest", "pest_generator", @@ -2350,22 +2360,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" +checksum = "db7d01726be8ab66ab32f9df467ae8b1148906685bbe75c82d1e65d7f5b3f841" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] name = "pest_meta" -version = "2.7.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" +checksum = "7f9f832470494906d1fca5329f8ab5791cc60beb230c74815dff541cbd2b5ca0" dependencies = [ "once_cell", "pest", @@ -2436,7 +2446,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -2501,11 +2511,11 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy", + "zerocopy 0.8.25", ] [[package]] @@ -2515,14 +2525,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" dependencies = [ "proc-macro2", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] name = "proc-macro2" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -2547,7 +2557,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -2561,9 +2571,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17da310086b068fbdcefbba30aeb3721d5bb9af8db4987d6735b2183ca567229" +checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" dependencies = [ "cfg-if", "chrono", @@ -2593,9 +2603,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e27165889bd793000a098bb966adc4300c312497ea25cf7a690a9f0ac5aa5fc1" +checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" dependencies = [ "once_cell", "target-lexicon", @@ -2603,9 +2613,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05280526e1dbf6b420062f3ef228b78c0c54ba94e157f5cb724a609d0f2faabc" +checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" dependencies = [ "libc", "pyo3-build-config", @@ -2613,27 +2623,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c3ce5686aa4d3f63359a5100c62a127c9f15e8398e5fdeb5deef1fed5cd5f44" +checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] name = "pyo3-macros-backend" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4cf6faa0cbfb0ed08e89beb8103ae9724eb4750e3a78084ba4017cbe94f3855" +checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -2669,11 +2679,12 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" +checksum = "c3bd15a6f2967aef83887dcb9fec0014580467e33720d073560cf015a5683012" dependencies = [ "bytes", + "cfg_aliases", "pin-project-lite", "quinn-proto", "quinn-udp", @@ -2683,17 +2694,18 @@ dependencies = [ "thiserror 2.0.12", "tokio", "tracing", + "web-time", ] [[package]] name = "quinn-proto" -version = "0.11.9" +version = "0.11.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +checksum = "bcbafbbdbb0f638fe3f35f3c56739f77a8a1d070cb25603226c83339b391472b" dependencies = [ "bytes", - "getrandom 0.2.15", - "rand 0.8.5", + "getrandom 0.3.2", + "rand 0.9.1", "ring", "rustc-hash 2.1.1", "rustls", @@ -2707,9 +2719,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.10" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944" +checksum = "541d0f57c6ec747a90738a52741d3221f7960e8ac2f0ff4b1a63680e033b4ab5" dependencies = [ "cfg_aliases", "libc", @@ -2721,13 +2733,19 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1f1914ce909e1658d9907913b4b91947430c7d9be598b15a1912935b8c04801" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.8.5" @@ -2775,7 +2793,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", ] [[package]] @@ -2784,14 +2802,14 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.1", + "getrandom 0.3.2", ] [[package]] name = "redox_syscall" -version = "0.5.10" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3" dependencies = [ "bitflags", ] @@ -2918,7 +2936,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.15", + "getrandom 0.2.16", "libc", "untrusted", "windows-sys 0.52.0", @@ -2938,9 +2956,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.7" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47c75d7c5c6b673e58bf54d8544a9f432e3a925b0e80f7cd3602ab5c50c55519" +checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" dependencies = [ "const-oid", "digest", @@ -2993,15 +3011,28 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.9.4", "windows-sys 0.59.0", ] [[package]] name = "rustls" -version = "0.23.25" +version = "0.23.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "822ee9188ac4ec04a2f0531e55d035fb2de73f18b41a63c70c2712503b6fb13c" +checksum = "df51b5869f3a441595eac5e8ff14d486ff285f7b8c0df8770e49c3b56351f0f0" dependencies = [ "aws-lc-rs", "log", @@ -3058,9 +3089,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.0" +version = "0.103.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0aa4eeac2588ffff23e9d7a7e9b3f971c5fb5b7ebc9452745e0c232c64f83b2f" +checksum = "fef8b8769aaccf73098557a87cd1816b4f9c7c16811c9c77142aa695c16f2c03" dependencies = [ "aws-lc-rs", "ring", @@ -3110,7 +3141,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -3194,7 +3225,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -3205,7 +3236,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -3215,7 +3246,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d2de91cf02bbc07cde38891769ccd5d4f073d22a40683aa4bc7a95781aaa2c4" dependencies = [ "form_urlencoded", - "indexmap 2.8.0", + "indexmap 2.9.0", "itoa", "ryu", "serde", @@ -3227,7 +3258,7 @@ version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ - "indexmap 2.8.0", + "indexmap 2.9.0", "itoa", "memchr", "ryu", @@ -3275,7 +3306,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.8.0", + "indexmap 2.9.0", "serde", "serde_derive", "serde_json", @@ -3292,7 +3323,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -3334,9 +3365,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.2" +version = "1.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" dependencies = [ "libc", ] @@ -3368,9 +3399,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" dependencies = [ "serde", ] @@ -3406,9 +3437,9 @@ dependencies = [ [[package]] name = "sqlx" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4410e73b3c0d8442c5f99b425d7a435b5ee0ae4167b3196771dd3f7a01be745f" +checksum = "f3c3a85280daca669cfd3bcb68a337882a8bc57ec882f72c5d13a430613a738e" dependencies = [ "sqlx-core", "sqlx-macros", @@ -3419,10 +3450,11 @@ dependencies = [ [[package]] name = "sqlx-core" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a007b6936676aa9ab40207cde35daab0a04b823be8ae004368c0793b96a61e0" +checksum = "f743f2a3cea30a58cd479013f75550e879009e3a02f616f18ca699335aa248c3" dependencies = [ + "base64 0.22.1", "bytes", "chrono", "crc", @@ -3435,7 +3467,7 @@ dependencies = [ "futures-util", "hashbrown 0.15.2", "hashlink 0.10.0", - "indexmap 2.8.0", + "indexmap 2.9.0", "log", "memchr", "once_cell", @@ -3454,22 +3486,22 @@ dependencies = [ [[package]] name = "sqlx-macros" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3112e2ad78643fef903618d78cf0aec1cb3134b019730edb039b69eaf531f310" +checksum = "7f4200e0fde19834956d4252347c12a083bdcb237d7a1a1446bffd8768417dce" dependencies = [ "proc-macro2", "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] name = "sqlx-macros-core" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e9f90acc5ab146a99bf5061a7eb4976b573f560bc898ef3bf8435448dd5e7ad" +checksum = "882ceaa29cade31beca7129b6beeb05737f44f82dbe2a9806ecea5a7093d00b7" dependencies = [ "dotenvy", "either", @@ -3485,7 +3517,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.99", + "syn 2.0.101", "tempfile", "tokio", "url", @@ -3493,9 +3525,9 @@ dependencies = [ [[package]] name = "sqlx-mysql" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4560278f0e00ce64938540546f59f590d60beee33fffbd3b9cd47851e5fff233" +checksum = "0afdd3aa7a629683c2d750c2df343025545087081ab5942593a5288855b1b7a7" dependencies = [ "atoi", "base64 0.22.1", @@ -3537,9 +3569,9 @@ dependencies = [ [[package]] name = "sqlx-postgres" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5b98a57f363ed6764d5b3a12bfedf62f07aa16e1856a7ddc2a0bb190a959613" +checksum = "a0bedbe1bbb5e2615ef347a5e9d8cd7680fb63e77d9dafc0f29be15e53f1ebe6" dependencies = [ "atoi", "base64 0.22.1", @@ -3576,9 +3608,9 @@ dependencies = [ [[package]] name = "sqlx-sqlite" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f85ca71d3a5b24e64e1d08dd8fe36c6c95c339a896cc33068148906784620540" +checksum = "c26083e9a520e8eb87a06b12347679b142dc2ea29e6e409f805644a7a979a5bc" dependencies = [ "atoi", "chrono", @@ -3594,6 +3626,7 @@ dependencies = [ "serde", "serde_urlencoded", "sqlx-core", + "thiserror 2.0.12", "tracing", "url", "uuid", @@ -3647,9 +3680,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.99" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02e925281e18ffd9d640e234264753c43edc62d64b2d4cf898f1bc5e75f3fc2" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -3673,7 +3706,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -3684,15 +3717,14 @@ checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" [[package]] name = "tempfile" -version = "3.17.1" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" +checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" dependencies = [ - "cfg-if", "fastrand", - "getrandom 0.3.1", + "getrandom 0.3.2", "once_cell", - "rustix", + "rustix 1.0.5", "windows-sys 0.59.0", ] @@ -3722,7 +3754,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -3733,7 +3765,7 @@ checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -3815,9 +3847,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.44.1" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", @@ -3840,7 +3872,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -3866,9 +3898,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.14" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034" +checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" dependencies = [ "bytes", "futures-core", @@ -3879,9 +3911,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148" +checksum = "900f6c86a685850b1bc9f6223b20125115ee3f31e01207d81655bbcc0aea9231" dependencies = [ "serde", "serde_spanned", @@ -3891,26 +3923,33 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.24" +version = "0.22.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" +checksum = "10558ed0bd2a1562e630926a2d1f0b98c827da99fabd3fe20920a59642504485" dependencies = [ - "indexmap 2.8.0", + "indexmap 2.9.0", "serde", "serde_spanned", "toml_datetime", + "toml_write", "winnow", ] +[[package]] +name = "toml_write" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28391a4201ba7eb1984cfeb6862c0b3ea2cfe23332298967c749dddc0d6cd976" + [[package]] name = "tonic" version = "0.12.3" @@ -4029,7 +4068,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] [[package]] @@ -4433,7 +4472,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ - "getrandom 0.3.1", + "getrandom 0.3.2", "serde", ] @@ -4472,9 +4511,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] @@ -4507,7 +4546,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", "wasm-bindgen-shared", ] @@ -4542,7 +4581,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4591,9 +4630,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.8" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9" +checksum = "29aad86cec885cafd03e8305fd727c418e970a521322c91688414d5b8efba16b" dependencies = [ "rustls-pki-types", ] @@ -4607,14 +4646,14 @@ dependencies = [ "either", "home", "once_cell", - "rustix", + "rustix 0.38.44", ] [[package]] name = "whoami" -version = "1.5.2" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "372d5b87f58ec45c384ba03563b03544dc5fadc3983e434b286913f5b4a9bb6d" +checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7" dependencies = [ "redox_syscall", "wasite", @@ -4622,18 +4661,44 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.52.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" dependencies = [ - "windows-targets 0.52.6", + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings 0.4.0", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", ] [[package]] name = "windows-link" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" [[package]] name = "windows-registry" @@ -4642,15 +4707,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ "windows-result", - "windows-strings", + "windows-strings 0.3.1", "windows-targets 0.53.0", ] [[package]] name = "windows-result" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06374efe858fab7e4f881500e6e86ec8bc28f9462c47e5a9941a0142ad86b189" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" dependencies = [ "windows-link", ] @@ -4664,6 +4729,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-strings" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -4878,18 +4952,18 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "winnow" -version = "0.7.3" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1" +checksum = "6cb8234a863ea0e8cd7284fcdd4f145233eb00fee02bbdd9861aec44e6477bc5" dependencies = [ "memchr", ] [[package]] name = "wit-bindgen-rt" -version = "0.33.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ "bitflags", ] @@ -4948,7 +5022,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", "synstructure", ] @@ -5011,8 +5085,16 @@ version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +dependencies = [ + "zerocopy-derive 0.8.25", ] [[package]] @@ -5023,7 +5105,18 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", ] [[package]] @@ -5043,7 +5136,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", "synstructure", ] @@ -5072,5 +5165,5 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.99", + "syn 2.0.101", ] diff --git a/examples/manuals_llm_extraction/main.py b/examples/manuals_llm_extraction/main.py index 2115731f..94e816de 100644 --- a/examples/manuals_llm_extraction/main.py +++ b/examples/manuals_llm_extraction/main.py @@ -40,7 +40,7 @@ class ArgInfo: class MethodInfo: """Information about a method.""" name: str - args: cocoindex.typing.List[ArgInfo] + args: list[ArgInfo] description: str @dataclasses.dataclass @@ -48,15 +48,15 @@ class ClassInfo: """Information about a class.""" name: str description: str - methods: cocoindex.typing.List[MethodInfo] + methods: list[MethodInfo] @dataclasses.dataclass class ModuleInfo: """Information about a Python module.""" title: str description: str - classes: cocoindex.typing.Table[ClassInfo] - methods: cocoindex.typing.Table[MethodInfo] + classes: list[ClassInfo] + methods: list[MethodInfo] @dataclasses.dataclass class ModuleSummary: diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index 056e6cd4..acc170ad 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -8,7 +8,7 @@ from enum import Enum from typing import Any, Callable, get_origin -from .typing import analyze_type_info, encode_enriched_type, COLLECTION_TYPES +from .typing import analyze_type_info, encode_enriched_type, TABLE_TYPES def to_engine_value(value: Any) -> Any: """Convert a Python value to an engine value.""" @@ -40,7 +40,7 @@ def make_engine_value_converter( src_type_kind = src_type['kind'] if dst_annotation is inspect.Parameter.empty: - if src_type_kind == 'Struct' or src_type_kind in COLLECTION_TYPES: + if src_type_kind == 'Struct' or src_type_kind in TABLE_TYPES: raise ValueError(f"Missing type annotation for `{''.join(field_path)}`." f"It's required for {src_type_kind} type.") return lambda value: value @@ -56,7 +56,7 @@ def make_engine_value_converter( return _make_engine_struct_value_converter( field_path, src_type['fields'], dst_type_info.dataclass_type) - if src_type_kind in COLLECTION_TYPES: + if src_type_kind in TABLE_TYPES: field_path.append('[*]') elem_type_info = analyze_type_info(dst_type_info.elem_type) if elem_type_info.dataclass_type is None: diff --git a/python/cocoindex/flow.py b/python/cocoindex/flow.py index 064a1c52..a30160fc 100644 --- a/python/cocoindex/flow.py +++ b/python/cocoindex/flow.py @@ -142,9 +142,9 @@ def __getitem__(self, field_name: str) -> DataSlice: def row(self) -> DataScope: """ - Return a scope representing each entry of the collection. + Return a scope representing each row of the table. """ - row_scope = self._state.engine_data_slice.collection_entry_scope() + row_scope = self._state.engine_data_slice.table_row_scope() return DataScope(self._state.flow_builder_state, row_scope) def for_each(self, f: Callable[[DataScope], None]) -> None: diff --git a/python/cocoindex/typing.py b/python/cocoindex/typing.py index 1c6382ab..24e494b7 100644 --- a/python/cocoindex/typing.py +++ b/python/cocoindex/typing.py @@ -30,29 +30,29 @@ def __init__(self, key: str, value: Any): LocalDateTime = Annotated[datetime.datetime, TypeKind('LocalDateTime')] OffsetDateTime = Annotated[datetime.datetime, TypeKind('OffsetDateTime')] -COLLECTION_TYPES = ('Table', 'List') +TABLE_TYPES = ('KTable', 'LTable') R = TypeVar("R") if TYPE_CHECKING: - Table = Annotated[list[R], TypeKind('Table')] - List = Annotated[list[R], TypeKind('List')] + KTable = Annotated[list[R], TypeKind('KTable')] + LTable = Annotated[list[R], TypeKind('LTable')] else: # pylint: disable=too-few-public-methods - class Table: # type: ignore[unreachable] + class KTable: # type: ignore[unreachable] """ - A Table type, which has a list of rows. The first field of each row is the key. + A KTable type, which is a table that the first field is the key. """ def __class_getitem__(cls, item: type[R]): - return Annotated[list[item], TypeKind('Table')] + return Annotated[list[item], TypeKind('KTable')] # pylint: disable=too-few-public-methods - class List: # type: ignore[unreachable] + class LTable: # type: ignore[unreachable] """ - A List type, which has a list of ordered rows. + A LTable type, which is a table that has a list of ordered rows. """ def __class_getitem__(cls, item: type[R]): - return Annotated[list[item], TypeKind('List')] + return Annotated[list[item], TypeKind('LTable')] @dataclasses.dataclass class AnalyzedTypeInfo: @@ -113,8 +113,8 @@ def analyze_type_info(t) -> AnalyzedTypeInfo: dataclass_type = t elif base_type is collections.abc.Sequence or base_type is list: if kind is None: - kind = 'Vector' if vector_info is not None else 'List' - elif not (kind == 'Vector' or kind in COLLECTION_TYPES): + kind = 'Vector' if vector_info is not None else 'LTable' + elif not (kind == 'Vector' or kind in TABLE_TYPES): raise ValueError(f"Unexpected type kind for list: {kind}") args = typing.get_args(t) @@ -123,7 +123,7 @@ def analyze_type_info(t) -> AnalyzedTypeInfo: elem_type = args[0] elif kind is None: if base_type is collections.abc.Sequence or base_type is list: - kind = 'Vector' if vector_info is not None else 'List' + kind = 'Vector' if vector_info is not None else 'LTable' elif t is bytes: kind = 'Bytes' elif t is str: @@ -179,7 +179,7 @@ def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]: encoded_type['element_type'] = _encode_type(analyze_type_info(type_info.elem_type)) encoded_type['dimension'] = type_info.vector_info.dim - elif type_info.kind in COLLECTION_TYPES: + elif type_info.kind in TABLE_TYPES: if type_info.elem_type is None: raise ValueError(f"{type_info.kind} type must have an element type") row_type_info = analyze_type_info(type_info.elem_type) diff --git a/src/base/json_schema.rs b/src/base/json_schema.rs index 4e811210..2e787199 100644 --- a/src/base/json_schema.rs +++ b/src/base/json_schema.rs @@ -219,7 +219,7 @@ impl JsonSchemaBuilder { match value_type { schema::ValueType::Basic(b) => self.for_basic_value_type(b, field_path), schema::ValueType::Struct(s) => self.for_struct_schema(s, field_path), - schema::ValueType::Collection(c) => SchemaObject { + schema::ValueType::Table(c) => SchemaObject { instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Array))), array: Some(Box::new(ArrayValidation { items: Some(SingleOrVec::Single(Box::new( diff --git a/src/base/schema.rs b/src/base/schema.rs index b4588798..39463b41 100644 --- a/src/base/schema.rs +++ b/src/base/schema.rs @@ -116,51 +116,51 @@ impl std::fmt::Display for StructSchema { } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] -pub enum CollectionKind { - /// A generic collection can have any row type. - Collection, +pub enum TableKind { + /// An table with unordered rows, without key. + UTable, /// A table's first field is the key. - Table, - /// A list is a table whose key type is int64 starting from 0 continuously.. - List, + KTable, + /// A table whose rows orders are preserved. + LTable, } -impl std::fmt::Display for CollectionKind { +impl std::fmt::Display for TableKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - CollectionKind::Collection => write!(f, "Collection"), - CollectionKind::Table => write!(f, "Table"), - CollectionKind::List => write!(f, "List"), + TableKind::UTable => write!(f, "Table"), + TableKind::KTable => write!(f, "KTable"), + TableKind::LTable => write!(f, "LTable"), } } } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -pub struct CollectionSchema { - pub kind: CollectionKind, +pub struct TableSchema { + pub kind: TableKind, pub row: StructSchema, #[serde(default = "Vec::new", skip_serializing_if = "Vec::is_empty")] pub collectors: Vec>>, } -impl CollectionSchema { +impl TableSchema { pub fn has_key(&self) -> bool { match self.kind { - CollectionKind::Table => true, - CollectionKind::Collection | CollectionKind::List => false, + TableKind::KTable => true, + TableKind::UTable | TableKind::LTable => false, } } pub fn key_type(&self) -> Option<&EnrichedValueType> { match self.kind { - CollectionKind::Table => self + TableKind::KTable => self .row .fields .first() .as_ref() .map(|field| &field.value_type), - CollectionKind::Collection | CollectionKind::List => None, + TableKind::UTable | TableKind::LTable => None, } } @@ -180,7 +180,7 @@ impl CollectionSchema { } } -impl std::fmt::Display for CollectionSchema { +impl std::fmt::Display for TableSchema { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}({}", self.kind, self.row)?; for collector in self.collectors.iter() { @@ -191,8 +191,8 @@ impl std::fmt::Display for CollectionSchema { } } -impl CollectionSchema { - pub fn new(kind: CollectionKind, row: StructSchema) -> Self { +impl TableSchema { + pub fn new(kind: TableKind, row: StructSchema) -> Self { Self { kind, row, @@ -202,8 +202,8 @@ impl CollectionSchema { pub fn key_field(&self) -> Option<&FieldSchema> { match self.kind { - CollectionKind::Table => Some(self.row.fields.first().unwrap()), - CollectionKind::Collection | CollectionKind::List => None, + TableKind::KTable => Some(self.row.fields.first().unwrap()), + TableKind::UTable | TableKind::LTable => None, } } } @@ -217,7 +217,7 @@ pub enum ValueType { Basic(BasicValueType), #[serde(untagged)] - Collection(CollectionSchema), + Table(TableSchema), } impl ValueType { @@ -225,7 +225,7 @@ impl ValueType { match self { ValueType::Basic(_) => None, ValueType::Struct(_) => None, - ValueType::Collection(c) => c.key_type(), + ValueType::Table(c) => c.key_type(), } } @@ -234,7 +234,7 @@ impl ValueType { match self { ValueType::Basic(a) => ValueType::Basic(a.clone()), ValueType::Struct(a) => ValueType::Struct(a.without_attrs()), - ValueType::Collection(a) => ValueType::Collection(a.without_attrs()), + ValueType::Table(a) => ValueType::Table(a.without_attrs()), } } } @@ -307,7 +307,7 @@ impl std::fmt::Display for ValueType { match self { ValueType::Basic(b) => write!(f, "{}", b), ValueType::Struct(s) => write!(f, "{}", s), - ValueType::Collection(c) => write!(f, "{}", c), + ValueType::Table(c) => write!(f, "{}", c), } } } diff --git a/src/base/spec.rs b/src/base/spec.rs index 09c229c6..0b61f0b2 100644 --- a/src/base/spec.rs +++ b/src/base/spec.rs @@ -185,7 +185,7 @@ pub struct TransformOpSpec { /// Apply reactive operations to each row of the input field. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ForEachOpSpec { - /// Mapping that provides a collection of rows to apply reactive operations to. + /// Mapping that provides a table to apply reactive operations to. pub field_path: FieldPath, pub op_scope: ReactiveOpScope, } diff --git a/src/base/value.rs b/src/base/value.rs index 98339709..c3266fb7 100644 --- a/src/base/value.rs +++ b/src/base/value.rs @@ -1,8 +1,8 @@ -use crate::{api_bail, api_error}; -use bytes::Bytes; use super::schema::*; +use crate::{api_bail, api_error}; use anyhow::Result; use base64::prelude::*; +use bytes::Bytes; use chrono::Offset; use log::warn; use serde::{ @@ -517,9 +517,9 @@ pub enum Value { Null, Basic(BasicValue), Struct(FieldValues), - Collection(Vec), - Table(BTreeMap), - List(Vec), + UTable(Vec), + KTable(BTreeMap), + LTable(Vec), } impl> From for Value { @@ -592,11 +592,11 @@ impl Value { .map(|v| Value::::from_alternative(v)) .collect(), }), - Value::Collection(v) => Value::Collection(v.into_iter().map(|v| v.into()).collect()), - Value::Table(v) => { - Value::Table(v.into_iter().map(|(k, v)| (k.clone(), v.into())).collect()) + Value::UTable(v) => Value::UTable(v.into_iter().map(|v| v.into()).collect()), + Value::KTable(v) => { + Value::KTable(v.into_iter().map(|(k, v)| (k.clone(), v.into())).collect()) } - Value::List(v) => Value::List(v.into_iter().map(|v| v.into()).collect()), + Value::LTable(v) => Value::LTable(v.into_iter().map(|v| v.into()).collect()), } } @@ -614,9 +614,11 @@ impl Value { .map(|v| Value::::from_alternative_ref(v)) .collect(), }), - Value::Collection(v) => Value::Collection(v.iter().map(|v| v.into()).collect()), - Value::Table(v) => Value::Table(v.iter().map(|(k, v)| (k.clone(), v.into())).collect()), - Value::List(v) => Value::List(v.iter().map(|v| v.into()).collect()), + Value::UTable(v) => Value::UTable(v.iter().map(|v| v.into()).collect()), + Value::KTable(v) => { + Value::KTable(v.iter().map(|(k, v)| (k.clone(), v.into())).collect()) + } + Value::LTable(v) => Value::LTable(v.iter().map(|v| v.into()).collect()), } } @@ -633,7 +635,7 @@ impl Value { .map(|v| v.into_key()) .collect::>>()?, ), - Value::Null | Value::Collection(_) | Value::Table(_) | Value::List(_) => { + Value::Null | Value::UTable(_) | Value::KTable(_) | Value::LTable(_) => { anyhow::bail!("invalid key value type") } }; @@ -649,7 +651,7 @@ impl Value { .map(|v| v.as_key()) .collect::>>()?, ), - Value::Null | Value::Collection(_) | Value::Table(_) | Value::List(_) => { + Value::Null | Value::UTable(_) | Value::KTable(_) | Value::LTable(_) => { anyhow::bail!("invalid key value type") } }; @@ -660,10 +662,10 @@ impl Value { match self { Value::Null => "null", Value::Basic(v) => v.kind(), - Value::Struct(_) => "struct", - Value::Collection(_) => "collection", - Value::Table(_) => "table", - Value::List(_) => "list", + Value::Struct(_) => "Struct", + Value::UTable(_) => "UTable", + Value::KTable(_) => "KTable", + Value::LTable(_) => "LTable", } } @@ -743,13 +745,6 @@ impl Value { _ => anyhow::bail!("expected struct value, but got {}", self.kind()), } } - - pub fn as_collection(&self) -> Result<&Vec> { - match self { - Value::Collection(v) => Ok(v), - _ => anyhow::bail!("expected collection value, but got {}", self.kind()), - } - } } #[derive(Debug, Clone)] @@ -939,15 +934,15 @@ impl serde::Serialize for Value { Value::Null => serializer.serialize_none(), Value::Basic(v) => v.serialize(serializer), Value::Struct(v) => v.serialize(serializer), - Value::Collection(v) => v.serialize(serializer), - Value::Table(m) => { + Value::UTable(v) => v.serialize(serializer), + Value::KTable(m) => { let mut seq = serializer.serialize_seq(Some(m.len()))?; for (k, v) in m.iter() { seq.serialize_element(&TableEntry(k, v))?; } seq.end() } - Value::List(v) => v.serialize(serializer), + Value::LTable(v) => v.serialize(serializer), } } } @@ -975,15 +970,15 @@ where (v, ValueType::Struct(s)) => { Value::::Struct(FieldValues::::from_json(v, &s.fields)?) } - (serde_json::Value::Array(v), ValueType::Collection(s)) => match s.kind { - CollectionKind::Collection => { + (serde_json::Value::Array(v), ValueType::Table(s)) => match s.kind { + TableKind::UTable => { let rows = v .into_iter() .map(|v| Ok(FieldValues::from_json(v, &s.row.fields)?.into())) .collect::>>()?; - Value::List(rows) + Value::LTable(rows) } - CollectionKind::Table => { + TableKind::KTable => { let rows = v .into_iter() .map(|v| { @@ -1027,14 +1022,14 @@ where } }) .collect::>>()?; - Value::Table(rows) + Value::KTable(rows) } - CollectionKind::List => { + TableKind::LTable => { let rows = v .into_iter() .map(|v| Ok(FieldValues::from_json(v, &s.row.fields)?.into())) .collect::>>()?; - Value::List(rows) + Value::LTable(rows) } }, (v, t) => { @@ -1061,7 +1056,7 @@ impl Serialize for TypedValue<'_> { values_iter: field_values.fields.iter(), } .serialize(serializer), - (ValueType::Collection(c), Value::Collection(rows) | Value::List(rows)) => { + (ValueType::Table(c), Value::UTable(rows) | Value::LTable(rows)) => { let mut seq = serializer.serialize_seq(Some(rows.len()))?; for row in rows { seq.serialize_element(&TypedFieldsValue { @@ -1071,7 +1066,7 @@ impl Serialize for TypedValue<'_> { } seq.end() } - (ValueType::Collection(c), Value::Table(rows)) => { + (ValueType::Table(c), Value::KTable(rows)) => { let mut seq = serializer.serialize_seq(Some(rows.len()))?; for (k, v) in rows { seq.serialize_element(&TypedFieldsValue { diff --git a/src/builder/analyzer.rs b/src/builder/analyzer.rs index 3d047518..d9d3289a 100644 --- a/src/builder/analyzer.rs +++ b/src/builder/analyzer.rs @@ -20,7 +20,7 @@ use futures::{future::try_join_all, FutureExt}; pub(super) enum ValueTypeBuilder { Basic(BasicValueType), Struct(StructSchemaBuilder), - Collection(CollectionSchemaBuilder), + Table(TableSchemaBuilder), } impl TryFrom<&ValueType> for ValueTypeBuilder { @@ -30,9 +30,7 @@ impl TryFrom<&ValueType> for ValueTypeBuilder { match value_type { ValueType::Basic(basic_type) => Ok(ValueTypeBuilder::Basic(basic_type.clone())), ValueType::Struct(struct_type) => Ok(ValueTypeBuilder::Struct(struct_type.try_into()?)), - ValueType::Collection(collection_type) => { - Ok(ValueTypeBuilder::Collection(collection_type.try_into()?)) - } + ValueType::Table(table_type) => Ok(ValueTypeBuilder::Table(table_type.try_into()?)), } } } @@ -44,9 +42,7 @@ impl TryInto for &ValueTypeBuilder { match self { ValueTypeBuilder::Basic(basic_type) => Ok(ValueType::Basic(basic_type.clone())), ValueTypeBuilder::Struct(struct_type) => Ok(ValueType::Struct(struct_type.try_into()?)), - ValueTypeBuilder::Collection(collection_type) => { - Ok(ValueType::Collection(collection_type.try_into()?)) - } + ValueTypeBuilder::Table(table_type) => Ok(ValueType::Table(table_type.try_into()?)), } } } @@ -113,15 +109,15 @@ impl TryInto for &StructSchemaBuilder { } #[derive(Debug)] -pub(super) struct CollectionSchemaBuilder { - pub kind: CollectionKind, +pub(super) struct TableSchemaBuilder { + pub kind: TableKind, pub sub_scope: Arc>, } -impl TryFrom<&CollectionSchema> for CollectionSchemaBuilder { +impl TryFrom<&TableSchema> for TableSchemaBuilder { type Error = anyhow::Error; - fn try_from(schema: &CollectionSchema) -> Result { + fn try_from(schema: &TableSchema) -> Result { Ok(Self { kind: schema.kind, sub_scope: Arc::new(Mutex::new(DataScopeBuilder { @@ -138,10 +134,10 @@ impl TryFrom<&CollectionSchema> for CollectionSchemaBuilder { } } -impl TryInto for &CollectionSchemaBuilder { +impl TryInto for &TableSchemaBuilder { type Error = anyhow::Error; - fn try_into(self) -> Result { + fn try_into(self) -> Result { let sub_scope = self.sub_scope.lock().unwrap(); let row = (&sub_scope.data).try_into()?; let collectors = sub_scope @@ -154,7 +150,7 @@ impl TryInto for &CollectionSchemaBuilder { spec: schema.schema.clone(), }) .collect(); - Ok(CollectionSchema { + Ok(TableSchema { kind: self.kind, row, collectors, @@ -177,27 +173,27 @@ fn try_make_common_value_type( let common_schema = try_merge_struct_schemas(struct_type1, struct_type2)?; ValueType::Struct(common_schema) } - (ValueType::Collection(collection_type1), ValueType::Collection(collection_type2)) => { - if collection_type1.kind != collection_type2.kind { + (ValueType::Table(table_type1), ValueType::Table(table_type2)) => { + if table_type1.kind != table_type2.kind { api_bail!( "Collection types are not compatible: {} vs {}", - collection_type1, - collection_type2 + table_type1, + table_type2 ); } - let row = try_merge_struct_schemas(&collection_type1.row, &collection_type2.row)?; + let row = try_merge_struct_schemas(&table_type1.row, &table_type2.row)?; - if collection_type1.collectors.len() != collection_type2.collectors.len() { + if table_type1.collectors.len() != table_type2.collectors.len() { api_bail!( "Collection types are not compatible as they have different collectors count: {} vs {}", - collection_type1, - collection_type2 + table_type1, + table_type2 ); } - let collectors = collection_type1 + let collectors = table_type1 .collectors .iter() - .zip(collection_type2.collectors.iter()) + .zip(table_type2.collectors.iter()) .map(|(c1, c2)| -> Result<_> { if c1.name != c2.name { api_bail!( @@ -214,13 +210,13 @@ fn try_make_common_value_type( }) .collect::>()?; - ValueType::Collection(CollectionSchema { - kind: collection_type1.kind, + ValueType::Table(TableSchema { + kind: table_type1.kind, row, collectors, }) } - (t1 @ (ValueType::Basic(_) | ValueType::Struct(_) | ValueType::Collection(_)), t2) => { + (t1 @ (ValueType::Basic(_) | ValueType::Struct(_) | ValueType::Table(_)), t2) => { api_bail!("Unmatched types:\n {t1}\n {t2}\n",) } }; @@ -764,7 +760,7 @@ impl AnalyzerContext<'_> { let (local_field_ref, value_type) = scope.data.analyze_field_path(&op.field_path)?; let sub_scope = match &value_type.typ { - ValueTypeBuilder::Collection(collection_type) => &collection_type.sub_scope, + ValueTypeBuilder::Table(table_type) => &table_type.sub_scope, _ => api_bail!( "ForEach only works on collection, field {} is not", op.field_path diff --git a/src/builder/flow_builder.rs b/src/builder/flow_builder.rs index c52a788b..b69160a2 100644 --- a/src/builder/flow_builder.rs +++ b/src/builder/flow_builder.rs @@ -129,7 +129,7 @@ impl DataScopeRef { .typ; } let scope_builder = match field_typ { - ValueTypeBuilder::Collection(collection_type) => collection_type.sub_scope.clone(), + ValueTypeBuilder::Table(table_type) => table_type.sub_scope.clone(), _ => api_bail!("expect collection type"), }; @@ -243,7 +243,7 @@ impl DataSlice { })) } - pub fn collection_entry_scope(&self) -> PyResult { + pub fn table_row_scope(&self) -> PyResult { let field_path = match self.value.as_ref() { spec::ValueMapping::Field(v) => &v.field_path, _ => return Err(PyException::new_err("expect field path")), @@ -902,9 +902,9 @@ impl FlowBuilder { let (_, field_type) = scope.data.analyze_field_path(field_path)?; let sub_scope = match &field_type.typ { - ValueTypeBuilder::Collection(collection_type) => &collection_type.sub_scope, + ValueTypeBuilder::Table(table_type) => &table_type.sub_scope, t => api_bail!( - "expect collection type, got {}", + "expect table type, got {}", TryInto::::try_into(t)? ), }; diff --git a/src/execution/evaluator.rs b/src/execution/evaluator.rs index 71c8e4c5..5ec20b02 100644 --- a/src/execution/evaluator.rs +++ b/src/execution/evaluator.rs @@ -60,7 +60,7 @@ impl ScopeValueBuilder { fn augmented_from( source: &value::ScopeValue, - schema: &schema::CollectionSchema, + schema: &schema::TableSchema, ) -> Result { let val_index_base = if schema.has_key() { 1 } else { 0 }; let len = schema.row.fields.len() - val_index_base; @@ -98,19 +98,17 @@ fn augmented_value( .collect::>>()?, }) } - (value::Value::Collection(v), schema::ValueType::Collection(t)) => { - value::Value::Collection( - v.iter() - .map(|v| ScopeValueBuilder::augmented_from(v, t)) - .collect::>>()?, - ) - } - (value::Value::Table(v), schema::ValueType::Collection(t)) => value::Value::Table( + (value::Value::UTable(v), schema::ValueType::Table(t)) => value::Value::UTable( + v.iter() + .map(|v| ScopeValueBuilder::augmented_from(v, t)) + .collect::>>()?, + ), + (value::Value::KTable(v), schema::ValueType::Table(t)) => value::Value::KTable( v.iter() .map(|(k, v)| Ok((k.clone(), ScopeValueBuilder::augmented_from(v, t)?))) .collect::>>()?, ), - (value::Value::List(v), schema::ValueType::Collection(t)) => value::Value::List( + (value::Value::LTable(v), schema::ValueType::Table(t)) => value::Value::LTable( v.iter() .map(|v| ScopeValueBuilder::augmented_from(v, t)) .collect::>>()?, @@ -121,11 +119,11 @@ fn augmented_value( } enum ScopeKey<'a> { - /// For root struct and generic collection. + /// For root struct and UTable. None, - /// For table row. + /// For KTable row. MapKey(&'a value::KeyValue), - /// For list item. + /// For LTable row. ListIndex(usize), } @@ -340,14 +338,14 @@ async fn evaluate_op_scope( AnalyzedReactiveOp::ForEach(op) => { let target_field_schema = head_scope.get_field_schema(&op.local_field_ref)?; - let collection_schema = match &target_field_schema.value_type.typ { - schema::ValueType::Collection(cs) => cs, - _ => bail!("Expect target field to be a collection"), + let table_schema = match &target_field_schema.value_type.typ { + schema::ValueType::Table(cs) => cs, + _ => bail!("Expect target field to be a table"), }; let target_field = head_scope.get_value_field_builder(&op.local_field_ref); let task_futs = match target_field { - value::Value::Collection(v) => v + value::Value::UTable(v) => v .iter() .map(|item| { evaluate_child_op_scope( @@ -356,13 +354,13 @@ async fn evaluate_op_scope( ScopeEntry { key: ScopeKey::None, value: item, - schema: &collection_schema.row, + schema: &table_schema.row, }, memory, ) }) .collect::>(), - value::Value::Table(v) => v + value::Value::KTable(v) => v .iter() .map(|(k, v)| { evaluate_child_op_scope( @@ -371,13 +369,13 @@ async fn evaluate_op_scope( ScopeEntry { key: ScopeKey::MapKey(k), value: v, - schema: &collection_schema.row, + schema: &table_schema.row, }, memory, ) }) .collect::>(), - value::Value::List(v) => v + value::Value::LTable(v) => v .iter() .enumerate() .map(|(i, item)| { @@ -387,14 +385,14 @@ async fn evaluate_op_scope( ScopeEntry { key: ScopeKey::ListIndex(i), value: item, - schema: &collection_schema.row, + schema: &table_schema.row, }, memory, ) }) .collect::>(), _ => { - bail!("Target field type is expected to be a collection"); + bail!("Target field type is expected to be a table"); } }; try_join_all(task_futs) @@ -455,21 +453,21 @@ pub async fn evaluate_source_entry( schema: root_schema, }; - let collection_schema = match &root_schema.fields[import_op.output.field_idx as usize] + let table_schema = match &root_schema.fields[import_op.output.field_idx as usize] .value_type .typ { - schema::ValueType::Collection(cs) => cs, + schema::ValueType::Table(cs) => cs, _ => { bail!("Expect source output to be a table") } }; let scope_value = - ScopeValueBuilder::augmented_from(&value::ScopeValue(source_value), collection_schema)?; + ScopeValueBuilder::augmented_from(&value::ScopeValue(source_value), table_schema)?; root_scope_entry.define_field_w_builder( &import_op.output, - value::Value::Table(BTreeMap::from([(key.clone(), scope_value)])), + value::Value::KTable(BTreeMap::from([(key.clone(), scope_value)])), ); evaluate_op_scope( diff --git a/src/ops/functions/split_recursively.rs b/src/ops/functions/split_recursively.rs index 3a3578ad..f90ebbfb 100644 --- a/src/ops/functions/split_recursively.rs +++ b/src/ops/functions/split_recursively.rs @@ -550,7 +550,7 @@ impl SimpleFunctionExecutor for Executor { .map(|(range, text)| (range.into(), fields_value!(Arc::::from(text)).into())) .collect(); - Ok(Value::Table(table)) + Ok(Value::KTable(table)) } } @@ -596,12 +596,11 @@ impl SimpleFunctionFactoryBase for Factory { "text", make_output_type(BasicValueType::Str), )); - let output_schema = - make_output_type(CollectionSchema::new(CollectionKind::Table, struct_schema)) - .with_attr( - field_attrs::CHUNK_BASE_TEXT, - serde_json::to_value(args_resolver.get_analyze_value(&args.text))?, - ); + let output_schema = make_output_type(TableSchema::new(TableKind::KTable, struct_schema)) + .with_attr( + field_attrs::CHUNK_BASE_TEXT, + serde_json::to_value(args_resolver.get_analyze_value(&args.text))?, + ); Ok((args, output_schema)) } diff --git a/src/ops/sdk.rs b/src/ops/sdk.rs index f0a36b9f..b3332ae9 100644 --- a/src/ops/sdk.rs +++ b/src/ops/sdk.rs @@ -29,9 +29,9 @@ impl TypeCore for StructSchema { } } -impl TypeCore for CollectionSchema { +impl TypeCore for TableSchema { fn into_type(self) -> ValueType { - ValueType::Collection(self) + ValueType::Table(self) } } diff --git a/src/ops/sources/google_drive.rs b/src/ops/sources/google_drive.rs index da41f5d9..06786ddf 100644 --- a/src/ops/sources/google_drive.rs +++ b/src/ops/sources/google_drive.rs @@ -473,8 +473,8 @@ impl SourceFactoryBase for Factory { serde_json::to_value(mime_type_field.to_field_ref())?, ), )); - Ok(make_output_type(CollectionSchema::new( - CollectionKind::Table, + Ok(make_output_type(TableSchema::new( + TableKind::KTable, struct_schema, ))) } diff --git a/src/ops/sources/local_file.rs b/src/ops/sources/local_file.rs index 224455b8..fbf7157a 100644 --- a/src/ops/sources/local_file.rs +++ b/src/ops/sources/local_file.rs @@ -139,8 +139,8 @@ impl SourceFactoryBase for Factory { ), )); - Ok(make_output_type(CollectionSchema::new( - CollectionKind::Table, + Ok(make_output_type(TableSchema::new( + TableKind::KTable, struct_schema, ))) } diff --git a/src/ops/storages/neo4j.rs b/src/ops/storages/neo4j.rs index 70b62caa..fd8a3b2a 100644 --- a/src/ops/storages/neo4j.rs +++ b/src/ops/storages/neo4j.rs @@ -293,17 +293,17 @@ fn value_to_bolt(value: &Value, schema: &schema::ValueType) -> Result ValueType::Struct(t) => field_values_to_bolt(v.fields.iter(), t.fields.iter())?, _ => anyhow::bail!("Non-struct type got struct value: {}", schema), }, - Value::Collection(v) | Value::List(v) => match schema { - ValueType::Collection(t) => BoltType::List(neo4rs::BoltList { + Value::UTable(v) | Value::LTable(v) => match schema { + ValueType::Table(t) => BoltType::List(neo4rs::BoltList { value: v .iter() .map(|v| field_values_to_bolt(v.0.fields.iter(), t.row.fields.iter())) .collect::>()?, }), - _ => anyhow::bail!("Non-collection type got collection value: {}", schema), + _ => anyhow::bail!("Non-table type got table value: {}", schema), }, - Value::Table(v) => match schema { - ValueType::Collection(t) => BoltType::List(neo4rs::BoltList { + Value::KTable(v) => match schema { + ValueType::Table(t) => BoltType::List(neo4rs::BoltList { value: v .iter() .map(|(k, v)| { diff --git a/src/py/convert.rs b/src/py/convert.rs index 6410f1a1..327ba828 100644 --- a/src/py/convert.rs +++ b/src/py/convert.rs @@ -95,14 +95,14 @@ pub fn value_to_py_object<'py>(py: Python<'py>, v: &value::Value) -> PyResult py.None().into_bound(py), value::Value::Basic(v) => basic_value_to_py_object(py, v)?, value::Value::Struct(v) => field_values_to_py_object(py, v.fields.iter())?, - value::Value::Collection(v) | value::Value::List(v) => { + value::Value::UTable(v) | value::Value::LTable(v) => { let rows = v .iter() .map(|v| field_values_to_py_object(py, v.0.fields.iter())) .collect::>>()?; PyList::new(py, rows)?.into_any() } - value::Value::Table(v) => { + value::Value::KTable(v) => { let rows = v .iter() .map(|(k, v)| { @@ -192,20 +192,20 @@ pub fn value_from_py_object<'py>( schema::ValueType::Struct(schema) => { value::Value::Struct(field_values_from_py_object(schema, v)?) } - schema::ValueType::Collection(schema) => { + schema::ValueType::Table(schema) => { let list = v.extract::>>()?; let values = list .into_iter() .map(|v| field_values_from_py_object(&schema.row, &v)) .collect::>>()?; match schema.kind { - schema::CollectionKind::Collection => { - value::Value::Collection(values.into_iter().map(|v| v.into()).collect()) + schema::TableKind::UTable => { + value::Value::UTable(values.into_iter().map(|v| v.into()).collect()) } - schema::CollectionKind::List => { - value::Value::List(values.into_iter().map(|v| v.into()).collect()) + schema::TableKind::LTable => { + value::Value::LTable(values.into_iter().map(|v| v.into()).collect()) } - schema::CollectionKind::Table => value::Value::Table( + schema::TableKind::KTable => value::Value::KTable( values .into_iter() .map(|v| { diff --git a/src/service/flows.rs b/src/service/flows.rs index 18816bc2..84d21029 100644 --- a/src/service/flows.rs +++ b/src/service/flows.rs @@ -137,11 +137,11 @@ pub async fn evaluate_data( let plan = flow_ctx.flow.get_execution_plan().await?; let import_op = &plan.import_ops[import_op_idx]; let field_schema = &schema.fields[import_op.output.field_idx as usize]; - let collection_schema = match &field_schema.value_type.typ { - schema::ValueType::Collection(collection) => collection, + let table_schema = match &field_schema.value_type.typ { + schema::ValueType::Table(table) => table, _ => api_bail!("field is not a table: {}", query.field), }; - let key_field = collection_schema + let key_field = table_schema .key_field() .ok_or_else(|| api_error!("field {} does not have a key", query.field))?; let key = value::KeyValue::from_strs(query.key, &key_field.value_type.typ)?; From 3b313e933e04285c5c60fb7ab253d436d03ac1a2 Mon Sep 17 00:00:00 2001 From: LJ Date: Mon, 28 Apr 2025 12:29:02 -0700 Subject: [PATCH 2/7] feat(types): update Python SDK to support using `dict` for `KTable` --- python/cocoindex/convert.py | 59 +++++++++---- python/cocoindex/op.py | 8 +- python/cocoindex/tests/test_convert.py | 118 ++++++++++++++----------- python/cocoindex/typing.py | 65 +++++++------- 4 files changed, 143 insertions(+), 107 deletions(-) diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index acc170ad..1f947323 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -8,25 +8,28 @@ from enum import Enum from typing import Any, Callable, get_origin -from .typing import analyze_type_info, encode_enriched_type, TABLE_TYPES +from .typing import analyze_type_info, encode_enriched_type, TABLE_TYPES, KEY_FIELD_NAME -def to_engine_value(value: Any) -> Any: - """Convert a Python value to an engine value.""" + +def encode_engine_value(value: Any) -> Any: + """Encode a Python value to an engine value.""" if dataclasses.is_dataclass(value): - return [to_engine_value(getattr(value, f.name)) for f in dataclasses.fields(value)] + return [encode_engine_value(getattr(value, f.name)) for f in dataclasses.fields(value)] if isinstance(value, (list, tuple)): - return [to_engine_value(v) for v in value] + return [encode_engine_value(v) for v in value] + if isinstance(value, dict): + return [[encode_engine_value(k)] + encode_engine_value(v) for k, v in value.items()] if isinstance(value, uuid.UUID): return value.bytes return value -def make_engine_value_converter( +def make_engine_value_decoder( field_path: list[str], src_type: dict[str, Any], dst_annotation, ) -> Callable[[Any], Any]: """ - Make a converter from an engine value to a Python value. + Make a decoder from an engine value to a Python value. Args: field_path: The path to the field in the engine value. For error messages. @@ -34,7 +37,7 @@ def make_engine_value_converter( dst_annotation: The type annotation of the Python value. Returns: - A converter from an engine value to a Python value. + A decoder from an engine value to a Python value. """ src_type_kind = src_type['kind'] @@ -53,7 +56,7 @@ def make_engine_value_converter( f"passed in {src_type_kind}, declared {dst_annotation} ({dst_type_info.kind})") if dst_type_info.dataclass_type is not None: - return _make_engine_struct_value_converter( + return _make_engine_struct_value_decoder( field_path, src_type['fields'], dst_type_info.dataclass_type) if src_type_kind in TABLE_TYPES: @@ -61,33 +64,51 @@ def make_engine_value_converter( elem_type_info = analyze_type_info(dst_type_info.elem_type) if elem_type_info.dataclass_type is None: raise ValueError(f"Type mismatch for `{''.join(field_path)}`: " - f"declared `{dst_type_info.kind}`, a dataclass type expected") - elem_converter = _make_engine_struct_value_converter( - field_path, src_type['row']['fields'], elem_type_info.dataclass_type) + f"declared `{dst_type_info.kind}`, a dataclass type expected") + engine_fields_schema = src_type['row']['fields'] + if elem_type_info.key_type is not None: + key_field_schema = engine_fields_schema[0] + field_path.append(f".{key_field_schema.get('name', KEY_FIELD_NAME)}") + key_decoder = make_engine_value_decoder( + field_path, key_field_schema['type'], elem_type_info.key_type) + field_path.pop() + value_decoder = _make_engine_struct_value_decoder( + field_path, engine_fields_schema[1:], elem_type_info.dataclass_type) + def decode(value): + if value is None: + return None + return {key_decoder(v[0]): value_decoder(v[1:]) for v in value} + else: + elem_decoder = _make_engine_struct_value_decoder( + field_path, engine_fields_schema, elem_type_info.dataclass_type) + def decode(value): + if value is None: + return None + return [elem_decoder(v) for v in value] field_path.pop() - return lambda value: [elem_converter(v) for v in value] if value is not None else None + return decode if src_type_kind == 'Uuid': return lambda value: uuid.UUID(bytes=value) return lambda value: value -def _make_engine_struct_value_converter( +def _make_engine_struct_value_decoder( field_path: list[str], src_fields: list[dict[str, Any]], dst_dataclass_type: type, ) -> Callable[[list], Any]: - """Make a converter from an engine field values to a Python value.""" + """Make a decoder from an engine field values to a Python value.""" src_name_to_idx = {f['name']: i for i, f in enumerate(src_fields)} def make_closure_for_value(name: str, param: inspect.Parameter) -> Callable[[list], Any]: src_idx = src_name_to_idx.get(name) if src_idx is not None: field_path.append(f'.{name}') - field_converter = make_engine_value_converter( + field_decoder = make_engine_value_decoder( field_path, src_fields[src_idx]['type'], param.annotation) field_path.pop() - return lambda values: field_converter(values[src_idx]) + return lambda values: field_decoder(values[src_idx]) default_value = param.default if default_value is inspect.Parameter.empty: @@ -96,12 +117,12 @@ def make_closure_for_value(name: str, param: inspect.Parameter) -> Callable[[lis return lambda _: default_value - field_value_converters = [ + field_value_decoder = [ make_closure_for_value(name, param) for (name, param) in inspect.signature(dst_dataclass_type).parameters.items()] return lambda values: dst_dataclass_type( - *(converter(values) for converter in field_value_converters)) + *(decoder(values) for decoder in field_value_decoder)) def dump_engine_object(v: Any) -> Any: """Recursively dump an object for engine. Engine side uses `Pythonized` to catch.""" diff --git a/python/cocoindex/op.py b/python/cocoindex/op.py index e524595b..bf211b01 100644 --- a/python/cocoindex/op.py +++ b/python/cocoindex/op.py @@ -9,7 +9,7 @@ from enum import Enum from .typing import encode_enriched_type -from .convert import to_engine_value, make_engine_value_converter +from .convert import encode_engine_value, make_engine_value_decoder from . import _engine class OpCategory(Enum): @@ -129,7 +129,7 @@ def analyze(self, *args, **kwargs): raise ValueError( f"Too many positional arguments passed in: {len(args)} > {next_param_idx}") self._args_converters.append( - make_engine_value_converter( + make_engine_value_decoder( [arg_name], arg.value_type['type'], arg_param.annotation)) if arg_param.kind != inspect.Parameter.VAR_POSITIONAL: next_param_idx += 1 @@ -146,7 +146,7 @@ def analyze(self, *args, **kwargs): if expected_arg is None: raise ValueError(f"Unexpected keyword argument passed in: {kwarg_name}") arg_param = expected_arg[1] - self._kwargs_converters[kwarg_name] = make_engine_value_converter( + self._kwargs_converters[kwarg_name] = make_engine_value_decoder( [kwarg_name], kwarg.value_type['type'], arg_param.annotation) missing_args = [name for (name, arg) in expected_kwargs @@ -188,7 +188,7 @@ async def __call__(self, *args, **kwargs): output = await self._acall(*converted_args, **converted_kwargs) else: output = await self._acall(*converted_args, **converted_kwargs) - return to_engine_value(output) + return encode_engine_value(output) _WrappedClass.__name__ = executor_cls.__name__ _WrappedClass.__doc__ = executor_cls.__doc__ diff --git a/python/cocoindex/tests/test_convert.py b/python/cocoindex/tests/test_convert.py index a3f5684d..9cf725d2 100644 --- a/python/cocoindex/tests/test_convert.py +++ b/python/cocoindex/tests/test_convert.py @@ -1,11 +1,9 @@ -import dataclasses import uuid import datetime from dataclasses import dataclass, make_dataclass import pytest from cocoindex.typing import encode_enriched_type -from cocoindex.convert import to_engine_value -from cocoindex.convert import make_engine_value_converter +from cocoindex.convert import encode_engine_value, make_engine_value_decoder @dataclass class Order: @@ -26,7 +24,7 @@ class Basket: class Customer: name: str order: Order - tags: list[Tag] = None + tags: list[Tag] | None = None @dataclass class NestedStruct: @@ -34,63 +32,63 @@ class NestedStruct: orders: list[Order] count: int = 0 -def build_engine_value_converter(engine_type_in_py, python_type=None): +def build_engine_value_decoder(engine_type_in_py, python_type=None): """ Helper to build a converter for the given engine-side type (as represented in Python). If python_type is not specified, uses engine_type_in_py as the target. """ engine_type = encode_enriched_type(engine_type_in_py)["type"] - return make_engine_value_converter([], engine_type, python_type or engine_type_in_py) + return make_engine_value_decoder([], engine_type, python_type or engine_type_in_py) -def test_to_engine_value_basic_types(): - assert to_engine_value(123) == 123 - assert to_engine_value(3.14) == 3.14 - assert to_engine_value("hello") == "hello" - assert to_engine_value(True) is True +def test_encode_engine_value_basic_types(): + assert encode_engine_value(123) == 123 + assert encode_engine_value(3.14) == 3.14 + assert encode_engine_value("hello") == "hello" + assert encode_engine_value(True) is True -def test_to_engine_value_uuid(): +def test_encode_engine_value_uuid(): u = uuid.uuid4() - assert to_engine_value(u) == u.bytes + assert encode_engine_value(u) == u.bytes -def test_to_engine_value_date_time_types(): +def test_encode_engine_value_date_time_types(): d = datetime.date(2024, 1, 1) - assert to_engine_value(d) == d + assert encode_engine_value(d) == d t = datetime.time(12, 30) - assert to_engine_value(t) == t + assert encode_engine_value(t) == t dt = datetime.datetime(2024, 1, 1, 12, 30) - assert to_engine_value(dt) == dt + assert encode_engine_value(dt) == dt -def test_to_engine_value_struct(): +def test_encode_engine_value_struct(): order = Order(order_id="O123", name="mixed nuts", price=25.0) - assert to_engine_value(order) == ["O123", "mixed nuts", 25.0, "default_extra"] + assert encode_engine_value(order) == ["O123", "mixed nuts", 25.0, "default_extra"] -def test_to_engine_value_list_of_structs(): +def test_encode_engine_value_list_of_structs(): orders = [Order("O1", "item1", 10.0), Order("O2", "item2", 20.0)] - assert to_engine_value(orders) == [["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"]] + assert encode_engine_value(orders) == [["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"]] -def test_to_engine_value_struct_with_list(): +def test_encode_engine_value_struct_with_list(): basket = Basket(items=["apple", "banana"]) - assert to_engine_value(basket) == [["apple", "banana"]] + assert encode_engine_value(basket) == [["apple", "banana"]] -def test_to_engine_value_nested_struct(): +def test_encode_engine_value_nested_struct(): customer = Customer(name="Alice", order=Order("O1", "item1", 10.0)) - assert to_engine_value(customer) == ["Alice", ["O1", "item1", 10.0, "default_extra"], None] + assert encode_engine_value(customer) == ["Alice", ["O1", "item1", 10.0, "default_extra"], None] -def test_to_engine_value_empty_list(): - assert to_engine_value([]) == [] - assert to_engine_value([[]]) == [[]] +def test_encode_engine_value_empty_list(): + assert encode_engine_value([]) == [] + assert encode_engine_value([[]]) == [[]] -def test_to_engine_value_tuple(): - assert to_engine_value(()) == [] - assert to_engine_value((1, 2, 3)) == [1, 2, 3] - assert to_engine_value(((1, 2), (3, 4))) == [[1, 2], [3, 4]] - assert to_engine_value(([],)) == [[]] - assert to_engine_value(((),)) == [[]] +def test_encode_engine_value_tuple(): + assert encode_engine_value(()) == [] + assert encode_engine_value((1, 2, 3)) == [1, 2, 3] + assert encode_engine_value(((1, 2), (3, 4))) == [[1, 2], [3, 4]] + assert encode_engine_value(([],)) == [[]] + assert encode_engine_value(((),)) == [[]] -def test_to_engine_value_none(): - assert to_engine_value(None) is None +def test_encode_engine_value_none(): + assert encode_engine_value(None) is None -def test_make_engine_value_converter_basic_types(): +def test_make_engine_value_decoder_basic_types(): for engine_type_in_py, value in [ (int, 42), (float, 3.14), @@ -98,11 +96,11 @@ def test_make_engine_value_converter_basic_types(): (bool, True), # (type(None), None), # Removed unsupported NoneType ]: - converter = build_engine_value_converter(engine_type_in_py) - assert converter(value) == value + decoder = build_engine_value_decoder(engine_type_in_py) + assert decoder(value) == value @pytest.mark.parametrize( - "converter_type, engine_val, expected", + "data_type, engine_val, expected", [ # All fields match (Order, ["O123", "mixed nuts", 25.0, "default_extra"], Order("O123", "mixed nuts", 25.0, "default_extra")), @@ -120,30 +118,30 @@ def test_make_engine_value_converter_basic_types(): (Customer, ["Alice", ["O1", "item1", 10.0, "default_extra"], [["vip"]], "extra"], Customer("Alice", Order("O1", "item1", 10.0, "default_extra"), [Tag("vip")])), ] ) -def test_struct_conversion_cases(converter_type, engine_val, expected): - converter = build_engine_value_converter(converter_type) - assert converter(engine_val) == expected +def test_struct_decoder_cases(data_type, engine_val, expected): + decoder = build_engine_value_decoder(data_type) + assert decoder(engine_val) == expected -def test_make_engine_value_converter_collections(): +def test_make_engine_value_decoder_collections(): # List of structs - converter = build_engine_value_converter(list[Order]) + decoder = build_engine_value_decoder(list[Order]) engine_val = [ ["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"] ] - assert converter(engine_val) == [Order("O1", "item1", 10.0, "default_extra"), Order("O2", "item2", 20.0, "default_extra")] + assert decoder(engine_val) == [Order("O1", "item1", 10.0, "default_extra"), Order("O2", "item2", 20.0, "default_extra")] # Struct with list field - converter = build_engine_value_converter(Customer) + decoder = build_engine_value_decoder(Customer) engine_val = ["Alice", ["O1", "item1", 10.0, "default_extra"], [["vip"], ["premium"]]] - assert converter(engine_val) == Customer("Alice", Order("O1", "item1", 10.0, "default_extra"), [Tag("vip"), Tag("premium")]) + assert decoder(engine_val) == Customer("Alice", Order("O1", "item1", 10.0, "default_extra"), [Tag("vip"), Tag("premium")]) # Struct with struct field - converter = build_engine_value_converter(NestedStruct) + decoder = build_engine_value_decoder(NestedStruct) engine_val = [ ["Alice", ["O1", "item1", 10.0, "default_extra"], [["vip"]]], [["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"]], 2 ] - assert converter(engine_val) == NestedStruct( + assert decoder(engine_val) == NestedStruct( Customer("Alice", Order("O1", "item1", 10.0, "default_extra"), [Tag("vip")]), [Order("O1", "item1", 10.0, "default_extra"), Order("O2", "item2", 20.0, "default_extra")], 2 @@ -227,8 +225,24 @@ def make_python_order(fields, defaults=None): def test_field_position_cases(engine_fields, python_fields, python_defaults, engine_val, expected_python_val): EngineOrder = make_engine_order(engine_fields) PythonOrder = make_python_order(python_fields, python_defaults) - converter = build_engine_value_converter(EngineOrder, PythonOrder) + decoder = build_engine_value_decoder(EngineOrder, PythonOrder) # Map field names to expected values expected_dict = dict(zip([f[0] for f in python_fields], expected_python_val)) # Instantiate using keyword arguments (order doesn't matter) - assert converter(engine_val) == PythonOrder(**expected_dict) + assert decoder(engine_val) == PythonOrder(**expected_dict) + +def test_roundtrip_ltable(): + t = list[Order] + value = [Order("O1", "item1", 10.0), Order("O2", "item2", 20.0)] + encoded = encode_engine_value(value) + assert encoded == [["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"]] + decoded = build_engine_value_decoder(t)(encoded) + assert decoded == value + +def test_roundtrip_ktable(): + t = dict[str, Order] + value = {"K1": Order("O1", "item1", 10.0), "K2": Order("O2", "item2", 20.0)} + encoded = encode_engine_value(value) + assert encoded == [["K1", "O1", "item1", 10.0, "default_extra"], ["K2", "O2", "item2", 20.0, "default_extra"]] + decoded = build_engine_value_decoder(t)(encoded) + assert decoded == value diff --git a/python/cocoindex/typing.py b/python/cocoindex/typing.py index 24e494b7..540b52d0 100644 --- a/python/cocoindex/typing.py +++ b/python/cocoindex/typing.py @@ -31,38 +31,22 @@ def __init__(self, key: str, value: Any): OffsetDateTime = Annotated[datetime.datetime, TypeKind('OffsetDateTime')] TABLE_TYPES = ('KTable', 'LTable') +KEY_FIELD_NAME = '_key' -R = TypeVar("R") - -if TYPE_CHECKING: - KTable = Annotated[list[R], TypeKind('KTable')] - LTable = Annotated[list[R], TypeKind('LTable')] -else: - # pylint: disable=too-few-public-methods - class KTable: # type: ignore[unreachable] - """ - A KTable type, which is a table that the first field is the key. - """ - def __class_getitem__(cls, item: type[R]): - return Annotated[list[item], TypeKind('KTable')] - - # pylint: disable=too-few-public-methods - class LTable: # type: ignore[unreachable] - """ - A LTable type, which is a table that has a list of ordered rows. - """ - def __class_getitem__(cls, item: type[R]): - return Annotated[list[item], TypeKind('LTable')] +ElementType = type | tuple[type, type] @dataclasses.dataclass class AnalyzedTypeInfo: """ Analyzed info of a Python type. """ kind: str - vector_info: Vector | None - elem_type: type | None - dataclass_type: type | None + vector_info: Vector | None # For Vector + elem_type: ElementType | None # For Vector and Table + + key_type: type | None # For element of KTable + dataclass_type: type | None # For Struct + attrs: dict[str, Any] | None nullable: bool = False @@ -70,6 +54,12 @@ def analyze_type_info(t) -> AnalyzedTypeInfo: """ Analyze a Python type and return the analyzed info. """ + if isinstance(t, tuple) and len(t) == 2: + key_type, value_type = t + result = analyze_type_info(value_type) + result.key_type = key_type + return result + annotations: tuple[Annotation, ...] = () base_type = None nullable = False @@ -105,6 +95,7 @@ def analyze_type_info(t) -> AnalyzedTypeInfo: dataclass_type = None elem_type = None + key_type = None if isinstance(t, type) and dataclasses.is_dataclass(t): if kind is None: kind = 'Struct' @@ -121,6 +112,10 @@ def analyze_type_info(t) -> AnalyzedTypeInfo: if len(args) != 1: raise ValueError(f"{kind} must have exactly one type argument") elem_type = args[0] + elif base_type is collections.abc.Mapping or base_type is dict: + kind = 'KTable' + args = typing.get_args(t) + elem_type = (args[0], args[1]) elif kind is None: if base_type is collections.abc.Sequence or base_type is list: kind = 'Vector' if vector_info is not None else 'LTable' @@ -145,20 +140,26 @@ def analyze_type_info(t) -> AnalyzedTypeInfo: else: raise ValueError(f"type unsupported yet: {t}") - return AnalyzedTypeInfo(kind=kind, vector_info=vector_info, elem_type=elem_type, - dataclass_type=dataclass_type, attrs=attrs, nullable=nullable) + return AnalyzedTypeInfo(kind=kind, vector_info=vector_info, + elem_type=elem_type, key_type=key_type, dataclass_type=dataclass_type, + attrs=attrs, nullable=nullable) -def _encode_fields_schema(dataclass_type: type) -> list[dict[str, Any]]: +def _encode_fields_schema(dataclass_type: type, key_type: type | None = None) -> list[dict[str, Any]]: result = [] - for field in dataclasses.fields(dataclass_type): + def add_field(name: str, t) -> None: try: - type_info = encode_enriched_type_info(analyze_type_info(field.type)) + type_info = encode_enriched_type_info(analyze_type_info(t)) except ValueError as e: e.add_note(f"Failed to encode annotation for field - " - f"{dataclass_type.__name__}.{field.name}: {field.type}") + f"{dataclass_type.__name__}.{name}: {t}") raise - type_info['name'] = field.name + type_info['name'] = name result.append(type_info) + + if key_type is not None: + add_field(KEY_FIELD_NAME, key_type) + for field in dataclasses.fields(dataclass_type): + add_field(field.name, field.type) return result def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]: @@ -167,7 +168,7 @@ def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]: if type_info.kind == 'Struct': if type_info.dataclass_type is None: raise ValueError("Struct type must have a dataclass type") - encoded_type['fields'] = _encode_fields_schema(type_info.dataclass_type) + encoded_type['fields'] = _encode_fields_schema(type_info.dataclass_type, type_info.key_type) if doc := inspect.getdoc(type_info.dataclass_type): encoded_type['description'] = doc From 7b999cfa07b76831b4e422524048a2ccfd7a4979 Mon Sep 17 00:00:00 2001 From: LJ Date: Mon, 28 Apr 2025 15:18:01 -0700 Subject: [PATCH 3/7] test: add a test case for struct-typed KTable key --- python/cocoindex/tests/test_convert.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/python/cocoindex/tests/test_convert.py b/python/cocoindex/tests/test_convert.py index 9cf725d2..b20e035e 100644 --- a/python/cocoindex/tests/test_convert.py +++ b/python/cocoindex/tests/test_convert.py @@ -239,10 +239,24 @@ def test_roundtrip_ltable(): decoded = build_engine_value_decoder(t)(encoded) assert decoded == value -def test_roundtrip_ktable(): +def test_roundtrip_ktable_str_key(): t = dict[str, Order] value = {"K1": Order("O1", "item1", 10.0), "K2": Order("O2", "item2", 20.0)} encoded = encode_engine_value(value) assert encoded == [["K1", "O1", "item1", 10.0, "default_extra"], ["K2", "O2", "item2", 20.0, "default_extra"]] decoded = build_engine_value_decoder(t)(encoded) assert decoded == value + +def test_roundtrip_ktable_struct_key(): + @dataclass(frozen=True) + class OrderKey: + shop_id: str + version: int + + t = dict[OrderKey, Order] + value = {OrderKey("A", 3): Order("O1", "item1", 10.0), OrderKey("B", 4): Order("O2", "item2", 20.0)} + encoded = encode_engine_value(value) + assert encoded == [[["A", 3], "O1", "item1", 10.0, "default_extra"], + [["B", 4], "O2", "item2", 20.0, "default_extra"]] + decoded = build_engine_value_decoder(t)(encoded) + assert decoded == value From 4e2edeaaedaf87bf624682bd6a9f77bcb6918c4c Mon Sep 17 00:00:00 2001 From: LJ Date: Mon, 28 Apr 2025 15:38:22 -0700 Subject: [PATCH 4/7] docs(types): revise documents regarding `KTable` and `LTable` --- docs/docs/core/basics.md | 6 +- docs/docs/core/data_types.mdx | 76 ++++++++++++++++--------- docs/docs/core/flow_def.mdx | 2 +- docs/docs/getting_started/quickstart.md | 7 ++- docs/docs/ops/functions.md | 2 +- docs/docs/ops/sources.md | 4 +- 6 files changed, 61 insertions(+), 36 deletions(-) diff --git a/docs/docs/core/basics.md b/docs/docs/core/basics.md index 4eec0ae5..45464cb1 100644 --- a/docs/docs/core/basics.md +++ b/docs/docs/core/basics.md @@ -21,9 +21,9 @@ An indexing flow involves source data and transformed data (either as an interme Each piece of data has a **data type**, falling into one of the following categories: -* Basic type. -* Struct type: a collection of **fields**, each with a name and a type. -* Collection type: a collection of **rows**, each of which is a struct with specified schema. A collection type can be a table (which has a key field) or a list (ordered but without key field). +* *Basic type*. +* *Struct type*: a collection of **fields**, each with a name and a type. +* *Table type*: a collection of **rows**, each of which is a struct with specified schema. A table type can be a *KTable* (which has a key field) or a *LTable* (ordered but without key field). An indexing flow always has a top-level struct, containing all data within and managed by the flow. diff --git a/docs/docs/core/data_types.mdx b/docs/docs/core/data_types.mdx index 2b38a84d..2efefe6c 100644 --- a/docs/docs/core/data_types.mdx +++ b/docs/docs/core/data_types.mdx @@ -42,47 +42,71 @@ So CocoIndex will have information about the specific type. ### Struct Type -A struct has a bunch of fields, each with a name and a type. +A Struct has a bunch of fields, each with a name and a type. -In Python, a struct type is represented by a [dataclass](https://docs.python.org/3/library/dataclasses.html), +In Python, a Struct type is represented by a [dataclass](https://docs.python.org/3/library/dataclasses.html), and all fields must be annotated with a specific type. For example: ```python from dataclasses import dataclass @dataclass -class Order: - order_id: str - name: str - price: float +class Person: + first_name: str + last_name + dob: datetime.date ``` -### Collection Types +### Table Types -A collection type models a collection of rows, each of which is a struct with specific schema. +A Table type models a collection of rows, each with multiple columns. +Each column of a table has a specific type. -We have two specific types of collection: +We have two specific types of Table types: KTable and LTable. -| Type | Description |Type in Python | Original Type in Python | -|------|-------------|---------------|-------------------------| -| Table[*type*] | The first field is the key, and CocoIndex enforces its uniqueness | `cocoindex.typing.Table[type]` | `list[type]` | -| List[*type*] | No key field; row order is preserved | `cocoindex.typing.List[type]` | `list[type]` | +#### KTable + +KTable is a Table type whose first column serves as the key. +The row order of a KTable is not preserved. +Type of the first column (key column) must be a [key type](#key-types). + +In Python, a KTable type is represented by `dict[K, V]`. +The `V` should be a dataclass, representing the value fields of each row. +For example, you can use `dict[str, Person]` to represent a KTable, with 4 columns: key (Str), `first_name` (Str), `last_name` (Str), `dob` (Date). + +Note that if you want to use a struct as the key, you need to annotate the struct with `@dataclass(frozen=True)`, so the values are immutable. +For example: + +```python +@dataclass(frozen=True) +class PersonKey: + id_kind: str + id: str +``` + +Then you can use `dict[PersonKey, Person]` to represent a KTable keyed by `PersonKey`. + + +#### LTable + +LTable is a Table type whose row order is preserved. LTable has no key column. -For example, we can use `cocoindex.typing.Table[Order]` to represent a table of orders, and the first field `order_id` will be taken as the key field. +In Python, a LTable type is represented by `list[R]`, where `R` is a dataclass representing a row. +For example, you can use `list[Person]` to represent a LTable with 3 columns: `first_name` (Str), `last_name` (Str), `dob` (Date). -## Types to Create Indexes +## Index Types ### Key Types -Currently, the following types are supported as types for key fields: +Currently, the following types are key types -- `bytes` -- `str` -- `bool` -- `int64` -- `range` -- `uuid` -- `date` +- Bytes +- Str +- Bool +- Int64 +- Range +- Uuid +- Date - Struct with all fields being key types ### Vector Type @@ -94,6 +118,6 @@ Following metrics are supported: | Metric Name | Description | Similarity Order | |-------------|-------------|------------------| -| `CosineSimilarity` | [Cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) | Larger is more similar | -| `L2Distance` | [L2 distance (a.k.a. Euclidean distance)](https://en.wikipedia.org/wiki/Euclidean_distance) | Smaller is more similar | -| `InnerProduct` | [Inner product](https://en.wikipedia.org/wiki/Inner_product_space) | Larger is more similar | +| CosineSimilarity | [Cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) | Larger is more similar | +| L2Distance | [L2 distance (a.k.a. Euclidean distance)](https://en.wikipedia.org/wiki/Euclidean_distance) | Smaller is more similar | +| InnerProduct | [Inner product](https://en.wikipedia.org/wiki/Inner_product_space) | Larger is more similar | diff --git a/docs/docs/core/flow_def.mdx b/docs/docs/core/flow_def.mdx index 15c07f6c..2e5f652c 100644 --- a/docs/docs/core/flow_def.mdx +++ b/docs/docs/core/flow_def.mdx @@ -178,7 +178,7 @@ def demo_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataSco ### For each row -If the data slice has `Table` type, you can call `row()` method to obtain a child scope representing each row, to apply operations on each row. +If the data slice has [table type](/docs/core/data_types#table-types), you can call `row()` method to obtain a child scope representing each row, to apply operations on each row. diff --git a/docs/docs/getting_started/quickstart.md b/docs/docs/getting_started/quickstart.md index be635adb..ec400dfd 100644 --- a/docs/docs/getting_started/quickstart.md +++ b/docs/docs/getting_started/quickstart.md @@ -112,11 +112,12 @@ Notes: * `doc`, representing each row of `documents`. * `chunk`, representing each row of `chunks`. -3. A *data source* extracts data from an external source. In this example, the `LocalFile` data source defines a table, each row has `"filename"` and `"content"` fields. +3. A *data source* extracts data from an external source. + In this example, the `LocalFile` data source imports local files as a KTable (table with a key field, see [KTable](../core/data_types#ktable) for details), each row has `"filename"` and `"content"` fields. -4. After defining the table, we extended a new field `"chunks"` to each row by *transforming* the `"content"` field using `SplitRecursively`. The output of the `SplitRecursively` is also a table representing each chunk of the document, with `"location"` and `"text"` fields. +4. After defining the KTable, we extended a new field `"chunks"` to each row by *transforming* the `"content"` field using `SplitRecursively`. The output of the `SplitRecursively` is also a KTable representing each chunk of the document, with `"location"` and `"text"` fields. -5. After defining the table, we extended a new field `"embedding"` to each row by *transforming* the `"text"` field using `SentenceTransformerEmbed`. +5. After defining the KTable, we extended a new field `"embedding"` to each row by *transforming* the `"text"` field using `SentenceTransformerEmbed`. 6. In CocoIndex, a *collector* collects multiple entries of data together. In this example, the `doc_embeddings` collector collects data from all `chunk`s across all `doc`s, and using the collected data to build a vector index `"doc_embeddings"`, using `Postgres`. diff --git a/docs/docs/ops/functions.md b/docs/docs/ops/functions.md index fa73805e..769017d3 100644 --- a/docs/docs/ops/functions.md +++ b/docs/docs/ops/functions.md @@ -32,7 +32,7 @@ Input data: To see all supported language names and extensions, see [the code](https://github.com/search?q=org%3Acocoindex-io+lang%3Arust++%22static+TREE_SITTER_LANGUAGE_BY_LANG%22&type=code). If it's unspecified or the specified language is not supported, it will be treated as plain text. -Return type: `Table`, each row represents a chunk, with the following sub fields: +Return type: [KTable](/docs/core/data_types#ktable), each row represents a chunk, with the following sub fields: * `location` (type: `range`): The location of the chunk. * `text` (type: `str`): The text of the chunk. diff --git a/docs/docs/ops/sources.md b/docs/docs/ops/sources.md index df890ab6..1ae614cc 100644 --- a/docs/docs/ops/sources.md +++ b/docs/docs/ops/sources.md @@ -28,7 +28,7 @@ The spec takes the following fields: ### Schema -The output is a table with the following sub fields: +The output is a [KTable](/docs/core/data_types#ktable) with the following sub fields: * `filename` (key, type: `str`): the filename of the file, including the path, relative to the root directory, e.g. `"dir1/file1.md"` * `content` (type: `str` if `binary` is `False`, otherwise `bytes`): the content of the file @@ -78,7 +78,7 @@ The spec takes the following fields: ### Schema -The output is a table with the following sub fields: +The output is a [KTable](/docs/core/data_types#ktable) with the following sub fields: * `file_id` (key, type: `str`): the ID of the file in Google Drive. * `filename` (type: `str`): the filename of the file, without the path, e.g. `"file1.md"` From 62d451849411c13d37d599676b89add34d1196c7 Mon Sep 17 00:00:00 2001 From: LJ Date: Mon, 28 Apr 2025 17:58:54 -0700 Subject: [PATCH 5/7] feat(types): revise representation for `Vector` type --- python/cocoindex/__init__.py | 3 +- python/cocoindex/functions.py | 2 +- python/cocoindex/tests/test_convert.py | 20 ++++++++- python/cocoindex/typing.py | 61 +++++++++++++++++++------- 4 files changed, 66 insertions(+), 20 deletions(-) diff --git a/python/cocoindex/__init__.py b/python/cocoindex/__init__.py index b3c5b010..cf227256 100644 --- a/python/cocoindex/__init__.py +++ b/python/cocoindex/__init__.py @@ -9,4 +9,5 @@ from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry from .lib import * -from ._engine import OpArgSchema \ No newline at end of file +from ._engine import OpArgSchema +from .typing import Vector \ No newline at end of file diff --git a/python/cocoindex/functions.py b/python/cocoindex/functions.py index e0e77457..7f12ca12 100644 --- a/python/cocoindex/functions.py +++ b/python/cocoindex/functions.py @@ -41,7 +41,7 @@ def analyze(self, text): args = self.spec.args or {} self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args) dim = self._model.get_sentence_embedding_dimension() - return Annotated[list[Float32], Vector(dim=dim), TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value)] + return Annotated[Vector[Float32, dim], TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value)] def __call__(self, text: str) -> list[Float32]: return self._model.encode(text).tolist() diff --git a/python/cocoindex/tests/test_convert.py b/python/cocoindex/tests/test_convert.py index b20e035e..9019653b 100644 --- a/python/cocoindex/tests/test_convert.py +++ b/python/cocoindex/tests/test_convert.py @@ -2,9 +2,10 @@ import datetime from dataclasses import dataclass, make_dataclass import pytest +import cocoindex from cocoindex.typing import encode_enriched_type from cocoindex.convert import encode_engine_value, make_engine_value_decoder - +from typing import Literal @dataclass class Order: order_id: str @@ -260,3 +261,20 @@ class OrderKey: [["B", 4], "O2", "item2", 20.0, "default_extra"]] decoded = build_engine_value_decoder(t)(encoded) assert decoded == value + +IntVectorType = cocoindex.Vector[int, Literal[5]] +def test_vector_as_vector() -> None: + value: IntVectorType = [1, 2, 3, 4, 5] + encoded = encode_engine_value(value) + assert encoded == [1, 2, 3, 4, 5] + decoded = build_engine_value_decoder(IntVectorType)(encoded) + assert decoded == value + +ListIntType = list[int] +def test_vector_as_list() -> None: + value: ListIntType = [1, 2, 3, 4, 5] + encoded = encode_engine_value(value) + assert encoded == [1, 2, 3, 4, 5] + decoded = build_engine_value_decoder(ListIntType)(encoded) + assert decoded == value + diff --git a/python/cocoindex/typing.py b/python/cocoindex/typing.py index 540b52d0..accb1c36 100644 --- a/python/cocoindex/typing.py +++ b/python/cocoindex/typing.py @@ -5,9 +5,9 @@ import types import inspect import uuid -from typing import Annotated, NamedTuple, Any, TypeVar, TYPE_CHECKING, overload +from typing import Annotated, NamedTuple, Any, TypeVar, TYPE_CHECKING, overload, Sequence, Protocol, Generic, Literal -class Vector(NamedTuple): +class VectorInfo(NamedTuple): dim: int | None class TypeKind(NamedTuple): @@ -21,7 +21,7 @@ def __init__(self, key: str, value: Any): self.key = key self.value = value -Annotation = Vector | TypeKind | TypeAttr +Annotation = TypeKind | TypeAttr | VectorInfo Float32 = Annotated[float, TypeKind('Float32')] Float64 = Annotated[float, TypeKind('Float64')] @@ -30,18 +30,42 @@ def __init__(self, key: str, value: Any): LocalDateTime = Annotated[datetime.datetime, TypeKind('LocalDateTime')] OffsetDateTime = Annotated[datetime.datetime, TypeKind('OffsetDateTime')] +if TYPE_CHECKING: + T_co = TypeVar('T_co', covariant=True) + Dim_co = TypeVar('Dim_co', bound=int, covariant=True) + + class Vector(Sequence[T_co], Generic[T_co, Dim_co], Protocol): + """Vector[T, Dim] is a special typing alias for a list[T] with optional dimension info""" +else: + class Vector: # type: ignore[unreachable] + """ A special typing alias for a list[T] with optional dimension info """ + def __class_getitem__(self, params): + if not isinstance(params, tuple): + # Only element type provided + elem_type = params + return Annotated[list[elem_type], VectorInfo(dim=None)] + else: + # Element type and dimension provided + elem_type, dim = params + if typing.get_origin(dim) is Literal: + dim = typing.get_args(dim)[0] # Extract the literal value + return Annotated[list[elem_type], VectorInfo(dim=dim)] + TABLE_TYPES = ('KTable', 'LTable') KEY_FIELD_NAME = '_key' - ElementType = type | tuple[type, type] + +def _is_struct_type(t) -> bool: + return isinstance(t, type) and dataclasses.is_dataclass(t) + @dataclasses.dataclass class AnalyzedTypeInfo: """ Analyzed info of a Python type. """ kind: str - vector_info: Vector | None # For Vector + vector_info: VectorInfo | None # For Vector elem_type: ElementType | None # For Vector and Table key_type: type | None # For element of KTable @@ -88,7 +112,7 @@ def analyze_type_info(t) -> AnalyzedTypeInfo: if attrs is None: attrs = dict() attrs[attr.key] = attr.value - elif isinstance(attr, Vector): + elif isinstance(attr, VectorInfo): vector_info = attr elif isinstance(attr, TypeKind): kind = attr.kind @@ -96,30 +120,33 @@ def analyze_type_info(t) -> AnalyzedTypeInfo: dataclass_type = None elem_type = None key_type = None - if isinstance(t, type) and dataclasses.is_dataclass(t): + if _is_struct_type(t): if kind is None: kind = 'Struct' elif kind != 'Struct': raise ValueError(f"Unexpected type kind for struct: {kind}") dataclass_type = t elif base_type is collections.abc.Sequence or base_type is list: + args = typing.get_args(t) + elem_type = args[0] + if kind is None: - kind = 'Vector' if vector_info is not None else 'LTable' + if _is_struct_type(elem_type): + kind = 'LTable' + if vector_info is not None: + raise ValueError("Vector element must be a simple type, not a struct") + else: + kind = 'Vector' + if vector_info is None: + vector_info = VectorInfo(dim=None) elif not (kind == 'Vector' or kind in TABLE_TYPES): raise ValueError(f"Unexpected type kind for list: {kind}") - - args = typing.get_args(t) - if len(args) != 1: - raise ValueError(f"{kind} must have exactly one type argument") - elem_type = args[0] elif base_type is collections.abc.Mapping or base_type is dict: - kind = 'KTable' args = typing.get_args(t) elem_type = (args[0], args[1]) + kind = 'KTable' elif kind is None: - if base_type is collections.abc.Sequence or base_type is list: - kind = 'Vector' if vector_info is not None else 'LTable' - elif t is bytes: + if t is bytes: kind = 'Bytes' elif t is str: kind = 'Str' From 7f2bd1830aa93b4855648dc5f6f9154963faed24 Mon Sep 17 00:00:00 2001 From: LJ Date: Mon, 28 Apr 2025 18:03:10 -0700 Subject: [PATCH 6/7] style(types): export types to the `cocoindex.` level --- python/cocoindex/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cocoindex/__init__.py b/python/cocoindex/__init__.py index cf227256..8c77fc7b 100644 --- a/python/cocoindex/__init__.py +++ b/python/cocoindex/__init__.py @@ -10,4 +10,4 @@ from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry from .lib import * from ._engine import OpArgSchema -from .typing import Vector \ No newline at end of file +from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json \ No newline at end of file From 853bb91615eee633c25ebc7595c8ab06512c642c Mon Sep 17 00:00:00 2001 From: LJ Date: Mon, 28 Apr 2025 18:46:13 -0700 Subject: [PATCH 7/7] docs(types): update docs for data type --- docs/docs/core/custom_function.mdx | 2 +- docs/docs/core/data_types.mdx | 56 ++++++++++++++---------------- docs/docs/core/flow_def.mdx | 25 ++++++++++--- 3 files changed, 49 insertions(+), 34 deletions(-) diff --git a/docs/docs/core/custom_function.mdx b/docs/docs/core/custom_function.mdx index cbca3456..1fe35986 100644 --- a/docs/docs/core/custom_function.mdx +++ b/docs/docs/core/custom_function.mdx @@ -33,7 +33,7 @@ Notes: * The `cocoindex.op.function()` function decorator also takes optional parameters. See [Parameters for custom functions](#parameters-for-custom-functions) for details. -* Types of arugments and the return value must be annotated, so that CocoIndex will have information about data types of the operation's output fields. +* Types of arguments and the return value must be annotated, so that CocoIndex will have information about data types of the operation's output fields. See [Data Types](/docs/core/data_types) for supported types. diff --git a/docs/docs/core/data_types.mdx b/docs/docs/core/data_types.mdx index 2efefe6c..ffbeb6f1 100644 --- a/docs/docs/core/data_types.mdx +++ b/docs/docs/core/data_types.mdx @@ -9,36 +9,49 @@ In CocoIndex, all data processed by the flow have a type determined when the flo This makes schema of data processed by CocoIndex clear, and easily determine the schema of your index. -## Data Types +## Data Types + +You don't need to spell out data types in CocoIndex, when you define the flow using existing operations (source, function, etc). +These operations decide data types of fields produced by them based on the spec and input data types. +All you need to do is to make sure the data passed to functions and storage targets are accepted by them. + +When you define [custom functions](/docs/core/custom_function), you need to specify the data types of arguments and return values. ### Basic Types This is the list of all basic types supported by CocoIndex: -| Type | Description |Type in Python | Original Type in Python | +| Type | Description | Specific Python Type | Native Python Type | |------|-------------|---------------|-------------------------| | Bytes | | `bytes` | `bytes` | | Str | | `str` | `str` | | Bool | | `bool` | `bool` | | Int64 | | `int` | `int` | -| Float32 | | `cocoindex.typing.Float32` |`float` | -| Float64 | | `cocoindex.typing.Float64` |`float` | -| Range | | `cocoindex.typing.Range` | `tuple[int, int]` | +| Float32 | | `cocoindex.Float32` |`float` | +| Float64 | | `cocoindex.Float64` |`float` | +| Range | | `cocoindex.Range` | `tuple[int, int]` | | Uuid | | `uuid.UUId` | `uuid.UUID` | | Date | | `datetime.date` | `datetime.date` | | Time | | `datetime.time` | `datetime.time` | -| LocalDatetime | Date and time without timezone | `cocoindex.typing.LocalDateTime` | `datetime.datetime` | -| OffsetDatetime | Date and time with a timezone offset | `cocoindex.typing.OffsetDateTime` | `datetime.datetime` | -| Vector[*type*, *N*?] | |`Annotated[list[type], cocoindex.typing.Vector(dim=N)]` | `list[type]` | -| Json | | `cocoindex.typing.Json` | Any type convertible to JSON by `json` package | +| LocalDatetime | Date and time without timezone | `cocoindex.LocalDateTime` | `datetime.datetime` | +| OffsetDatetime | Date and time with a timezone offset | `cocoindex.OffsetDateTime` | `datetime.datetime` | +| Vector[*T*, *Dim*?] | *T* must be basic type. *Dim* is a positive integer and optional. |`cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `list[T]` | +| Json | | `cocoindex.Json` | Any data convertible to JSON by `json` package | + +Values of all data types can be represented by values in Python's native types (as described under the Native Python Type column). +However, the underlying execution engine and some storage system (like Postgres) has finer distinctions for some types, specifically: -For some types, CocoIndex Python SDK provides annotated types with finer granularity than Python's original type, e.g. * *Float32* and *Float64* for `float`, with different precision. * *LocalDateTime* and *OffsetDateTime* for `datetime.datetime`, with different timezone awareness. -* *Vector* has dimension information. +* *Vector* has optional dimension information. +* *Range* and *Json* provide a clear tag for the type, to clearly distinguish the type in CocoIndex. -When defining [custom functions](/docs/core/custom_function), use the specific types as type annotations for arguments and return values. -So CocoIndex will have information about the specific type. +The native Python type is always more permissive and can represent a superset of possible values. +* Only when you annotate the return type of a custom function, you should use the specific type, + so that CocoIndex will have information about the precise type to be used in the execution engine and storage system. +* For all other purposes, e.g. to provide annotation for argument types of a custom function, or used internally in your custom function, + you can choose whatever to use. + The native Python type is usually simpler. ### Struct Type @@ -94,9 +107,7 @@ LTable is a Table type whose row order is preserved. LTable has no key column. In Python, a LTable type is represented by `list[R]`, where `R` is a dataclass representing a row. For example, you can use `list[Person]` to represent a LTable with 3 columns: `first_name` (Str), `last_name` (Str), `dob` (Date). -## Index Types - -### Key Types +## Key Types Currently, the following types are key types @@ -108,16 +119,3 @@ Currently, the following types are key types - Uuid - Date - Struct with all fields being key types - -### Vector Type - -Users can create vector index on fields with `vector` types. -A vector index also needs to be configured with a similarity metric, and the index is only effective when this metric is used during retrieval. - -Following metrics are supported: - -| Metric Name | Description | Similarity Order | -|-------------|-------------|------------------| -| CosineSimilarity | [Cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) | Larger is more similar | -| L2Distance | [L2 distance (a.k.a. Euclidean distance)](https://en.wikipedia.org/wiki/Euclidean_distance) | Smaller is more similar | -| InnerProduct | [Inner product](https://en.wikipedia.org/wiki/Inner_product_space) | Larger is more similar | diff --git a/docs/docs/core/flow_def.mdx b/docs/docs/core/flow_def.mdx index 2e5f652c..60a2b551 100644 --- a/docs/docs/core/flow_def.mdx +++ b/docs/docs/core/flow_def.mdx @@ -1,6 +1,7 @@ --- title: Flow Definition description: Define a CocoIndex flow, by specifying source, transformations and storages, and connect input/output data of them. +toc_max_heading_level: 4 --- import Tabs from '@theme/Tabs'; @@ -281,16 +282,32 @@ The target storage is managed by CocoIndex, i.e. it'll be created by [CocoIndex The `name` for the same storage should remain stable across different runs. If it changes, CocoIndex will treat it as an old storage removed and a new one created, and perform setup changes and reindexing accordingly. -#### Storage Indexes +## Storage Indexes Many storage supports indexes, to boost efficiency in retrieving data. CocoIndex provides a common way to configure indexes for various storages. -* *Primary key*. `primary_key_fields` (`Sequence[str]`): the fields to be used as primary key. Types of the fields must be supported as key fields. See [Key Types](data_types#key-types) for more details. -* *Vector index*. `vector_indexes` (`Sequence[VectorIndexDef]`): the fields to create vector index. `VectorIndexDef` has the following fields: +### Primary Key + +*Primary key* is specified by `primary_key_fields` (`Sequence[str]`). +Types of the fields must be key types. See [Key Types](data_types#key-types) for more details. + +### Vector Index + +*Vector index* is specified by `vector_indexes` (`Sequence[VectorIndexDef]`). `VectorIndexDef` has the following fields: + * `field_name`: the field to create vector index. - * `metric`: the similarity metric to use. See [Vector Type](data_types#vector-type) for more details about supported similarity metrics. + * `metric`: the similarity metric to use. + +#### Similarity Metrics + +Following metrics are supported: +| Metric Name | Description | Similarity Order | +|-------------|-------------|------------------| +| CosineSimilarity | [Cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) | Larger is more similar | +| L2Distance | [L2 distance (a.k.a. Euclidean distance)](https://en.wikipedia.org/wiki/Euclidean_distance) | Smaller is more similar | +| InnerProduct | [Inner product](https://en.wikipedia.org/wiki/Inner_product_space) | Larger is more similar | ## Miscellaneous