From 2c5490652c21e59489b00e124b15c659df1a339a Mon Sep 17 00:00:00 2001 From: Vaibhav Prabhu Date: Mon, 12 Feb 2024 12:54:14 +0530 Subject: [PATCH 1/2] refactor: remove uses of dozer-cache --- Cargo.lock | 27 +--------------------- Cargo.toml | 1 - dozer-cli/Cargo.toml | 2 +- dozer-cli/src/cli/helper.rs | 2 +- dozer-cli/src/errors.rs | 10 -------- dozer-cli/src/pipeline/builder.rs | 3 +-- dozer-cli/src/pipeline/dummy_sink.rs | 2 +- dozer-cli/src/pipeline/log_sink.rs | 10 ++++---- dozer-cli/src/simple/build/contract/mod.rs | 2 +- dozer-cli/src/simple/executor.rs | 2 +- dozer-cli/src/simple/orchestrator.rs | 4 ++-- dozer-cli/src/ui/app/state.rs | 2 +- dozer-cli/src/ui/live/state.rs | 2 +- 13 files changed, 16 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4832b7f8ae..228a045389 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3542,31 +3542,6 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" -[[package]] -name = "dozer-cache" -version = "0.3.0" -dependencies = [ - "ahash 0.8.6", - "bincode", - "clap 4.4.8", - "criterion", - "dozer-log", - "dozer-storage", - "dozer-tracing", - "dozer-types", - "env_logger 0.10.1", - "futures", - "itertools 0.10.5", - "metrics", - "rand 0.8.5", - "rayon", - "roaring", - "tempdir", - "tokio", - "unicode-segmentation", - "uuid", -] - [[package]] name = "dozer-cli" version = "0.3.0" @@ -3576,9 +3551,9 @@ dependencies = [ "async-trait", "atty", "clap 4.4.8", - "dozer-cache", "dozer-core", "dozer-ingestion", + "dozer-log", "dozer-sink-aerospike", "dozer-sink-clickhouse", "dozer-sink-oracle", diff --git a/Cargo.toml b/Cargo.toml index 4fbcdc1bc5..b164b11dbc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ members = [ "dozer-core", "dozer-cli", "dozer-sql", - "dozer-cache", "dozer-tracing", "dozer-tests", "dozer-log", diff --git a/dozer-cli/Cargo.toml b/dozer-cli/Cargo.toml index 7f8d92da68..21cd4f173c 100644 --- a/dozer-cli/Cargo.toml +++ b/dozer-cli/Cargo.toml @@ -12,7 +12,7 @@ name = "dozer" [dependencies] dozer-ingestion = { path = "../dozer-ingestion" } dozer-core = { path = "../dozer-core" } -dozer-cache = { path = "../dozer-cache" } +dozer-log = { path = "../dozer-log" } dozer-sql = { path = "../dozer-sql" } dozer-types = { path = "../dozer-types" } dozer-tracing = { path = "../dozer-tracing" } diff --git a/dozer-cli/src/cli/helper.rs b/dozer-cli/src/cli/helper.rs index e7fd2abc0e..bc834dcba9 100644 --- a/dozer-cli/src/cli/helper.rs +++ b/dozer-cli/src/cli/helper.rs @@ -6,7 +6,7 @@ use crate::errors::OrchestrationError; use crate::simple::SimpleOrchestrator as Dozer; use atty::Stream; -use dozer_cache::dozer_log::camino::Utf8PathBuf; +use dozer_log::camino::Utf8PathBuf; use dozer_tracing::LabelsAndProgress; use dozer_types::models::config::default_cache_max_map_size; use dozer_types::prettytable::{row, Table}; diff --git a/dozer-cli/src/errors.rs b/dozer-cli/src/errors.rs index 76ae71d130..5656304a21 100644 --- a/dozer-cli/src/errors.rs +++ b/dozer-cli/src/errors.rs @@ -12,8 +12,6 @@ use crate::{ ui::{app::AppUIError, live::LiveError}, }; -use dozer_cache::dozer_log::storage; -use dozer_cache::errors::CacheError; use dozer_core::errors::ExecutionError; use dozer_sql::errors::PipelineError; use dozer_types::{constants::LOCK_FILE, thiserror::Error}; @@ -38,8 +36,6 @@ pub enum OrchestrationError { FileSystem(PathBuf, std::io::Error), #[error("Failed to find any build")] NoBuildFound, - #[error("Failed to create log: {0}")] - CreateLog(#[from] dozer_cache::dozer_log::replication::Error), #[error("Failed to login: {0}")] CloudLoginFailed(#[from] CloudLoginError), #[error("Credential Error: {0}")] @@ -56,10 +52,6 @@ pub enum OrchestrationError { GrpcServeFailed(#[source] tonic::transport::Error), #[error("Failed to server pgwire: {0}")] PGWireServerFailed(#[source] std::io::Error), - #[error("{0}: Failed to initialize cache. Have you run `dozer build`?")] - CacheInitFailed(#[source] CacheError), - #[error("Failed to build cache {0} from log: {1}")] - CacheBuildFailed(String, #[source] CacheError), #[error("Cache {0} has reached its maximum size. Try to increase `cache_max_map_size` in the config.")] CacheFull(String), #[error("Internal thread panic: {0}")] @@ -218,8 +210,6 @@ pub enum BuildError { FailedToLoadExistingContract(#[source] serde_json::Error), #[error("Serde json error: {0}")] SerdeJson(#[source] serde_json::Error), - #[error("Storage error: {0}")] - Storage(#[from] storage::Error), } #[derive(Debug, Error)] diff --git a/dozer-cli/src/pipeline/builder.rs b/dozer-cli/src/pipeline/builder.rs index 28b083b20b..e168bb2631 100644 --- a/dozer-cli/src/pipeline/builder.rs +++ b/dozer-cli/src/pipeline/builder.rs @@ -2,14 +2,13 @@ use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; -use dozer_cache::dozer_log::replication::Log; use dozer_core::app::App; use dozer_core::app::AppPipeline; use dozer_core::app::PipelineEntryPoint; use dozer_core::node::SinkFactory; use dozer_core::shutdown::ShutdownReceiver; use dozer_core::DEFAULT_PORT_HANDLE; - +use dozer_log::replication::Log; use dozer_sql::builder::statement_to_pipeline; use dozer_sql::builder::{OutputNodeInfo, QueryContext}; use dozer_tracing::LabelsAndProgress; diff --git a/dozer-cli/src/pipeline/dummy_sink.rs b/dozer-cli/src/pipeline/dummy_sink.rs index eae1da7d72..c67d6048c8 100644 --- a/dozer-cli/src/pipeline/dummy_sink.rs +++ b/dozer-cli/src/pipeline/dummy_sink.rs @@ -1,11 +1,11 @@ use std::{collections::HashMap, time::Instant}; -use dozer_cache::dozer_log::storage::Queue; use dozer_core::{ epoch::Epoch, node::{PortHandle, Sink, SinkFactory}, DEFAULT_PORT_HANDLE, }; +use dozer_log::storage::Queue; use dozer_types::{ chrono::Local, errors::internal::BoxedError, diff --git a/dozer-cli/src/pipeline/log_sink.rs b/dozer-cli/src/pipeline/log_sink.rs index 1a5b1855b9..c5019b87aa 100644 --- a/dozer-cli/src/pipeline/log_sink.rs +++ b/dozer-cli/src/pipeline/log_sink.rs @@ -1,14 +1,14 @@ use std::{collections::HashMap, fmt::Debug, ops::Deref, sync::Arc}; -use dozer_cache::dozer_log::{ - replication::{Log, LogOperation}, - storage::Queue, -}; use dozer_core::{ epoch::Epoch, node::{PortHandle, Sink, SinkFactory}, DEFAULT_PORT_HANDLE, }; +use dozer_log::{ + replication::{Log, LogOperation}, + storage::Queue, +}; use dozer_tracing::LabelsAndProgress; use dozer_types::types::Schema; use dozer_types::{errors::internal::BoxedError, node::OpIdentifier}; @@ -93,7 +93,7 @@ impl Sink for LogSink { let end = self .runtime .block_on(self.log.lock()) - .write(dozer_cache::dozer_log::replication::LogOperation::Op { op: op.op }); + .write(dozer_log::replication::LogOperation::Op { op: op.op }); self.pb.set_position(end as u64); Ok(()) } diff --git a/dozer-cli/src/simple/build/contract/mod.rs b/dozer-cli/src/simple/build/contract/mod.rs index 85a1cd1d9a..b9507bf8e6 100644 --- a/dozer-cli/src/simple/build/contract/mod.rs +++ b/dozer-cli/src/simple/build/contract/mod.rs @@ -4,7 +4,6 @@ use std::{ path::Path, }; -use dozer_cache::dozer_log::schemas::EndpointSchema; use dozer_core::{ dag_schemas::DagSchemas, daggy::{self, NodeIndex}, @@ -15,6 +14,7 @@ use dozer_core::{ Direction, }, }; +use dozer_log::schemas::EndpointSchema; use dozer_types::{ models::{ connection::Connection, diff --git a/dozer-cli/src/simple/executor.rs b/dozer-cli/src/simple/executor.rs index 15019839dc..d318d60bb9 100644 --- a/dozer-cli/src/simple/executor.rs +++ b/dozer-cli/src/simple/executor.rs @@ -1,6 +1,6 @@ -use dozer_cache::dozer_log::home_dir::HomeDir; use dozer_core::checkpoint::{CheckpointOptions, OptionCheckpoint}; use dozer_core::shutdown::ShutdownReceiver; +use dozer_log::home_dir::HomeDir; use dozer_tracing::LabelsAndProgress; use dozer_types::models::endpoint::{ AerospikeSinkConfig, ClickhouseSinkConfig, Endpoint, EndpointKind, OracleSinkConfig, diff --git a/dozer-cli/src/simple/orchestrator.rs b/dozer-cli/src/simple/orchestrator.rs index 8b66f1bb42..602a3d95a5 100644 --- a/dozer-cli/src/simple/orchestrator.rs +++ b/dozer-cli/src/simple/orchestrator.rs @@ -8,11 +8,11 @@ use crate::simple::helper::validate_config; use crate::utils::{get_checkpoint_options, get_executor_options}; use crate::flatten_join_handle; -use dozer_cache::dozer_log::camino::Utf8PathBuf; -use dozer_cache::dozer_log::home_dir::{BuildId, HomeDir}; use dozer_core::app::AppPipeline; use dozer_core::dag_schemas::DagSchemas; use dozer_core::shutdown::ShutdownReceiver; +use dozer_log::camino::Utf8PathBuf; +use dozer_log::home_dir::{BuildId, HomeDir}; use dozer_tracing::LabelsAndProgress; use dozer_types::constants::LOCK_FILE; use dozer_types::models::endpoint::EndpointKind; diff --git a/dozer-cli/src/ui/app/state.rs b/dozer-cli/src/ui/app/state.rs index 8656d92508..6ff027db74 100644 --- a/dozer-cli/src/ui/app/state.rs +++ b/dozer-cli/src/ui/app/state.rs @@ -2,9 +2,9 @@ use std::{collections::HashMap, sync::Arc, thread::JoinHandle}; use clap::Parser; -use dozer_cache::dozer_log::camino::Utf8Path; use dozer_core::shutdown::{self, ShutdownReceiver, ShutdownSender}; use dozer_core::{dag_schemas::DagSchemas, Dag}; +use dozer_log::camino::Utf8Path; use dozer_tracing::{Labels, LabelsAndProgress}; use dozer_types::{ grpc_types::{ diff --git a/dozer-cli/src/ui/live/state.rs b/dozer-cli/src/ui/live/state.rs index 95b95ca3a5..dcc032ade7 100644 --- a/dozer-cli/src/ui/live/state.rs +++ b/dozer-cli/src/ui/live/state.rs @@ -2,9 +2,9 @@ use std::{collections::HashMap, sync::Arc, thread::JoinHandle}; use clap::Parser; -use dozer_cache::dozer_log::camino::Utf8Path; use dozer_core::shutdown::{self, ShutdownReceiver, ShutdownSender}; use dozer_core::{dag_schemas::DagSchemas, Dag}; +use dozer_log::camino::Utf8Path; use dozer_tracing::{Labels, LabelsAndProgress}; use dozer_types::{ grpc_types::{ From 468b62b3a134a7a2fe1912640d99b62fa53bdbb2 Mon Sep 17 00:00:00 2001 From: Vaibhav Prabhu Date: Mon, 12 Feb 2024 13:09:32 +0530 Subject: [PATCH 2/2] chore: delete dozer-cache --- dozer-cache/Cargo.toml | 35 - dozer-cache/benches/cache.rs | 93 --- dozer-cache/src/cache/expression/mod.rs | 153 ---- .../src/cache/expression/query_helper.rs | 238 ------- .../src/cache/expression/query_serde.rs | 246 ------- .../src/cache/expression/tests/deserialize.rs | 251 ------- dozer-cache/src/cache/expression/tests/mod.rs | 2 - .../src/cache/expression/tests/serialize.rs | 221 ------ dozer-cache/src/cache/index/mod.rs | 148 ---- dozer-cache/src/cache/index/tests.rs | 43 -- .../src/cache/lmdb/cache/dump_restore.rs | 180 ----- .../conflict_resolution_tests.rs | 258 ------- .../cache/main_environment/dump_restore.rs | 178 ----- .../lmdb/cache/main_environment/hash_tests.rs | 30 - .../cache/lmdb/cache/main_environment/mod.rs | 665 ------------------ .../operation_log/hash_metadata.rs | 80 --- .../operation_log/lmdb_val_impl.rs | 106 --- .../operation_log/metadata.rs | 143 ---- .../main_environment/operation_log/mod.rs | 555 --------------- .../operation_log/primary_key_metadata.rs | 82 --- .../main_environment/operation_log/tests.rs | 353 ---------- dozer-cache/src/cache/lmdb/cache/mod.rs | 221 ------ .../src/cache/lmdb/cache/query/handler.rs | 241 ------- .../cache/lmdb/cache/query/intersection.rs | 101 --- .../src/cache/lmdb/cache/query/lmdb_cmp.rs | 16 - dozer-cache/src/cache/lmdb/cache/query/mod.rs | 9 - .../src/cache/lmdb/cache/query/secondary.rs | 282 -------- .../src/cache/lmdb/cache/query/tests.rs | 271 ------- .../cache/secondary_environment/comparator.rs | 253 ------- .../secondary_environment/dump_restore.rs | 194 ----- .../cache/secondary_environment/indexer.rs | 201 ------ .../lmdb/cache/secondary_environment/mod.rs | 259 ------- dozer-cache/src/cache/lmdb/cache_manager.rs | 380 ---------- dozer-cache/src/cache/lmdb/indexing.rs | 264 ------- dozer-cache/src/cache/lmdb/mod.rs | 7 - dozer-cache/src/cache/lmdb/tests/basic.rs | 188 ----- dozer-cache/src/cache/lmdb/tests/mod.rs | 3 - .../src/cache/lmdb/tests/read_write.rs | 76 -- dozer-cache/src/cache/lmdb/tests/utils.rs | 56 -- dozer-cache/src/cache/lmdb/utils.rs | 133 ---- dozer-cache/src/cache/mod.rs | 167 ----- dozer-cache/src/cache/plan/helper.rs | 213 ------ dozer-cache/src/cache/plan/mod.rs | 61 -- dozer-cache/src/cache/plan/planner.rs | 347 --------- dozer-cache/src/cache/plan/tests.rs | 135 ---- dozer-cache/src/cache/test_utils.rs | 124 ---- dozer-cache/src/errors.rs | 165 ----- dozer-cache/src/lib.rs | 6 - dozer-cache/src/main.rs | 110 --- dozer-cache/src/reader.rs | 105 --- 50 files changed, 8648 deletions(-) delete mode 100644 dozer-cache/Cargo.toml delete mode 100644 dozer-cache/benches/cache.rs delete mode 100644 dozer-cache/src/cache/expression/mod.rs delete mode 100644 dozer-cache/src/cache/expression/query_helper.rs delete mode 100644 dozer-cache/src/cache/expression/query_serde.rs delete mode 100644 dozer-cache/src/cache/expression/tests/deserialize.rs delete mode 100644 dozer-cache/src/cache/expression/tests/mod.rs delete mode 100644 dozer-cache/src/cache/expression/tests/serialize.rs delete mode 100644 dozer-cache/src/cache/index/mod.rs delete mode 100644 dozer-cache/src/cache/index/tests.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/dump_restore.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/conflict_resolution_tests.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/dump_restore.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/hash_tests.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/mod.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/hash_metadata.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/lmdb_val_impl.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/metadata.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/mod.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/primary_key_metadata.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/tests.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/mod.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/query/handler.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/query/intersection.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/query/lmdb_cmp.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/query/mod.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/query/secondary.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/query/tests.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/secondary_environment/comparator.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/secondary_environment/dump_restore.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/secondary_environment/indexer.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/secondary_environment/mod.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache_manager.rs delete mode 100644 dozer-cache/src/cache/lmdb/indexing.rs delete mode 100644 dozer-cache/src/cache/lmdb/mod.rs delete mode 100644 dozer-cache/src/cache/lmdb/tests/basic.rs delete mode 100644 dozer-cache/src/cache/lmdb/tests/mod.rs delete mode 100644 dozer-cache/src/cache/lmdb/tests/read_write.rs delete mode 100644 dozer-cache/src/cache/lmdb/tests/utils.rs delete mode 100644 dozer-cache/src/cache/lmdb/utils.rs delete mode 100644 dozer-cache/src/cache/mod.rs delete mode 100644 dozer-cache/src/cache/plan/helper.rs delete mode 100644 dozer-cache/src/cache/plan/mod.rs delete mode 100644 dozer-cache/src/cache/plan/planner.rs delete mode 100644 dozer-cache/src/cache/plan/tests.rs delete mode 100644 dozer-cache/src/cache/test_utils.rs delete mode 100644 dozer-cache/src/errors.rs delete mode 100644 dozer-cache/src/lib.rs delete mode 100644 dozer-cache/src/main.rs delete mode 100644 dozer-cache/src/reader.rs diff --git a/dozer-cache/Cargo.toml b/dozer-cache/Cargo.toml deleted file mode 100644 index 2845e2f7f6..0000000000 --- a/dozer-cache/Cargo.toml +++ /dev/null @@ -1,35 +0,0 @@ -[package] -name = "dozer-cache" -version = "0.3.0" -edition = "2021" -authors = ["getdozer/dozer-dev"] - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -dozer-types = { path = "../dozer-types" } -dozer-storage = { path = "../dozer-storage" } -dozer-log = { path = "../dozer-log" } -dozer-tracing = { path = "../dozer-tracing" } - -tokio = { version = "1", features = ["macros", "net", "rt-multi-thread"] } -tempdir = "0.3.7" -futures = "0.3.28" -unicode-segmentation = "1.10.1" -itertools = "0.10.5" -roaring = "0.10.2" -uuid = { version = "1.6.1", features = ["v4"] } -rayon = "1.8.0" -ahash = "0.8.3" -metrics = "0.21.0" -clap = { version = "4.4.1", features = ["derive"] } -env_logger = "0.10.0" -bincode = { workspace = true } - -[dev-dependencies] -criterion = "0.4" -rand = "0.8.5" - -[[bench]] -name = "cache" -harness = false diff --git a/dozer-cache/benches/cache.rs b/dozer-cache/benches/cache.rs deleted file mode 100644 index 7fb773e83c..0000000000 --- a/dozer-cache/benches/cache.rs +++ /dev/null @@ -1,93 +0,0 @@ -use std::path::Path; - -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use dozer_cache::cache::expression::{self, FilterExpression, QueryExpression, Skip}; -use dozer_cache::cache::{ - test_utils, CacheManagerOptions, LmdbRwCacheManager, RwCache, RwCacheManager, -}; -use dozer_types::parking_lot::Mutex; -use dozer_types::serde_json::Value; -use dozer_types::types::{Field, Record}; - -fn insert(cache: &Mutex>, n: usize, commit_size: usize) { - let mut cache = cache.lock(); - - let val = format!("bar_{n}"); - let record = Record::new(vec![Field::String(val)]); - - cache.insert(&record).unwrap(); - - if n % commit_size == 0 { - cache.commit(&Default::default()).unwrap(); - } -} - -fn query(cache: &Mutex>, _n: usize) { - let cache = cache.lock(); - let exp = QueryExpression::new( - Some(FilterExpression::Simple( - "foo".to_string(), - expression::Operator::EQ, - Value::from("bar".to_string()), - )), - vec![], - Some(10), - Skip::Skip(0), - ); - - let _get_record = cache.query(&exp).unwrap(); -} - -fn cache(c: &mut Criterion) { - let (schema, secondary_indexes) = test_utils::schema_0(); - - let path = std::env::var("CACHE_BENCH_PATH").unwrap_or(".dozer".to_string()); - let commit_size = std::env::var("CACHE_BENCH_COMMIT_SIZE").unwrap_or("".to_string()); - let commit_size: usize = commit_size.parse().unwrap_or(1000); - - let max_size = std::env::var("CACHE_BENCH_MAP_SIZE").unwrap_or("".to_string()); - let max_size: usize = max_size.parse().unwrap_or(49999872000); - - let cache_manager = LmdbRwCacheManager::new(CacheManagerOptions { - max_db_size: 1000, - max_size, - path: Some(Path::new(&path).to_path_buf()), - ..Default::default() - }) - .unwrap(); - let cache = Mutex::new( - cache_manager - .create_cache( - "temp".to_string(), - Default::default(), - (schema, secondary_indexes), - &Default::default(), - Default::default(), - ) - .unwrap(), - ); - - let iterations = std::env::var("CACHE_BENCH_ITERATIONS").unwrap_or("".to_string()); - let iterations: usize = iterations.parse().unwrap_or(1000000); - - let mut idx = 0; - c.bench_with_input( - BenchmarkId::new("cache_insert", iterations), - &iterations, - |b, &_s| { - b.iter(|| { - insert(&cache, idx, commit_size); - idx += 1; - }) - }, - ); - - c.bench_with_input( - BenchmarkId::new("cache_query", iterations), - &iterations, - |b, &s| b.iter(|| query(&cache, s)), - ); -} - -criterion_group!(benches, cache); -criterion_main!(benches); diff --git a/dozer-cache/src/cache/expression/mod.rs b/dozer-cache/src/cache/expression/mod.rs deleted file mode 100644 index 1554f352b7..0000000000 --- a/dozer-cache/src/cache/expression/mod.rs +++ /dev/null @@ -1,153 +0,0 @@ -use dozer_types::serde::{Deserialize, Serialize}; -use dozer_types::serde_json::Value; -mod query_helper; -mod query_serde; -use dozer_types::constants::DEFAULT_DEFAULT_MAX_NUM_RECORDS; -#[cfg(test)] -mod tests; - -#[derive(Clone, Debug, Copy, PartialEq)] -pub enum Skip { - Skip(usize), - After(u64), -} - -impl Default for Skip { - fn default() -> Self { - Skip::Skip(0) - } -} - -#[derive(Clone, Debug, PartialEq)] -pub struct QueryExpression { - pub filter: Option, - pub order_by: SortOptions, - pub limit: Option, - pub skip: Skip, -} - -impl QueryExpression { - pub fn with_limit(limit: usize) -> Self { - Self { - filter: None, - order_by: Default::default(), - limit: Some(limit), - skip: Default::default(), - } - } - - pub fn with_no_limit() -> Self { - Self { - filter: None, - order_by: Default::default(), - limit: None, - skip: Default::default(), - } - } -} - -impl Default for QueryExpression { - fn default() -> Self { - Self::with_limit(DEFAULT_DEFAULT_MAX_NUM_RECORDS) - } -} - -impl QueryExpression { - pub fn new( - filter: Option, - order_by: Vec, - limit: Option, - skip: Skip, - ) -> Self { - Self { - filter, - order_by: SortOptions(order_by), - limit, - skip, - } - } -} - -#[derive(Clone, Debug, PartialEq)] -pub enum FilterExpression { - // a = 1, a containts "s", a > 4 - Simple(String, Operator, Value), - And(Vec), -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] -#[serde(crate = "dozer_types::serde")] -pub enum Operator { - #[serde(rename = "$lt")] - LT, - #[serde(rename = "$lte")] - LTE, - #[serde(rename = "$eq")] - EQ, - #[serde(rename = "$gt")] - GT, - #[serde(rename = "$gte")] - GTE, - #[serde(rename = "$contains")] - Contains, - #[serde(rename = "$matches_any")] - MatchesAny, - #[serde(rename = "$matches_all")] - MatchesAll, -} - -impl Operator { - pub fn supported_by_sorted_inverted(&self) -> bool { - match self { - Operator::LT | Operator::LTE | Operator::EQ | Operator::GT | Operator::GTE => true, - Operator::Contains | Operator::MatchesAny | Operator::MatchesAll => false, - } - } - - pub fn supported_by_full_text(&self) -> bool { - match self { - Operator::LT | Operator::LTE | Operator::EQ | Operator::GT | Operator::GTE => false, - Operator::Contains | Operator::MatchesAny | Operator::MatchesAll => true, - } - } - - pub fn is_range_operator(&self) -> bool { - match self { - Operator::LT | Operator::LTE | Operator::GT | Operator::GTE => true, - Operator::EQ | Operator::Contains | Operator::MatchesAny | Operator::MatchesAll => { - false - } - } - } -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct SortOption { - pub field_name: String, - pub direction: SortDirection, -} - -impl SortOption { - pub fn new(field_name: String, direction: SortDirection) -> Self { - Self { - field_name, - direction, - } - } -} - -/// A wrapper of `Vec`, for customizing the `Serialize` and `Deserialize` implementation. -#[derive(Clone, Debug, PartialEq, Eq, Default)] -pub struct SortOptions(pub Vec); - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize)] -#[serde(crate = "dozer_types::serde")] -pub enum SortDirection { - #[serde(rename = "asc")] - Ascending, - #[serde(rename = "desc")] - Descending, -} - -#[derive(Debug, Clone)] -pub struct SQLQuery(pub String); diff --git a/dozer-cache/src/cache/expression/query_helper.rs b/dozer-cache/src/cache/expression/query_helper.rs deleted file mode 100644 index fb6106c240..0000000000 --- a/dozer-cache/src/cache/expression/query_helper.rs +++ /dev/null @@ -1,238 +0,0 @@ -use dozer_types::serde::{ - de::{self, Visitor}, - ser::SerializeMap, - Deserialize, Deserializer, Serialize, -}; -use dozer_types::serde_json::Value; - -use super::super::expression::Operator; - -pub struct OperatorAndValue { - pub operator: Operator, - pub value: Value, -} - -impl<'de> Deserialize<'de> for OperatorAndValue { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct OperatorAndValueVisitor {} - impl<'de> Visitor<'de> for OperatorAndValueVisitor { - type Value = OperatorAndValue; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("value or map from operator to value") - } - - fn visit_bool(self, v: bool) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::Bool(v), - }) - } - - fn visit_borrowed_str(self, v: &'de str) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::String(v.to_string()), - }) - } - - fn visit_char(self, v: char) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::String(v.to_string()), - }) - } - - fn visit_f32(self, v: f32) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_f64(self, v: f64) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_i16(self, v: i16) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_i32(self, v: i32) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_i64(self, v: i64) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_i8(self, v: i8) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_map(self, mut map: A) -> Result - where - A: de::MapAccess<'de>, - { - if let Some((operator, value)) = map.next_entry()? { - if map.next_entry::()?.is_some() { - Err(de::Error::custom( - "More than one statement passed in Simple Expression", - )) - } else { - Ok(OperatorAndValue { operator, value }) - } - } else { - Err(de::Error::custom("empty object passed as value")) - } - } - - fn visit_none(self) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::Null, - }) - } - - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::String(v.to_string()), - }) - } - - fn visit_string(self, v: String) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::String(v), - }) - } - - fn visit_u16(self, v: u16) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_u32(self, v: u32) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_u64(self, v: u64) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_u8(self, v: u8) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::from(v), - }) - } - - fn visit_unit(self) -> Result - where - E: de::Error, - { - Ok(OperatorAndValue { - operator: Operator::EQ, - value: Value::Null, - }) - } - } - deserializer.deserialize_any(OperatorAndValueVisitor {}) - } -} - -pub struct OperatorAndValueBorrow<'a> { - pub operator: &'a Operator, - pub value: &'a Value, -} - -impl<'a> Serialize for OperatorAndValueBorrow<'a> { - fn serialize(&self, serializer: S) -> Result - where - S: dozer_types::serde::Serializer, - { - match self.operator { - Operator::EQ => self.value.serialize(serializer), - _ => { - let mut map = serializer.serialize_map(Some(1))?; - map.serialize_entry(self.operator, self.value)?; - map.end() - } - } - } -} diff --git a/dozer-cache/src/cache/expression/query_serde.rs b/dozer-cache/src/cache/expression/query_serde.rs deleted file mode 100644 index 40c8f126bf..0000000000 --- a/dozer-cache/src/cache/expression/query_serde.rs +++ /dev/null @@ -1,246 +0,0 @@ -use dozer_types::serde::{ - de::{self, Deserialize, Deserializer, Error, MapAccess, Visitor}, - ser::{Serialize, SerializeMap, Serializer}, -}; - -use super::{ - super::expression::{FilterExpression, Skip, SortOption}, - query_helper::{OperatorAndValue, OperatorAndValueBorrow}, - QueryExpression, SQLQuery, SortOptions, -}; - -impl<'de> Deserialize<'de> for FilterExpression { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct FilterExpressionVisitor {} - impl<'de> Visitor<'de> for FilterExpressionVisitor { - type Value = FilterExpression; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("map from field name to value or operator value map") - } - fn visit_map(self, mut map: A) -> Result - where - A: MapAccess<'de>, - { - let mut expressions = vec![]; - while let Some(key) = map.next_key::()? { - if key == "$and" { - expressions.push(FilterExpression::And(map.next_value()?)); - } else { - let operator_and_value = map.next_value::()?; - expressions.push(FilterExpression::Simple( - key, - operator_and_value.operator, - operator_and_value.value, - )); - } - } - if expressions.len() == 1 { - Ok(expressions.remove(0)) - } else { - Ok(FilterExpression::And(expressions)) - } - } - } - deserializer.deserialize_map(FilterExpressionVisitor {}) - } -} - -impl Serialize for FilterExpression { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match self { - FilterExpression::Simple(name, operator, value) => { - let mut state = serializer.serialize_map(Some(1))?; - state.serialize_entry(name, &OperatorAndValueBorrow { operator, value })?; - state.end() - } - FilterExpression::And(expressions) => { - let mut state = serializer.serialize_map(Some(1))?; - state.serialize_entry("$and", &expressions)?; - state.end() - } - } - } -} - -impl<'de> Deserialize<'de> for SortOptions { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct SortOptionsVisitor {} - impl<'de> Visitor<'de> for SortOptionsVisitor { - type Value = SortOptions; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("map from field name to sort direction") - } - - fn visit_map(self, mut map: A) -> Result - where - A: MapAccess<'de>, - { - let mut sort_options = vec![]; - while let Some((field_name, direction)) = map.next_entry()? { - sort_options.push(SortOption { - field_name, - direction, - }); - } - Ok(SortOptions(sort_options)) - } - } - deserializer.deserialize_map(SortOptionsVisitor {}) - } -} - -impl Serialize for SortOptions { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut state = serializer.serialize_map(Some(self.0.len()))?; - for sort_option in &self.0 { - state.serialize_entry(&sort_option.field_name, &sort_option.direction)?; - } - state.end() - } -} - -impl<'de> Deserialize<'de> for QueryExpression { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct QueryExpressionVisitor {} - impl<'de> Visitor<'de> for QueryExpressionVisitor { - type Value = QueryExpression; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("map of dozer query options") - } - fn visit_map(self, mut map: A) -> Result - where - A: MapAccess<'de>, - { - let mut filter = None; - let mut order_by = None; - let mut limit = None; - let mut skip = None; - while let Some(key) = map.next_key::()? { - match key.as_str() { - "$filter" => { - filter = Some(map.next_value()?); - } - "$order_by" => { - order_by = Some(map.next_value()?); - } - "$limit" => { - limit = Some(map.next_value()?); - } - "$skip" => { - if skip.is_some() { - return Err(Error::custom("$skip cannot be used with $after")); - } - skip = Some(Skip::Skip(map.next_value()?)); - } - "$after" => { - if skip.is_some() { - return Err(Error::custom("$after cannot be used with $skip")); - } - skip = Some(Skip::After(map.next_value()?)); - } - _ => {} - } - } - Ok(QueryExpression { - filter, - order_by: order_by.unwrap_or_default(), - limit, - skip: skip.unwrap_or_default(), - }) - } - } - deserializer.deserialize_map(QueryExpressionVisitor {}) - } -} - -impl Serialize for QueryExpression { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut state = serializer.serialize_map(Some(4))?; - if let Some(filter) = &self.filter { - state.serialize_entry("$filter", filter)?; - } - if !self.order_by.0.is_empty() { - state.serialize_entry("$order_by", &self.order_by)?; - } - if let Some(limit) = self.limit { - state.serialize_entry("$limit", &limit)?; - } - match self.skip { - Skip::Skip(skip) => { - if skip > 0 { - state.serialize_entry("$skip", &skip)?; - } - } - Skip::After(after) => { - state.serialize_entry("$after", &after)?; - } - } - state.end() - } -} - -impl<'de> Deserialize<'de> for SQLQuery { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct SQLQueryVisitor; - impl<'de> Visitor<'de> for SQLQueryVisitor { - type Value = SQLQuery; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("map containing a single key 'query'") - } - fn visit_map(self, mut map: A) -> Result - where - A: MapAccess<'de>, - { - let mut query = None; - while let Some(key) = map.next_key::()? { - if key.as_str() == "query" { - query = Some(map.next_value()?); - break; - } - } - if let Some(query) = query { - Ok(SQLQuery(query)) - } else { - Err(de::Error::missing_field("query")) - } - } - } - deserializer.deserialize_map(SQLQueryVisitor) - } -} - -impl Serialize for SQLQuery { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut state = serializer.serialize_map(Some(1))?; - state.serialize_entry("query", &self.0)?; - state.end() - } -} diff --git a/dozer-cache/src/cache/expression/tests/deserialize.rs b/dozer-cache/src/cache/expression/tests/deserialize.rs deleted file mode 100644 index 065cf0f69d..0000000000 --- a/dozer-cache/src/cache/expression/tests/deserialize.rs +++ /dev/null @@ -1,251 +0,0 @@ -use crate::cache::expression::FilterExpression; -use crate::cache::expression::Operator; -use crate::cache::expression::Skip; -use crate::cache::expression::SortOptions; -use crate::cache::expression::{ - QueryExpression, - SortDirection::{Ascending, Descending}, - SortOption, -}; -use dozer_types::serde_json; -use dozer_types::serde_json::json; -use dozer_types::serde_json::Value; - -#[test] -fn test_operators() { - let operators = vec![ - (Operator::GT, "$gt"), - (Operator::GTE, "$gte"), - (Operator::LT, "$lt"), - (Operator::LTE, "$lte"), - (Operator::EQ, "$eq"), - (Operator::Contains, "$contains"), - (Operator::MatchesAny, "$matches_any"), - (Operator::MatchesAll, "$matches_all"), - ]; - for (op, op_str) in operators { - let fetched = serde_json::from_value(Value::String(op_str.to_string())).unwrap(); - - assert_eq!(op, fetched, "are equal"); - } -} - -#[test] -fn test_filter_query_deserialize_simple() { - test_deserialize_filter( - json!({"a": 1}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(1)), - ); - test_deserialize_filter( - json!({"ab_c": 1}), - FilterExpression::Simple("ab_c".to_string(), Operator::EQ, Value::from(1)), - ); - - test_deserialize_filter( - json!({"a": {"$eq": 1}}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(1)), - ); - - test_deserialize_filter( - json!({"a": {"$gt": 1}}), - FilterExpression::Simple("a".to_string(), Operator::GT, Value::from(1)), - ); - - test_deserialize_filter( - json!({"a": {"$lt": 1}}), - FilterExpression::Simple("a".to_string(), Operator::LT, Value::from(1)), - ); - - test_deserialize_filter( - json!({"a": {"$lte": 1}}), - FilterExpression::Simple("a".to_string(), Operator::LTE, Value::from(1)), - ); - test_deserialize_filter( - json!({"a": -64}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(-64)), - ); - test_deserialize_filter( - json!({"a": 256.0}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(256.0)), - ); - test_deserialize_filter( - json!({"a": -256.88393}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(-256.88393)), - ); - test_deserialize_filter( - json!({"a": 98_222}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(98222)), - ); - test_deserialize_filter( - json!({"a": true}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(true)), - ); - test_deserialize_filter( - json!({ "a": null }), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::Null), - ); - - test_deserialize_filter_error(json!({"a": []})); - test_deserialize_filter_error(json!({"a": {}})); - test_deserialize_filter_error(json!({"a": {"lte": 1}})); - test_deserialize_filter_error(json!({"$lte": {"lte": 1}})); - test_deserialize_filter_error(json!([])); - test_deserialize_filter_error(json!(2)); - test_deserialize_filter_error(json!(true)); - test_deserialize_filter_error(json!("abc")); - test_deserialize_filter_error(json!(2.3)); -} -#[test] -fn test_filter_query_deserialize_complex() { - test_deserialize_filter( - json!({"a": {"$lt": 1}, "b": {"$gte": 3}}), - FilterExpression::And(vec![ - FilterExpression::Simple("a".to_string(), Operator::LT, Value::from(1)), - FilterExpression::Simple("b".to_string(), Operator::GTE, Value::from(3)), - ]), - ); - // AND with 3 expression - let three_fields = FilterExpression::And(vec![ - FilterExpression::Simple("a".to_string(), Operator::LT, Value::from(1)), - FilterExpression::Simple("b".to_string(), Operator::GTE, Value::from(3)), - FilterExpression::Simple("c".to_string(), Operator::EQ, Value::from(3)), - ]); - test_deserialize_filter( - json!({"a": {"$lt": 1}, "b": {"$gte": 3}, "c": 3}), - three_fields, - ); - - // Same expression with different conditions - test_deserialize_filter( - json!({ "$and": [{"film_id": {"$lt": 500}}, {"film_id": {"$gte": 2}}]}), - FilterExpression::And(vec![ - FilterExpression::Simple("film_id".to_string(), Operator::LT, Value::from(500)), - FilterExpression::Simple("film_id".to_string(), Operator::GTE, Value::from(2)), - ]), - ); - - test_deserialize_filter_error(json!({"$and": {}})); - test_deserialize_filter_error(json!({"$and": [{"a": {"lt": 1}}, {"b": {"$gt": 1}}]})); - test_deserialize_filter_error(json!({"and": [{"a": {"$lt": 1}}]})); -} - -#[test] -fn test_sort_options_query_deserialize() { - test_deserialize_sort_options(json!({}), vec![]); - test_deserialize_sort_options( - json!({"a": "asc", "b": "desc"}), - vec![ - SortOption::new("a".into(), Ascending), - SortOption::new("b".into(), Descending), - ], - ); - - test_deserialize_sort_options_error(json!("")); - test_deserialize_sort_options_error(json!(1)); - test_deserialize_sort_options_error(json!(1.2)); - test_deserialize_sort_options_error(json!(true)); - test_deserialize_sort_options_error(json!(false)); - test_deserialize_sort_options_error(json!(null)); - test_deserialize_filter_error(json!([])); - test_deserialize_sort_options_error(json!({"a": "string"})); - test_deserialize_sort_options_error(json!({"a": 1})); - test_deserialize_sort_options_error(json!({"a": 1.2})); - test_deserialize_sort_options_error(json!({"a": true})); - test_deserialize_sort_options_error(json!({"a": false})); - test_deserialize_sort_options_error(json!({ "a": null })); - test_deserialize_sort_options_error(json!({"a": []})); - test_deserialize_sort_options_error(json!({"a": {}})); -} - -#[test] -fn test_query_expression_deserialize() { - test_deserialize_query( - json!({}), - QueryExpression::new(None, vec![], None, Skip::Skip(0)), - ); - test_deserialize_query( - json!({"$filter": {}}), - QueryExpression::new( - Some(FilterExpression::And(vec![])), - vec![], - None, - Skip::Skip(0), - ), - ); - test_deserialize_query( - json!({"$order_by": {"abc": "asc"}}), - QueryExpression::new( - None, - vec![SortOption { - field_name: "abc".to_owned(), - direction: Ascending, - }], - None, - Skip::Skip(0), - ), - ); - test_deserialize_query( - json!({"$order_by": {"abc": "asc"}, "$limit": 100, "$skip": 20}), - QueryExpression::new( - None, - vec![SortOption { - field_name: "abc".to_owned(), - direction: Ascending, - }], - Some(100), - Skip::Skip(20), - ), - ); - test_deserialize_query( - json!({ "$after": 30 }), - QueryExpression::new(None, vec![], None, Skip::After(30)), - ); - test_deserialize_query( - json!({"$filter": {"a": {"$lt": 1}, "b": {"$gte": 3}, "c": 3}}), - QueryExpression::new( - Some(FilterExpression::And(vec![ - FilterExpression::Simple("a".to_string(), Operator::LT, Value::from(1)), - FilterExpression::Simple("b".to_string(), Operator::GTE, Value::from(3)), - FilterExpression::Simple("c".to_string(), Operator::EQ, Value::from(3)), - ])), - vec![], - None, - Skip::Skip(0), - ), - ); -} - -#[test] -fn test_query_expression_deserialize_error() { - test_deserialize_query_error(json!({ "$skip": 20, "$after": 30 })); -} - -fn test_deserialize_query(a: Value, b: QueryExpression) { - let parsed_result = serde_json::from_value::(a).unwrap(); - assert_eq!(parsed_result, b, "must be equal"); -} - -fn test_deserialize_query_error(a: Value) { - let parsed_result = serde_json::from_value::(a); - assert!(parsed_result.is_err()); -} - -fn test_deserialize_filter(a: Value, b: FilterExpression) { - let parsed_result = serde_json::from_value::(a).unwrap(); - assert_eq!(parsed_result, b, "must be equal"); -} -fn test_deserialize_filter_error(a: Value) { - let parsed_result = serde_json::from_value::(a); - assert!(parsed_result.is_err()); -} - -fn test_deserialize_sort_options(json: Value, expected: Vec) { - assert_eq!( - serde_json::from_value::(json).unwrap(), - SortOptions(expected) - ); -} - -fn test_deserialize_sort_options_error(json: Value) { - assert!(serde_json::from_value::(json).is_err()); -} diff --git a/dozer-cache/src/cache/expression/tests/mod.rs b/dozer-cache/src/cache/expression/tests/mod.rs deleted file mode 100644 index 5f582ffa67..0000000000 --- a/dozer-cache/src/cache/expression/tests/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod deserialize; -mod serialize; diff --git a/dozer-cache/src/cache/expression/tests/serialize.rs b/dozer-cache/src/cache/expression/tests/serialize.rs deleted file mode 100644 index bf753ef74a..0000000000 --- a/dozer-cache/src/cache/expression/tests/serialize.rs +++ /dev/null @@ -1,221 +0,0 @@ -use crate::cache::expression::FilterExpression; -use crate::cache::expression::Operator; -use crate::cache::expression::QueryExpression; -use crate::cache::expression::Skip; -use crate::cache::expression::SortDirection::{Ascending, Descending}; -use crate::cache::expression::SortOption; -use crate::cache::expression::SortOptions; -use dozer_types::serde_json; -use dozer_types::serde_json::json; -use dozer_types::serde_json::Value; - -#[test] -fn test_serialize_filter_simple() { - test_serialize_filter( - json!({"a": 1}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(1)), - ); - test_serialize_filter( - json!({"ab_c": 1}), - FilterExpression::Simple("ab_c".to_string(), Operator::EQ, Value::from(1)), - ); - - test_serialize_filter( - json!({"a": {"$gt": 1}}), - FilterExpression::Simple("a".to_string(), Operator::GT, Value::from(1)), - ); - - test_serialize_filter( - json!({"a": {"$lt": 1}}), - FilterExpression::Simple("a".to_string(), Operator::LT, Value::from(1)), - ); - - test_serialize_filter( - json!({"a": {"$lte": 1}}), - FilterExpression::Simple("a".to_string(), Operator::LTE, Value::from(1)), - ); - test_serialize_filter( - json!({"a": -64}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(-64)), - ); - test_serialize_filter( - json!({"a": 256.0}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(256.0)), - ); - test_serialize_filter( - json!({"a": -256.88393}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(-256.88393)), - ); - test_serialize_filter( - json!({"a": 98_222}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(98222)), - ); - test_serialize_filter( - json!({"a": true}), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(true)), - ); - test_serialize_filter( - json!({ "a": null }), - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::Null), - ); -} -#[test] -fn test_serialize_filter_complex() { - test_serialize_filter( - json!({"$and": [{"a": {"$lt": 1}}, {"b": {"$gte": 3}}]}), - FilterExpression::And(vec![ - FilterExpression::Simple("a".to_string(), Operator::LT, Value::from(1)), - FilterExpression::Simple("b".to_string(), Operator::GTE, Value::from(3)), - ]), - ); - // AND with 3 expression - let three_fields = FilterExpression::And(vec![ - FilterExpression::Simple("a".to_string(), Operator::LT, Value::from(1)), - FilterExpression::Simple("b".to_string(), Operator::GTE, Value::from(3)), - FilterExpression::Simple("c".to_string(), Operator::EQ, Value::from(3)), - ]); - test_serialize_filter( - json!({"$and":[{"a": {"$lt": 1}}, {"b": {"$gte": 3}}, {"c": 3}]}), - three_fields, - ); -} - -#[test] -fn test_serialize_sort_options() { - test_serialize_sort_options_impl(vec![], json!({})); - test_serialize_sort_options_impl( - vec![SortOption::new("a".into(), Ascending)], - json!({"a": "asc"}), - ); - test_serialize_sort_options_impl( - vec![SortOption::new("b".into(), Descending)], - json!({"b": "desc"}), - ); - test_serialize_sort_options_impl( - vec![ - SortOption::new("a".into(), Ascending), - SortOption::new("b".into(), Descending), - ], - json!({"a": "asc", "b": "desc"}), - ); - test_serialize_sort_options_impl( - vec![ - SortOption::new("b".into(), Ascending), - SortOption::new("a".into(), Descending), - ], - json!({"b": "asc", "a": "desc"}), - ); -} - -fn test_serialize_filter(a: Value, b: FilterExpression) { - let serialized = serde_json::to_value(b).unwrap(); - assert_eq!(a, serialized, "must be equal"); -} - -fn test_serialize_sort_options_impl(sort_options: Vec, json: Value) { - assert_eq!( - serde_json::to_value(SortOptions(sort_options)).unwrap(), - json, - ); -} - -#[test] -fn test_serialize_skip() { - test_serialize_skip_impl(Skip::Skip(0), json!({})); - test_serialize_skip_impl(Skip::Skip(1), json!({"$skip": 1})); - test_serialize_skip_impl(Skip::After(10), json!({"$after": 10})); -} - -fn test_serialize_skip_impl(skip: Skip, json: Value) { - let query = QueryExpression { - skip, - limit: None, - ..Default::default() - }; - assert_eq!(serde_json::to_value(query).unwrap(), json); -} - -#[test] -fn test_serialize_query_expression() { - test_serialize_query_expression_impl( - QueryExpression { - filter: None, - limit: None, - ..Default::default() - }, - json!({}), - ); - test_serialize_query_expression_impl( - QueryExpression { - filter: Some(FilterExpression::Simple( - "a".to_string(), - Operator::EQ, - Value::from(1), - )), - limit: None, - ..Default::default() - }, - json!({"$filter": { "a": 1 }}), - ); - - test_serialize_query_expression_impl( - QueryExpression { - order_by: Default::default(), - limit: None, - ..Default::default() - }, - json!({}), - ); - test_serialize_query_expression_impl( - QueryExpression { - order_by: SortOptions(vec![SortOption::new("a".into(), Ascending)]), - limit: None, - ..Default::default() - }, - json!({"$order_by": {"a": "asc"}}), - ); - - test_serialize_query_expression_impl( - QueryExpression { - limit: None, - ..Default::default() - }, - json!({}), - ); - test_serialize_query_expression_impl( - QueryExpression { - limit: Some(1), - ..Default::default() - }, - json!({"$limit": 1}), - ); - - test_serialize_query_expression_impl( - QueryExpression { - skip: Skip::Skip(0), - limit: None, - ..Default::default() - }, - json!({}), - ); - test_serialize_query_expression_impl( - QueryExpression { - skip: Skip::Skip(1), - limit: None, - ..Default::default() - }, - json!({"$skip": 1}), - ); - test_serialize_query_expression_impl( - QueryExpression { - skip: Skip::After(10), - limit: None, - ..Default::default() - }, - json!({"$after": 10}), - ); -} - -fn test_serialize_query_expression_impl(query: QueryExpression, json: Value) { - assert_eq!(serde_json::to_value(query).unwrap(), json); -} diff --git a/dozer-cache/src/cache/index/mod.rs b/dozer-cache/src/cache/index/mod.rs deleted file mode 100644 index f31208c3c2..0000000000 --- a/dozer-cache/src/cache/index/mod.rs +++ /dev/null @@ -1,148 +0,0 @@ -use std::cmp::Ordering; - -use dozer_types::types::{IndexDefinition, Record}; - -pub trait CacheIndex { - // Builds one index based on index definition and record - fn build(index: &IndexDefinition, rec: &Record) -> Vec>; - - fn get_key(schema_id: u32, field_idx: &usize, field_val: &[u8]) -> Vec; -} - -use dozer_types::types::Field; - -use crate::errors::CompareError; - -pub fn get_primary_key(primary_index: &[usize], values: &[Field]) -> Vec { - debug_assert!( - !primary_index.is_empty(), - "Primary key indexes cannot be empty" - ); - - let key: Vec> = primary_index - .iter() - .map(|idx| values[*idx].encode()) - .collect(); - - key.join("#".as_bytes()) -} - -/// Returns the secondary index key for a given set of fields. -/// -/// We allow computing the secondary index key of "prefix" fields, so the user can filter the "prefix" fields using `Eq` filters, -/// and sort the filtering result using other fields. -/// -/// In the meantime, we compute the key differently for single field indexes and compound indexes. -/// We'are not able to tell if certain `fields` belong to a single field index or compound index if its length is 1, hence the second parameter. -/// -/// # Parameters -/// - `fields`: The fields to index. -/// - `is_single_field_index`: Whether the `fields` belong to a single field index. If `true`, `fields` must have length 1. -pub fn get_secondary_index(fields: &[&Field], is_single_field_index: bool) -> Vec { - debug_assert!(!is_single_field_index || fields.len() == 1); - if is_single_field_index { - fields[0].encode() - } else { - get_composite_secondary_index(fields) - } -} - -pub fn compare_single_secondary_index(a: &[u8], b: &[u8]) -> Result { - let a = Field::decode(a)?; - let b = Field::decode(b)?; - Ok(a.cmp(&b)) -} - -pub fn compare_composite_secondary_index(a: &[u8], b: &[u8]) -> Result { - let mut a = CompositeSecondaryIndexKey::new(a); - let mut b = CompositeSecondaryIndexKey::new(b); - Ok(loop { - match (a.next(), b.next()) { - (Some(a), Some(b)) => { - let a = a?; - let b = b?; - match a.cmp(&b) { - Ordering::Equal => continue, - ordering => break ordering, - } - } - (Some(_), None) => break Ordering::Greater, - (None, Some(_)) => break Ordering::Less, - (None, None) => break Ordering::Equal, - } - }) -} - -pub fn get_full_text_secondary_index(token: &str) -> Vec { - token.as_bytes().to_vec() -} - -fn get_composite_secondary_index(fields: &[&Field]) -> Vec { - fn get_field_encoding_len(field: &Field) -> usize { - 8 + field.encoding_len() - } - - let total_len = fields - .iter() - .map(|field| get_field_encoding_len(field)) - .sum::(); - let mut buf = vec![0; total_len]; - let mut offset = 0; - for field in fields { - let field_len = get_field_encoding_len(field); - buf[offset..offset + 8].copy_from_slice(&(field_len as u64).to_be_bytes()); - field.encode_buf(&mut buf[offset + 8..offset + field_len]); - offset += field_len; - } - buf -} - -struct CompositeSecondaryIndexKey<'a> { - buf: &'a [u8], - offset: usize, -} - -impl<'a> CompositeSecondaryIndexKey<'a> { - fn new(buf: &'a [u8]) -> Self { - Self { buf, offset: 0 } - } - - fn decode_one(&mut self) -> Result { - if self.offset + 8 > self.buf.len() { - return Err(CompareError::CannotReadFieldLength); - } - - let field_len = u64::from_be_bytes( - self.buf[self.offset..self.offset + 8] - .try_into() - .expect("We have checked `buf` is long enough"), - ) as usize; - if self.offset + field_len > self.buf.len() { - return Err(CompareError::CannotReadField); - } - - let field = Field::decode(&self.buf[self.offset + 8..self.offset + field_len])?; - self.offset += field_len; - Ok(field) - } -} - -impl<'a> Iterator for CompositeSecondaryIndexKey<'a> { - type Item = Result; - - fn next(&mut self) -> Option { - if self.offset >= self.buf.len() { - return None; - } - - let result = self.decode_one(); - if result.is_err() { - // Once an error happens, we stop decoding the rest of the buffer. - self.offset = self.buf.len(); - } - Some(result) - } -} - -#[cfg(test)] -mod tests; diff --git a/dozer-cache/src/cache/index/tests.rs b/dozer-cache/src/cache/index/tests.rs deleted file mode 100644 index 9e8030a1ae..0000000000 --- a/dozer-cache/src/cache/index/tests.rs +++ /dev/null @@ -1,43 +0,0 @@ -use dozer_types::types::field_test_cases; - -use crate::cache::index::{get_composite_secondary_index, CompositeSecondaryIndexKey}; - -use super::get_full_text_secondary_index; - -#[test] -fn test_get_full_text_secondary_index() { - assert_eq!(get_full_text_secondary_index("foo"), b"foo",); -} - -#[test] -fn test_composite_key_encode_roundtrip() { - // Single field - for field in field_test_cases() { - let key = get_composite_secondary_index(&[&field]); - let mut key = CompositeSecondaryIndexKey::new(&key); - assert_eq!(key.next().unwrap().unwrap(), field); - assert!(key.next().is_none()); - } - - // Two fields - for field1 in field_test_cases() { - for field2 in field_test_cases() { - let key = get_composite_secondary_index(&[&field1, &field2]); - let mut key = CompositeSecondaryIndexKey::new(&key); - assert_eq!(key.next().unwrap().unwrap(), field1); - assert_eq!(key.next().unwrap().unwrap(), field2); - assert!(key.next().is_none()); - } - } -} - -#[test] -fn composite_key_encode_preserves_prefix() { - for field1 in field_test_cases() { - for field2 in field_test_cases() { - let key1 = get_composite_secondary_index(&[&field1]); - let key2 = get_composite_secondary_index(&[&field1, &field2]); - assert_eq!(key2[..key1.len()], key1); - } - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/dump_restore.rs b/dozer-cache/src/cache/lmdb/cache/dump_restore.rs deleted file mode 100644 index 52ca4cb917..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/dump_restore.rs +++ /dev/null @@ -1,180 +0,0 @@ -use std::sync::Arc; - -use dozer_storage::{ - errors::StorageError, - generator::FutureGeneratorContext, - lmdb::{RoTransaction, Transaction}, - DumpItem, LmdbEnvironment, -}; -use dozer_types::{log::info, parking_lot::Mutex}; -use tokio::io::AsyncRead; - -use crate::{ - cache::{lmdb::indexing::IndexingThreadPool, CacheWriteOptions, RoCache}, - errors::CacheError, -}; - -use super::{ - main_environment, secondary_environment, secondary_environment_name, CacheOptions, LmdbCache, - LmdbRwCache, MainEnvironment, SecondaryEnvironment, -}; - -pub struct DumpTransaction { - main_txn: T, - main_env_metadata: Option, - secondary_txns: Vec, - secondary_metadata: Vec, -} - -impl DumpTransaction { - pub fn main_env_metadata(&self) -> Option { - self.main_env_metadata - } - - pub fn secondary_metadata(&self) -> &[u64] { - &self.secondary_metadata - } -} - -pub fn begin_dump_txn( - cache: &C, -) -> Result, CacheError> { - let main_env = cache.main_env(); - let main_txn = main_env.begin_txn()?; - let main_env_metadata = main_env - .commit_state_with_txn(&main_txn)? - .map(|commit_state| commit_state.log_position); - - let mut secondary_txns = vec![]; - let mut secondary_metadata = vec![]; - for index in 0..cache.get_schema().1.len() { - let secondary_env = cache.secondary_env(index); - let txn = secondary_env.begin_txn()?; - let metadata = secondary_env.next_operation_id(&txn)?; - secondary_txns.push(txn); - secondary_metadata.push(metadata); - } - - Ok(DumpTransaction { - main_txn, - main_env_metadata, - secondary_txns, - secondary_metadata, - }) -} - -pub async fn dump<'txn, T: Transaction, C: LmdbCache>( - cache: &C, - txn: &'txn DumpTransaction, - context: &FutureGeneratorContext, StorageError>>, -) -> Result<(), ()> { - main_environment::dump_restore::dump(cache.main_env(), &txn.main_txn, context).await?; - - for index in 0..cache.get_schema().1.len() { - let secondary_env = cache.secondary_env(index); - let txn = &txn.secondary_txns[index]; - secondary_environment::dump_restore::dump(secondary_env, txn, context).await?; - } - - Ok(()) -} - -pub async fn restore( - options: CacheOptions, - write_options: CacheWriteOptions, - indexing_thread_pool: Arc>, - reader: &mut (impl AsyncRead + Unpin), -) -> Result { - info!("Restoring cache with options {options:?}"); - let rw_main_env = - main_environment::dump_restore::restore(&options, write_options, reader).await?; - - let options = CacheOptions { - path: Some(( - rw_main_env.base_path().to_path_buf(), - rw_main_env.name().to_string(), - )), - ..options - }; - let ro_main_env = rw_main_env.share(); - - let mut rw_secondary_envs = vec![]; - let mut ro_secondary_envs = vec![]; - for index in 0..ro_main_env.schema().1.len() { - let name = secondary_environment_name(index); - let rw_secondary_env = - secondary_environment::dump_restore::restore(name, options.clone(), reader).await?; - let ro_secondary_env = rw_secondary_env.share(); - - rw_secondary_envs.push(rw_secondary_env); - ro_secondary_envs.push(ro_secondary_env); - } - - indexing_thread_pool - .lock() - .add_cache_unsafe(ro_main_env, rw_secondary_envs); - - Ok(LmdbRwCache { - main_env: rw_main_env, - secondary_envs: ro_secondary_envs, - indexing_thread_pool, - }) -} - -#[cfg(test)] -mod tests { - use std::pin::pin; - - use dozer_storage::generator::{Generator, IntoGenerator}; - - use crate::cache::{ - lmdb::tests::utils::{create_cache, insert_rec_1}, - test_utils, RwCache, - }; - - use super::*; - - #[tokio::test] - async fn test_dump_restore() { - let (mut cache, indexing_thread_pool, _, _) = create_cache(test_utils::schema_1); - - insert_rec_1(&mut cache, (0, Some("a".to_string()), None)); - insert_rec_1(&mut cache, (1, None, Some(2))); - insert_rec_1(&mut cache, (2, Some("b".to_string()), Some(3))); - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - let mut data = vec![]; - { - let cache = &cache; - let txn = &begin_dump_txn(cache).unwrap(); - let generator = |context| async move { dump(cache, txn, &context).await.unwrap() }; - let generator = generator.into_generator(); - for item in pin!(generator).into_iter() { - data.extend_from_slice(&item.unwrap()); - } - } - - let indexing_thread_pool = Arc::new(Mutex::new(IndexingThreadPool::new(1))); - let restored_cache = restore( - Default::default(), - Default::default(), - indexing_thread_pool, - &mut data.as_slice(), - ) - .await - .unwrap(); - - super::super::main_environment::dump_restore::tests::assert_main_env_equal( - cache.main_env(), - restored_cache.main_env(), - ); - - for index in 0..cache.main_env().schema().1.len() { - super::super::secondary_environment::dump_restore::tests::assert_secondary_env_equal( - cache.secondary_env(index), - restored_cache.secondary_env(index), - ); - } - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/conflict_resolution_tests.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/conflict_resolution_tests.rs deleted file mode 100644 index 84a3f9cf89..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/conflict_resolution_tests.rs +++ /dev/null @@ -1,258 +0,0 @@ -use crate::cache::index; -use crate::cache::lmdb::cache::{CacheWriteOptions, MainEnvironment}; -use crate::cache::test_utils::schema_multi_indices; -use crate::errors::CacheError; -use dozer_types::models::endpoint::{ - ConflictResolution, OnDeleteResolutionTypes, OnInsertResolutionTypes, OnUpdateResolutionTypes, -}; -use dozer_types::types::{Field, Record, Schema}; - -use super::RwMainEnvironment; - -fn init_env(conflict_resolution: ConflictResolution) -> (RwMainEnvironment, Schema) { - let schema = schema_multi_indices(); - let write_options = CacheWriteOptions { - insert_resolution: conflict_resolution.on_insert, - delete_resolution: conflict_resolution.on_delete, - update_resolution: conflict_resolution.on_update, - ..Default::default() - }; - let main_env = - RwMainEnvironment::new(Some(&schema), None, Default::default(), write_options).unwrap(); - (main_env, schema.0) -} - -#[test] -fn ignore_insert_error_when_type_nothing() { - let (mut env, schema) = init_env(ConflictResolution { - on_insert: OnInsertResolutionTypes::Nothing, - on_update: Default::default(), - on_delete: Default::default(), - }); - - let initial_values = vec![Field::Int(1), Field::String("Film name old".to_string())]; - let record = Record { - values: initial_values.clone(), - lifetime: None, - }; - env.insert(&record).unwrap(); - env.commit(&Default::default()).unwrap(); - - let key = index::get_primary_key(&schema.primary_index, &initial_values); - let record = env.get(&key).unwrap(); - - assert_eq!(initial_values, record.record.values); - assert_eq!(1, record.version); - - env.insert(&record.record).unwrap(); - - let record = env.get(&key).unwrap(); - - // version should remain unchanged, because insert should be ignored - assert_eq!(initial_values, record.record.values); - assert_eq!(1, record.version); -} - -#[test] -fn update_after_insert_error_when_type_update() { - let (mut env, schema) = init_env(ConflictResolution { - on_insert: OnInsertResolutionTypes::Update, - on_update: Default::default(), - on_delete: Default::default(), - }); - - let initial_values = vec![Field::Int(1), Field::String("Film name old".to_string())]; - let record = Record { - values: initial_values.clone(), - lifetime: None, - }; - env.insert(&record).unwrap(); - env.commit(&Default::default()).unwrap(); - - let key = index::get_primary_key(&schema.primary_index, &initial_values); - let record = env.get(&key).unwrap(); - - assert_eq!(initial_values, record.record.values); - assert_eq!(1, record.version); - - let second_insert_values = vec![ - Field::Int(1), - Field::String("Second insert name".to_string()), - ]; - let second_record = Record { - values: second_insert_values.clone(), - lifetime: None, - }; - - env.insert(&second_record).unwrap(); - env.commit(&Default::default()).unwrap(); - - let key = index::get_primary_key(&schema.primary_index, &initial_values); - let record = env.get(&key).unwrap(); - - // version should increase, because record should be updated - assert_eq!(second_insert_values, record.record.values); - assert_eq!(2, record.version); - - // Check cache size. It should have only one record - let current_count = env.count().unwrap(); - assert_eq!(current_count, 1_usize); -} - -#[test] -fn return_insert_error_when_type_panic() { - let (mut env, schema) = init_env(ConflictResolution { - on_insert: OnInsertResolutionTypes::Panic, - on_update: Default::default(), - on_delete: Default::default(), - }); - - let initial_values = vec![Field::Int(1), Field::String("Film name old".to_string())]; - let record = Record { - values: initial_values.clone(), - lifetime: None, - }; - env.insert(&record).unwrap(); - env.commit(&Default::default()).unwrap(); - - let key = index::get_primary_key(&schema.primary_index, &initial_values); - let record = env.get(&key).unwrap(); - - assert_eq!(initial_values, record.record.values); - assert_eq!(1, record.version); - - // Try insert same data again - let result = env.insert(&record.record); - assert!(matches!(result, Err(CacheError::PrimaryKeyExists { .. }))); -} - -#[test] -fn ignore_update_error_when_type_nothing() { - let (mut env, schema) = init_env(ConflictResolution { - on_insert: Default::default(), - on_update: OnUpdateResolutionTypes::Nothing, - on_delete: Default::default(), - }); - - let initial_values = vec![Field::Int(1), Field::Null]; - let update_values = vec![ - Field::Int(1), - Field::String("Film name updated".to_string()), - ]; - - let initial_record = Record { - values: initial_values.clone(), - lifetime: None, - }; - let update_record = Record { - values: update_values, - lifetime: None, - }; - env.update(&initial_record, &update_record).unwrap(); - - let key = index::get_primary_key(&schema.primary_index, &initial_values); - let record = env.get(&key); - - assert!(matches!(record, Err(CacheError::PrimaryKeyNotFound))); -} - -#[test] -fn update_after_update_error_when_type_upsert() { - let (mut env, schema) = init_env(ConflictResolution { - on_insert: Default::default(), - on_update: OnUpdateResolutionTypes::Upsert, - on_delete: Default::default(), - }); - - let initial_values = vec![Field::Int(1), Field::Null]; - let update_values = vec![ - Field::Int(1), - Field::String("Film name updated".to_string()), - ]; - - let initial_record = Record { - values: initial_values.clone(), - lifetime: None, - }; - let update_record = Record { - values: update_values.clone(), - lifetime: None, - }; - env.update(&initial_record, &update_record).unwrap(); - env.commit(&Default::default()).unwrap(); - - let key = index::get_primary_key(&schema.primary_index, &initial_values); - let record = env.get(&key).unwrap(); - - assert_eq!(update_values, record.record.values); - assert_eq!(1, record.version); -} - -#[test] -fn return_update_error_when_type_panic() { - let (mut env, _) = init_env(ConflictResolution { - on_insert: Default::default(), - on_update: OnUpdateResolutionTypes::Panic, - on_delete: Default::default(), - }); - - let initial_values = vec![Field::Int(1), Field::Null]; - let update_values = vec![ - Field::Int(1), - Field::String("Film name updated".to_string()), - ]; - - let initial_record = Record { - values: initial_values, - lifetime: None, - }; - let update_record = Record { - values: update_values, - lifetime: None, - }; - - let result = env.update(&initial_record, &update_record); - - assert!(matches!(result, Err(CacheError::PrimaryKeyNotFound))); -} - -#[test] -fn ignore_delete_error_when_type_nothing() { - let (mut env, _) = init_env(ConflictResolution { - on_insert: Default::default(), - on_update: Default::default(), - on_delete: OnDeleteResolutionTypes::Nothing, - }); - - let initial_values = vec![Field::Int(1), Field::Null]; - let initial_record = Record { - values: initial_values, - lifetime: None, - }; - - // Check is cache empty - let current_count = env.count().unwrap(); - assert_eq!(current_count, 0_usize); - - // Trying delete not existing record should be ignored - let result = env.delete(&initial_record); - assert!(result.is_ok()); -} - -#[test] -fn return_delete_error_when_type_panic() { - let (mut env, _) = init_env(ConflictResolution { - on_insert: Default::default(), - on_update: Default::default(), - on_delete: OnDeleteResolutionTypes::Panic, - }); - - let initial_values = vec![Field::Int(1), Field::Null]; - let initial_record = Record { - values: initial_values, - lifetime: None, - }; - - let result = env.delete(&initial_record); - assert!(matches!(result, Err(CacheError::PrimaryKeyNotFound))); -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/dump_restore.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/dump_restore.rs deleted file mode 100644 index 643e5caf32..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/dump_restore.rs +++ /dev/null @@ -1,178 +0,0 @@ -use dozer_storage::{ - errors::StorageError, generator::FutureGeneratorContext, lmdb::Transaction, DumpItem, - LmdbEnvironment, LmdbMap, LmdbOption, -}; -use dozer_types::{borrow::IntoOwned, log::info}; -use tokio::io::AsyncRead; - -use crate::{ - cache::{ - lmdb::{cache::CacheOptions, utils::create_env}, - CacheWriteOptions, - }, - errors::CacheError, -}; - -use super::{ - MainEnvironment, MainEnvironmentCommon, OperationLog, RwMainEnvironment, COMMIT_STATE_DB_NAME, - CONNECTION_SNAPSHOTTING_DONE_DB_NAME, SCHEMA_DB_NAME, -}; - -pub async fn dump<'txn, E: MainEnvironment, T: Transaction>( - env: &E, - txn: &'txn T, - context: &FutureGeneratorContext, StorageError>>, -) -> Result<(), ()> { - dozer_storage::dump( - txn, - SCHEMA_DB_NAME, - env.common().schema_option.database(), - context, - ) - .await?; - dozer_storage::dump( - txn, - COMMIT_STATE_DB_NAME, - env.common().commit_state.database(), - context, - ) - .await?; - dozer_storage::dump( - txn, - CONNECTION_SNAPSHOTTING_DONE_DB_NAME, - env.common().connection_snapshotting_done.database(), - context, - ) - .await?; - env.common().operation_log.dump(txn, context).await -} - -pub async fn restore( - options: &CacheOptions, - write_options: CacheWriteOptions, - reader: &mut (impl AsyncRead + Unpin), -) -> Result { - info!("Restoring main environment with options {options:?}"); - let (mut env, (base_path, name), temp_dir) = create_env(options)?; - - info!("Restoring schema"); - dozer_storage::restore(&mut env, reader).await?; - info!("Restoring commit state"); - dozer_storage::restore(&mut env, reader).await?; - info!("Restoring connection snapshotting done"); - dozer_storage::restore(&mut env, reader).await?; - info!("Restoring operation log"); - let operation_log = - OperationLog::restore(&mut env, reader, name, options.labels.clone()).await?; - - let schema_option = LmdbOption::open(&env, Some(SCHEMA_DB_NAME))?; - let commit_state = LmdbOption::open(&env, Some(COMMIT_STATE_DB_NAME))?; - let connection_snapshotting_done = - LmdbMap::open(&env, Some(CONNECTION_SNAPSHOTTING_DONE_DB_NAME))?; - - let schema = schema_option - .load(&env.begin_txn()?)? - .map(IntoOwned::into_owned) - .ok_or(CacheError::SchemaNotFound)?; - - Ok(RwMainEnvironment { - env, - common: MainEnvironmentCommon { - base_path, - schema, - schema_option, - commit_state, - connection_snapshotting_done, - operation_log, - intersection_chunk_size: options.intersection_chunk_size, - }, - _temp_dir: temp_dir, - write_options, - }) -} - -#[cfg(test)] -pub mod tests { - use std::pin::pin; - - use super::*; - - use dozer_storage::{ - assert_database_equal, - generator::{Generator, IntoGenerator}, - }; - use dozer_types::types::{Record, Schema}; - - use crate::cache::lmdb::cache::{ - main_environment::operation_log::tests::assert_operation_log_equal, RwMainEnvironment, - }; - - pub fn assert_main_env_equal(env1: &E1, env2: &E2) { - assert_eq!(env1.common().schema, env2.common().schema); - let txn1 = env1.begin_txn().unwrap(); - let txn2 = env2.begin_txn().unwrap(); - assert_database_equal( - &txn1, - env1.common().schema_option.database(), - &txn2, - env2.common().schema_option.database(), - ); - assert_database_equal( - &txn1, - env1.common().commit_state.database(), - &txn2, - env2.common().commit_state.database(), - ); - assert_database_equal( - &txn1, - env1.common().connection_snapshotting_done.database(), - &txn2, - env2.common().connection_snapshotting_done.database(), - ); - assert_operation_log_equal( - &env1.common().operation_log, - &txn1, - &env2.common().operation_log, - &txn2, - ); - } - - #[tokio::test] - async fn test_dump_restore() { - let schema = Schema::default(); - let mut env = RwMainEnvironment::new( - Some(&(schema, vec![])), - None, - Default::default(), - Default::default(), - ) - .unwrap(); - - let record = Record::new(vec![]); - env.insert(&record).unwrap(); - env.insert(&record).unwrap(); - env.delete(&record).unwrap(); - env.commit(&Default::default()).unwrap(); - - let mut data = vec![]; - { - let env = &env; - let txn = &env.begin_txn().unwrap(); - let generator = |context| async move { dump(env, txn, &context).await.unwrap() }; - let generator = generator.into_generator(); - for item in pin!(generator).into_iter() { - data.extend_from_slice(&item.unwrap()); - } - } - - let restored_env = restore( - &Default::default(), - Default::default(), - &mut data.as_slice(), - ) - .await - .unwrap(); - - assert_main_env_equal(&env, &restored_env); - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/hash_tests.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/hash_tests.rs deleted file mode 100644 index c0caf737d7..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/hash_tests.rs +++ /dev/null @@ -1,30 +0,0 @@ -use dozer_types::types::{Record, Schema}; - -use crate::cache::UpsertResult; - -use super::RwMainEnvironment; - -#[test] -fn test_hash_insert_delete_insert() { - let schema = Schema::default(); - let mut env = RwMainEnvironment::new( - Some(&(schema, vec![])), - None, - Default::default(), - Default::default(), - ) - .unwrap(); - - let record = Record::new(vec![]); - - let UpsertResult::Inserted { meta } = env.insert(&record).unwrap() else { - panic!("Expected UpsertResult::Inserted"); - }; - let deleted_meta = env.delete(&record).unwrap().unwrap(); - assert_eq!(meta, deleted_meta); - let UpsertResult::Inserted { meta } = env.insert(&record).unwrap() else { - panic!("Expected UpsertResult::Inserted"); - }; - assert_eq!(meta.id, deleted_meta.id); - assert_eq!(meta.version, deleted_meta.version + 1); -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/mod.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/mod.rs deleted file mode 100644 index df50684b37..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/mod.rs +++ /dev/null @@ -1,665 +0,0 @@ -use std::{ - collections::HashSet, - hash::{Hash, Hasher}, - path::{Path, PathBuf}, -}; - -use dozer_storage::{ - errors::StorageError, - lmdb::{RwTransaction, Transaction}, - lmdb_storage::{RoLmdbEnvironment, RwLmdbEnvironment}, - LmdbEnvironment, LmdbMap, LmdbOption, -}; -use dozer_tracing::Labels; -use dozer_types::{ - bincode, - borrow::{Borrow, IntoOwned}, - types::{Field, FieldType, Record, Schema, SchemaWithIndex}, -}; -use dozer_types::{ - log::warn, - models::endpoint::{OnDeleteResolutionTypes, OnInsertResolutionTypes, OnUpdateResolutionTypes}, -}; -use tempdir::TempDir; - -use crate::{ - cache::{ - index, - lmdb::utils::{create_env, open_env}, - CacheRecord, CommitState, RecordMeta, UpsertResult, - }, - errors::{CacheError, ConnectionMismatch}, -}; - -mod operation_log; - -use operation_log::RecordMetadata; -pub use operation_log::{Operation, OperationLog}; - -use self::operation_log::MetadataKey; - -use super::{CacheOptions, CacheWriteOptions}; - -pub trait MainEnvironment: LmdbEnvironment { - fn common(&self) -> &MainEnvironmentCommon; - - fn schema(&self) -> &SchemaWithIndex { - &self.common().schema - } - - fn base_path(&self) -> &Path { - &self.common().base_path - } - - fn name(&self) -> &str { - self.common().operation_log.name() - } - - fn labels(&self) -> &Labels { - self.common().operation_log.labels() - } - - fn operation_log(&self) -> &OperationLog { - &self.common().operation_log - } - - fn intersection_chunk_size(&self) -> usize { - self.common().intersection_chunk_size - } - - fn count(&self) -> Result { - let txn = self.begin_txn()?; - self.operation_log() - .count_present_records(&txn, self.schema().0.is_append_only()) - .map_err(Into::into) - } - - fn get(&self, key: &[u8]) -> Result { - let txn = self.begin_txn()?; - self.operation_log() - .get_record(&txn, key)? - .ok_or(CacheError::PrimaryKeyNotFound) - } - - fn commit_state(&self) -> Result, CacheError> { - let txn = self.begin_txn()?; - self.commit_state_with_txn(&txn) - } - - fn is_snapshotting_done(&self) -> Result { - let txn = self.begin_txn()?; - for value in self.common().connection_snapshotting_done.values(&txn)? { - if !value?.into_owned() { - return Ok(false); - } - } - Ok(true) - } - - fn commit_state_with_txn( - &self, - txn: &T, - ) -> Result, CacheError> { - self.common() - .commit_state - .load(txn)? - .map(|commit_state| { - bincode::decode_from_slice(commit_state.borrow(), bincode::config::legacy()) - .map(|v| v.0) - .map_err(CacheError::map_deserialization_error) - }) - .transpose() - } -} - -const SCHEMA_DB_NAME: &str = "schema"; -const COMMIT_STATE_DB_NAME: &str = "commit_state"; -const CONNECTION_SNAPSHOTTING_DONE_DB_NAME: &str = "connection_snapshotting_done"; - -#[derive(Debug, Clone)] -pub struct MainEnvironmentCommon { - /// The environment base path. - base_path: PathBuf, - /// The schema. - schema: SchemaWithIndex, - /// The schema database, used for dumping. - schema_option: LmdbOption, - /// The serialized commit state. - commit_state: LmdbOption>, - /// The source status. - connection_snapshotting_done: LmdbMap, - /// The operation log. - operation_log: OperationLog, - intersection_chunk_size: usize, -} - -#[derive(Debug)] -pub struct RwMainEnvironment { - env: RwLmdbEnvironment, - common: MainEnvironmentCommon, - _temp_dir: Option, - write_options: CacheWriteOptions, -} - -impl LmdbEnvironment for RwMainEnvironment { - fn env(&self) -> &dozer_storage::lmdb::Environment { - self.env.env() - } -} - -impl MainEnvironment for RwMainEnvironment { - fn common(&self) -> &MainEnvironmentCommon { - &self.common - } -} - -impl RwMainEnvironment { - pub fn new( - schema: Option<&SchemaWithIndex>, - connections: Option<&HashSet>, - options: CacheOptions, - write_options: CacheWriteOptions, - ) -> Result { - let (mut env, (base_path, name), temp_dir) = create_env(&options)?; - - let operation_log = OperationLog::create(&mut env, name.clone(), options.labels)?; - let schema_option = LmdbOption::create(&mut env, Some(SCHEMA_DB_NAME))?; - let commit_state = LmdbOption::create(&mut env, Some(COMMIT_STATE_DB_NAME))?; - let connection_snapshotting_done = - LmdbMap::create(&mut env, Some(CONNECTION_SNAPSHOTTING_DONE_DB_NAME))?; - - let old_schema = schema_option - .load(&env.begin_txn()?)? - .map(IntoOwned::into_owned); - - let schema = match (schema, old_schema) { - (Some(schema), Some(old_schema)) => { - if &old_schema != schema { - return Err(CacheError::SchemaMismatch { - name: name.clone(), - given: Box::new(schema.clone()), - stored: Box::new(old_schema), - }); - } - old_schema - } - (Some(schema), None) => { - schema_option.store(env.txn_mut()?, schema)?; - env.commit()?; - schema.clone() - } - (None, Some(schema)) => schema, - (None, None) => return Err(CacheError::SchemaNotFound), - }; - - if let Some(connections) = connections { - if connection_snapshotting_done.count(&env.begin_txn()?)? == 0 { - // A new environment, set all connections to false. - let txn = env.txn_mut()?; - for connection in connections { - connection_snapshotting_done.insert(txn, connection.as_str(), &false)?; - } - env.commit()?; - } else { - // Check if the connections match. - let mut existing_connections = HashSet::::default(); - for connection in connection_snapshotting_done.iter(&env.begin_txn()?)? { - existing_connections.insert(connection?.0.into_owned()); - } - if &existing_connections != connections { - return Err(CacheError::ConnectionsMismatch(Box::new( - ConnectionMismatch { - name, - given: connections.clone(), - stored: existing_connections, - }, - ))); - } - } - } - - Ok(Self { - env, - common: MainEnvironmentCommon { - base_path, - schema, - schema_option, - commit_state, - connection_snapshotting_done, - operation_log, - intersection_chunk_size: options.intersection_chunk_size, - }, - _temp_dir: temp_dir, - write_options, - }) - } - - pub fn share(&self) -> RoMainEnvironment { - RoMainEnvironment { - env: self.env.share(), - common: self.common.clone(), - } - } - - pub fn insert(&mut self, record: &Record) -> Result { - let txn = self.env.txn_mut()?; - insert_impl( - &self.common.operation_log, - txn, - &self.common.schema.0, - record, - self.write_options.insert_resolution, - ) - } - - pub fn delete(&mut self, record: &Record) -> Result, CacheError> { - if self.common.schema.0.is_append_only() { - return Err(CacheError::AppendOnlySchema); - } - - let txn = self.env.txn_mut()?; - let operation_log = &self.common.operation_log; - let key = calculate_key(&self.common.schema.0, record); - - if let Some((meta, insert_operation_id)) = - get_existing_record_metadata(operation_log, txn, &key)? - { - // The record exists. - operation_log.delete(txn, key.as_ref(), meta, insert_operation_id)?; - Ok(Some(meta)) - } else { - // The record does not exist. Resolve the conflict. - match self.write_options.delete_resolution { - OnDeleteResolutionTypes::Nothing => { - warn!("Record (Key: {:?}) not found, ignoring delete", key); - Ok(None) - } - OnDeleteResolutionTypes::Panic => Err(CacheError::PrimaryKeyNotFound), - } - } - } - - pub fn update(&mut self, old: &Record, new: &Record) -> Result { - // if old_key == new_key { - // match (key_exist, conflict_resolution) { - // (true, _) => Updated, // Case 1 - // (false, Nothing) => Ignored, // Case 2 - // (false, Upsert) => Inserted, // Case 3 - // (false, Panic) => Err, // Case 4 - // } - // } else { - // match (old_key_exist, new_key_exist, conflict_resolution) { - // (true, true, Nothing) => Ignored, // Case 5 - // (true, true, Upsert) => Err, // Case 6 - // (true, true, Panic) => Err, // Case 7 - // (true, false, _) => Updated, // Case 8 - // (false, true, Nothing) => Ignored, // Case 9 - // (false, true, Upsert) => Err, // Case 10 - // (false, true, Panic) => Err, // Case 11 - // (false, false, Nothing) => Ignored, // Case 12 - // (false, false, Upsert) => Inserted, // Case 13 - // (false, false, Panic) => Err, // Case 14 - // } - // } - - let txn = self.env.txn_mut()?; - let operation_log = &self.common.operation_log; - let schema = &self.common.schema.0; - let old_key = calculate_key(schema, old); - - if let Some((old_meta, insert_operation_id)) = - get_existing_record_metadata(operation_log, txn, &old_key)? - { - // Case 1, 5, 6, 7, 8. - let new_key = calculate_key(schema, new); - if new_key.equal(&old_key) { - // Case 1. - let new_meta = operation_log.update( - txn, - old_key.as_ref(), - new, - old_meta, - insert_operation_id, - )?; - Ok(UpsertResult::Updated { old_meta, new_meta }) - } else { - // Case 5, 6, 7, 8. - let new_metadata = operation_log.get_deleted_metadata(txn, new_key.as_ref())?; - match new_metadata { - Some(RecordMetadata { - insert_operation_id: Some(insert_operation_id), - meta, - }) => { - // Case 5, 6, 7. - if self.write_options.update_resolution == OnUpdateResolutionTypes::Nothing - { - // Case 5. - warn!("Old record (Key: {:?}) and new record (Key: {:?}) both exist, ignoring update", get_key_fields(old, schema), get_key_fields(new, schema)); - Ok(UpsertResult::Ignored) - } else { - // Case 6, 7. - Err(CacheError::PrimaryKeyExists { - key: get_key_fields(new, schema), - meta, - insert_operation_id, - }) - } - } - Some(RecordMetadata { - meta, - insert_operation_id: None, - }) => { - // Case 8. Meta from deleted record. - operation_log.delete( - txn, - old_key.as_ref(), - old_meta, - insert_operation_id, - )?; - let new_meta = - operation_log.insert_deleted(txn, new_key.as_ref(), new, meta)?; - Ok(UpsertResult::Updated { old_meta, new_meta }) - } - None => { - // Case 8. Meta from `insert_new`. - operation_log.delete( - txn, - old_key.as_ref(), - old_meta, - insert_operation_id, - )?; - let new_meta = - operation_log.insert_new(txn, Some(new_key.as_ref()), new)?; - Ok(UpsertResult::Updated { old_meta, new_meta }) - } - } - } - } else { - // Case 2, 3, 4, 9, 10, 11, 12, 13. - match self.write_options.update_resolution { - OnUpdateResolutionTypes::Nothing => { - // Case 2, 9, 12. - warn!("Old record (Key: {:?}) not found, ignoring update", old_key); - Ok(UpsertResult::Ignored) - } - OnUpdateResolutionTypes::Upsert => { - // Case 3, 10, 13. - insert_impl( - operation_log, - txn, - &self.common.schema.0, - new, - OnInsertResolutionTypes::Panic, - ) - } - OnUpdateResolutionTypes::Panic => { - // Case 4, 11, 14. - Err(CacheError::PrimaryKeyNotFound) - } - } - } - } - - pub fn set_connection_snapshotting_done( - &mut self, - connection_name: &str, - ) -> Result<(), CacheError> { - let txn = self.env.txn_mut()?; - self.common - .connection_snapshotting_done - .insert_overwrite(txn, connection_name, &true) - .map_err(Into::into) - } - - pub fn commit(&mut self, state: &CommitState) -> Result<(), CacheError> { - let txn = self.env.txn_mut()?; - self.common.commit_state.store( - txn, - bincode::encode_to_vec(state, bincode::config::legacy()) - .map_err(CacheError::map_serialization_error)? - .as_slice(), - )?; - self.env.commit().map_err(Into::into) - } -} - -#[derive(Debug)] -enum OwnedMetadataKey<'a> { - PrimaryKey(Vec), - Hash(&'a Record, u64), -} - -impl<'a> OwnedMetadataKey<'a> { - fn as_ref(&self) -> MetadataKey<'_> { - match self { - OwnedMetadataKey::PrimaryKey(key) => MetadataKey::PrimaryKey(key), - OwnedMetadataKey::Hash(record, hash) => MetadataKey::Hash(record, *hash), - } - } - - fn equal(&self, other: &OwnedMetadataKey) -> bool { - match (self, other) { - (OwnedMetadataKey::PrimaryKey(key1), OwnedMetadataKey::PrimaryKey(key2)) => { - key1 == key2 - } - (OwnedMetadataKey::Hash(_, hash1), OwnedMetadataKey::Hash(_, hash2)) => hash1 == hash2, - _ => false, - } - } -} - -fn calculate_key<'a>(schema: &Schema, record: &'a Record) -> OwnedMetadataKey<'a> { - if schema.primary_index.is_empty() { - let mut hasher = ahash::AHasher::default(); - record.hash(&mut hasher); - let hash = hasher.finish(); - OwnedMetadataKey::Hash(record, hash) - } else { - let key = index::get_primary_key(&schema.primary_index, &record.values); - OwnedMetadataKey::PrimaryKey(key) - } -} - -fn get_key_fields(record: &Record, schema: &Schema) -> Vec<(String, Field)> { - if schema.primary_index.is_empty() { - schema - .fields - .iter() - .zip(record.values.iter()) - .map(|(field, value)| (field.name.clone(), value.clone())) - .collect() - } else { - schema - .primary_index - .iter() - .map(|idx| { - ( - schema.fields[*idx].name.clone(), - record.values[*idx].clone(), - ) - }) - .collect() - } -} - -fn insert_impl( - operation_log: &OperationLog, - txn: &mut RwTransaction, - schema: &Schema, - record: &Record, - insert_resolution: OnInsertResolutionTypes, -) -> Result { - debug_check_schema_record_consistency(schema, record); - - if schema.is_append_only() { - let meta = operation_log.insert_new(txn, None, record)?; - Ok(UpsertResult::Inserted { meta }) - } else { - let key = calculate_key(schema, record); - let metadata = operation_log.get_deleted_metadata(txn, key.as_ref())?; - match metadata { - Some(RecordMetadata { - meta, - insert_operation_id: Some(insert_operation_id), - }) => { - // The record already exists. - if schema.primary_index.is_empty() { - // Insert anyway. - let meta = operation_log.insert_new(txn, Some(key.as_ref()), record)?; - Ok(UpsertResult::Inserted { meta }) - } else { - // Resolve the conflict. - match insert_resolution { - OnInsertResolutionTypes::Nothing => { - warn!( - "Record (Key: {:?}) already exist, ignoring insert", - get_key_fields(record, schema) - ); - Ok(UpsertResult::Ignored) - } - OnInsertResolutionTypes::Panic => Err(CacheError::PrimaryKeyExists { - key: get_key_fields(record, schema), - meta, - insert_operation_id, - }), - OnInsertResolutionTypes::Update => { - let new_meta = operation_log.update( - txn, - key.as_ref(), - record, - meta, - insert_operation_id, - )?; - Ok(UpsertResult::Updated { - old_meta: meta, - new_meta, - }) - } - } - } - } - Some(RecordMetadata { - meta, - insert_operation_id: None, - }) => { - // The record has an id but was deleted. - let new_meta = operation_log.insert_deleted(txn, key.as_ref(), record, meta)?; - Ok(UpsertResult::Inserted { meta: new_meta }) - } - None => { - // The record does not exist. - let meta = operation_log.insert_new(txn, Some(key.as_ref()), record)?; - Ok(UpsertResult::Inserted { meta }) - } - } - } -} - -fn get_existing_record_metadata( - operation_log: &OperationLog, - txn: &T, - key: &OwnedMetadataKey, -) -> Result, StorageError> { - if let Some(RecordMetadata { - meta, - insert_operation_id: Some(insert_operation_id), - }) = operation_log.get_present_metadata(txn, key.as_ref())? - { - Ok(Some((meta, insert_operation_id))) - } else { - Ok(None) - } -} - -#[derive(Debug, Clone)] -pub struct RoMainEnvironment { - env: RoLmdbEnvironment, - common: MainEnvironmentCommon, -} - -impl LmdbEnvironment for RoMainEnvironment { - fn env(&self) -> &dozer_storage::lmdb::Environment { - self.env.env() - } -} - -impl MainEnvironment for RoMainEnvironment { - fn common(&self) -> &MainEnvironmentCommon { - &self.common - } -} - -impl RoMainEnvironment { - pub fn new(options: CacheOptions) -> Result { - let (env, (base_path, name)) = open_env(&options)?; - let base_path = base_path.to_path_buf(); - - let operation_log = OperationLog::open(&env, name.to_string(), options.labels)?; - let schema_option = LmdbOption::open(&env, Some(SCHEMA_DB_NAME))?; - let commit_state = LmdbOption::open(&env, Some(COMMIT_STATE_DB_NAME))?; - let connection_snapshotting_done = - LmdbMap::open(&env, Some(CONNECTION_SNAPSHOTTING_DONE_DB_NAME))?; - - let schema = schema_option - .load(&env.begin_txn()?)? - .map(IntoOwned::into_owned) - .ok_or(CacheError::SchemaNotFound)?; - - Ok(Self { - env, - common: MainEnvironmentCommon { - base_path, - schema, - schema_option, - commit_state, - connection_snapshotting_done, - operation_log, - intersection_chunk_size: options.intersection_chunk_size, - }, - }) - } -} - -pub mod dump_restore; - -fn debug_check_schema_record_consistency(schema: &Schema, record: &Record) { - debug_assert_eq!(schema.fields.len(), record.values.len()); - for (field, value) in schema.fields.iter().zip(record.values.iter()) { - if field.nullable && value == &Field::Null { - continue; - } - match field.typ { - FieldType::UInt => { - debug_assert!(value.as_uint().is_some()) - } - FieldType::U128 => { - debug_assert!(value.as_u128().is_some()) - } - FieldType::Int => { - debug_assert!(value.as_int().is_some()) - } - FieldType::I128 => { - debug_assert!(value.as_i128().is_some()) - } - FieldType::Float => { - debug_assert!(value.as_float().is_some()) - } - FieldType::Boolean => debug_assert!(value.as_boolean().is_some()), - FieldType::String => debug_assert!(value.as_string().is_some()), - FieldType::Text => debug_assert!(value.as_text().is_some()), - FieldType::Binary => debug_assert!(value.as_binary().is_some()), - FieldType::Decimal => debug_assert!(value.as_decimal().is_some()), - FieldType::Timestamp => debug_assert!(value.as_timestamp().is_some()), - FieldType::Date => debug_assert!(value.as_date().is_some()), - FieldType::Json => debug_assert!(value.as_json().is_some()), - FieldType::Point => debug_assert!(value.as_point().is_some()), - FieldType::Duration => debug_assert!(value.as_duration().is_some()), - } - } -} - -#[cfg(test)] -mod conflict_resolution_tests; - -#[cfg(test)] -mod hash_tests; diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/hash_metadata.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/hash_metadata.rs deleted file mode 100644 index 5263432bd3..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/hash_metadata.rs +++ /dev/null @@ -1,80 +0,0 @@ -use dozer_storage::{ - errors::StorageError, - lmdb::{Database, RwTransaction, Transaction}, - LmdbEnvironment, LmdbMultimap, RwLmdbEnvironment, -}; -use dozer_types::{borrow::IntoOwned, types::Record}; - -use super::{metadata::Metadata, RecordMetadata}; - -#[derive(Debug, Clone, Copy)] -pub struct HashMetadata(LmdbMultimap); - -impl HashMetadata { - pub fn create(env: &mut RwLmdbEnvironment) -> Result { - LmdbMultimap::create(env, Some(Self::DATABASE_NAME)).map(Self) - } - - pub fn open(env: &E) -> Result { - LmdbMultimap::open(env, Some(Self::DATABASE_NAME)).map(Self) - } - - pub const DATABASE_NAME: &'static str = "hash_metadata"; - - pub fn database(&self) -> Database { - self.0.database() - } -} - -impl Metadata for HashMetadata { - type Key<'a> = (&'a Record, u64); - - fn count_data(&self, txn: &T) -> Result { - self.0.count_data(txn) - } - - fn get_present( - &self, - txn: &T, - key: (&Record, u64), - ) -> Result, StorageError> { - self.0 - .get_last(txn, &key.1) - .map(|metadata| metadata.map(|metadata| metadata.into_owned())) - } - - fn get_deleted( - &self, - txn: &T, - key: (&Record, u64), - ) -> Result, StorageError> { - self.0 - .get_first(txn, &key.1) - .map(|metadata| metadata.map(|metadata| metadata.into_owned())) - } - - fn insert( - &self, - txn: &mut RwTransaction, - key: (&Record, u64), - value: &RecordMetadata, - ) -> Result<(), StorageError> { - let inserted = self.0.insert(txn, &key.1, value)?; - debug_assert!(inserted); - Ok(()) - } - - fn insert_overwrite( - &self, - txn: &mut RwTransaction, - key: (&Record, u64), - old: &RecordMetadata, - new: &RecordMetadata, - ) -> Result<(), StorageError> { - let removed = self.0.remove(txn, &key.1, old)?; - debug_assert!(removed); - let inserted = self.0.insert(txn, &key.1, new)?; - debug_assert!(inserted); - Ok(()) - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/lmdb_val_impl.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/lmdb_val_impl.rs deleted file mode 100644 index d1f54ae42f..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/lmdb_val_impl.rs +++ /dev/null @@ -1,106 +0,0 @@ -use dozer_storage::{errors::StorageError, BorrowEncode, Decode, Encode, Encoded, LmdbVal}; -use dozer_types::borrow::{Borrow, Cow, IntoOwned}; - -use super::{Operation, OperationBorrow}; - -impl<'a> IntoOwned for OperationBorrow<'a> { - fn into_owned(self) -> Operation { - match self { - Self::Delete { operation_id } => Operation::Delete { operation_id }, - Self::Insert { - record_meta, - record, - } => Operation::Insert { - record_meta, - record: record.clone(), - }, - } - } -} - -impl Borrow for Operation { - type Borrowed<'a> = OperationBorrow<'a>; - - fn borrow(&self) -> Self::Borrowed<'_> { - match self { - Self::Delete { operation_id } => OperationBorrow::Delete { - operation_id: *operation_id, - }, - Self::Insert { - record_meta, - record, - } => OperationBorrow::Insert { - record_meta: *record_meta, - record, - }, - } - } - - fn upcast<'b, 'a: 'b>(borrow: Self::Borrowed<'a>) -> Self::Borrowed<'b> { - match borrow { - OperationBorrow::Delete { operation_id } => OperationBorrow::Delete { operation_id }, - OperationBorrow::Insert { - record_meta, - record, - } => OperationBorrow::Insert { - record_meta, - record, - }, - } - } -} - -impl BorrowEncode for Operation { - type Encode<'a> = OperationBorrow<'a>; -} - -impl<'a> Encode<'a> for OperationBorrow<'a> { - fn encode(self) -> Result, StorageError> { - let encoded = bincode::encode_to_vec(self, bincode::config::legacy()).map_err(|e| { - StorageError::SerializationError { - typ: "Operation", - reason: Box::new(e), - } - })?; - Ok(Encoded::Vec(encoded)) - } -} - -impl Decode for Operation { - fn decode(bytes: &[u8]) -> Result, StorageError> { - let decoded = dozer_types::bincode::decode_from_slice(bytes, bincode::config::legacy()) - .map_err(|e| StorageError::DeserializationError { - typ: "Operation", - reason: Box::new(e), - })? - .0; - Ok(Cow::Owned(decoded)) - } -} - -unsafe impl LmdbVal for Operation {} - -#[cfg(test)] -mod tests { - use dozer_types::types::Record; - - use crate::cache::RecordMeta; - - use super::*; - - #[test] - fn test_operation_encode_decode() { - let operation = Operation::Delete { operation_id: 1 }; - let encoded = operation.borrow().encode().unwrap(); - let decoded = Operation::decode(encoded.as_ref()).unwrap().into_owned(); - assert_eq!(operation, decoded); - - let operation = Operation::Insert { - record_meta: RecordMeta::new(1, 1), - record: Record::new(vec![1.into(), 2.into(), 3.into()]), - }; - let encoded = operation.borrow().encode().unwrap(); - let decoded = Operation::decode(encoded.as_ref()).unwrap().into_owned(); - assert_eq!(operation, decoded); - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/metadata.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/metadata.rs deleted file mode 100644 index d9e22b1124..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/metadata.rs +++ /dev/null @@ -1,143 +0,0 @@ -use dozer_storage::{ - errors::StorageError, - lmdb::{RwTransaction, Transaction}, - BorrowEncode, Decode, Encode, Encoded, LmdbKey, LmdbKeyType, LmdbVal, -}; -use dozer_types::{ - borrow::{Borrow, Cow, IntoOwned}, - impl_borrow_for_clone_type, -}; - -use crate::cache::RecordMeta; - -#[derive(Debug, Clone, Copy, PartialEq)] -pub struct RecordMetadata { - /// The record metadata. `id` is consistent across `insert`s and `delete`s. `version` gets updated on every `insert` or `update`. - pub meta: RecordMeta, - /// The operation id of the latest `Insert` operation. `None` if the record is deleted. - pub insert_operation_id: Option, -} - -pub trait Metadata: Copy { - type Key<'a>; - - /// Returns total number of metadata. - fn count_data(&self, txn: &T) -> Result; - - /// Tries to get metadata using `key`, returning metadata with `insert_operation_id: Some(_)` if it exists. - fn get_present( - &self, - txn: &T, - key: Self::Key<'_>, - ) -> Result, StorageError>; - - /// Tries to get metadata using `key`, returning metadata with `insert_operation_id: None` if it exists. - fn get_deleted( - &self, - txn: &T, - key: Self::Key<'_>, - ) -> Result, StorageError>; - - /// Inserts the key value entry `(key, value)`. Caller must ensure (key, value) does not exist. - fn insert( - &self, - txn: &mut RwTransaction, - key: Self::Key<'_>, - value: &RecordMetadata, - ) -> Result<(), StorageError>; - - /// Overrides the key value entry `(key, old)` with `(key, new)`. Caller must ensure (key, old) exists. - fn insert_overwrite( - &self, - txn: &mut RwTransaction, - key: Self::Key<'_>, - old: &RecordMetadata, - new: &RecordMetadata, - ) -> Result<(), StorageError>; -} - -impl_borrow_for_clone_type!(RecordMetadata); - -impl BorrowEncode for RecordMetadata { - type Encode<'a> = &'a RecordMetadata; -} - -impl<'a> Encode<'a> for &'a RecordMetadata { - fn encode(self) -> Result, StorageError> { - let mut result = [0; 21]; - if let Some(insert_operation_id) = self.insert_operation_id { - result[0] = 1; - result[1..9].copy_from_slice(&insert_operation_id.to_be_bytes()); - } else { - result[0] = 0; - } - result[9..17].copy_from_slice(&self.meta.id.to_be_bytes()); - result[17..21].copy_from_slice(&self.meta.version.to_be_bytes()); - Ok(Encoded::U8x21(result)) - } -} - -impl Decode for RecordMetadata { - fn decode(bytes: &[u8]) -> Result, StorageError> { - let insert_operation_id = if bytes[0] == 1 { - Some(u64::from_be_bytes(bytes[1..9].try_into().unwrap())) - } else { - None - }; - let id = u64::from_be_bytes(bytes[9..17].try_into().unwrap()); - let version = u32::from_be_bytes(bytes[17..21].try_into().unwrap()); - Ok(Cow::Owned(RecordMetadata { - meta: RecordMeta::new(id, version), - insert_operation_id, - })) - } -} - -unsafe impl LmdbVal for RecordMetadata {} - -unsafe impl LmdbKey for RecordMetadata { - const TYPE: LmdbKeyType = LmdbKeyType::FixedSizeOtherThanU32OrUsize; -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_record_metadata_encode_decode() { - let record_metadata = RecordMetadata { - meta: RecordMeta::new(1, 2), - insert_operation_id: Some(3), - }; - let encoded = record_metadata.encode().unwrap(); - let decoded = RecordMetadata::decode(encoded.as_ref()) - .unwrap() - .into_owned(); - assert_eq!(record_metadata, decoded); - - let record_metadata = RecordMetadata { - meta: RecordMeta::new(1, 2), - insert_operation_id: None, - }; - let encoded = record_metadata.encode().unwrap(); - let decoded = RecordMetadata::decode(encoded.as_ref()) - .unwrap() - .into_owned(); - assert_eq!(record_metadata, decoded); - } - - #[test] - fn test_metadata_order() { - let metadata1 = RecordMetadata { - meta: RecordMeta::new(2, 2), - insert_operation_id: None, - }; - let metadata2 = RecordMetadata { - meta: RecordMeta::new(1, 1), - insert_operation_id: Some(0), - }; - let encoded1 = metadata1.encode().unwrap(); - let encoded2 = metadata2.encode().unwrap(); - assert!(encoded1 < encoded2); - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/mod.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/mod.rs deleted file mode 100644 index 7cee17734f..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/mod.rs +++ /dev/null @@ -1,555 +0,0 @@ -use dozer_storage::{ - errors::StorageError, - lmdb::{RoCursor, RwTransaction, Transaction}, - KeyIterator, LmdbCounter, LmdbEnvironment, LmdbMap, LmdbSet, RwLmdbEnvironment, -}; -use dozer_tracing::Labels; -use dozer_types::{ - borrow::{Borrow, Cow, IntoOwned}, - log::info, - types::Record, -}; -use metrics::{describe_counter, increment_counter}; - -use crate::cache::{CacheRecord, RecordMeta}; - -#[derive(Debug, Clone, PartialEq, bincode::Decode)] -pub enum Operation { - Delete { - /// The operation id of an `Insert` operation, which must exist. - operation_id: u64, - }, - Insert { - #[bincode(with_serde)] - record_meta: RecordMeta, - record: Record, - }, -} - -#[derive(Debug, Clone, Copy, bincode::Encode)] -pub enum OperationBorrow<'a> { - Delete { - /// The operation id of an `Insert` operation, which must exist. - operation_id: u64, - }, - Insert { - #[bincode(with_serde)] - record_meta: RecordMeta, - record: &'a Record, - }, -} - -#[derive(Debug, Clone)] -pub struct OperationLog { - /// Record primary key -> RecordMetadata, empty if schema is append only or has no primary index. - primary_key_metadata: PrimaryKeyMetadata, - /// Record hash -> RecordMetadata, empty if schema is append only or has primary index. - hash_metadata: HashMetadata, - /// Operation ids of latest `Insert`s. Used to filter out deleted records in query. Empty if schema is append only. - present_operation_ids: LmdbSet, - /// The next operation id. Monotonically increasing. - next_operation_id: LmdbCounter, - /// Operation_id -> operation. - operation_id_to_operation: LmdbMap, - /// The cache name. - name: String, - /// The cache labels. - labels: Labels, -} - -#[derive(Debug, Clone, Copy)] -pub enum MetadataKey<'a> { - PrimaryKey(&'a [u8]), - Hash(&'a Record, u64), -} - -const PRESENT_OPERATION_IDS_DB_NAME: &str = "present_operation_ids"; -const NEXT_OPERATION_ID_DB_NAME: &str = "next_operation_id"; -const OPERATION_ID_TO_OPERATION_DB_NAME: &str = "operation_id_to_operation"; - -const CACHE_OPERATION_LOG_COUNTER_NAME: &str = "cache_operation_log"; - -impl OperationLog { - pub fn create( - env: &mut RwLmdbEnvironment, - name: String, - labels: Labels, - ) -> Result { - describe_counter!( - CACHE_OPERATION_LOG_COUNTER_NAME, - "Number of operations stored in the cache" - ); - - let primary_key_metadata = PrimaryKeyMetadata::create(env)?; - let hash_metadata = HashMetadata::create(env)?; - let present_operation_ids = LmdbSet::create(env, Some(PRESENT_OPERATION_IDS_DB_NAME))?; - let next_operation_id = LmdbCounter::create(env, Some(NEXT_OPERATION_ID_DB_NAME))?; - let operation_id_to_operation = - LmdbMap::create(env, Some(OPERATION_ID_TO_OPERATION_DB_NAME))?; - Ok(Self { - primary_key_metadata, - hash_metadata, - present_operation_ids, - next_operation_id, - operation_id_to_operation, - name, - labels, - }) - } - - pub fn open( - env: &E, - name: String, - labels: Labels, - ) -> Result { - let primary_key_metadata = PrimaryKeyMetadata::open(env)?; - let hash_metadata = HashMetadata::open(env)?; - let present_operation_ids = LmdbSet::open(env, Some(PRESENT_OPERATION_IDS_DB_NAME))?; - let next_operation_id = LmdbCounter::open(env, Some(NEXT_OPERATION_ID_DB_NAME))?; - let operation_id_to_operation = - LmdbMap::open(env, Some(OPERATION_ID_TO_OPERATION_DB_NAME))?; - Ok(Self { - primary_key_metadata, - hash_metadata, - present_operation_ids, - next_operation_id, - operation_id_to_operation, - name, - labels, - }) - } - - pub fn name(&self) -> &str { - &self.name - } - - pub fn labels(&self) -> &Labels { - &self.labels - } - - pub fn count_present_records( - &self, - txn: &T, - schema_is_append_only: bool, - ) -> Result { - if schema_is_append_only { - self.operation_id_to_operation.count(txn) - } else { - self.present_operation_ids.count(txn) - } - .map_err(Into::into) - } - - pub fn get_present_metadata( - &self, - txn: &T, - key: MetadataKey, - ) -> Result, StorageError> { - match key { - MetadataKey::PrimaryKey(key) => self.primary_key_metadata.get_present(txn, key), - MetadataKey::Hash(record, hash) => self.hash_metadata.get_present(txn, (record, hash)), - } - } - - pub fn get_deleted_metadata( - &self, - txn: &T, - key: MetadataKey, - ) -> Result, StorageError> { - match key { - MetadataKey::PrimaryKey(key) => self.primary_key_metadata.get_deleted(txn, key), - MetadataKey::Hash(record, hash) => self.hash_metadata.get_deleted(txn, (record, hash)), - } - } - - pub fn get_record( - &self, - txn: &T, - key: &[u8], - ) -> Result, StorageError> { - let Some(metadata) = self.get_present_metadata(txn, MetadataKey::PrimaryKey(key))? else { - return Ok(None); - }; - let Some(insert_operation_id) = metadata.insert_operation_id else { - return Ok(None); - }; - self.get_record_by_operation_id_unchecked(txn, insert_operation_id) - .map(Some) - } - - pub fn next_operation_id(&self, txn: &T) -> Result { - self.next_operation_id.load(txn).map_err(Into::into) - } - - pub fn present_operation_ids<'txn, T: Transaction>( - &self, - txn: &'txn T, - schema_is_append_only: bool, - ) -> Result, u64>, StorageError> { - // If schema is append only, then all operation ids are latest `Insert`s. - if schema_is_append_only { - self.operation_id_to_operation.keys(txn) - } else { - self.present_operation_ids.iter(txn) - } - } - - pub fn contains_operation_id( - &self, - txn: &T, - schema_is_append_only: bool, - operation_id: u64, - ) -> Result { - // If schema is append only, then all operation ids are latest `Insert`s. - if schema_is_append_only { - Ok(true) - } else { - self.present_operation_ids.contains(txn, &operation_id) - } - .map_err(Into::into) - } - - pub fn get_record_by_operation_id_unchecked( - &self, - txn: &T, - operation_id: u64, - ) -> Result { - let Some(Cow::Owned(Operation::Insert { - record_meta, - record, - })) = self.operation_id_to_operation.get(txn, &operation_id)? - else { - panic!( - "Inconsistent state: primary_key_metadata, hash_metadata or present_operation_ids contains an insert operation id that is not an Insert operation" - ); - }; - Ok(CacheRecord::new( - record_meta.id, - record_meta.version, - record, - )) - } - - pub fn get_operation( - &self, - txn: &T, - operation_id: u64, - ) -> Result, StorageError> { - Ok(self - .operation_id_to_operation - .get(txn, &operation_id)? - .map(IntoOwned::into_owned)) - } - - /// Inserts a new record and returns the new record id and version. If key is primary key, it must not exist. - pub fn insert_new( - &self, - txn: &mut RwTransaction, - key: Option, - record: &Record, - ) -> Result { - if let Some(key) = key { - if let MetadataKey::PrimaryKey(key) = key { - debug_assert!(!key.is_empty()); - debug_assert!(self.primary_key_metadata.get_present(txn, key)?.is_none()) - } - - // Generate record id from metadata. - let record_id = match key { - MetadataKey::PrimaryKey(_) => self.primary_key_metadata.count_data(txn)? as u64, - MetadataKey::Hash(_, _) => self.hash_metadata.count_data(txn)? as u64, - }; - let record_meta = RecordMeta::new(record_id, INITIAL_RECORD_VERSION); - self.insert_overwrite(txn, key, record, None, record_meta)?; - Ok(record_meta) - } else { - // Generate operation id. Record id is operation id. - let operation_id = self.next_operation_id.fetch_add(txn, 1)?; - let record_meta = RecordMeta::new(operation_id, INITIAL_RECORD_VERSION); - // Record operation. The operation id must not exist. - self.operation_id_to_operation.append( - txn, - &operation_id, - OperationBorrow::Insert { - record_meta, - record, - }, - )?; - increment_counter!(CACHE_OPERATION_LOG_COUNTER_NAME, self.labels.clone()); - Ok(record_meta) - } - } - - /// Inserts a record that was deleted before. The given `record_meta` must match what is stored in metadata. - /// Meaning there exists `(key, meta)` pair in metadata and `meta == record_meta`. - pub fn insert_deleted( - &self, - txn: &mut RwTransaction, - key: MetadataKey, - record: &Record, - record_meta: RecordMeta, - ) -> Result { - let check = || { - if let MetadataKey::PrimaryKey(key) = key { - let Some(metadata) = self.primary_key_metadata.get_deleted(txn, key)? else { - return Ok::<_, StorageError>(false); - }; - let metadata = metadata.borrow(); - Ok(metadata.meta == record_meta && metadata.insert_operation_id.is_none()) - } else { - Ok(true) - } - }; - debug_assert!(check()?); - - self.insert_deleted_impl(txn, key, record, record_meta, None) - } - - /// Inserts a record that was deleted before, without checking invariants. - fn insert_deleted_impl( - &self, - txn: &mut RwTransaction, - key: MetadataKey, - record: &Record, - record_meta: RecordMeta, - insert_operation_id: Option, - ) -> Result { - let old = RecordMetadata { - meta: record_meta, - insert_operation_id, - }; - let new_meta = RecordMeta::new(record_meta.id, record_meta.version + 1); - self.insert_overwrite(txn, key, record, Some(old), new_meta)?; - Ok(new_meta) - } - - /// Inserts an record and overwrites its metadata. This function breaks variants of `OperationLog` and should be used with caution. - fn insert_overwrite( - &self, - txn: &mut RwTransaction, - key: MetadataKey, - record: &Record, - old: Option, - new_meta: RecordMeta, - ) -> Result<(), StorageError> { - // Generation operation id. - let operation_id = self.next_operation_id.fetch_add(txn, 1)?; - - // Update `primary_key_metadata`. - let new = RecordMetadata { - meta: new_meta, - insert_operation_id: Some(operation_id), - }; - if let Some(old) = old { - match key { - MetadataKey::PrimaryKey(key) => self - .primary_key_metadata - .insert_overwrite(txn, key, &old, &new)?, - MetadataKey::Hash(record, hash) => { - self.hash_metadata - .insert_overwrite(txn, (record, hash), &old, &new)? - } - } - } else { - match key { - MetadataKey::PrimaryKey(key) => self.primary_key_metadata.insert(txn, key, &new)?, - MetadataKey::Hash(record, hash) => { - self.hash_metadata.insert(txn, (record, hash), &new)? - } - } - } - - // Update `present_operation_ids`. - if !self.present_operation_ids.insert(txn, &operation_id)? { - panic!("Inconsistent state: operation id already exists"); - } - - // Record operation. The operation id must not exist. - self.operation_id_to_operation.append( - txn, - &operation_id, - OperationBorrow::Insert { - record_meta: new_meta, - record, - }, - )?; - increment_counter!(CACHE_OPERATION_LOG_COUNTER_NAME, self.labels.clone()); - Ok(()) - } - - /// Updates an existing record. The given `record_meta` and `insert_operation_id` must match what is stored in metadata. - /// Meaning there exists `(key, meta)` pair in metadata and `meta == record_meta` and `meta.insert_operation_id == Some(insert_operation_id)`. - pub fn update( - &self, - txn: &mut RwTransaction, - key: MetadataKey, - record: &Record, - record_meta: RecordMeta, - insert_operation_id: u64, - ) -> Result { - self.debug_check_record_existence(txn, key, record_meta, insert_operation_id)?; - self.delete_without_updating_metadata(txn, insert_operation_id)?; - self.insert_deleted_impl(txn, key, record, record_meta, Some(insert_operation_id)) - } - - // Only checks `primary_key_metadata` because `hash_metadata` will check existence in `insert_overwrite`. - fn debug_check_record_existence( - &self, - txn: &T, - key: MetadataKey, - record_meta: RecordMeta, - insert_operation_id: u64, - ) -> Result<(), StorageError> { - let check = || { - if let MetadataKey::PrimaryKey(key) = key { - let Some(metadata) = self.primary_key_metadata.get_present(txn, key)? else { - return Ok::<_, StorageError>(false); - }; - let metadata = metadata.borrow(); - Ok(metadata.meta == record_meta - && metadata.insert_operation_id == Some(insert_operation_id)) - } else { - Ok(true) - } - }; - debug_assert!(check()?); - Ok(()) - } - - /// Deletes an operation without updating the record metadata. This function breaks variants of `OperationLog` and should be used with caution. - fn delete_without_updating_metadata( - &self, - txn: &mut RwTransaction, - insert_operation_id: u64, - ) -> Result<(), StorageError> { - // The operation id must be present. - if !self - .present_operation_ids - .remove(txn, &insert_operation_id)? - { - panic!("Inconsistent state: insert operation id not found") - } - // Generate new operation id. - let operation_id = self.next_operation_id.fetch_add(txn, 1)?; - // Record delete operation. The operation id must not exist. - self.operation_id_to_operation.append( - txn, - &operation_id, - OperationBorrow::Delete { - operation_id: insert_operation_id, - }, - )?; - increment_counter!(CACHE_OPERATION_LOG_COUNTER_NAME, self.labels.clone()); - Ok(()) - } - - /// Deletes an existing record. The given `record_meta` and `insert_operation_id` must match what is stored in metadata. - /// Meaning there exists `(key, meta)` pair in metadata and `meta == record_meta` and `meta.insert_operation_id == Some(insert_operation_id)`. - pub fn delete( - &self, - txn: &mut RwTransaction, - key: MetadataKey, - record_meta: RecordMeta, - insert_operation_id: u64, - ) -> Result<(), StorageError> { - self.debug_check_record_existence(txn, key, record_meta, insert_operation_id)?; - self.delete_without_updating_metadata(txn, insert_operation_id)?; - - let old = RecordMetadata { - meta: record_meta, - insert_operation_id: Some(insert_operation_id), - }; - let new = RecordMetadata { - meta: record_meta, - insert_operation_id: None, - }; - match key { - MetadataKey::PrimaryKey(key) => self - .primary_key_metadata - .insert_overwrite(txn, key, &old, &new), - MetadataKey::Hash(record, hash) => { - self.hash_metadata - .insert_overwrite(txn, (record, hash), &old, &new) - } - } - } - - pub async fn dump<'txn, T: Transaction>( - &self, - txn: &'txn T, - context: &dozer_storage::generator::FutureGeneratorContext< - Result, StorageError>, - >, - ) -> Result<(), ()> { - dozer_storage::dump( - txn, - PrimaryKeyMetadata::DATABASE_NAME, - self.primary_key_metadata.database(), - context, - ) - .await?; - dozer_storage::dump( - txn, - HashMetadata::DATABASE_NAME, - self.hash_metadata.database(), - context, - ) - .await?; - dozer_storage::dump( - txn, - PRESENT_OPERATION_IDS_DB_NAME, - self.present_operation_ids.database(), - context, - ) - .await?; - dozer_storage::dump( - txn, - NEXT_OPERATION_ID_DB_NAME, - self.next_operation_id.database(), - context, - ) - .await?; - dozer_storage::dump( - txn, - OPERATION_ID_TO_OPERATION_DB_NAME, - self.operation_id_to_operation.database(), - context, - ) - .await - } - - pub async fn restore<'txn, R: tokio::io::AsyncRead + Unpin>( - env: &mut RwLmdbEnvironment, - reader: &mut R, - name: String, - labels: Labels, - ) -> Result { - info!("Restoring primary key metadata"); - dozer_storage::restore(env, reader).await?; - info!("Restoring hash metadata"); - dozer_storage::restore(env, reader).await?; - info!("Restoring present operation ids"); - dozer_storage::restore(env, reader).await?; - info!("Restoring next operation id"); - dozer_storage::restore(env, reader).await?; - info!("Restoring operation id to operation"); - dozer_storage::restore(env, reader).await?; - Self::open(env, name, labels).map_err(Into::into) - } -} - -const INITIAL_RECORD_VERSION: u32 = 1_u32; - -mod hash_metadata; -mod lmdb_val_impl; -mod metadata; -mod primary_key_metadata; - -pub use metadata::RecordMetadata; - -use hash_metadata::HashMetadata; -use metadata::Metadata; -use primary_key_metadata::PrimaryKeyMetadata; - -#[cfg(test)] -pub mod tests; diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/primary_key_metadata.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/primary_key_metadata.rs deleted file mode 100644 index 12a74b2a24..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/primary_key_metadata.rs +++ /dev/null @@ -1,82 +0,0 @@ -use dozer_storage::{ - errors::StorageError, - lmdb::{Database, Transaction}, - LmdbEnvironment, LmdbMap, RwLmdbEnvironment, -}; -use dozer_types::borrow::IntoOwned; - -use super::{metadata::Metadata, RecordMetadata}; - -#[derive(Debug, Clone, Copy)] -pub struct PrimaryKeyMetadata(LmdbMap, RecordMetadata>); - -impl PrimaryKeyMetadata { - pub fn create(env: &mut RwLmdbEnvironment) -> Result { - LmdbMap::create(env, Some(Self::DATABASE_NAME)).map(Self) - } - - pub fn open(env: &E) -> Result { - LmdbMap::open(env, Some(Self::DATABASE_NAME)).map(Self) - } - - fn get( - &self, - txn: &T, - key: &[u8], - ) -> Result, StorageError> { - self.0 - .get(txn, key) - .map(|metadata| metadata.map(|metadata| metadata.into_owned())) - } - - pub const DATABASE_NAME: &'static str = "primary_key_metadata"; - - pub fn database(&self) -> Database { - self.0.database() - } -} - -impl Metadata for PrimaryKeyMetadata { - type Key<'a> = &'a [u8]; - - fn count_data(&self, txn: &T) -> Result { - self.0.count(txn) - } - - fn get_present( - &self, - txn: &T, - key: &[u8], - ) -> Result, StorageError> { - self.get(txn, key) - } - - fn get_deleted( - &self, - txn: &T, - key: &[u8], - ) -> Result, StorageError> { - self.get(txn, key) - } - - fn insert( - &self, - txn: &mut dozer_storage::lmdb::RwTransaction, - key: &[u8], - value: &RecordMetadata, - ) -> Result<(), StorageError> { - let inserted = self.0.insert(txn, key, value)?; - debug_assert!(inserted); - Ok(()) - } - - fn insert_overwrite( - &self, - txn: &mut dozer_storage::lmdb::RwTransaction, - key: &[u8], - _old: &RecordMetadata, - new: &RecordMetadata, - ) -> Result<(), StorageError> { - self.0.insert_overwrite(txn, key, new) - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/tests.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/tests.rs deleted file mode 100644 index 0d40d00982..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/tests.rs +++ /dev/null @@ -1,353 +0,0 @@ -use dozer_storage::{assert_database_equal, lmdb::Transaction}; -use dozer_types::{borrow::IntoOwned, types::Record}; - -use crate::cache::{ - lmdb::{ - cache::main_environment::{ - operation_log::{MetadataKey, OperationLog, INITIAL_RECORD_VERSION}, - Operation, - }, - utils::create_env, - }, - CacheRecord, -}; - -pub fn assert_operation_log_equal( - log1: &OperationLog, - txn1: &T1, - log2: &OperationLog, - txn2: &T2, -) { - assert_database_equal( - txn1, - log1.primary_key_metadata.database(), - txn2, - log2.primary_key_metadata.database(), - ); - assert_database_equal( - txn1, - log1.hash_metadata.database(), - txn2, - log2.hash_metadata.database(), - ); - assert_database_equal( - txn1, - log1.present_operation_ids.database(), - txn2, - log2.present_operation_ids.database(), - ); - assert_database_equal( - txn1, - log1.next_operation_id.database(), - txn2, - log2.next_operation_id.database(), - ); - assert_database_equal( - txn1, - log1.operation_id_to_operation.database(), - txn2, - log2.operation_id_to_operation.database(), - ); -} - -#[test] -fn test_operation_log_append_only() { - let mut env = create_env(&Default::default()).unwrap().0; - let log = OperationLog::create(&mut env, "temp".to_string(), Default::default()).unwrap(); - let txn = env.txn_mut().unwrap(); - let append_only = true; - - let records = vec![Record::new(vec![]); 10]; - for (index, record) in records.iter().enumerate() { - let record_meta = log.insert_new(txn, None, record).unwrap(); - assert_eq!(record_meta.id, index as u64); - assert_eq!(record_meta.version, INITIAL_RECORD_VERSION); - assert_eq!( - log.count_present_records(txn, append_only).unwrap(), - index + 1 - ); - assert_eq!(log.next_operation_id(txn).unwrap(), index as u64 + 1); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - (0..=index as u64).collect::>() - ); - assert!(log - .contains_operation_id(txn, append_only, index as _) - .unwrap()); - assert_eq!( - log.get_record_by_operation_id_unchecked(txn, index as _) - .unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!( - log.get_operation(txn, index as _).unwrap().unwrap(), - Operation::Insert { - record_meta, - record: record.clone(), - } - ); - } -} - -#[test] -fn test_operation_log_with_primary_key() { - let mut env = create_env(&Default::default()).unwrap().0; - let log = OperationLog::create(&mut env, "temp".to_string(), Default::default()).unwrap(); - let txn = env.txn_mut().unwrap(); - let append_only = false; - - // Insert a record. - let record = Record::new(vec![]); - let primary_key = b"primary_key"; - let key = MetadataKey::PrimaryKey(primary_key); - let mut record_meta = log.insert_new(txn, Some(key), &record).unwrap(); - assert_eq!(record_meta.id, 0); - assert_eq!(record_meta.version, INITIAL_RECORD_VERSION); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 1); - assert_eq!( - log.get_record(txn, primary_key).unwrap().unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!(log.next_operation_id(txn).unwrap(), 1); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - vec![0] - ); - assert!(log.contains_operation_id(txn, append_only, 0).unwrap()); - assert_eq!( - log.get_record_by_operation_id_unchecked(txn, 0).unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!( - log.get_operation(txn, 0).unwrap().unwrap(), - Operation::Insert { - record_meta, - record: record.clone(), - } - ); - - // Update the record. - record_meta = log.update(txn, key, &record, record_meta, 0).unwrap(); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 1); - assert_eq!( - log.get_record(txn, primary_key).unwrap().unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!(log.next_operation_id(txn).unwrap(), 3); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - vec![2] - ); - assert!(log.contains_operation_id(txn, append_only, 2).unwrap()); - assert_eq!( - log.get_record_by_operation_id_unchecked(txn, 2).unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!( - log.get_operation(txn, 1).unwrap().unwrap(), - Operation::Delete { operation_id: 0 } - ); - assert_eq!( - log.get_operation(txn, 2).unwrap().unwrap(), - Operation::Insert { - record_meta, - record: record.clone() - } - ); - - // Delete the record. - log.delete(txn, key, record_meta, 2).unwrap(); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 0); - assert_eq!(log.get_record(txn, primary_key).unwrap(), None); - assert_eq!(log.next_operation_id(txn).unwrap(), 4); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - Vec::::new(), - ); - assert!(!log.contains_operation_id(txn, append_only, 2).unwrap()); - assert_eq!( - log.get_operation(txn, 3).unwrap().unwrap(), - Operation::Delete { operation_id: 2 } - ); - - // Insert with that primary key again. - record_meta = log.insert_deleted(txn, key, &record, record_meta).unwrap(); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 1); - assert_eq!( - log.get_record(txn, primary_key).unwrap().unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!(log.next_operation_id(txn).unwrap(), 5); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - vec![4] - ); - assert!(log.contains_operation_id(txn, append_only, 4).unwrap()); - assert_eq!( - log.get_record_by_operation_id_unchecked(txn, 4).unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!( - log.get_operation(txn, 4).unwrap().unwrap(), - Operation::Insert { - record_meta, - record: record.clone(), - } - ); -} - -#[test] -fn test_operation_log_without_primary_key() { - let mut env = create_env(&Default::default()).unwrap().0; - let log = OperationLog::create(&mut env, "temp".to_string(), Default::default()).unwrap(); - let txn = env.txn_mut().unwrap(); - let append_only = false; - - // Insert a record. - let record = Record::new(vec![]); - let key = MetadataKey::Hash(&record, 0); - let mut record_meta = log.insert_new(txn, Some(key), &record).unwrap(); - assert_eq!(record_meta.id, 0); - assert_eq!(record_meta.version, INITIAL_RECORD_VERSION); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 1); - assert_eq!(log.next_operation_id(txn).unwrap(), 1); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - vec![0] - ); - assert!(log.contains_operation_id(txn, append_only, 0).unwrap()); - assert_eq!( - log.get_record_by_operation_id_unchecked(txn, 0).unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!( - log.get_operation(txn, 0).unwrap().unwrap(), - Operation::Insert { - record_meta, - record: record.clone(), - } - ); - - // Insert the same record again. - record_meta = log.insert_new(txn, Some(key), &record).unwrap(); - assert_eq!(record_meta.id, 1); - assert_eq!(record_meta.version, INITIAL_RECORD_VERSION); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 2); - assert_eq!(log.next_operation_id(txn).unwrap(), 2); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - vec![0, 1] - ); - assert!(log.contains_operation_id(txn, append_only, 1).unwrap()); - assert_eq!( - log.get_record_by_operation_id_unchecked(txn, 1).unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!( - log.get_operation(txn, 1).unwrap().unwrap(), - Operation::Insert { - record_meta, - record: record.clone(), - } - ); - - // Update the record. - record_meta = log.update(txn, key, &record, record_meta, 1).unwrap(); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 2); - assert_eq!(log.next_operation_id(txn).unwrap(), 4); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - vec![0, 3] - ); - assert!(log.contains_operation_id(txn, append_only, 3).unwrap()); - assert_eq!( - log.get_record_by_operation_id_unchecked(txn, 3).unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!( - log.get_operation(txn, 2).unwrap().unwrap(), - Operation::Delete { operation_id: 1 } - ); - assert_eq!( - log.get_operation(txn, 3).unwrap().unwrap(), - Operation::Insert { - record_meta, - record: record.clone() - } - ); - - // Delete the record. - log.delete(txn, key, record_meta, 3).unwrap(); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 1); - assert_eq!(log.next_operation_id(txn).unwrap(), 5); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - vec![0], - ); - assert!(!log.contains_operation_id(txn, append_only, 3).unwrap()); - assert_eq!( - log.get_operation(txn, 4).unwrap().unwrap(), - Operation::Delete { operation_id: 3 } - ); - - // Insert with that record id again. - record_meta = log.insert_deleted(txn, key, &record, record_meta).unwrap(); - assert_eq!(log.count_present_records(txn, append_only).unwrap(), 2); - assert_eq!(log.next_operation_id(txn).unwrap(), 6); - assert_eq!( - log.present_operation_ids(txn, append_only) - .unwrap() - .map(|result| result.map(IntoOwned::into_owned)) - .collect::, _>>() - .unwrap(), - vec![0, 5] - ); - assert!(log.contains_operation_id(txn, append_only, 5).unwrap()); - assert_eq!( - log.get_record_by_operation_id_unchecked(txn, 5).unwrap(), - CacheRecord::new(record_meta.id, record_meta.version, record.clone()), - ); - assert_eq!( - log.get_operation(txn, 5).unwrap().unwrap(), - Operation::Insert { - record_meta, - record: record.clone(), - } - ); -} diff --git a/dozer-cache/src/cache/lmdb/cache/mod.rs b/dozer-cache/src/cache/lmdb/cache/mod.rs deleted file mode 100644 index 6fae2b05c9..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/mod.rs +++ /dev/null @@ -1,221 +0,0 @@ -use dozer_tracing::Labels; -use dozer_types::parking_lot::Mutex; -use std::collections::HashSet; -use std::path::PathBuf; -use std::{fmt::Debug, sync::Arc}; - -use dozer_types::types::{Record, SchemaWithIndex}; - -use super::{ - super::{RoCache, RwCache}, - indexing::{secondary_environment_name, IndexingThreadPool}, -}; -use crate::cache::expression::QueryExpression; -use crate::cache::{CacheRecord, CacheWriteOptions, CommitState, RecordMeta, UpsertResult}; -use crate::errors::CacheError; - -pub mod dump_restore; -mod main_environment; -mod query; -mod secondary_environment; - -pub use main_environment::{MainEnvironment, RoMainEnvironment, RwMainEnvironment}; -use query::LmdbQueryHandler; -pub use secondary_environment::{ - RoSecondaryEnvironment, RwSecondaryEnvironment, SecondaryEnvironment, -}; - -#[derive(Clone, Debug)] -pub struct CacheOptions { - // Total number of readers allowed - pub max_readers: u32, - // Max no of dbs - pub max_db_size: u32, - // Total size allocated for data in a memory mapped file. - // This size is allocated at initialization. - pub max_size: usize, - - /// The chunk size when calculating intersection of index queries. - pub intersection_chunk_size: usize, - - /// Provide a path where db will be created. If nothing is provided, will default to a temp location. - /// Db path will be `PathBuf.join(name)`. - pub path: Option<(PathBuf, String)>, - - /// The labels to attach to the cache. - pub labels: Labels, -} - -impl Default for CacheOptions { - fn default() -> Self { - Self { - max_readers: 1000, - max_db_size: 1000, - max_size: 1024 * 1024 * 1024, - intersection_chunk_size: 100, - path: None, - labels: Labels::default(), - } - } -} - -#[derive(Debug, Clone)] -pub struct LmdbRoCache { - pub main_env: RoMainEnvironment, - pub secondary_envs: Vec, -} - -impl LmdbRoCache { - pub fn new(options: CacheOptions) -> Result { - let main_env = RoMainEnvironment::new(options.clone())?; - let secondary_envs = (0..main_env.schema().1.len()) - .map(|index| { - RoSecondaryEnvironment::new(secondary_environment_name(index), options.clone()) - }) - .collect::>()?; - Ok(Self { - main_env, - secondary_envs, - }) - } -} - -#[derive(Debug)] -pub struct LmdbRwCache { - main_env: RwMainEnvironment, - secondary_envs: Vec, - indexing_thread_pool: Arc>, -} - -impl LmdbRwCache { - pub fn new( - schema: Option<&SchemaWithIndex>, - connections: Option<&HashSet>, - options: CacheOptions, - write_options: CacheWriteOptions, - indexing_thread_pool: Arc>, - ) -> Result { - let rw_main_env = - RwMainEnvironment::new(schema, connections, options.clone(), write_options)?; - - let options = CacheOptions { - path: Some(( - rw_main_env.base_path().to_path_buf(), - rw_main_env.name().to_string(), - )), - ..options - }; - let ro_main_env = rw_main_env.share(); - let secondary_envs = indexing_thread_pool - .lock() - .add_cache(ro_main_env, &options)?; - - Ok(Self { - main_env: rw_main_env, - secondary_envs, - indexing_thread_pool, - }) - } -} - -impl RoCache for C { - fn name(&self) -> &str { - self.main_env().name() - } - - fn labels(&self) -> &Labels { - self.main_env().labels() - } - - fn get(&self, key: &[u8]) -> Result { - self.main_env().get(key) - } - - fn count(&self, query: &QueryExpression) -> Result { - LmdbQueryHandler::new(self, query).count() - } - - fn query(&self, query: &QueryExpression) -> Result, CacheError> { - LmdbQueryHandler::new(self, query).query() - } - - fn get_schema(&self) -> &SchemaWithIndex { - self.main_env().schema() - } - - fn get_commit_state(&self) -> Result, CacheError> { - self.main_env().commit_state() - } - - fn is_snapshotting_done(&self) -> Result { - self.main_env().is_snapshotting_done() - } -} - -impl RwCache for LmdbRwCache { - fn insert(&mut self, record: &Record) -> Result { - let span = dozer_types::tracing::span!(dozer_types::tracing::Level::TRACE, "insert_cache"); - let _enter = span.enter(); - self.main_env.insert(record) - } - - fn delete(&mut self, record: &Record) -> Result, CacheError> { - self.main_env.delete(record) - } - - fn update(&mut self, old: &Record, new: &Record) -> Result { - self.main_env.update(old, new) - } - - fn set_connection_snapshotting_done( - &mut self, - connection_name: &str, - ) -> Result<(), CacheError> { - self.main_env - .set_connection_snapshotting_done(connection_name) - } - - fn commit(&mut self, state: &CommitState) -> Result<(), CacheError> { - self.main_env.commit(state)?; - self.indexing_thread_pool.lock().wake(self.labels()); - Ok(()) - } - - fn as_ro(&self) -> &dyn RoCache { - self - } -} - -pub trait LmdbCache: Send + Sync + Debug { - type MainEnvironment: MainEnvironment; - - fn main_env(&self) -> &Self::MainEnvironment; - - type SecondaryEnvironment: SecondaryEnvironment; - - fn secondary_env(&self, index: usize) -> &Self::SecondaryEnvironment; -} - -impl LmdbCache for LmdbRoCache { - type MainEnvironment = RoMainEnvironment; - fn main_env(&self) -> &Self::MainEnvironment { - &self.main_env - } - - type SecondaryEnvironment = RoSecondaryEnvironment; - fn secondary_env(&self, index: usize) -> &Self::SecondaryEnvironment { - &self.secondary_envs[index] - } -} - -impl LmdbCache for LmdbRwCache { - type MainEnvironment = RwMainEnvironment; - fn main_env(&self) -> &Self::MainEnvironment { - &self.main_env - } - - type SecondaryEnvironment = RoSecondaryEnvironment; - fn secondary_env(&self, index: usize) -> &Self::SecondaryEnvironment { - &self.secondary_envs[index] - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/query/handler.rs b/dozer-cache/src/cache/lmdb/cache/query/handler.rs deleted file mode 100644 index ac1afa25fa..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/query/handler.rs +++ /dev/null @@ -1,241 +0,0 @@ -use super::intersection::intersection; -use crate::cache::expression::Skip; -use crate::cache::lmdb::cache::main_environment::MainEnvironment; -use crate::cache::lmdb::cache::query::secondary::build_index_scan; -use crate::cache::lmdb::cache::LmdbCache; -use crate::cache::CacheRecord; -use crate::cache::{ - expression::QueryExpression, - plan::{IndexScan, Plan, QueryPlanner}, -}; -use crate::errors::{CacheError, PlanError}; -use dozer_storage::errors::StorageError; -use dozer_storage::lmdb::{RoTransaction, Transaction}; -use dozer_storage::LmdbEnvironment; -use dozer_types::borrow::IntoOwned; -use itertools::Either; - -pub struct LmdbQueryHandler<'a, C: LmdbCache> { - cache: &'a C, - query: &'a QueryExpression, -} - -impl<'a, C: LmdbCache> LmdbQueryHandler<'a, C> { - pub fn new(cache: &'a C, query: &'a QueryExpression) -> Self { - Self { cache, query } - } - - pub fn count(&self) -> Result { - match self.plan()? { - Plan::IndexScans(index_scans) => { - let secondary_txns = self.create_secondary_txns(&index_scans)?; - let ids = self.combine_secondary_queries(&index_scans, &secondary_txns)?; - self.count_secondary_queries(ids) - } - Plan::SeqScan(_) => Ok(match self.query.skip { - Skip::Skip(skip) => self - .cache - .main_env() - .count()? - .saturating_sub(skip) - .min(self.query.limit.unwrap_or(usize::MAX)), - Skip::After(_) => self.all_ids(&self.cache.main_env().begin_txn()?)?.count(), - }), - Plan::ReturnEmpty => Ok(0), - } - } - - pub fn query(&self) -> Result, CacheError> { - match self.plan()? { - Plan::IndexScans(index_scans) => { - let secondary_txns = self.create_secondary_txns(&index_scans)?; - let main_txn = self.cache.main_env().begin_txn()?; - #[allow(clippy::let_and_return)] // Must do let binding unless won't compile - let result = self.collect_records( - &main_txn, - self.combine_secondary_queries(&index_scans, &secondary_txns)?, - ); - result - } - Plan::SeqScan(_seq_scan) => { - let main_txn = self.cache.main_env().begin_txn()?; - #[allow(clippy::let_and_return)] // Must do let binding unless won't compile - let result = self.collect_records(&main_txn, self.all_ids(&main_txn)?); - result - } - Plan::ReturnEmpty => Ok(vec![]), - } - } - - fn plan(&self) -> Result { - let (schema, secondary_indexes) = self.cache.main_env().schema(); - let planner = QueryPlanner::new( - schema, - secondary_indexes, - self.query.filter.as_ref(), - &self.query.order_by, - ); - planner.plan() - } - - fn all_ids<'txn, T: Transaction>( - &self, - main_txn: &'txn T, - ) -> Result> + 'txn, CacheError> { - let schema_is_append_only = self.cache.main_env().schema().0.is_append_only(); - let all_ids = self - .cache - .main_env() - .operation_log() - .present_operation_ids(main_txn, schema_is_append_only)? - .map(|result| { - result - .map(|id| id.into_owned()) - .map_err(CacheError::Storage) - }); - Ok(skip(all_ids, self.query.skip).take(self.query.limit.unwrap_or(usize::MAX))) - } - - fn create_secondary_txns( - &self, - index_scans: &[IndexScan], - ) -> Result>, StorageError> { - index_scans - .iter() - .map(|index_scan| self.cache.secondary_env(index_scan.index_id).begin_txn()) - .collect() - } - - fn combine_secondary_queries<'txn, T: Transaction>( - &self, - index_scans: &[IndexScan], - secondary_txns: &'txn [T], - ) -> Result> + 'txn, CacheError> { - debug_assert!( - !index_scans.is_empty(), - "Planner should not generate empty index scan" - ); - let combined = if index_scans.len() == 1 { - // The fast path, without intersection calculation. - Either::Left(build_index_scan( - &secondary_txns[0], - self.cache.secondary_env(index_scans[0].index_id), - &index_scans[0].kind, - )?) - } else { - // Intersection of multiple index scans. - let iterators = index_scans - .iter() - .zip(secondary_txns) - .map(|(index_scan, secondary_txn)| { - build_index_scan( - secondary_txn, - self.cache.secondary_env(index_scan.index_id), - &index_scan.kind, - ) - }) - .collect::, CacheError>>()?; - Either::Right(intersection( - iterators, - self.cache.main_env().intersection_chunk_size(), - )) - }; - Ok(skip(combined, self.query.skip).take(self.query.limit.unwrap_or(usize::MAX))) - } - - fn filter_secondary_queries<'txn, T: Transaction>( - &'txn self, - main_txn: &'txn T, - ids: impl Iterator> + 'txn, - ) -> impl Iterator> + 'txn { - let schema_is_append_only = self.cache.main_env().schema().0.is_append_only(); - ids.filter_map(move |id| match id { - Ok(id) => match self.cache.main_env().operation_log().contains_operation_id( - main_txn, - schema_is_append_only, - id, - ) { - Ok(true) => Some(Ok(id)), - Ok(false) => None, - Err(err) => Some(Err(err.into())), - }, - Err(err) => Some(Err(err)), - }) - } - - fn count_secondary_queries( - &self, - ids: impl Iterator>, - ) -> Result { - let main_txn = self.cache.main_env().begin_txn()?; - - let mut result = 0; - for maybe_id in self.filter_secondary_queries(&main_txn, ids) { - maybe_id?; - result += 1; - } - Ok(result) - } - - fn collect_records<'txn, T: Transaction>( - &'txn self, - main_txn: &'txn T, - ids: impl Iterator> + 'txn, - ) -> Result, CacheError> { - self.filter_secondary_queries(main_txn, ids) - .map(|id| { - id.and_then(|id| { - self.cache - .main_env() - .operation_log() - .get_record_by_operation_id_unchecked(main_txn, id) - .map_err(Into::into) - }) - }) - .collect() - } -} - -fn skip( - iter: impl Iterator>, - skip: Skip, -) -> impl Iterator> { - match skip { - Skip::Skip(n) => Either::Left(iter.skip(n)), - Skip::After(after) => Either::Right(skip_after(iter, after)), - } -} - -struct SkipAfter>> { - inner: T, - after: Option, -} - -impl>> Iterator for SkipAfter { - type Item = Result; - - fn next(&mut self) -> Option { - loop { - if let Some(after) = self.after { - match self.inner.next() { - Some(Ok(id)) => { - if id == after { - self.after = None; - } - } - Some(Err(e)) => return Some(Err(e)), - None => return None, - } - } else { - return self.inner.next(); - } - } - } -} - -fn skip_after>>(iter: T, after: u64) -> SkipAfter { - SkipAfter { - inner: iter, - after: Some(after), - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/query/intersection.rs b/dozer-cache/src/cache/lmdb/cache/query/intersection.rs deleted file mode 100644 index bc087f0b67..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/query/intersection.rs +++ /dev/null @@ -1,101 +0,0 @@ -use roaring::{MultiOps, RoaringTreemap}; - -pub fn intersection>>( - iterators: Vec, - chunk_size: usize, -) -> Intersection { - let all_iterated_ids = vec![RoaringTreemap::new(); iterators.len()]; - Intersection { - intersection: None, - iterators, - all_iterated_ids, - chunk_size, - } -} - -pub struct Intersection>> { - intersection: Option, - iterators: Vec, - all_iterated_ids: Vec, - chunk_size: usize, -} - -impl>> Iterator for Intersection { - type Item = Result; - - fn next(&mut self) -> Option { - loop { - if let Some(intersection) = &mut self.intersection { - if let Some(id) = intersection.next() { - return Some(Ok(id)); - } else { - self.intersection = None; - } - } - - let mut exhaused = true; - - // Get the next chunk of each iterator. - for (iterated_ids, iterator) in self - .all_iterated_ids - .iter_mut() - .zip(self.iterators.iter_mut()) - { - for _ in 0..self.chunk_size { - if let Some(id) = iterator.next() { - exhaused = false; - match id { - Ok(id) => { - iterated_ids.insert(id); - } - Err(e) => { - return Some(Err(e)); - } - } - } else { - break; - } - } - } - - if exhaused { - return None; - } - - // Emit the intersection of all ids. - let intersection = self.all_iterated_ids.iter().intersection(); - for iterated_ids in self.all_iterated_ids.iter_mut() { - *iterated_ids -= &intersection; - } - self.intersection = Some(intersection.into_iter()); - } - } -} - -#[cfg(test)] -mod tests { - use std::convert::Infallible; - - use super::*; - - #[test] - fn test_intersection() { - let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; - let b = vec![1, 3, 5, 7, 9]; - let c = vec![1, 2, 3, 5, 8]; - let intersection = intersection( - vec![ - a.into_iter().map(Ok), - b.into_iter().map(Ok), - c.into_iter().map(Ok), - ], - 2, - ); - assert_eq!( - intersection - .collect::, Infallible>>() - .unwrap(), - vec![1, 3, 5] - ); - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/query/lmdb_cmp.rs b/dozer-cache/src/cache/lmdb/cache/query/lmdb_cmp.rs deleted file mode 100644 index 1b50890cc3..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/query/lmdb_cmp.rs +++ /dev/null @@ -1,16 +0,0 @@ -use dozer_storage::lmdb::{Database, Transaction}; -use dozer_storage::lmdb_sys as ffi; -use std::{cmp::Ordering, ffi::c_void}; - -pub fn lmdb_cmp(txn: &T, db: Database, a: &[u8], b: &[u8]) -> Ordering { - let a: ffi::MDB_val = ffi::MDB_val { - mv_size: a.len(), - mv_data: a.as_ptr() as *mut c_void, - }; - let b: ffi::MDB_val = ffi::MDB_val { - mv_size: b.len(), - mv_data: b.as_ptr() as *mut c_void, - }; - let result = unsafe { dozer_storage::lmdb_sys::mdb_cmp(txn.txn(), db.dbi(), &a, &b) }; - result.cmp(&0) -} diff --git a/dozer-cache/src/cache/lmdb/cache/query/mod.rs b/dozer-cache/src/cache/lmdb/cache/query/mod.rs deleted file mode 100644 index 8179decbdf..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/query/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod handler; -mod intersection; -mod lmdb_cmp; -mod secondary; - -pub use handler::LmdbQueryHandler; - -#[cfg(test)] -mod tests; diff --git a/dozer-cache/src/cache/lmdb/cache/query/secondary.rs b/dozer-cache/src/cache/lmdb/cache/query/secondary.rs deleted file mode 100644 index 51e06e5c47..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/query/secondary.rs +++ /dev/null @@ -1,282 +0,0 @@ -use std::{cmp::Ordering, ops::Bound}; - -use dozer_storage::lmdb::Transaction; -use dozer_types::{ - borrow::{Borrow, IntoOwned}, - types::{Field, IndexDefinition}, -}; - -use crate::{ - cache::{ - expression::{Operator, SortDirection}, - index, - lmdb::cache::secondary_environment::SecondaryEnvironment, - plan::{IndexScanKind, SortedInvertedRangeQuery}, - }, - errors::{CacheError, IndexError}, -}; - -use super::lmdb_cmp::lmdb_cmp; - -pub fn build_index_scan<'txn, T: Transaction, S: SecondaryEnvironment>( - secondary_txn: &'txn T, - secondary_env: &S, - index_scan_kind: &IndexScanKind, -) -> Result> + 'txn, CacheError> { - let is_single_field_sorted_inverted = - is_single_field_sorted_inverted(secondary_env.index_definition()); - let range = get_range_spec(index_scan_kind, is_single_field_sorted_inverted)?; - - let start = match &range.start { - Some(KeyEndpoint::Including(key)) => Bound::Included(key.as_slice()), - Some(KeyEndpoint::Excluding(key)) => Bound::Excluded(key.as_slice()), - None => Bound::Unbounded, - }; - - let database = secondary_env.database().database(); - Ok(secondary_env - .database() - .range( - secondary_txn, - start, - range.direction == SortDirection::Ascending, - )? - .take_while(move |result| match result { - Ok((key, _)) => { - if let Some(end_key) = &range.end { - match lmdb_cmp(secondary_txn, database, key.borrow(), end_key.key()) { - Ordering::Less => { - matches!(range.direction, SortDirection::Ascending) - } - Ordering::Equal => matches!(end_key, KeyEndpoint::Including(_)), - Ordering::Greater => { - matches!(range.direction, SortDirection::Descending) - } - } - } else { - true - } - } - Err(_) => true, - }) - .map(|result| { - result - .map(|(_, id)| id.into_owned()) - .map_err(CacheError::Storage) - })) -} - -fn is_single_field_sorted_inverted(index: &IndexDefinition) -> bool { - match index { - // `fields.len() == 1` criteria must be kept the same with `comparator.rs`. - IndexDefinition::SortedInverted(fields) => fields.len() == 1, - _ => false, - } -} - -#[derive(Debug, Clone)] -pub enum KeyEndpoint { - Including(Vec), - Excluding(Vec), -} - -impl KeyEndpoint { - pub fn key(&self) -> &[u8] { - match self { - KeyEndpoint::Including(key) => key, - KeyEndpoint::Excluding(key) => key, - } - } -} - -#[derive(Debug)] -struct RangeSpec { - start: Option, - end: Option, - direction: SortDirection, -} - -fn get_range_spec( - index_scan_kind: &IndexScanKind, - is_single_field_sorted_inverted: bool, -) -> Result { - match &index_scan_kind { - IndexScanKind::SortedInverted { - eq_filters, - range_query, - } => { - let comparison_key = build_sorted_inverted_comparison_key( - eq_filters, - range_query.as_ref(), - is_single_field_sorted_inverted, - ); - // There're 3 cases: - // 1. Range query with operator. - // 2. Range query without operator (only order by). - // 3. No range query. - Ok(if let Some(range_query) = range_query { - match range_query.operator_and_value { - Some((operator, _)) => { - // Here we respond to case 1, examples are `a = 1 && b > 2` or `b < 2`. - let comparison_key = comparison_key.expect("here's at least a range query"); - let null_key = build_sorted_inverted_comparison_key( - eq_filters, - Some(&SortedInvertedRangeQuery { - field_index: range_query.field_index, - operator_and_value: Some((operator, Field::Null)), - sort_direction: range_query.sort_direction, - }), - is_single_field_sorted_inverted, - ) - .expect("we provided a range query"); - get_key_interval_from_range_query( - comparison_key, - null_key, - operator, - range_query.sort_direction, - ) - } - None => { - // Here we respond to case 2, examples are `a = 1 && b asc` or `b desc`. - if let Some(comparison_key) = comparison_key { - // This is the case like `a = 1 && b asc`. The comparison key is only built from `a = 1`. - // We use `a = 1 && b = null` as a sentinel, using the invariant that `null` is greater than anything. - let null_key = build_sorted_inverted_comparison_key( - eq_filters, - Some(&SortedInvertedRangeQuery { - field_index: range_query.field_index, - operator_and_value: Some((Operator::LT, Field::Null)), - sort_direction: range_query.sort_direction, - }), - is_single_field_sorted_inverted, - ) - .expect("we provided a range query"); - match range_query.sort_direction { - SortDirection::Ascending => RangeSpec { - start: Some(KeyEndpoint::Excluding(comparison_key)), - end: Some(KeyEndpoint::Including(null_key)), - direction: SortDirection::Ascending, - }, - SortDirection::Descending => RangeSpec { - start: Some(KeyEndpoint::Including(null_key)), - end: Some(KeyEndpoint::Excluding(comparison_key)), - direction: SortDirection::Descending, - }, - } - } else { - // Just all of them. - RangeSpec { - start: None, - end: None, - direction: range_query.sort_direction, - } - } - } - } - } else { - // Here we respond to case 3, examples are `a = 1` or `a = 1 && b = 2`. - let comparison_key = comparison_key - .expect("here's at least a eq filter because there's no range query"); - RangeSpec { - start: Some(KeyEndpoint::Including(comparison_key.clone())), - end: Some(KeyEndpoint::Including(comparison_key)), - direction: SortDirection::Ascending, // doesn't matter - } - }) - } - IndexScanKind::FullText { filter } => match filter.op { - Operator::Contains => { - let token = match &filter.val { - Field::String(token) => token, - Field::Text(token) => token, - _ => return Err(CacheError::Index(IndexError::ExpectedStringFullText)), - }; - let key = index::get_full_text_secondary_index(token); - Ok(RangeSpec { - start: Some(KeyEndpoint::Including(key.clone())), - end: Some(KeyEndpoint::Including(key)), - direction: SortDirection::Ascending, // doesn't matter - }) - } - Operator::MatchesAll | Operator::MatchesAny => { - unimplemented!("matches all and matches any are not implemented") - } - other => panic!("operator {other:?} is not supported by full text index"), - }, - } -} - -fn build_sorted_inverted_comparison_key( - eq_filters: &[(usize, Field)], - range_query: Option<&SortedInvertedRangeQuery>, - is_single_field_index: bool, -) -> Option> { - let mut fields = vec![]; - eq_filters.iter().for_each(|filter| { - fields.push(&filter.1); - }); - if let Some(range_query) = range_query { - if let Some((_, val)) = &range_query.operator_and_value { - fields.push(val); - } - } - if fields.is_empty() { - None - } else { - Some(index::get_secondary_index(&fields, is_single_field_index)) - } -} - -/// Here we use the invariant that `null` is greater than anything. -fn get_key_interval_from_range_query( - comparison_key: Vec, - null_key: Vec, - operator: Operator, - sort_direction: SortDirection, -) -> RangeSpec { - match (operator, sort_direction) { - (Operator::LT, SortDirection::Ascending) => RangeSpec { - start: None, - end: Some(KeyEndpoint::Excluding(comparison_key)), - direction: SortDirection::Ascending, - }, - (Operator::LT, SortDirection::Descending) => RangeSpec { - start: Some(KeyEndpoint::Excluding(comparison_key)), - end: None, - direction: SortDirection::Descending, - }, - (Operator::LTE, SortDirection::Ascending) => RangeSpec { - start: None, - end: Some(KeyEndpoint::Including(comparison_key)), - direction: SortDirection::Ascending, - }, - (Operator::LTE, SortDirection::Descending) => RangeSpec { - start: Some(KeyEndpoint::Including(comparison_key)), - end: None, - direction: SortDirection::Descending, - }, - (Operator::GT, SortDirection::Ascending) => RangeSpec { - start: Some(KeyEndpoint::Excluding(comparison_key)), - end: Some(KeyEndpoint::Excluding(null_key)), - direction: SortDirection::Ascending, - }, - (Operator::GT, SortDirection::Descending) => RangeSpec { - start: Some(KeyEndpoint::Excluding(null_key)), - end: Some(KeyEndpoint::Excluding(comparison_key)), - direction: SortDirection::Descending, - }, - (Operator::GTE, SortDirection::Ascending) => RangeSpec { - start: Some(KeyEndpoint::Including(comparison_key)), - end: Some(KeyEndpoint::Excluding(null_key)), - direction: SortDirection::Ascending, - }, - (Operator::GTE, SortDirection::Descending) => RangeSpec { - start: Some(KeyEndpoint::Excluding(null_key)), - end: Some(KeyEndpoint::Including(comparison_key)), - direction: SortDirection::Descending, - }, - (other, _) => { - panic!("operator {other:?} is not supported by sorted inverted index range query") - } - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/query/tests.rs b/dozer-cache/src/cache/lmdb/cache/query/tests.rs deleted file mode 100644 index 21a34d8884..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/query/tests.rs +++ /dev/null @@ -1,271 +0,0 @@ -use crate::cache::{ - expression::{FilterExpression, Operator, QueryExpression}, - lmdb::tests::utils::{create_cache, insert_rec_1}, - test_utils::{query_from_filter, schema_1, schema_full_text, schema_multi_indices}, - CacheRecord, RoCache, RwCache, -}; -use dozer_types::{ - serde_json::{from_value, json, Value}, - types::{Field, Record}, -}; - -#[test] -fn query_secondary_sorted_inverted() { - let (mut cache, indexing_thread_pool, _, _) = create_cache(schema_1); - - let record = Record::new(vec![ - Field::Int(1), - Field::String("test".to_string()), - Field::Int(2), - ]); - - cache.insert(&record).unwrap(); - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - let filter = FilterExpression::And(vec![ - FilterExpression::Simple("a".to_string(), Operator::EQ, Value::from(1)), - FilterExpression::Simple( - "b".to_string(), - Operator::EQ, - Value::from("test".to_string()), - ), - ]); - - // Query with an expression - let query = query_from_filter(filter); - - let records = cache.query(&query).unwrap(); - assert_eq!(cache.count(&query).unwrap(), 1); - assert_eq!(records.len(), 1, "must be equal"); - assert_eq!(records[0].record, record, "must be equal"); -} - -#[test] -fn query_secondary_full_text() { - let (mut cache, indexing_thread_pool, _, _) = create_cache(schema_full_text); - - let record = Record::new(vec![ - Field::String("today is a good day".into()), - Field::Text("marry has a little lamb".into()), - ]); - - cache.insert(&record).unwrap(); - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - let filter = FilterExpression::Simple("foo".into(), Operator::Contains, "good".into()); - - let query = query_from_filter(filter); - - let records = cache.query(&query).unwrap(); - assert_eq!(cache.count(&query).unwrap(), 1); - assert_eq!(records.len(), 1); - assert_eq!(records[0].record, record); - - let filter = FilterExpression::Simple("bar".into(), Operator::Contains, "lamb".into()); - let query = query_from_filter(filter); - let records = cache.query(&query).unwrap(); - assert_eq!(cache.count(&query).unwrap(), 1); - assert_eq!(records.len(), 1); - assert_eq!(records[0].record, record); -} - -#[test] -fn query_secondary_vars() { - let (mut cache, indexing_thread_pool, _, _) = create_cache(schema_1); - - let items = vec![ - (1, Some("yuri".to_string()), Some(521)), - (2, Some("mega".to_string()), Some(521)), - (3, Some("james".to_string()), Some(523)), - (4, Some("james".to_string()), Some(524)), - (5, Some("steff".to_string()), Some(526)), - (6, Some("mega".to_string()), Some(527)), - (7, Some("james".to_string()), Some(528)), - (8, Some("ava".to_string()), None), - ]; - // 26 alphabets - for val in items { - insert_rec_1(&mut cache, val); - } - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - test_query(json!({}), 8, &cache); - - test_query( - json!({ - "$order_by": { "c": "desc" } - }), - 8, - &cache, - ); - - test_query(json!({"$filter":{ "a": {"$eq": 1}}}), 1, &cache); - - test_query(json!({"$filter":{ "a": {"$eq": null}}}), 0, &cache); - - test_query(json!({"$filter":{ "c": {"$eq": 521}}}), 2, &cache); - - test_query(json!({"$filter":{ "c": {"$eq": null}}}), 1, &cache); - - test_query( - json!({"$filter":{ "a": 1, "b": "yuri".to_string()}}), - 1, - &cache, - ); - - // No compound index for a,c - test_query_err(json!({"$filter":{ "a": 1, "c": 521}}), &cache); - - test_query( - json!({ - "$filter":{ "c": {"$eq": 521}}, - "$order_by": { "c": "asc" } - }), - 2, - &cache, - ); - - test_query_record( - json!({ - "$filter":{ "a": {"$eq": 1}}, - "$order_by": { "b": "asc" } - }), - vec![(0, 1, "yuri".to_string(), 521)], - &cache, - ); - - // Range tests - test_query(json!({"$filter":{ "c": {"$lte": null}}}), 0, &cache); - - test_query(json!({"$filter":{ "c": {"$lte": 521}}}), 2, &cache); - - test_query(json!({"$filter":{ "c": {"$gte": 521}}}), 7, &cache); - - test_query(json!({"$filter":{ "c": {"$gt": 521}}}), 5, &cache); - - test_query(json!({"$filter":{ "c": {"$lte": 524}}}), 4, &cache); - - test_query(json!({"$filter":{ "c": {"$lt": 524}}}), 3, &cache); - - test_query(json!({"$filter":{ "c": {"$lt": 600}}}), 7, &cache); - - test_query(json!({"$filter":{ "c": {"$gt": 200}}}), 7, &cache); - - test_query_record( - json!({ - "$filter":{ "c": {"$gt": 526}}, - "$order_by": { "c": "asc" } - }), - vec![ - (5, 6, "mega".to_string(), 527), - (6, 7, "james".to_string(), 528), - ], - &cache, - ); - - test_query_record( - json!({ - "$filter":{ "c": {"$gt": 526}}, - "$order_by": { "c": "desc" } - }), - vec![ - (6, 7, "james".to_string(), 528), - (5, 6, "mega".to_string(), 527), - ], - &cache, - ); -} - -#[test] -fn query_secondary_multi_indices() { - let (mut cache, indexing_thread_pool, _, _) = create_cache(schema_multi_indices); - - for (id, text) in [ - (1, "apple ball cake dance"), - (2, "ball cake dance egg"), - (3, "cake dance egg fish"), - (4, "dance egg fish glove"), - (5, "egg fish glove heart"), - (6, "fish glove heart igloo"), - (7, "glove heart igloo jump"), - ] { - let record = Record { - values: vec![Field::Int(id), Field::String(text.into())], - lifetime: None, - }; - cache.insert(&record).unwrap(); - } - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - let query = query_from_filter(FilterExpression::And(vec![ - FilterExpression::Simple("id".into(), Operator::GT, Value::from(2)), - FilterExpression::Simple("text".into(), Operator::Contains, Value::from("dance")), - ])); - - let records = cache.query(&query).unwrap(); - assert_eq!(cache.count(&query).unwrap(), 2); - assert_eq!( - records, - vec![ - CacheRecord::new( - 2, - 1, - Record { - values: vec![Field::Int(3), Field::String("cake dance egg fish".into())], - lifetime: None, - } - ), - CacheRecord::new( - 3, - 1, - Record { - values: vec![Field::Int(4), Field::String("dance egg fish glove".into())], - lifetime: None, - } - ), - ] - ); -} - -fn test_query_err(query: Value, cache: &dyn RwCache) { - let query = from_value::(query).unwrap(); - let count_result = cache.count(&query); - let result = cache.query(&query); - - assert!(matches!( - count_result.unwrap_err(), - crate::errors::CacheError::Plan(_) - ),); - assert!(matches!( - result.unwrap_err(), - crate::errors::CacheError::Plan(_) - ),); -} -fn test_query(query: Value, count: usize, cache: &dyn RwCache) { - let query = from_value::(query).unwrap(); - assert_eq!(cache.count(&query).unwrap(), count); - let records = cache.query(&query).unwrap(); - - assert_eq!(records.len(), count, "Count must be equal : {query:?}"); -} - -fn test_query_record(query: Value, expected: Vec<(u64, i64, String, i64)>, cache: &dyn RwCache) { - let query = from_value::(query).unwrap(); - assert_eq!(cache.count(&query).unwrap(), expected.len()); - let records = cache.query(&query).unwrap(); - let expected = expected - .into_iter() - .map(|(id, a, b, c)| { - CacheRecord::new( - id, - 1, - Record::new(vec![Field::Int(a), Field::String(b), Field::Int(c)]), - ) - }) - .collect::>(); - assert_eq!(records, expected); -} diff --git a/dozer-cache/src/cache/lmdb/cache/secondary_environment/comparator.rs b/dozer-cache/src/cache/lmdb/cache/secondary_environment/comparator.rs deleted file mode 100644 index bda517076e..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/secondary_environment/comparator.rs +++ /dev/null @@ -1,253 +0,0 @@ -use dozer_storage::errors::StorageError; -use dozer_storage::lmdb::{Database, Transaction}; -use dozer_storage::lmdb_sys::{mdb_set_compare, MDB_val, MDB_SUCCESS}; - -use crate::cache::index::{compare_composite_secondary_index, compare_single_secondary_index}; - -pub fn set_sorted_inverted_comparator( - txn: &T, - db: Database, - fields: &[usize], -) -> Result<(), StorageError> { - let comparator = if fields.len() == 1 { - compare_single_key - } else { - compare_composite_key - }; - - unsafe { - assert_eq!( - mdb_set_compare(txn.txn(), db.dbi(), Some(comparator)), - MDB_SUCCESS - ); - } - Ok(()) -} - -unsafe fn mdb_val_to_slice(val: &MDB_val) -> &[u8] { - std::slice::from_raw_parts(val.mv_data as *const u8, val.mv_size) -} - -unsafe extern "C" fn compare_single_key(a: *const MDB_val, b: *const MDB_val) -> std::ffi::c_int { - match compare_single_secondary_index(mdb_val_to_slice(&*a), mdb_val_to_slice(&*b)) { - Ok(ordering) => ordering as std::ffi::c_int, - Err(e) => { - dozer_types::log::error!("Error deserializing secondary index key: {}", e); - 0 - } - } -} - -unsafe extern "C" fn compare_composite_key( - a: *const MDB_val, - b: *const MDB_val, -) -> std::ffi::c_int { - match compare_composite_secondary_index(mdb_val_to_slice(&*a), mdb_val_to_slice(&*b)) { - Ok(ordering) => ordering as std::ffi::c_int, - Err(e) => { - dozer_types::log::error!("Error deserializing secondary index key: {}", e); - 0 - } - } -} - -#[cfg(test)] -mod tests { - use std::cmp::Ordering::{self, Equal, Greater, Less}; - - use dozer_storage::{ - lmdb::DatabaseFlags, lmdb_sys::mdb_cmp, LmdbEnvironment, RwLmdbEnvironment, - }; - - use dozer_types::{ - chrono::{DateTime, NaiveDate, TimeZone, Utc}, - ordered_float::OrderedFloat, - rust_decimal::Decimal, - types::Field, - }; - - use crate::cache::{index::get_secondary_index, lmdb::utils}; - - use super::*; - - fn check_test_cases(mut checker: impl FnMut(&[i64], &[i64], Ordering)) { - checker(&[1, 1], &[1, 1], Equal); - checker(&[1, 1], &[1, 2], Less); - checker(&[1, 1], &[2, 1], Less); - checker(&[1, 1], &[2, 2], Less); - checker(&[1, 1], &[1], Greater); - checker(&[1, 1], &[2], Less); - checker(&[1, 2], &[1, 1], Greater); - checker(&[1, 2], &[1, 2], Equal); - checker(&[1, 2], &[2, 1], Less); - checker(&[1, 2], &[2, 2], Less); - checker(&[1, 2], &[1], Greater); - checker(&[1, 2], &[2], Less); - checker(&[2, 1], &[1, 1], Greater); - checker(&[2, 1], &[1, 2], Greater); - checker(&[2, 1], &[2, 1], Equal); - checker(&[2, 1], &[2, 2], Less); - checker(&[2, 1], &[1], Greater); - checker(&[2, 1], &[2], Greater); - checker(&[2, 2], &[1, 1], Greater); - checker(&[2, 2], &[1, 2], Greater); - checker(&[2, 2], &[2, 1], Greater); - checker(&[2, 2], &[2, 2], Equal); - checker(&[2, 2], &[1], Greater); - checker(&[2, 2], &[2], Greater); - checker(&[1], &[1, 1], Less); - checker(&[1], &[1, 2], Less); - checker(&[1], &[2, 1], Less); - checker(&[1], &[2, 2], Less); - checker(&[1], &[1], Equal); - checker(&[1], &[2], Less); - checker(&[2], &[1, 1], Greater); - checker(&[2], &[1, 2], Greater); - checker(&[2], &[2, 1], Less); - checker(&[2], &[2, 2], Less); - checker(&[2], &[1], Greater); - checker(&[2], &[2], Equal); - } - - #[test] - fn test_compare_composite_key() { - let check = |a: &[i64], b: &[i64], expected: Ordering| { - let serialize = |a: &[i64]| { - let a = a.iter().map(|x| Field::Int(*x)).collect::>(); - let a = a.iter().collect::>(); - get_secondary_index(&a, false) - }; - let a = serialize(a); - let b = serialize(b); - let a = MDB_val { - mv_size: a.len() as _, - mv_data: a.as_ptr() as _, - }; - let b = MDB_val { - mv_size: b.len() as _, - mv_data: b.as_ptr() as _, - }; - assert_eq!(unsafe { compare_composite_key(&a, &b) }, expected as i32,); - }; - - check_test_cases(check); - } - - #[test] - fn test_set_sorted_inverted_comparator() { - let mut check_single = get_single_key_checker(); - check_single(Some(1), Some(1), Equal); - check_single(Some(1), Some(2), Less); - check_single(Some(2), Some(1), Greater); - check_single(Some(1), None, Less); - check_single(None, Some(1), Greater); - check_single(None, None, Equal); - - let check_composite = get_composite_key_checker(2); - check_test_cases(check_composite); - } - - fn setup(num_fields: usize) -> (RwLmdbEnvironment, Database) { - let mut env = utils::create_env(&Default::default()).unwrap().0; - let db = env - .create_database(Some("test"), DatabaseFlags::DUP_SORT) - .unwrap(); - let fields = (0..num_fields).collect::>(); - let txn = env.begin_txn().unwrap(); - set_sorted_inverted_comparator(&txn, db, &fields).unwrap(); - txn.commit().unwrap(); - (env, db) - } - - fn get_single_key_checker() -> impl FnMut(Option, Option, Ordering) { - let (env, db) = setup(1); - move |a: Option, b: Option, expected: Ordering| { - let orig_a = a; - let orig_b = b; - let serialize = - |a: Option| get_secondary_index(&[&a.map_or(Field::Null, Field::Int)], true); - let a = serialize(a); - let b = serialize(b); - let a = MDB_val { - mv_size: a.len() as _, - mv_data: a.as_ptr() as *mut _, - }; - let b = MDB_val { - mv_size: b.len() as _, - mv_data: b.as_ptr() as *mut _, - }; - let txn = env.begin_txn().unwrap(); - let result = unsafe { mdb_cmp(txn.txn(), db.dbi(), &a, &b) }.cmp(&0); - assert_eq!( - result, expected, - "Comparing {orig_a:?} and {orig_b:?}. Expected: {expected:?}, Actual: {result:?}" - ); - } - } - - fn get_composite_key_checker<'a>( - num_fields: usize, - ) -> impl FnMut(&[i64], &[i64], Ordering) + 'a { - let (env, db) = setup(num_fields); - move |a: &[i64], b: &[i64], expected: Ordering| { - let serialize = |a: &[i64]| { - let fields = a.iter().map(|a| Field::Int(*a)).collect::>(); - let fields = fields.iter().collect::>(); - get_secondary_index(&fields, false) - }; - let a = serialize(a); - let b = serialize(b); - let a = MDB_val { - mv_size: a.len() as _, - mv_data: a.as_ptr() as *mut _, - }; - let b = MDB_val { - mv_size: b.len() as _, - mv_data: b.as_ptr() as *mut _, - }; - let txn = env.begin_txn().unwrap(); - assert_eq!( - unsafe { mdb_cmp(txn.txn(), db.dbi(), &a, &b) }.cmp(&0), - expected - ); - } - } - - #[test] - fn null_is_greater_than_other_thing() { - let (env, db) = setup(1); - let txn = env.begin_txn().unwrap(); - let check = |field: &Field| { - let serialize = |a| get_secondary_index(&[a], true); - let a = serialize(field); - let b = serialize(&Field::Null); - let a = MDB_val { - mv_size: a.len() as _, - mv_data: a.as_ptr() as *mut _, - }; - let b = MDB_val { - mv_size: b.len() as _, - mv_data: b.as_ptr() as *mut _, - }; - assert_eq!(field.cmp(&Field::Null), Ordering::Less); - assert!(unsafe { mdb_cmp(txn.txn(), db.dbi(), &a, &b) } < 0); - }; - - let test_cases = [ - Field::UInt(u64::MAX), - Field::Int(i64::MAX), - Field::Float(OrderedFloat(f64::MAX)), - Field::Boolean(true), - Field::String("a".to_string()), - Field::Text("a".to_string()), - Field::Binary(vec![255]), - Field::Decimal(Decimal::new(i64::MAX, 0)), - Field::Timestamp(DateTime::from(Utc.timestamp_millis_opt(1).unwrap())), - Field::Date(NaiveDate::from_ymd_opt(2020, 1, 2).unwrap()), - Field::Json(vec![255_f64].into()), - ]; - for a in test_cases.iter() { - check(a); - } - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/secondary_environment/dump_restore.rs b/dozer-cache/src/cache/lmdb/cache/secondary_environment/dump_restore.rs deleted file mode 100644 index d946058b83..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/secondary_environment/dump_restore.rs +++ /dev/null @@ -1,194 +0,0 @@ -use dozer_storage::{ - errors::StorageError, generator::FutureGeneratorContext, lmdb::Transaction, DumpItem, - LmdbCounter, LmdbEnvironment, LmdbMultimap, LmdbOption, -}; -use dozer_types::{borrow::IntoOwned, log::info}; -use tokio::io::AsyncRead; - -use crate::{cache::lmdb::utils::create_env, errors::CacheError}; - -use super::{ - get_cache_options, set_comparator, CacheOptions, RwSecondaryEnvironment, SecondaryEnvironment, - SecondaryEnvironmentCommon, DATABASE_DB_NAME, INDEX_DEFINITION_DB_NAME, - NEXT_OPERATION_ID_DB_NAME, -}; - -pub async fn dump<'txn, E: SecondaryEnvironment, T: Transaction>( - env: &E, - txn: &'txn T, - context: &FutureGeneratorContext, StorageError>>, -) -> Result<(), ()> { - dozer_storage::dump( - txn, - INDEX_DEFINITION_DB_NAME, - env.common().index_definition_option.database(), - context, - ) - .await?; - dozer_storage::dump( - txn, - DATABASE_DB_NAME, - env.common().database.database(), - context, - ) - .await?; - dozer_storage::dump( - txn, - NEXT_OPERATION_ID_DB_NAME, - env.common().next_operation_id.database(), - context, - ) - .await -} - -pub async fn restore( - name: String, - options: CacheOptions, - reader: &mut (impl AsyncRead + Unpin), -) -> Result { - info!("Restoring secondary environment {name} with options {options:?}"); - let mut env = create_env(&get_cache_options(name.clone(), options))?.0; - - info!("Restoring index definition"); - dozer_storage::restore(&mut env, reader).await?; - info!("Restoring database"); - dozer_storage::restore(&mut env, reader).await?; - info!("Restoring next operation id"); - dozer_storage::restore(&mut env, reader).await?; - - let index_definition_option = LmdbOption::open(&env, Some(INDEX_DEFINITION_DB_NAME))?; - let database = LmdbMultimap::open(&env, Some(DATABASE_DB_NAME))?; - let next_operation_id = LmdbCounter::open(&env, Some(NEXT_OPERATION_ID_DB_NAME))?; - - let index_definition = index_definition_option - .load(&env.begin_txn()?)? - .map(IntoOwned::into_owned) - .ok_or(CacheError::IndexDefinitionNotFound(name))?; - - set_comparator(&env, &index_definition, database)?; - - Ok(RwSecondaryEnvironment { - env, - common: SecondaryEnvironmentCommon { - index_definition, - index_definition_option, - database, - next_operation_id, - }, - }) -} - -#[cfg(test)] -pub mod tests { - use std::pin::pin; - - use super::*; - - use dozer_storage::{ - assert_database_equal, - generator::{Generator, IntoGenerator}, - LmdbEnvironment, - }; - use dozer_types::types::{ - Field, FieldDefinition, FieldType, IndexDefinition, Record, Schema, SourceDefinition, - }; - - use crate::cache::lmdb::cache::{MainEnvironment, RwMainEnvironment, RwSecondaryEnvironment}; - - pub fn assert_secondary_env_equal( - env1: &E1, - env2: &E2, - ) { - assert_eq!( - env1.common().index_definition, - env2.common().index_definition - ); - let txn1 = env1.begin_txn().unwrap(); - let txn2 = env2.begin_txn().unwrap(); - assert_database_equal( - &txn1, - env1.common().index_definition_option.database(), - &txn2, - env2.common().index_definition_option.database(), - ); - assert_database_equal( - &txn1, - env1.common().database.database(), - &txn2, - env2.common().database.database(), - ); - assert_database_equal( - &txn1, - env1.common().next_operation_id.database(), - &txn2, - env2.common().next_operation_id.database(), - ); - } - - #[tokio::test] - async fn test_dump_restore() { - let schema = Schema { - fields: vec![FieldDefinition { - name: "test".to_string(), - typ: FieldType::String, - nullable: true, - source: SourceDefinition::Dynamic, - }], - primary_index: vec![0], - }; - let mut main_env = RwMainEnvironment::new( - Some(&(schema, vec![])), - None, - Default::default(), - Default::default(), - ) - .unwrap(); - - let record_a = Record { - values: vec![Field::String("a".to_string())], - lifetime: None, - }; - let record_b = Record { - values: vec![Field::String("b".to_string())], - lifetime: None, - }; - main_env.insert(&record_a).unwrap(); - main_env.insert(&record_b).unwrap(); - main_env.delete(&record_a).unwrap(); - main_env.commit(&Default::default()).unwrap(); - - let mut env = RwSecondaryEnvironment::new( - &IndexDefinition::SortedInverted(vec![0]), - "0".to_string(), - Default::default(), - ) - .unwrap(); - { - let log_txn = main_env.begin_txn().unwrap(); - env.index( - &log_txn, - main_env.operation_log().clone(), - "TEMP", - &Default::default(), - ) - .unwrap(); - env.commit().unwrap(); - } - - let mut data = vec![]; - { - let env = &env; - let txn = &env.begin_txn().unwrap(); - let generator = |context| async move { dump(env, txn, &context).await.unwrap() }; - let generator = generator.into_generator(); - for item in pin!(generator).into_iter() { - data.extend_from_slice(&item.unwrap()); - } - } - - let restored_env = restore("0".to_string(), Default::default(), &mut data.as_slice()) - .await - .unwrap(); - assert_secondary_env_equal(&env, &restored_env); - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/secondary_environment/indexer.rs b/dozer-cache/src/cache/lmdb/cache/secondary_environment/indexer.rs deleted file mode 100644 index a879b5ab4e..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/secondary_environment/indexer.rs +++ /dev/null @@ -1,201 +0,0 @@ -use crate::errors::{CacheError, IndexError}; - -use dozer_storage::lmdb::RwTransaction; -use dozer_types::types::{Field, IndexDefinition, Record}; - -use dozer_storage::LmdbMultimap; - -use itertools::Itertools; -use unicode_segmentation::UnicodeSegmentation; - -use crate::cache::index::{self, get_full_text_secondary_index}; - -pub fn build_index( - txn: &mut RwTransaction, - database: LmdbMultimap, u64>, - record: &Record, - index_definition: &IndexDefinition, - operation_id: u64, -) -> Result<(), CacheError> { - match index_definition { - IndexDefinition::SortedInverted(fields) => { - let secondary_key = build_index_sorted_inverted(fields, &record.values); - // Ignore existing pair. - database.insert(txn, &secondary_key, &operation_id)?; - } - IndexDefinition::FullText(field_index) => { - for secondary_key in build_indices_full_text(*field_index, &record.values)? { - // Ignore existing pair. - database.insert(txn, &secondary_key, &operation_id)?; - } - } - } - Ok(()) -} - -pub fn delete_index( - txn: &mut RwTransaction, - database: LmdbMultimap, u64>, - record: &Record, - index_definition: &IndexDefinition, - operation_id: u64, -) -> Result<(), CacheError> { - match index_definition { - IndexDefinition::SortedInverted(fields) => { - let secondary_key = build_index_sorted_inverted(fields, &record.values); - // Ignore if not found. - database.remove(txn, &secondary_key, &operation_id)?; - } - IndexDefinition::FullText(field_index) => { - for secondary_key in build_indices_full_text(*field_index, &record.values)? { - // Ignore if not found. - database.remove(txn, &secondary_key, &operation_id)?; - } - } - } - Ok(()) -} - -fn build_index_sorted_inverted(fields: &[usize], values: &[Field]) -> Vec { - let values = fields - .iter() - .copied() - .filter_map(|index| (values.get(index))) - .collect::>(); - // `values.len() == 1` criteria must be kept the same with `comparator.rs`. - index::get_secondary_index(&values, values.len() == 1) -} - -fn build_indices_full_text( - field_index: usize, - values: &[Field], -) -> Result>, CacheError> { - let Some(field) = values.get(field_index) else { - return Err(CacheError::Index(IndexError::FieldIndexOutOfRange)); - }; - - let string = match field { - Field::String(string) => string, - Field::Text(string) => string, - Field::Null => "", - _ => { - return Err(CacheError::Index(IndexError::FieldNotCompatibleIndex( - field_index, - ))) - } - }; - - Ok(string - .unicode_words() - .map(get_full_text_secondary_index) - .unique() - .collect()) -} - -#[cfg(test)] -mod tests { - use crate::cache::{ - lmdb::tests::utils::{self as lmdb_utils, create_cache}, - test_utils, RwCache, - }; - - use super::*; - - #[test] - fn test_secondary_indexes() { - let (mut cache, indexing_thread_pool, _, secondary_indexes) = - create_cache(test_utils::schema_1); - - let items = vec![ - (1, Some("a".to_string()), Some(521)), - (2, Some("a".to_string()), None), - (3, None, Some(521)), - (4, None, None), - ]; - - for val in items.clone() { - lmdb_utils::insert_rec_1(&mut cache, val); - } - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - // No of index dbs - let index_counts = lmdb_utils::get_index_counts(&cache); - let expected_count = secondary_indexes.len(); - assert_eq!(index_counts.len(), expected_count); - - // 3 columns, 1 compound, 1 descending - assert_eq!( - index_counts.iter().sum::(), - items.len() * expected_count, - ); - - for a in [1i64, 2, 3, 4] { - let record = Record { - values: vec![Field::Int(a), Field::Null, Field::Null], - lifetime: None, - }; - cache.delete(&record).unwrap(); - } - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - assert_eq!( - lmdb_utils::get_index_counts(&cache) - .into_iter() - .sum::(), - 0, - "Must delete every index" - ); - } - - #[test] - fn test_build_indices_full_text() { - let field_index = 0; - assert_eq!( - build_indices_full_text(field_index, &[Field::String("today is a good day".into())]) - .unwrap(), - vec![ - get_full_text_secondary_index("today"), - get_full_text_secondary_index("is"), - get_full_text_secondary_index("a"), - get_full_text_secondary_index("good"), - get_full_text_secondary_index("day"), - ] - ); - } - - #[test] - fn test_full_text_secondary_index_with_duplicated_words() { - let (mut cache, indexing_thread_pool, _, _) = create_cache(test_utils::schema_full_text); - - let items = vec![( - Some("another test".to_string()), - Some("regular test regular".to_string()), - )]; - - for val in items { - lmdb_utils::insert_full_text(&mut cache, val); - } - - { - let a = "another test".to_string(); - let record = Record { - values: vec![Field::String(a), Field::Null], - lifetime: None, - }; - cache.delete(&record).unwrap(); - } - - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - assert_eq!( - lmdb_utils::get_index_counts(&cache) - .into_iter() - .sum::(), - 0, - "Must delete every index" - ); - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/secondary_environment/mod.rs b/dozer-cache/src/cache/lmdb/cache/secondary_environment/mod.rs deleted file mode 100644 index 843ff30ae2..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/secondary_environment/mod.rs +++ /dev/null @@ -1,259 +0,0 @@ -use dozer_storage::{ - lmdb::Transaction, - lmdb_storage::{RoLmdbEnvironment, RwLmdbEnvironment}, - LmdbCounter, LmdbEnvironment, LmdbMultimap, LmdbOption, -}; -use dozer_tracing::Labels; -use dozer_types::{borrow::IntoOwned, log::debug, types::IndexDefinition}; -use metrics::increment_counter; - -use crate::{ - cache::lmdb::utils::{create_env, open_env}, - errors::CacheError, -}; - -use super::{ - main_environment::{Operation, OperationLog}, - CacheOptions, -}; - -mod comparator; -mod indexer; - -pub type SecondaryIndexDatabase = LmdbMultimap, u64>; - -#[derive(Debug, Clone)] -pub struct SecondaryEnvironmentCommon { - pub index_definition: IndexDefinition, - pub index_definition_option: LmdbOption, - pub database: SecondaryIndexDatabase, - pub next_operation_id: LmdbCounter, -} - -const INDEX_DEFINITION_DB_NAME: &str = "index_definition"; -const DATABASE_DB_NAME: &str = "database"; -const NEXT_OPERATION_ID_DB_NAME: &str = "next_operation_id"; - -pub trait SecondaryEnvironment: LmdbEnvironment { - fn common(&self) -> &SecondaryEnvironmentCommon; - - fn index_definition(&self) -> &IndexDefinition { - &self.common().index_definition - } - - fn database(&self) -> SecondaryIndexDatabase { - self.common().database - } - - fn count_data(&self) -> Result { - let txn = self.begin_txn()?; - self.database().count_data(&txn).map_err(Into::into) - } - - fn next_operation_id(&self, txn: &T) -> Result { - self.common() - .next_operation_id - .load(txn) - .map_err(Into::into) - } -} - -#[derive(Debug)] -pub struct RwSecondaryEnvironment { - env: RwLmdbEnvironment, - common: SecondaryEnvironmentCommon, -} - -impl LmdbEnvironment for RwSecondaryEnvironment { - fn env(&self) -> &dozer_storage::lmdb::Environment { - self.env.env() - } -} - -impl SecondaryEnvironment for RwSecondaryEnvironment { - fn common(&self) -> &SecondaryEnvironmentCommon { - &self.common - } -} - -impl RwSecondaryEnvironment { - pub fn new( - index_definition: &IndexDefinition, - name: String, - options: CacheOptions, - ) -> Result { - let mut env = create_env(&get_cache_options(name.clone(), options))?.0; - - let database = LmdbMultimap::create(&mut env, Some(DATABASE_DB_NAME))?; - let next_operation_id = LmdbCounter::create(&mut env, Some(NEXT_OPERATION_ID_DB_NAME))?; - let index_definition_option = LmdbOption::create(&mut env, Some(INDEX_DEFINITION_DB_NAME))?; - - let old_index_definition = index_definition_option - .load(&env.begin_txn()?)? - .map(IntoOwned::into_owned); - - let index_definition = if let Some(old_index_definition) = old_index_definition { - if index_definition != &old_index_definition { - return Err(CacheError::IndexDefinitionMismatch { - name, - given: index_definition.clone(), - stored: old_index_definition, - }); - } - old_index_definition - } else { - index_definition_option.store(env.txn_mut()?, index_definition)?; - env.commit()?; - index_definition.clone() - }; - - set_comparator(&env, &index_definition, database)?; - - Ok(Self { - env, - common: SecondaryEnvironmentCommon { - index_definition, - index_definition_option, - database, - next_operation_id, - }, - }) - } - - pub fn share(&self) -> RoSecondaryEnvironment { - RoSecondaryEnvironment { - env: self.env.share(), - common: self.common.clone(), - } - } - - /// Returns `true` if the secondary index is up to date. - pub fn index( - &mut self, - log_txn: &T, - operation_log: OperationLog, - counter_name: &'static str, - labels: &Labels, - ) -> Result { - let main_env_next_operation_id = operation_log.next_operation_id(log_txn)?; - - let txn = self.env.txn_mut()?; - loop { - // Start from `next_operation_id`. - let operation_id = self.common.next_operation_id.load(txn)?; - if operation_id >= main_env_next_operation_id { - return Ok(true); - } - // Get operation by operation id. - let Some(operation) = operation_log.get_operation(log_txn, operation_id)? else { - // We're not able to read this operation yet, try again later. - debug!("Operation {} not found", operation_id); - return Ok(false); - }; - match operation { - Operation::Insert { record, .. } => { - // Build secondary index. - indexer::build_index( - txn, - self.common.database, - &record, - &self.common.index_definition, - operation_id, - )?; - } - Operation::Delete { operation_id } => { - // If the operation is a `Delete`, find the deleted record. - let Some(operation) = operation_log.get_operation(log_txn, operation_id)? - else { - // We're not able to read this operation yet, try again later. - debug!("Operation {} not found", operation_id); - return Ok(false); - }; - let Operation::Insert { record, .. } = operation else { - panic!("Insert operation {} not found", operation_id); - }; - // Delete secondary index. - indexer::delete_index( - txn, - self.common.database, - &record, - &self.common.index_definition, - operation_id, - )?; - } - } - self.common.next_operation_id.store(txn, operation_id + 1)?; - - increment_counter!(counter_name, labels.clone()); - } - } - - pub fn commit(&mut self) -> Result<(), CacheError> { - self.env.commit().map_err(Into::into) - } -} - -#[derive(Debug, Clone)] -pub struct RoSecondaryEnvironment { - env: RoLmdbEnvironment, - common: SecondaryEnvironmentCommon, -} - -impl LmdbEnvironment for RoSecondaryEnvironment { - fn env(&self) -> &dozer_storage::lmdb::Environment { - self.env.env() - } -} - -impl SecondaryEnvironment for RoSecondaryEnvironment { - fn common(&self) -> &SecondaryEnvironmentCommon { - &self.common - } -} - -impl RoSecondaryEnvironment { - pub fn new(name: String, options: CacheOptions) -> Result { - let env = open_env(&get_cache_options(name.clone(), options))?.0; - - let database = LmdbMultimap::open(&env, Some(DATABASE_DB_NAME))?; - let index_definition_option = LmdbOption::open(&env, Some(INDEX_DEFINITION_DB_NAME))?; - let next_operation_id = LmdbCounter::open(&env, Some(NEXT_OPERATION_ID_DB_NAME))?; - - let index_definition = index_definition_option - .load(&env.begin_txn()?)? - .map(IntoOwned::into_owned) - .ok_or(CacheError::IndexDefinitionNotFound(name))?; - - set_comparator(&env, &index_definition, database)?; - Ok(Self { - env, - common: SecondaryEnvironmentCommon { - index_definition, - index_definition_option, - database, - next_operation_id, - }, - }) - } -} - -pub mod dump_restore; - -fn get_cache_options(name: String, options: CacheOptions) -> CacheOptions { - let path = options.path.as_ref().map(|(main_base_path, main_name)| { - let base_path = main_base_path.join(format!("{}_index", main_name)); - (base_path, format!("secondary_index_{name}")) - }); - CacheOptions { path, ..options } -} - -fn set_comparator( - env: &E, - index_definition: &IndexDefinition, - database: SecondaryIndexDatabase, -) -> Result<(), CacheError> { - if let IndexDefinition::SortedInverted(fields) = index_definition { - comparator::set_sorted_inverted_comparator(&env.begin_txn()?, database.database(), fields)?; - } - Ok(()) -} diff --git a/dozer-cache/src/cache/lmdb/cache_manager.rs b/dozer-cache/src/cache/lmdb/cache_manager.rs deleted file mode 100644 index ee23a6fcc9..0000000000 --- a/dozer-cache/src/cache/lmdb/cache_manager.rs +++ /dev/null @@ -1,380 +0,0 @@ -use std::collections::HashSet; -use std::ops::Deref; -use std::{path::PathBuf, sync::Arc}; - -use dozer_storage::{lmdb_storage::LmdbEnvironmentManager, LmdbMap, RwLmdbEnvironment}; -use dozer_storage::{LmdbEnvironment, RoLmdbEnvironment}; -use dozer_tracing::Labels; -use dozer_types::borrow::IntoOwned; -use dozer_types::parking_lot::Mutex; -use dozer_types::parking_lot::RwLock; -use dozer_types::types::SchemaWithIndex; -use tempdir::TempDir; -use tokio::io::AsyncRead; - -use crate::cache::CacheWriteOptions; -use crate::{ - cache::{RoCache, RoCacheManager, RwCache, RwCacheManager}, - errors::CacheError, -}; - -use super::{ - cache::{dump_restore, CacheOptions, LmdbCache, LmdbRoCache, LmdbRwCache}, - indexing::IndexingThreadPool, -}; - -#[derive(Debug, Clone)] -pub struct CacheManagerOptions { - // Total number of readers allowed - pub max_readers: u32, - // Max no of dbs - pub max_db_size: u32, - - // Total size allocated for data in a memory mapped file. - // This size is allocated at initialization. - pub max_size: usize, - - /// The chunk size when calculating intersection of index queries. - pub intersection_chunk_size: usize, - - /// Provide a path where db will be created. If nothing is provided, will default to a temp directory. - pub path: Option, - - /// Number of threads in the indexing thread pool. - pub num_indexing_threads: usize, -} - -impl Default for CacheManagerOptions { - fn default() -> Self { - let cache_options = CacheOptions::default(); - Self { - max_readers: cache_options.max_readers, - max_db_size: cache_options.max_db_size, - intersection_chunk_size: cache_options.intersection_chunk_size, - max_size: cache_options.max_size, - path: None, - num_indexing_threads: 4, - } - } -} - -#[derive(Debug)] -pub struct LmdbRoCacheManager { - options: CacheManagerOptions, - base_path: PathBuf, - alias_to_real_name: LmdbMap, - env: RoLmdbEnvironment, -} - -impl LmdbRoCacheManager { - pub fn new(options: CacheManagerOptions) -> Result { - let base_path = options - .path - .as_deref() - .ok_or(CacheError::PathNotInitialized)?; - let base_path = base_path.to_path_buf(); - - let env = LmdbEnvironmentManager::create_ro( - &base_path, - LMDB_CACHE_MANAGER_ALIAS_ENV_NAME, - Default::default(), - )?; - let alias_to_real_name = LmdbMap::open(&env, None)?; - - Ok(Self { - options, - base_path, - alias_to_real_name, - env, - }) - } - - // HACK: We're leaking internal types here. - pub fn open_lmdb_cache( - &self, - name_or_alias: String, - labels: Labels, - ) -> Result, CacheError> { - let name = resolve_alias(&self.env, self.alias_to_real_name, name_or_alias)?; - open_ro_cache(self.base_path.clone(), name, labels, &self.options) - } -} - -// HACK: We're leaking internal types here. -pub use super::cache::dump_restore::{begin_dump_txn, dump}; - -impl RoCacheManager for LmdbRoCacheManager { - fn open_ro_cache( - &self, - name_or_alias: String, - labels: Labels, - ) -> Result>, CacheError> { - self.open_lmdb_cache(name_or_alias, labels) - .map(|cache| cache.map(|cache| Box::new(cache) as _)) - } -} - -#[derive(Debug)] -pub struct LmdbRwCacheManager { - options: CacheManagerOptions, - base_path: PathBuf, - alias_to_real_name: LmdbMap, - env: RwLock, - indexing_thread_pool: Arc>, - _temp_dir: Option, -} - -impl LmdbRwCacheManager { - pub fn new(options: CacheManagerOptions) -> Result { - let (temp_dir, base_path) = match &options.path { - Some(path) => { - std::fs::create_dir_all(path).map_err(|e| CacheError::Io(path.clone(), e))?; - (None, path.clone()) - } - None => { - let temp_dir = TempDir::new("dozer").expect("Unable to create temp dir"); - let base_path = temp_dir.path().to_path_buf(); - (Some(temp_dir), base_path) - } - }; - - let mut env = LmdbEnvironmentManager::create_rw( - &base_path, - LMDB_CACHE_MANAGER_ALIAS_ENV_NAME, - Default::default(), - )?; - let alias_to_real_name = LmdbMap::create(&mut env, None)?; - - let indexing_thread_pool = Arc::new(Mutex::new(IndexingThreadPool::new( - options.num_indexing_threads, - ))); - - Ok(Self { - options, - base_path, - alias_to_real_name, - env: RwLock::new(env), - indexing_thread_pool, - _temp_dir: temp_dir, - }) - } - - /// Blocks current thread until all secondary indexes are up to date with the last cache commit. - /// - /// If any cache commits during this call in another thread, those commits may or may not be indexed when this function returns. - pub fn wait_until_indexing_catchup(&self) { - self.indexing_thread_pool.lock().wait_until_catchup(); - } - - pub async fn restore_cache( - &self, - name: String, - labels: Labels, - write_options: CacheWriteOptions, - reader: &mut (impl AsyncRead + Unpin), - ) -> Result<(), CacheError> { - dump_restore::restore( - cache_options(&self.options, self.base_path.clone(), name, labels), - write_options, - self.indexing_thread_pool.clone(), - reader, - ) - .await?; - Ok(()) - } -} - -impl RoCacheManager for LmdbRwCacheManager { - fn open_ro_cache( - &self, - name_or_alias: String, - labels: Labels, - ) -> Result>, CacheError> { - let name = resolve_alias( - self.env.read().deref(), - self.alias_to_real_name, - name_or_alias, - )?; - - // Check if the cache is already opened. - if let Some(cache) = self.indexing_thread_pool.lock().find_cache(&name) { - return Ok(Some(Box::new(cache) as _)); - } - - open_ro_cache(self.base_path.clone(), name, labels, &self.options) - .map(|cache| cache.map(|cache| Box::new(cache) as _)) - } -} - -impl RwCacheManager for LmdbRwCacheManager { - fn open_rw_cache( - &self, - name_or_alias: String, - labels: Labels, - write_options: CacheWriteOptions, - ) -> Result>, CacheError> { - let name = resolve_alias( - self.env.read().deref(), - self.alias_to_real_name, - name_or_alias, - )?; - let cache: Option> = - if LmdbEnvironmentManager::exists(&self.base_path, &name) { - let cache = LmdbRwCache::new( - None, - None, - cache_options(&self.options, self.base_path.clone(), name, labels), - write_options, - self.indexing_thread_pool.clone(), - )?; - Some(Box::new(cache)) - } else { - None - }; - Ok(cache) - } - - fn create_cache( - &self, - name: String, - labels: Labels, - schema: SchemaWithIndex, - connections: &HashSet, - write_options: CacheWriteOptions, - ) -> Result, CacheError> { - if name.is_empty() { - return Err(CacheError::EmptyName); - } - - let cache = LmdbRwCache::new( - Some(&schema), - Some(connections), - cache_options(&self.options, self.base_path.clone(), name, labels), - write_options, - self.indexing_thread_pool.clone(), - )?; - Ok(Box::new(cache)) - } - - fn create_alias(&self, name: &str, alias: &str) -> Result<(), CacheError> { - let mut env = self.env.write(); - self.alias_to_real_name - .insert_overwrite(env.txn_mut()?, alias, name)?; - env.commit()?; - Ok(()) - } -} - -const LMDB_CACHE_MANAGER_ALIAS_ENV_NAME: &str = "__DOZER_CACHE_MANAGER_ALIAS__"; - -fn resolve_alias( - env: &E, - alias_to_real_name: LmdbMap, - name_or_alias: String, -) -> Result { - Ok( - if let Some(real_name) = alias_to_real_name.get(&env.begin_txn()?, &name_or_alias)? { - real_name.into_owned() - } else { - name_or_alias - }, - ) -} - -fn cache_options( - options: &CacheManagerOptions, - base_path: PathBuf, - name: String, - labels: Labels, -) -> CacheOptions { - CacheOptions { - max_db_size: options.max_db_size, - max_readers: options.max_readers, - max_size: options.max_size, - intersection_chunk_size: options.intersection_chunk_size, - path: Some((base_path, name)), - labels, - } -} - -fn open_ro_cache( - base_path: PathBuf, - name: String, - labels: Labels, - options: &CacheManagerOptions, -) -> Result, CacheError> { - let cache = if LmdbEnvironmentManager::exists(&base_path, &name) { - let cache = LmdbRoCache::new(cache_options(options, base_path, name, labels))?; - Some(cache) - } else { - None - }; - Ok(cache) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_lmdb_cache_manager() { - let cache_manager = LmdbRwCacheManager::new(Default::default()).unwrap(); - let name = cache_manager - .create_cache( - "temp".to_string(), - Default::default(), - Default::default(), - &Default::default(), - Default::default(), - ) - .unwrap() - .name() - .to_string(); - // Test open with name. - assert_eq!( - cache_manager - .open_rw_cache(name.clone(), Default::default(), Default::default()) - .unwrap() - .unwrap() - .name(), - &name - ); - // Test open with alias. - let alias = "alias".to_string(); - cache_manager.create_alias(&name, &alias).unwrap(); - assert_eq!( - cache_manager - .open_rw_cache(alias.clone(), Default::default(), Default::default()) - .unwrap() - .unwrap() - .name(), - &name - ); - assert_eq!( - cache_manager - .open_ro_cache(alias.clone(), Default::default()) - .unwrap() - .unwrap() - .name(), - &name - ); - let LmdbRwCacheManager { - base_path, - _temp_dir, - .. - } = cache_manager; - let cache_manager = LmdbRoCacheManager::new(CacheManagerOptions { - path: Some(base_path), - ..Default::default() - }) - .unwrap(); - assert_eq!( - cache_manager - .open_ro_cache(alias, Default::default()) - .unwrap() - .unwrap() - .name(), - &name - ); - } -} diff --git a/dozer-cache/src/cache/lmdb/indexing.rs b/dozer-cache/src/cache/lmdb/indexing.rs deleted file mode 100644 index efce5fad10..0000000000 --- a/dozer-cache/src/cache/lmdb/indexing.rs +++ /dev/null @@ -1,264 +0,0 @@ -use std::sync::{ - mpsc::{Receiver, Sender}, - Arc, -}; - -use dozer_storage::LmdbEnvironment; -use dozer_tracing::Labels; -use dozer_types::{log::debug, parking_lot::Mutex, types::IndexDefinition}; -use metrics::describe_counter; - -use crate::{cache::lmdb::cache::SecondaryEnvironment, errors::CacheError}; - -use super::cache::{ - CacheOptions, LmdbRoCache, MainEnvironment, RoMainEnvironment, RoSecondaryEnvironment, - RwSecondaryEnvironment, -}; - -const BUILD_INDEX_COUNTER_NAME: &str = "build_index"; - -pub fn secondary_environment_name(index: usize) -> String { - format!("{index}") -} - -#[derive(Debug)] -pub struct IndexingThreadPool { - caches: Vec, - task_completion_sender: Sender<(usize, usize)>, - task_completion_receiver: Receiver<(usize, usize)>, - pool: rayon::ThreadPool, -} - -impl IndexingThreadPool { - pub fn new(num_threads: usize) -> Self { - describe_counter!( - BUILD_INDEX_COUNTER_NAME, - "Number of operations built into indexes" - ); - - let (sender, receiver) = std::sync::mpsc::channel(); - Self { - caches: Vec::new(), - task_completion_sender: sender, - task_completion_receiver: receiver, - pool: create_thread_pool(num_threads), - } - } - - pub fn add_cache( - &mut self, - main_env: RoMainEnvironment, - options: &CacheOptions, - ) -> Result, CacheError> { - if let Some(cache) = self.find_cache(main_env.name()) { - return Ok(cache.secondary_envs); - } - - let mut secondary_envs = vec![]; - for (index, index_definition) in main_env.schema().1.iter().enumerate() { - let name = secondary_environment_name(index); - let secondary_env = - RwSecondaryEnvironment::new(index_definition, name.clone(), options.clone())?; - secondary_envs.push(secondary_env); - } - - Ok(self.add_cache_unsafe(main_env, secondary_envs)) - } - - /// # Safety - /// - /// User must ensure: - /// 1. The cache was never added to indexing thread pool before. - /// 2. The `secondary_envs` must belong to the same cache as `main_env`. - pub fn add_cache_unsafe( - &mut self, - main_env: RoMainEnvironment, - secondary_envs: Vec, - ) -> Vec { - if self.find_cache(main_env.name()).is_some() { - panic!("Cache with name {} already exists", main_env.name()); - } - - let ro_secondary_envs = secondary_envs - .iter() - .map(|env| env.share()) - .collect::>(); - - let secondary_envs = secondary_envs - .into_iter() - .map(|env| (Arc::new(Mutex::new(env)), false)) - .collect(); - let cache = Cache { - main_env, - secondary_envs, - }; - self.caches.push(cache); - - let index = self.caches.len() - 1; - for secondary_index in 0..ro_secondary_envs.len() { - self.spawn_task_if_not_running(index, secondary_index); - } - - ro_secondary_envs - } - - pub fn find_cache(&self, name: &str) -> Option { - for cache in self.caches.iter() { - if cache.main_env.name() == name { - let secondary_envs = cache - .secondary_envs - .iter() - .map(|(env, _)| env.lock().share()) - .collect(); - return Some(LmdbRoCache { - main_env: cache.main_env.clone(), - secondary_envs, - }); - } - } - None - } - - pub fn wake(&mut self, labels: &Labels) { - self.refresh_task_state(); - for index in 0..self.caches.len() { - let cache = &self.caches[index]; - if cache.main_env.labels() == labels { - for secondary_index in 0..cache.secondary_envs.len() { - self.spawn_task_if_not_running(index, secondary_index); - } - } - } - } - - pub fn wait_until_catchup(&mut self) { - while self - .caches - .iter() - .any(|cache| cache.secondary_envs.iter().any(|(_, running)| *running)) - { - let (index, secondary_index) = self - .task_completion_receiver - .recv() - .expect("At least one sender is alive"); - self.mark_not_running(index, secondary_index); - } - } - - fn refresh_task_state(&mut self) { - while let Ok((index, secondary_index)) = self.task_completion_receiver.try_recv() { - self.mark_not_running(index, secondary_index); - } - } - - fn mark_not_running(&mut self, index: usize, secondary_index: usize) { - let running = &mut self.caches[index].secondary_envs[secondary_index].1; - debug_assert!(*running); - *running = false; - } - - fn spawn_task_if_not_running(&mut self, index: usize, secondary_index: usize) { - let cache = &mut self.caches[index]; - let (secondary_env, running) = &mut cache.secondary_envs[secondary_index]; - if !*running { - let main_env = cache.main_env.clone(); - let secondary_env = secondary_env.clone(); - let sender = self.task_completion_sender.clone(); - self.pool.spawn(move || { - index_and_log_error(index, secondary_index, main_env, secondary_env, sender); - }); - *running = true; - } - } -} - -fn create_thread_pool(num_threads: usize) -> rayon::ThreadPool { - rayon::ThreadPoolBuilder::new() - .num_threads(num_threads) - .thread_name(|index| format!("indexing-thread-{}", index)) - .build() - .unwrap() -} - -#[derive(Debug, Clone)] -struct Cache { - main_env: RoMainEnvironment, - secondary_envs: Vec<(Arc>, bool)>, -} - -fn index_and_log_error( - index: usize, - secondary_index: usize, - main_env: RoMainEnvironment, - secondary_env: Arc>, - task_completion_sender: Sender<(usize, usize)>, -) { - let mut labels = main_env.labels().clone(); - labels.push("secondary_index", secondary_index.to_string()); - labels.push("secondary_index_type", secondary_index_type(&secondary_env)); - - // Loop until map full or up to date. - loop { - let mut secondary_env = secondary_env.lock(); - - match run_indexing(&main_env, &mut secondary_env, &labels) { - Ok(true) => { - break; - } - Ok(false) => { - debug!( - "Some operation can't be read from {}: {:?}", - main_env.labels(), - secondary_env.index_definition() - ); - rayon::yield_local(); - continue; - } - Err(e) => { - debug!("Error while indexing {}: {e}", main_env.labels()); - if e.is_map_full() { - panic!( - "Cache {} has reached its maximum size. Try to increase `cache_max_map_size` in the config.", - main_env.labels() - ); - } - if e.is_key_size() { - panic!("Secondary index key is too long. This usually happens with `String` fields. Try to [skip](https://getdozer.io/docs/configuration/api-endpoints#indexes) creating secondary index {:?}.", secondary_env.index_definition()); - } - } - } - } - if task_completion_sender - .send((index, secondary_index)) - .is_err() - { - debug!("`IndexingThreadPool` dropped while indexing task is running"); - } -} - -fn run_indexing( - main_env: &RoMainEnvironment, - secondary_env: &mut RwSecondaryEnvironment, - labels: &Labels, -) -> Result { - let txn = main_env.begin_txn()?; - - let span = dozer_types::tracing::span!(dozer_types::tracing::Level::TRACE, "build_indexes",); - let _enter = span.enter(); - - let result = secondary_env.index( - &txn, - main_env.operation_log().clone(), - BUILD_INDEX_COUNTER_NAME, - labels, - )?; - secondary_env.commit()?; - Ok(result) -} - -fn secondary_index_type(secondary_env: &Mutex) -> &'static str { - match secondary_env.lock().index_definition() { - IndexDefinition::SortedInverted(_) => "SortedInverted", - IndexDefinition::FullText(_) => "FullText", - } -} diff --git a/dozer-cache/src/cache/lmdb/mod.rs b/dozer-cache/src/cache/lmdb/mod.rs deleted file mode 100644 index ba7b4a4a98..0000000000 --- a/dozer-cache/src/cache/lmdb/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod cache; -pub mod cache_manager; -mod indexing; -mod utils; - -#[cfg(test)] -mod tests; diff --git a/dozer-cache/src/cache/lmdb/tests/basic.rs b/dozer-cache/src/cache/lmdb/tests/basic.rs deleted file mode 100644 index eda2271f2d..0000000000 --- a/dozer-cache/src/cache/lmdb/tests/basic.rs +++ /dev/null @@ -1,188 +0,0 @@ -use std::sync::Arc; - -use crate::cache::{ - expression::{self, FilterExpression, QueryExpression, Skip}, - index, - lmdb::{cache::LmdbRwCache, indexing::IndexingThreadPool}, - test_utils::{self, query_from_filter}, - RoCache, RwCache, UpsertResult, -}; -use dozer_types::{ - parking_lot::Mutex, - serde_json::Value, - types::{Field, Record, Schema}, -}; - -use super::utils::create_cache; - -fn _setup() -> (LmdbRwCache, Arc>, Schema) { - let (cache, indexing_thread_pool, schema, _) = create_cache(test_utils::schema_0); - (cache, indexing_thread_pool, schema) -} - -fn _setup_empty_primary_index() -> (LmdbRwCache, Arc>, Schema) { - let (cache, indexing_thread_pool, schema, _) = - create_cache(test_utils::schema_empty_primary_index); - (cache, indexing_thread_pool, schema) -} - -fn query_and_test(cache: &dyn RwCache, inserted_record: &Record, exp: &QueryExpression) { - let records = cache.query(exp).unwrap(); - assert_eq!(records[0].record, inserted_record.clone(), "must be equal"); -} - -#[test] -fn get_schema() { - let (cache, _, schema) = _setup(); - - let get_schema = &cache.get_schema().0; - assert_eq!(get_schema, &schema, "must be equal"); -} - -#[test] -fn insert_get_and_delete_record() { - let val = "bar".to_string(); - let (mut cache, indexing_thread_pool, _) = _setup(); - - assert_eq!(cache.count(&QueryExpression::with_no_limit()).unwrap(), 0); - - let record = Record::new(vec![Field::String(val.clone())]); - let UpsertResult::Inserted { meta } = cache.insert(&record).unwrap() else { - panic!("Must be inserted") - }; - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - assert_eq!(cache.count(&QueryExpression::with_no_limit()).unwrap(), 1); - - let key = index::get_primary_key(&[0], &[Field::String(val.clone())]); - - let get_record = cache.get(&key).unwrap().record; - assert_eq!(get_record, record, "must be equal"); - - assert_eq!( - cache - .delete(&Record { - values: vec![Field::String(val)], - lifetime: None, - }) - .unwrap() - .unwrap(), - meta - ); - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - assert_eq!(cache.count(&QueryExpression::with_no_limit()).unwrap(), 0); - - cache.get(&key).expect_err("Must not find a record"); - - assert_eq!(cache.query(&QueryExpression::default()).unwrap(), vec![]); -} - -#[test] -fn insert_and_update_record() { - let (mut cache, _, _) = _setup(); - let foo = Record::new(vec![Field::String("foo".to_string())]); - let bar = Record::new(vec![Field::String("bar".to_string())]); - let UpsertResult::Inserted { meta } = cache.insert(&foo).unwrap() else { - panic!("Must be inserted") - }; - cache.insert(&bar).unwrap(); - - let UpsertResult::Updated { old_meta, new_meta } = cache.update(&foo, &foo).unwrap() else { - panic!("Must be updated") - }; - assert_eq!(old_meta, meta); - assert_eq!(old_meta.id, new_meta.id); - assert_eq!(old_meta.version + 1, new_meta.version); -} - -fn insert_and_query_record_impl( - mut cache: LmdbRwCache, - indexing_thread_pool: Arc>, -) { - let val = "bar".to_string(); - let record = Record::new(vec![Field::String(val)]); - - cache.insert(&record).unwrap(); - cache.commit(&Default::default()).unwrap(); - indexing_thread_pool.lock().wait_until_catchup(); - - // Query with an expression - let exp = query_from_filter(FilterExpression::Simple( - "foo".to_string(), - expression::Operator::EQ, - Value::from("bar".to_string()), - )); - - query_and_test(&cache, &record, &exp); - - // Query without an expression - query_and_test( - &cache, - &record, - &QueryExpression::new(None, vec![], Some(10), Skip::Skip(0)), - ); -} - -#[test] -fn insert_and_query_record() { - let (cache, indexing_thread_pool, _) = _setup(); - insert_and_query_record_impl(cache, indexing_thread_pool); - let (cache, indexing_thread_pool, _) = _setup_empty_primary_index(); - insert_and_query_record_impl(cache, indexing_thread_pool); -} - -#[test] -// This test cases covers update of records when primary key changes because of value change in primary_key -fn update_record_when_primary_changes() { - let (mut cache, _, schema) = _setup(); - - let initial_values = vec![Field::String("1".into())]; - let initial_record = Record { - values: initial_values.clone(), - lifetime: None, - }; - - let updated_values = vec![Field::String("2".into())]; - let updated_record = Record { - values: updated_values.clone(), - lifetime: None, - }; - - cache.insert(&initial_record).unwrap(); - cache.commit(&Default::default()).unwrap(); - - let key = index::get_primary_key(&schema.primary_index, &initial_values); - let record = cache.get(&key).unwrap().record; - - assert_eq!(initial_values, record.values); - - cache.update(&initial_record, &updated_record).unwrap(); - cache.commit(&Default::default()).unwrap(); - - // Primary key with old values - let key = index::get_primary_key(&schema.primary_index, &initial_values); - - let record = cache.get(&key); - - assert!(record.is_err()); - - // Primary key with updated values - let key = index::get_primary_key(&schema.primary_index, &updated_values); - let record = cache.get(&key).unwrap().record; - - assert_eq!(updated_values, record.values); -} - -#[test] -fn test_cache_commit_state() { - let (mut cache, _, _) = _setup(); - assert!(cache.get_commit_state().unwrap().is_none()); - cache.commit(&Default::default()).unwrap(); - assert_eq!( - cache.get_commit_state().unwrap().unwrap(), - Default::default() - ); -} diff --git a/dozer-cache/src/cache/lmdb/tests/mod.rs b/dozer-cache/src/cache/lmdb/tests/mod.rs deleted file mode 100644 index ff169572dc..0000000000 --- a/dozer-cache/src/cache/lmdb/tests/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod basic; -mod read_write; -pub mod utils; diff --git a/dozer-cache/src/cache/lmdb/tests/read_write.rs b/dozer-cache/src/cache/lmdb/tests/read_write.rs deleted file mode 100644 index 65a4fa6db1..0000000000 --- a/dozer-cache/src/cache/lmdb/tests/read_write.rs +++ /dev/null @@ -1,76 +0,0 @@ -use std::sync::Arc; - -use crate::cache::expression::{FilterExpression, Operator, QueryExpression}; -use crate::cache::lmdb::cache::{CacheOptions, LmdbRoCache, LmdbRwCache}; -use crate::cache::lmdb::indexing::IndexingThreadPool; -use crate::cache::{lmdb::tests::utils as lmdb_utils, test_utils, RoCache, RwCache}; -use dozer_types::parking_lot::Mutex; -use dozer_types::serde_json::Value; -use dozer_types::types::Field; -use tempdir::TempDir; - -#[test] -fn read_and_write() { - let path = TempDir::new("dozer").unwrap(); - let path = (path.path().to_path_buf(), "temp".to_string()); - - // write and read from cache from two different threads. - - let schema = test_utils::schema_1(); - let indexing_thread_pool = Arc::new(Mutex::new(IndexingThreadPool::new(1))); - let mut cache_writer = LmdbRwCache::new( - Some(&schema), - None, - CacheOptions { - max_readers: 2, - max_db_size: 100, - max_size: 1024 * 1024, - path: Some(path.clone()), - intersection_chunk_size: 1, - labels: Default::default(), - }, - Default::default(), - indexing_thread_pool.clone(), - ) - .unwrap(); - - let items = vec![ - (1, Some("a".to_string()), Some(521)), - (2, Some("a".to_string()), None), - (3, None, Some(521)), - (4, None, None), - ]; - - for val in items.clone() { - lmdb_utils::insert_rec_1(&mut cache_writer, val.clone()); - } - cache_writer.commit(&Default::default()).unwrap(); - - indexing_thread_pool.lock().wait_until_catchup(); - - let read_options = CacheOptions { - path: Some(path), - ..Default::default() - }; - let cache_reader = LmdbRoCache::new(read_options).unwrap(); - for (a, b, c) in items { - let rec = cache_reader.get(&Field::Int(a).encode()).unwrap(); - let values = vec![ - Field::Int(a), - b.map_or(Field::Null, Field::String), - c.map_or(Field::Null, Field::Int), - ]; - assert_eq!(rec.record.values, values, "should be equal"); - } - let records = cache_reader - .query(&QueryExpression { - filter: Some(FilterExpression::Simple( - "a".to_string(), - Operator::EQ, - Value::from(1), - )), - ..Default::default() - }) - .unwrap(); - assert_eq!(records.len(), 1); -} diff --git a/dozer-cache/src/cache/lmdb/tests/utils.rs b/dozer-cache/src/cache/lmdb/tests/utils.rs deleted file mode 100644 index 225bc9c1fb..0000000000 --- a/dozer-cache/src/cache/lmdb/tests/utils.rs +++ /dev/null @@ -1,56 +0,0 @@ -use std::sync::Arc; - -use dozer_types::parking_lot::Mutex; -use dozer_types::types::{Field, IndexDefinition, Record, Schema, SchemaWithIndex}; - -use crate::cache::{ - lmdb::{ - cache::{LmdbCache, LmdbRwCache, SecondaryEnvironment}, - indexing::IndexingThreadPool, - }, - RoCache, RwCache, -}; - -pub fn create_cache( - schema_gen: impl FnOnce() -> SchemaWithIndex, -) -> ( - LmdbRwCache, - Arc>, - Schema, - Vec, -) { - let schema = schema_gen(); - let indexing_thread_pool = Arc::new(Mutex::new(IndexingThreadPool::new(1))); - let cache = LmdbRwCache::new( - Some(&schema), - None, - Default::default(), - Default::default(), - indexing_thread_pool.clone(), - ) - .unwrap(); - (cache, indexing_thread_pool, schema.0, schema.1) -} - -pub fn insert_rec_1(cache: &mut LmdbRwCache, (a, b, c): (i64, Option, Option)) { - let record = Record::new(vec![ - Field::Int(a), - b.map_or(Field::Null, Field::String), - c.map_or(Field::Null, Field::Int), - ]); - cache.insert(&record).unwrap(); -} - -pub fn insert_full_text(cache: &mut LmdbRwCache, (a, b): (Option, Option)) { - let record = Record::new(vec![ - a.map_or(Field::Null, Field::String), - b.map_or(Field::Null, Field::Text), - ]); - cache.insert(&record).unwrap(); -} - -pub fn get_index_counts(cache: &C) -> Vec { - (0..cache.get_schema().1.len()) - .map(|index| cache.secondary_env(index).count_data().unwrap()) - .collect() -} diff --git a/dozer-cache/src/cache/lmdb/utils.rs b/dozer-cache/src/cache/lmdb/utils.rs deleted file mode 100644 index 0ea98059fa..0000000000 --- a/dozer-cache/src/cache/lmdb/utils.rs +++ /dev/null @@ -1,133 +0,0 @@ -use std::{ - fs, - path::{Path, PathBuf}, -}; - -use crate::errors::CacheError; -use dozer_storage::{ - lmdb::EnvironmentFlags, - lmdb_storage::{ - LmdbEnvironmentManager, LmdbEnvironmentOptions, RoLmdbEnvironment, RwLmdbEnvironment, - }, -}; -use tempdir::TempDir; - -use super::cache::CacheOptions; - -#[allow(clippy::type_complexity)] -pub fn create_env( - options: &CacheOptions, -) -> Result<(RwLmdbEnvironment, (PathBuf, String), Option), CacheError> { - let (base_path, name, temp_dir) = match &options.path { - None => { - let base_path = - TempDir::new("dozer").map_err(|e| CacheError::Io("tempdir".into(), e))?; - ( - base_path.path().to_path_buf(), - "temp".to_string(), - Some(base_path), - ) - } - Some((base_path, name)) => { - fs::create_dir_all(base_path).map_err(|e| CacheError::Io(base_path.clone(), e))?; - (base_path.clone(), name.clone(), None) - } - }; - - let options = LmdbEnvironmentOptions::new( - options.max_db_size, - options.max_readers, - options.max_size, - EnvironmentFlags::empty(), - ); - - Ok(( - LmdbEnvironmentManager::create_rw(&base_path, &name, options)?, - (base_path, name), - temp_dir, - )) -} - -#[allow(clippy::type_complexity)] -pub fn open_env(options: &CacheOptions) -> Result<(RoLmdbEnvironment, (&Path, &str)), CacheError> { - let (base_path, name) = options - .path - .as_ref() - .ok_or(CacheError::PathNotInitialized)?; - - let env_options = LmdbEnvironmentOptions::new( - options.max_db_size, - options.max_readers, - options.max_size, - EnvironmentFlags::empty(), - ); - - Ok(( - LmdbEnvironmentManager::create_ro(base_path, name, env_options)?, - (base_path, name), - )) -} - -#[cfg(test)] -mod tests { - use dozer_storage::lmdb::{Cursor, DatabaseFlags, RoCursor, Transaction, WriteFlags}; - use dozer_types::types::Field; - - use super::*; - - fn cursor_dump(mut cursor: RoCursor) -> Vec<(&[u8], &[u8])> { - cursor - .iter_dup() - .flatten() - .collect::>>() - .unwrap() - } - - #[test] - fn duplicate_test_nested() { - let mut env = create_env(&Default::default()).unwrap().0; - - let db = env - .create_database( - Some("test"), - DatabaseFlags::DUP_SORT | DatabaseFlags::INTEGER_KEY, - ) - .unwrap(); - - let txn = env.txn_mut().unwrap(); - - let mut c_txn = txn.begin_nested_txn().unwrap(); - - let items: Vec<(i64, &[u8])> = vec![ - (1, b"a"), - (2, b"a"), - (3, b"a"), - (1, b"b"), - (2, b"b"), - (3, b"b"), - (1, b"c"), - (2, b"c"), - (3, b"c"), - (1, b"e"), - (2, b"e"), - (3, b"e"), - ]; - for (key, data) in &items { - let key = [ - "idx".as_bytes().to_vec(), - Field::Int(*key).encode(), - key.to_be_bytes().to_vec(), - ] - .join("#".as_bytes()); - c_txn.put(db, &key, data, WriteFlags::empty()).unwrap(); - } - c_txn.commit().unwrap(); - env.commit().unwrap(); - - let rtxn = env.txn_mut().unwrap(); - - let cursor = rtxn.open_ro_cursor(db).unwrap(); - let vals = cursor_dump(cursor); - assert_eq!(vals.len(), items.len(), "must have duplicate records"); - } -} diff --git a/dozer-cache/src/cache/mod.rs b/dozer-cache/src/cache/mod.rs deleted file mode 100644 index 5364b21855..0000000000 --- a/dozer-cache/src/cache/mod.rs +++ /dev/null @@ -1,167 +0,0 @@ -mod lmdb; -use std::collections::HashSet; -use std::fmt::Debug; - -use self::expression::QueryExpression; -use crate::errors::CacheError; -use dozer_tracing::Labels; -use dozer_types::models::endpoint::{ - OnDeleteResolutionTypes, OnInsertResolutionTypes, OnUpdateResolutionTypes, -}; -use dozer_types::node::SourceStates; -use dozer_types::{ - serde::{Deserialize, Serialize}, - types::{Record, SchemaWithIndex}, -}; -pub use lmdb::cache_manager::{ - begin_dump_txn, dump, CacheManagerOptions, LmdbRoCacheManager, LmdbRwCacheManager, -}; -pub mod expression; -mod index; -mod plan; -pub mod test_utils; - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(crate = "dozer_types::serde")] -pub struct CacheRecord { - pub id: u64, - pub version: u32, - pub record: Record, -} - -impl CacheRecord { - pub fn new(id: u64, version: u32, record: Record) -> Self { - Self { - id, - version, - record, - } - } -} - -pub trait RoCacheManager: Send + Sync + Debug { - /// Opens a cache in read-only mode, and attach given labels. - fn open_ro_cache( - &self, - name_or_alias: String, - labels: Labels, - ) -> Result>, CacheError>; -} - -#[derive(Debug, Clone, Copy, Default)] -pub struct CacheWriteOptions { - pub insert_resolution: OnInsertResolutionTypes, - pub delete_resolution: OnDeleteResolutionTypes, - pub update_resolution: OnUpdateResolutionTypes, - pub detect_hash_collision: bool, -} - -pub trait RwCacheManager: RoCacheManager { - /// Opens a cache in read-write mode, and attach given labels. - fn open_rw_cache( - &self, - name_or_alias: String, - labels: Labels, - write_options: CacheWriteOptions, - ) -> Result>, CacheError>; - - /// Creates a new cache with given `schema`s, which can also be opened in read-only mode using `open_ro_cache`. - /// - /// Schemas cannot be changed after the cache is created. - /// - /// The name must be unique and non-empty. - fn create_cache( - &self, - name: String, - labels: Labels, - schema: SchemaWithIndex, - connections: &HashSet, - write_options: CacheWriteOptions, - ) -> Result, CacheError>; - - /// Creates an alias `alias` for a cache with name `name`. - /// - /// If `alias` already exists, it's overwritten. If cache with name `name` doesn't exist, the alias is still recorded. - fn create_alias(&self, name: &str, alias: &str) -> Result<(), CacheError>; -} - -pub trait RoCache: Send + Sync + Debug { - /// Returns the name of the cache. - fn name(&self) -> &str; - /// Returns the labels of the cache. - fn labels(&self) -> &Labels; - - // Schema Operations - fn get_schema(&self) -> &SchemaWithIndex; - - // Record Operations - fn get(&self, key: &[u8]) -> Result; - fn count(&self, query: &QueryExpression) -> Result; - fn query(&self, query: &QueryExpression) -> Result, CacheError>; - - // Cache metadata - fn get_commit_state(&self) -> Result, CacheError>; - fn is_snapshotting_done(&self) -> Result; -} - -#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] -#[serde(crate = "dozer_types::serde")] -pub struct RecordMeta { - pub id: u64, - pub version: u32, -} - -impl RecordMeta { - pub fn new(id: u64, version: u32) -> Self { - Self { id, version } - } -} - -#[derive(Debug)] -pub enum UpsertResult { - Updated { - old_meta: RecordMeta, - new_meta: RecordMeta, - }, - Inserted { - meta: RecordMeta, - }, - Ignored, -} - -#[derive(Debug, Clone, Default, PartialEq, Eq, bincode::Encode, bincode::Decode)] -pub struct CommitState { - pub source_states: SourceStates, - pub log_position: u64, -} - -pub trait RwCache: RoCache { - /// Inserts a record into the cache. Implicitly starts a transaction if there's no active transaction. - /// - /// Depending on the `ConflictResolution` strategy, it may or may not overwrite the existing record. - fn insert(&mut self, record: &Record) -> Result; - - /// Deletes a record. Implicitly starts a transaction if there's no active transaction. - /// - /// Returns the id and version of the deleted record if it existed. - /// - /// If the schema has primary index, only fields that are part of the primary index are used to identify the record. - fn delete(&mut self, record: &Record) -> Result, CacheError>; - - /// Updates a record in the cache. Implicitly starts a transaction if there's no active transaction. - /// - /// Depending on the `ConflictResolution` strategy, it may actually insert the record if it doesn't exist. - /// - /// If the schema has primary index, only fields that are part of the primary index are used to identify the old record. - fn update(&mut self, old: &Record, record: &Record) -> Result; - - /// Marks a connection as snapshotting done. Implicitly starts a transaction if there's no active transaction. - fn set_connection_snapshotting_done(&mut self, connection_name: &str) - -> Result<(), CacheError>; - - /// Commits the current transaction. - fn commit(&mut self, state: &CommitState) -> Result<(), CacheError>; - - /// Upcast. - fn as_ro(&self) -> &dyn RoCache; -} diff --git a/dozer-cache/src/cache/plan/helper.rs b/dozer-cache/src/cache/plan/helper.rs deleted file mode 100644 index 11aa559f72..0000000000 --- a/dozer-cache/src/cache/plan/helper.rs +++ /dev/null @@ -1,213 +0,0 @@ -use dozer_types::types::Field; -use itertools::{Either, Itertools}; - -use crate::cache::expression::{Operator, SortDirection}; - -use super::{IndexFilter, IndexScanKind, SortedInvertedRangeQuery}; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct RangeQuery { - pub field_index: usize, - pub kind: RangeQueryKind, -} - -impl RangeQuery { - pub fn new(field_index: usize, kind: RangeQueryKind) -> Self { - Self { field_index, kind } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum RangeQueryKind { - Filter { - operator: Operator, - value: Field, - sort_direction: Option, - }, - OrderBy { - sort_direction: SortDirection, - }, -} - -pub fn get_all_indexes( - filters: Vec<(IndexFilter, Option)>, - range_query: Option, -) -> impl Iterator> { - // Create a full text index for every full text filter, and collect `Eq` filters. - let mut full_text_scans = vec![]; - let mut eq_filters = vec![]; - for filter in filters { - if filter.0.op.supported_by_full_text() { - full_text_scans.push(IndexScanKind::FullText { filter: filter.0 }); - } else { - debug_assert!(filter.0.op == Operator::EQ); - eq_filters.push((filter.0.field_index, filter.0.val)); - } - } - - if eq_filters.is_empty() && range_query.is_none() { - // Only full text scans. - assert!( - !full_text_scans.is_empty(), - "Must have at least one filter or range query" - ); - Either::Left(std::iter::once(full_text_scans)) - } else { - Either::Right( - get_sorted_inverted_scans(eq_filters, range_query).map(move |scan| { - let mut scans = full_text_scans.clone(); - scans.push(scan); - scans - }), - ) - } -} - -fn get_sorted_inverted_scans( - eq_filters: Vec<(usize, Field)>, - range_query: Option, -) -> impl Iterator { - if eq_filters.is_empty() { - Either::Left( - get_sorted_inverted_range_queries( - range_query.expect("Range query must not be None if eq_filters is empty"), - ) - .map(|range_query| IndexScanKind::SortedInverted { - eq_filters: vec![], - range_query: Some(range_query), - }), - ) - } else { - Either::Right(get_sorted_inverted_scans_with_eq_filters( - eq_filters, - range_query, - )) - } -} - -fn get_sorted_inverted_scans_with_eq_filters( - eq_filters: Vec<(usize, Field)>, - range_query: Option, -) -> impl Iterator { - // The `Eq` filters can be of arbitary order. - let num_eq_filters = eq_filters.len(); - eq_filters - .into_iter() - .permutations(num_eq_filters) - .flat_map(move |eq_filters| { - get_option_sorted_inverted_range_queries(range_query.clone()).map(move |range_query| { - IndexScanKind::SortedInverted { - eq_filters: eq_filters.clone(), - range_query, - } - }) - }) -} - -fn get_option_sorted_inverted_range_queries( - range_query: Option, -) -> impl Iterator> { - if let Some(range_query) = range_query { - Either::Left(get_sorted_inverted_range_queries(range_query).map(Some)) - } else { - Either::Right(std::iter::once(None)) - } -} - -fn get_sorted_inverted_range_queries( - range_query: RangeQuery, -) -> impl Iterator { - match range_query.kind { - RangeQueryKind::Filter { - operator, - value, - sort_direction, - } => Either::Left( - get_sort_directions(sort_direction).map(move |sort_direction| { - SortedInvertedRangeQuery { - field_index: range_query.field_index, - operator_and_value: Some((operator, value.clone())), - sort_direction, - } - }), - ), - RangeQueryKind::OrderBy { sort_direction } => { - Either::Right(std::iter::once(SortedInvertedRangeQuery { - field_index: range_query.field_index, - operator_and_value: None, - sort_direction, - })) - } - } -} - -fn get_sort_directions( - sort_direction: Option, -) -> impl Iterator + Clone { - if let Some(direction) = sort_direction { - Either::Left(std::iter::once(direction)) - } else { - Either::Right( - std::iter::once(SortDirection::Ascending) - .chain(std::iter::once(SortDirection::Descending)), - ) - } -} - -#[test] -#[should_panic] -fn get_all_indexes_from_empty_query_should_panic() { - get_all_indexes(vec![], None).collect_vec(); -} - -#[test] -fn test_get_all_indexes() { - fn check( - filters: Vec<(IndexFilter, Option)>, - range_query: Option, - expcected: Vec>, - ) { - let actual = get_all_indexes(filters, range_query).collect::>(); - assert_eq!(actual, expcected); - } - - // Only full text. - let filter = IndexFilter::new(0, Operator::Contains, Field::String("a".into())); - check( - vec![(filter.clone(), None)], - None, - vec![vec![IndexScanKind::FullText { filter }]], - ); - - // Only `Eq`. - let filter = IndexFilter::new(0, Operator::EQ, Field::String("a".into())); - check( - vec![(filter.clone(), None)], - None, - vec![vec![IndexScanKind::SortedInverted { - eq_filters: vec![(filter.field_index, filter.val)], - range_query: None, - }]], - ); - - // Only order by. - let direction = SortDirection::Ascending; - let range_query = RangeQuery::new( - 0, - RangeQueryKind::OrderBy { - sort_direction: direction, - }, - ); - check( - vec![], - Some(range_query.clone()), - vec![vec![IndexScanKind::SortedInverted { - eq_filters: vec![], - range_query: Some(SortedInvertedRangeQuery { - field_index: range_query.field_index, - operator_and_value: None, - sort_direction: direction, - }), - }]], - ); -} diff --git a/dozer-cache/src/cache/plan/mod.rs b/dozer-cache/src/cache/plan/mod.rs deleted file mode 100644 index 2d178d29d0..0000000000 --- a/dozer-cache/src/cache/plan/mod.rs +++ /dev/null @@ -1,61 +0,0 @@ -mod helper; -mod planner; -use dozer_types::types::Field; -pub use planner::QueryPlanner; - -use super::expression::{Operator, SortDirection}; - -#[cfg(test)] -mod tests; - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Plan { - IndexScans(Vec), - SeqScan(SeqScan), - ReturnEmpty, -} -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct IndexScan { - pub index_id: usize, - pub kind: IndexScanKind, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum IndexScanKind { - SortedInverted { - eq_filters: Vec<(usize, Field)>, - range_query: Option, - }, - FullText { - filter: IndexFilter, - }, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct SortedInvertedRangeQuery { - pub field_index: usize, - pub sort_direction: SortDirection, - pub operator_and_value: Option<(Operator, Field)>, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct SeqScan { - pub direction: SortDirection, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct IndexFilter { - pub field_index: usize, - pub op: Operator, - pub val: Field, -} - -impl IndexFilter { - pub fn new(field_index: usize, op: Operator, val: Field) -> Self { - Self { - field_index, - op, - val, - } - } -} diff --git a/dozer-cache/src/cache/plan/planner.rs b/dozer-cache/src/cache/plan/planner.rs deleted file mode 100644 index 1d40d461c2..0000000000 --- a/dozer-cache/src/cache/plan/planner.rs +++ /dev/null @@ -1,347 +0,0 @@ -use crate::cache::expression::{FilterExpression, Operator, SortDirection, SortOptions}; -use crate::errors::PlanError; -use dozer_types::models::endpoint::{FullText, SecondaryIndex, SortedInverted}; -use dozer_types::types::{Field, FieldDefinition, Schema}; -use dozer_types::types::{FieldType, IndexDefinition}; -use dozer_types::{json_value_to_field, serde_yaml}; - -use super::helper::{RangeQuery, RangeQueryKind}; -use super::{helper, IndexScan, Plan, SeqScan}; -use super::{IndexFilter, IndexScanKind}; - -pub struct QueryPlanner<'a> { - schema: &'a Schema, - secondary_indexes: &'a [IndexDefinition], - filter: Option<&'a FilterExpression>, - order_by: &'a SortOptions, -} -impl<'a> QueryPlanner<'a> { - pub fn new( - schema: &'a Schema, - secondary_indexes: &'a [IndexDefinition], - filter: Option<&'a FilterExpression>, - order_by: &'a SortOptions, - ) -> Self { - Self { - schema, - secondary_indexes, - filter, - order_by, - } - } - - pub fn plan(&self) -> Result { - // Collect all the filters. - // TODO: Handle filters like And([a > 0, a < 10]). - let mut filters = vec![]; - if let Some(expression) = &self.filter { - collect_filters(self.schema, expression, &mut filters)?; - } - - // Filter the sort options. - // TODO: Handle duplicate fields. - let mut order_by = vec![]; - for order in &self.order_by.0 { - // Find the field index. - let (field_index, _, _) = - get_field_index_and_type(&order.field_name, &self.schema.fields) - .ok_or_else(|| PlanError::FieldNotFound(order.field_name.clone()))?; - // If the field is already in a filter supported by `SortedInverted`, mark the corresponding filter. - if seen_in_sorted_inverted_filter(field_index, order.direction, &mut filters)? { - continue; - } - // This sort option needs to be in the plan. - order_by.push((field_index, order.direction)); - } - - // If no filter and sort is requested, return a SeqScan. - if filters.is_empty() && order_by.is_empty() { - return Ok(Plan::SeqScan(SeqScan { - direction: SortDirection::Ascending, - })); - } - - // If non-`Eq` filter is applied to `null` value, return empty result. - if filters - .iter() - .any(|f| matches!(f.0.val, Field::Null) && f.0.op != Operator::EQ) - { - return Ok(Plan::ReturnEmpty); - } - - // Find the range query, can be a range filter or a sort option. - let range_query = find_range_query(&mut filters, &order_by)?; - - // Generate some index scans that can answer this query, lazily. - let all_index_scans = helper::get_all_indexes(filters, range_query); - - // Check if existing secondary indexes can satisfy any of the scans. - let mut scans = None; - for index_scans in all_index_scans { - if scans.is_none() { - scans = Some(index_scans.clone()); - } - - if let Some(index_scans) = all_indexes_are_present(self.secondary_indexes, index_scans) - { - return Ok(Plan::IndexScans(index_scans)); - } - } - - Err(PlanError::MatchingIndexNotFound( - describe_index_configuration( - &self.schema.fields, - &scans.expect("Planner should always generate plan"), - ), - )) - } -} - -fn get_field_index_and_type( - field_name: &str, - fields: &[FieldDefinition], -) -> Option<(usize, FieldType, bool)> { - fields - .iter() - .enumerate() - .find(|(_, f)| f.name == field_name) - .map(|(i, f)| (i, f.typ, f.nullable)) -} - -fn collect_filters( - schema: &Schema, - expression: &FilterExpression, - filters: &mut Vec<(IndexFilter, Option)>, -) -> Result<(), PlanError> { - match expression { - FilterExpression::Simple(field_name, operator, value) => { - let (field_index, field_type, nullable) = - get_field_index_and_type(field_name, &schema.fields) - .ok_or_else(|| PlanError::FieldNotFound(field_name.clone()))?; - let field = json_value_to_field(value.clone(), field_type, nullable)?; - filters.push((IndexFilter::new(field_index, *operator, field), None)); - } - FilterExpression::And(expressions) => { - for expression in expressions { - collect_filters(schema, expression, filters)?; - } - } - } - Ok(()) -} - -fn seen_in_sorted_inverted_filter( - field_index: usize, - sort_direction: SortDirection, - filters: &mut [(IndexFilter, Option)], -) -> Result { - for filter in filters { - if filter.0.field_index == field_index { - return if !filter.0.op.supported_by_sorted_inverted() { - Err(PlanError::CannotSortFullTextFilter) - } else if let Some(direction) = filter.1 { - if direction == sort_direction { - Ok(true) - } else { - Err(PlanError::ConflictingSortOptions) - } - } else { - filter.1 = Some(sort_direction); - Ok(true) - }; - } - } - - Ok(false) -} - -fn find_range_query( - filters: &mut Vec<(IndexFilter, Option)>, - order_by: &[(usize, SortDirection)], -) -> Result, PlanError> { - let mut num_range_ops = 0; - let mut range_filter_index = None; - for (i, filter) in filters.iter().enumerate() { - if filter.0.op.is_range_operator() { - num_range_ops += 1; - range_filter_index = Some(i); - } - } - num_range_ops += order_by.len(); - if num_range_ops > 1 { - return Err(PlanError::RangeQueryLimit); - } - Ok(if let Some(range_filter_index) = range_filter_index { - let filter = filters.remove(range_filter_index); - Some(RangeQuery::new( - filter.0.field_index, - RangeQueryKind::Filter { - operator: filter.0.op, - value: filter.0.val, - sort_direction: filter.1, - }, - )) - } else if let Some((field_index, sort_direction)) = order_by.first() { - Some(RangeQuery::new( - *field_index, - RangeQueryKind::OrderBy { - sort_direction: *sort_direction, - }, - )) - } else { - None - }) -} - -impl IndexScanKind { - fn is_supported_by_index(&self, index: &IndexDefinition) -> bool { - match (self, index) { - ( - IndexScanKind::SortedInverted { - eq_filters, - range_query, - }, - IndexDefinition::SortedInverted(fields), - ) => { - if fields.len() < eq_filters.len() { - return false; - } - if !eq_filters - .iter() - .zip(fields) - .all(|(filter, field)| filter.0 == *field) - { - return false; - } - if let Some(range_query) = range_query { - if fields.len() != eq_filters.len() + 1 { - return false; - } - let last_field = fields - .last() - .expect("We've checked `fields.len()` is at least 1"); - range_query.field_index == *last_field - } else { - fields.len() == eq_filters.len() - } - } - (IndexScanKind::FullText { filter }, IndexDefinition::FullText(field_index)) => { - filter.field_index == *field_index - } - _ => false, - } - } -} - -fn all_indexes_are_present( - indexes: &[IndexDefinition], - index_scan_kinds: Vec, -) -> Option> { - let mut scans = vec![]; - for index_scan_kind in index_scan_kinds { - let found: Option<(usize, &IndexDefinition)> = indexes - .iter() - .enumerate() - .find(|(_, i)| index_scan_kind.is_supported_by_index(i)); - - match found { - Some((idx, _)) => { - scans.push(IndexScan { - index_id: idx, - kind: index_scan_kind, - }); - } - None => return None, - } - } - Some(scans) -} - -fn describe_index_configuration( - field_definitions: &[FieldDefinition], - indexes: &[IndexScanKind], -) -> String { - let mut creates = vec![]; - for index in indexes { - match index { - IndexScanKind::FullText { filter } => { - let field = field_definitions[filter.field_index].name.clone(); - creates.push(SecondaryIndex::FullText(FullText { field })); - } - IndexScanKind::SortedInverted { - eq_filters, - range_query, - } => { - let mut fields = vec![]; - for (field_index, _) in eq_filters { - let field = field_definitions[*field_index].name.clone(); - fields.push(field); - } - if let Some(range_query) = range_query { - let field = field_definitions[range_query.field_index].name.clone(); - fields.push(field); - } - creates.push(SecondaryIndex::SortedInverted(SortedInverted { fields })); - } - } - } - - serde_yaml::to_string(&creates).expect("This serialization should never fail.") -} - -#[cfg(test)] -mod tests { - use crate::cache::plan::SortedInvertedRangeQuery; - - use super::*; - - #[test] - fn test_is_supported_by_index() { - let check_sorted_inverted = - |eq_filters: Vec, range_query: Option, index, expected: bool| { - assert_eq!( - IndexScanKind::SortedInverted { - eq_filters: eq_filters - .into_iter() - .map(|index| (index, Field::Null)) - .collect(), - range_query: range_query.map(|index| SortedInvertedRangeQuery { - field_index: index, - sort_direction: SortDirection::Ascending, - operator_and_value: None, - }) - } - .is_supported_by_index(&IndexDefinition::SortedInverted(index)), - expected - ); - }; - - check_sorted_inverted(vec![0], None, vec![0], true); - check_sorted_inverted(vec![0], None, vec![1], false); - check_sorted_inverted(vec![0], None, vec![0, 1], false); - check_sorted_inverted(vec![0, 1], None, vec![0], false); - check_sorted_inverted(vec![0, 1], None, vec![0, 1], true); - check_sorted_inverted(vec![], Some(0), vec![0], true); - check_sorted_inverted(vec![0], Some(1), vec![0, 1], true); - check_sorted_inverted(vec![0], Some(1), vec![0, 1, 2], false); - check_sorted_inverted(vec![0], Some(1), vec![0, 2], false); - check_sorted_inverted(vec![0], Some(1), vec![0], false); - - let full_text_scan = IndexScanKind::FullText { - filter: IndexFilter { - field_index: 0, - op: Operator::Contains, - val: Field::Null, - }, - }; - assert!(full_text_scan.is_supported_by_index(&IndexDefinition::FullText(0)),); - assert!(!full_text_scan.is_supported_by_index(&IndexDefinition::FullText(1))); - - assert!(!full_text_scan.is_supported_by_index(&IndexDefinition::SortedInverted(vec![0])),); - assert!(!IndexScanKind::SortedInverted { - eq_filters: vec![(0, Field::Null)], - range_query: None - } - .is_supported_by_index(&IndexDefinition::FullText(0)),); - } -} diff --git a/dozer-cache/src/cache/plan/tests.rs b/dozer-cache/src/cache/plan/tests.rs deleted file mode 100644 index 68176e6404..0000000000 --- a/dozer-cache/src/cache/plan/tests.rs +++ /dev/null @@ -1,135 +0,0 @@ -use super::{Plan, QueryPlanner}; -use crate::cache::{ - expression::{self, FilterExpression, Operator, SortDirection, SortOption, SortOptions}, - plan::{IndexScanKind, SortedInvertedRangeQuery}, - test_utils, -}; - -use dozer_types::{serde_json::Value, types::Field}; - -#[test] -fn test_generate_plan_simple() { - let (schema, secondary_indexes) = test_utils::schema_0(); - - let filter = FilterExpression::Simple( - "foo".to_string(), - expression::Operator::EQ, - Value::from("bar".to_string()), - ); - let plan = QueryPlanner::new( - &schema, - &secondary_indexes, - Some(&filter), - &Default::default(), - ) - .plan() - .unwrap(); - if let Plan::IndexScans(index_scans) = plan { - assert_eq!(index_scans.len(), 1); - assert_eq!(index_scans[0].index_id, 0); - match &index_scans[0].kind { - IndexScanKind::SortedInverted { - eq_filters, - range_query, - } => { - assert_eq!(eq_filters.len(), 1); - assert_eq!(eq_filters[0], (0, Field::String("bar".to_string()))); - assert_eq!(range_query, &None); - } - _ => panic!("Must be sorted inverted"), - } - } else { - panic!("IndexScan expected") - } -} - -#[test] -fn test_generate_plan_and() { - let (schema, secondary_indexes) = test_utils::schema_1(); - - let filter = FilterExpression::And(vec![ - FilterExpression::Simple("a".to_string(), expression::Operator::EQ, Value::from(1)), - FilterExpression::Simple( - "b".to_string(), - expression::Operator::EQ, - Value::from("test".to_string()), - ), - ]); - let plan = QueryPlanner::new( - &schema, - &secondary_indexes, - Some(&filter), - &Default::default(), - ) - .plan() - .unwrap(); - // Pick the 3rd index - if let Plan::IndexScans(index_scans) = plan { - assert_eq!(index_scans.len(), 1); - assert_eq!(index_scans[0].index_id, 3); - match &index_scans[0].kind { - IndexScanKind::SortedInverted { - eq_filters, - range_query, - } => { - assert_eq!(eq_filters.len(), 2); - assert_eq!(eq_filters[0], (0, Field::Int(1))); - assert_eq!(eq_filters[1], (1, Field::String("test".to_string()))); - assert_eq!(range_query, &None); - } - _ => panic!("Must be sorted inverted"), - } - } else { - panic!("IndexScan expected") - } -} - -#[test] -fn test_generate_plan_range_query_and_order_by() { - let (schema, secondary_indexes) = test_utils::schema_1(); - let filter = FilterExpression::Simple("c".into(), expression::Operator::GT, 1.into()); - let order_by = SortOptions(vec![SortOption { - field_name: "c".into(), - direction: SortDirection::Descending, - }]); - let planner = QueryPlanner::new(&schema, &secondary_indexes, Some(&filter), &order_by); - if let Plan::IndexScans(index_scans) = planner.plan().unwrap() { - assert_eq!(index_scans.len(), 1); - assert_eq!(index_scans[0].index_id, 2); - match &index_scans[0].kind { - IndexScanKind::SortedInverted { - eq_filters, - range_query, - } => { - assert_eq!(eq_filters.len(), 0); - assert_eq!( - range_query, - &Some(SortedInvertedRangeQuery { - field_index: 2, - sort_direction: SortDirection::Descending, - operator_and_value: Some((expression::Operator::GT, 1.into())), - }) - ); - } - _ => panic!("Must be sorted inverted"), - } - } else { - panic!("IndexScan expected") - } -} - -#[test] -fn test_generate_plan_empty() { - let (schema, secondary_indexes) = test_utils::schema_1(); - - let filter = FilterExpression::Simple("c".into(), Operator::LT, Value::Null); - let plan = QueryPlanner::new( - &schema, - &secondary_indexes, - Some(&filter), - &Default::default(), - ) - .plan() - .unwrap(); - assert!(matches!(plan, Plan::ReturnEmpty)); -} diff --git a/dozer-cache/src/cache/test_utils.rs b/dozer-cache/src/cache/test_utils.rs deleted file mode 100644 index e2acd7e62c..0000000000 --- a/dozer-cache/src/cache/test_utils.rs +++ /dev/null @@ -1,124 +0,0 @@ -use dozer_types::types::{ - FieldDefinition, IndexDefinition, Schema, SchemaWithIndex, SourceDefinition, -}; - -use super::expression::{FilterExpression, QueryExpression, Skip}; - -pub fn schema_0() -> SchemaWithIndex { - ( - Schema { - fields: vec![FieldDefinition { - name: "foo".to_string(), - typ: dozer_types::types::FieldType::String, - nullable: true, - source: SourceDefinition::Dynamic, - }], - primary_index: vec![0], - }, - vec![IndexDefinition::SortedInverted(vec![0])], - ) -} - -pub fn schema_1() -> SchemaWithIndex { - ( - Schema { - fields: vec![ - FieldDefinition { - name: "a".to_string(), - typ: dozer_types::types::FieldType::Int, - nullable: true, - source: SourceDefinition::Dynamic, - }, - FieldDefinition { - name: "b".to_string(), - typ: dozer_types::types::FieldType::String, - nullable: true, - source: SourceDefinition::Dynamic, - }, - FieldDefinition { - name: "c".to_string(), - typ: dozer_types::types::FieldType::Int, - nullable: true, - source: SourceDefinition::Dynamic, - }, - ], - primary_index: vec![0], - }, - vec![ - IndexDefinition::SortedInverted(vec![0]), - IndexDefinition::SortedInverted(vec![1]), - IndexDefinition::SortedInverted(vec![2]), - // composite index - IndexDefinition::SortedInverted(vec![0, 1]), - ], - ) -} - -pub fn schema_full_text() -> SchemaWithIndex { - ( - Schema { - fields: vec![ - FieldDefinition { - name: "foo".to_string(), - typ: dozer_types::types::FieldType::String, - nullable: false, - source: SourceDefinition::Dynamic, - }, - FieldDefinition { - name: "bar".to_string(), - typ: dozer_types::types::FieldType::Text, - nullable: false, - source: SourceDefinition::Dynamic, - }, - ], - primary_index: vec![0], - }, - vec![IndexDefinition::FullText(0), IndexDefinition::FullText(1)], - ) -} - -// This is for testing appending only schema, which doesn't need a primary index, for example, eth logs. -pub fn schema_empty_primary_index() -> SchemaWithIndex { - ( - Schema { - fields: vec![FieldDefinition { - name: "foo".to_string(), - typ: dozer_types::types::FieldType::String, - nullable: false, - source: SourceDefinition::Dynamic, - }], - primary_index: vec![], - }, - vec![IndexDefinition::SortedInverted(vec![0])], - ) -} - -pub fn schema_multi_indices() -> SchemaWithIndex { - ( - Schema { - fields: vec![ - FieldDefinition { - name: "id".to_string(), - typ: dozer_types::types::FieldType::Int, - nullable: false, - source: SourceDefinition::Dynamic, - }, - FieldDefinition { - name: "text".to_string(), - typ: dozer_types::types::FieldType::String, - nullable: false, - source: SourceDefinition::Dynamic, - }, - ], - primary_index: vec![0], - }, - vec![ - IndexDefinition::SortedInverted(vec![0]), - IndexDefinition::FullText(1), - ], - ) -} - -pub fn query_from_filter(filter: FilterExpression) -> QueryExpression { - QueryExpression::new(Some(filter), vec![], Some(10), Skip::Skip(0)) -} diff --git a/dozer-cache/src/errors.rs b/dozer-cache/src/errors.rs deleted file mode 100644 index de31bdf9af..0000000000 --- a/dozer-cache/src/errors.rs +++ /dev/null @@ -1,165 +0,0 @@ -use std::collections::HashSet; -use std::path::PathBuf; - -use dozer_storage::errors::StorageError; -use dozer_storage::RestoreError; -use dozer_types::thiserror; -use dozer_types::thiserror::Error; - -use dozer_log::errors::ReaderError; -use dozer_types::errors::types::{DeserializationError, SerializationError, TypeError}; -use dozer_types::types::{Field, IndexDefinition, SchemaWithIndex}; - -use crate::cache::RecordMeta; - -#[derive(Debug)] -pub struct ConnectionMismatch { - pub name: String, - pub given: HashSet, - pub stored: HashSet, -} - -#[derive(Error, Debug)] -pub enum CacheError { - #[error("Io error on {0:?}: {1}")] - Io(PathBuf, #[source] std::io::Error), - #[error("Query error: {0}")] - Query(#[from] QueryError), - #[error("Index error: {0}")] - Index(#[from] IndexError), - #[error("Plan error: {0}")] - Plan(#[from] PlanError), - #[error("Type error: {0}")] - Type(#[from] TypeError), - #[error("Restore error: {0}")] - Restore(#[from] RestoreError), - - #[error("Log error: {0}")] - ReaderError(#[from] ReaderError), - - #[error("Storage error: {0}")] - Storage(#[from] dozer_storage::errors::StorageError), - #[error("Cache name is empty")] - EmptyName, - #[error("Schema is not found")] - SchemaNotFound, - #[error("Schema for {name} mismatch: given {given:?}, stored {stored:?}")] - SchemaMismatch { - name: String, - given: Box, - stored: Box, - }, - #[error("Connections for {} mismatch, give: {:?}, stored: {:?})", .0.name, .0.given, .0.stored)] - ConnectionsMismatch(Box), - #[error("Index definition {0} is not found")] - IndexDefinitionNotFound(String), - #[error("Index definition {name} mismatch: given {given:?}, stored {stored:?}")] - IndexDefinitionMismatch { - name: String, - given: IndexDefinition, - stored: IndexDefinition, - }, - #[error("Path not initialized for Cache Reader")] - PathNotInitialized, - #[error("Attempt to delete or update a cache with append-only schema")] - AppendOnlySchema, - #[error("Primary key is not found")] - PrimaryKeyNotFound, - #[error("Primary key {key:?} already exists: record id {}, version {}, insert operation id {insert_operation_id}", .meta.id, .meta.version)] - PrimaryKeyExists { - key: Vec<(String, Field)>, - meta: RecordMeta, - insert_operation_id: u64, - }, - #[error("Internal thread panic: {0}")] - InternalThreadPanic(#[source] tokio::task::JoinError), -} - -impl CacheError { - pub fn map_serialization_error(e: dozer_types::bincode::error::EncodeError) -> CacheError { - CacheError::Type(TypeError::SerializationError(SerializationError::Bincode( - e, - ))) - } - pub fn map_deserialization_error(e: dozer_types::bincode::error::DecodeError) -> CacheError { - CacheError::Type(TypeError::DeserializationError( - DeserializationError::Bincode(e), - )) - } - - pub fn is_map_full(&self) -> bool { - matches!( - self, - CacheError::Storage(StorageError::Lmdb(dozer_storage::lmdb::Error::MapFull)) - ) - } - - pub fn is_key_size(&self) -> bool { - matches!( - self, - CacheError::Storage(StorageError::Lmdb(dozer_storage::lmdb::Error::BadValSize)) - ) - } -} - -#[derive(Error, Debug)] -pub enum QueryError { - #[error("Failed to get a record by id - {0:?}")] - GetValue(#[source] dozer_storage::lmdb::Error), - #[error("Failed to get a schema by id - {0:?}")] - GetSchema(#[source] dozer_storage::lmdb::Error), - #[error("Failed to insert a record - {0:?}")] - InsertValue(#[source] dozer_storage::lmdb::Error), - #[error("Failed to delete a record - {0:?}")] - DeleteValue(#[source] dozer_storage::lmdb::Error), -} - -#[derive(Error, Debug)] -pub enum CompareError { - #[error("cannot read field length")] - CannotReadFieldLength, - #[error("cannot read field")] - CannotReadField, - #[error("invalid sort direction")] - InvalidSortDirection(u8), - #[error("deserialization error: {0:?}")] - DeserializationError(#[from] DeserializationError), -} - -#[derive(Error, Debug)] -pub enum IndexError { - #[error("field indexes dont match with index_scan")] - MismatchedIndexAndValues, - #[error("Expected strings for full text search")] - ExpectedStringFullText, - #[error("Field index out of range")] - FieldIndexOutOfRange, - #[error("Full text index generates one key for each field")] - IndexSingleField, - #[error("Field {0} cannot be indexed using full text")] - FieldNotCompatibleIndex(usize), - #[error("No secondary indexes defined")] - MissingSecondaryIndexes, - #[error("Unsupported Index: {0}")] - UnsupportedIndex(String), - #[error("range queries on multiple fields are not supported ")] - UnsupportedMultiRangeIndex, - #[error("Compound_index is required for fields: {0}")] - MissingCompoundIndex(String), -} - -#[derive(Error, Debug)] -pub enum PlanError { - #[error("Field {0:?} not found in query")] - FieldNotFound(String), - #[error("Type error: {0}")] - TypeError(#[from] TypeError), - #[error("Cannot sort full text filter")] - CannotSortFullTextFilter, - #[error("Conflicting sort options")] - ConflictingSortOptions, - #[error("Cannot have more than one range query")] - RangeQueryLimit, - #[error("Matching index not found. Try to add following secondary index configuration:\n{0}")] - MatchingIndexNotFound(String), -} diff --git a/dozer-cache/src/lib.rs b/dozer-cache/src/lib.rs deleted file mode 100644 index 1ca841385c..0000000000 --- a/dozer-cache/src/lib.rs +++ /dev/null @@ -1,6 +0,0 @@ -pub mod cache; -pub mod errors; -mod reader; - -pub use dozer_log; -pub use reader::{AccessFilter, CacheReader, Phase}; diff --git a/dozer-cache/src/main.rs b/dozer-cache/src/main.rs deleted file mode 100644 index 9309a60821..0000000000 --- a/dozer-cache/src/main.rs +++ /dev/null @@ -1,110 +0,0 @@ -use std::pin::pin; - -use clap::{Parser, Subcommand}; -use dozer_cache::cache::{ - begin_dump_txn, dump, expression::QueryExpression, CacheManagerOptions, LmdbRoCacheManager, - LmdbRwCacheManager, RoCache, -}; -use dozer_storage::generator::{Generator, IntoGenerator}; -use dozer_tracing::Labels; -use tokio::io::AsyncWriteExt; - -#[derive(Debug, Parser)] -struct Cli { - /// Dozer cache directory. - cache_dir: String, - /// The endpoint name. - endpoint: String, - /// The build name. - build: String, - #[clap(subcommand)] - command: CacheCommand, -} - -#[derive(Debug, Subcommand)] -enum CacheCommand { - /// Counts the number of records in the cache. - Count, - /// Dumps the cache to a file. - Dump { - /// The path to the file to dump to. - path: String, - }, - /// Restores the cache from a file. - Restore { - /// The path to the file to restore from. - path: String, - }, -} - -#[tokio::main] -async fn main() { - env_logger::init(); - - let cli = Cli::parse(); - let labels = labels(cli.endpoint.clone(), cli.build.clone()); - - match cli.command { - CacheCommand::Count => { - let cache_manager = LmdbRoCacheManager::new(CacheManagerOptions { - path: Some(cli.cache_dir.into()), - ..Default::default() - }) - .unwrap(); - let cache = cache_manager - .open_lmdb_cache(labels.to_non_empty_string().into_owned(), labels) - .unwrap() - .unwrap(); - let count = cache.count(&QueryExpression::with_no_limit()).unwrap(); - println!("Count: {}", count); - } - CacheCommand::Dump { path } => { - let cache_manager = LmdbRoCacheManager::new(CacheManagerOptions { - path: Some(cli.cache_dir.into()), - ..Default::default() - }) - .unwrap(); - let cache = &cache_manager - .open_lmdb_cache(labels.to_non_empty_string().into_owned(), labels) - .unwrap() - .unwrap(); - let file = tokio::fs::File::create(path).await.unwrap(); - let mut writer = tokio::io::BufWriter::new(file); - - let txn = &begin_dump_txn(cache).unwrap(); - let generator = |context| async move { dump(cache, txn, &context).await.unwrap() }; - let generator = generator.into_generator(); - for item in pin!(generator).into_iter() { - let item = item.unwrap(); - writer.write_all(&item).await.unwrap(); - } - - writer.flush().await.unwrap(); - } - CacheCommand::Restore { path } => { - let cache_manager = LmdbRwCacheManager::new(CacheManagerOptions { - path: Some(cli.cache_dir.into()), - ..Default::default() - }) - .unwrap(); - let file = tokio::fs::File::open(path).await.unwrap(); - let mut reader = tokio::io::BufReader::new(file); - cache_manager - .restore_cache( - labels.to_non_empty_string().into_owned(), - labels, - Default::default(), - &mut reader, - ) - .await - .unwrap(); - } - } -} - -fn labels(endpoint: String, build: String) -> Labels { - let mut labels = Labels::default(); - labels.push("endpoint", endpoint); - labels.push("build", build); - labels -} diff --git a/dozer-cache/src/reader.rs b/dozer-cache/src/reader.rs deleted file mode 100644 index d1e7c645f1..0000000000 --- a/dozer-cache/src/reader.rs +++ /dev/null @@ -1,105 +0,0 @@ -use crate::cache::{expression::QueryExpression, CacheRecord, CommitState, RoCache}; - -use super::cache::expression::FilterExpression; -use crate::errors::CacheError; -use dozer_types::{ - serde, - types::{Record, SchemaWithIndex}, -}; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] -#[serde(crate = "self::serde")] - -/// This filter gets dynamically added to the query. -pub struct AccessFilter { - /// FilterExpression to evaluate access - pub filter: Option, - - /// Fields to be restricted - #[serde(default)] - pub fields: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[serde(crate = "dozer_types::serde")] -pub enum Phase { - Snapshotting, - Streaming, -} - -#[derive(Debug)] -/// CacheReader dynamically attaches permissions on top of queries -pub struct CacheReader { - cache: Box, -} - -impl CacheReader { - pub fn new(cache: Box) -> Self { - Self { cache } - } - - // TODO: Implement check_access - fn check_access(&self, _rec: &Record, _access_filter: &AccessFilter) -> Result<(), CacheError> { - Ok(()) - } - - pub fn cache_name(&self) -> &str { - self.cache.name() - } - - pub fn get_commit_state(&self) -> Result, CacheError> { - self.cache.get_commit_state() - } - - pub fn get_schema(&self) -> &SchemaWithIndex { - self.cache.get_schema() - } - - pub fn get(&self, key: &[u8], access_filter: &AccessFilter) -> Result { - let record = self.cache.get(key)?; - match self.check_access(&record.record, access_filter) { - Ok(_) => Ok(record), - Err(e) => Err(e), - } - } - - pub fn query( - &self, - query: &mut QueryExpression, - access_filter: AccessFilter, - ) -> Result, CacheError> { - self.apply_access_filter(query, access_filter); - self.cache.query(query) - } - - pub fn count( - &self, - query: &mut QueryExpression, - access_filter: AccessFilter, - ) -> Result { - self.apply_access_filter(query, access_filter); - self.cache.count(query) - } - - pub fn get_phase(&self) -> Result { - if self.cache.is_snapshotting_done()? { - Ok(Phase::Streaming) - } else { - Ok(Phase::Snapshotting) - } - } - - // Apply filter if specified in access - fn apply_access_filter(&self, query: &mut QueryExpression, access_filter: AccessFilter) { - // TODO: Use `fields` in `access_filter`. - if let Some(access_filter) = access_filter.filter { - let filter = match query.filter.take() { - Some(query_filter) => FilterExpression::And(vec![access_filter, query_filter]), - None => access_filter, - }; - - query.filter = Some(filter); - } - } -}