diff --git a/Cargo.lock b/Cargo.lock index 758356029b890..89ebbc1ca7312 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3552,12 +3552,10 @@ dependencies = [ "databend-common-storage", "databend-common-tracing", "log", - "pretty_assertions", "serde", "serde_ignored", "serde_with", "serfig", - "tempfile", "toml 0.8.22", ] @@ -3615,7 +3613,6 @@ dependencies = [ "arrow-data 56.2.0", "arrow-flight", "arrow-ipc 56.2.0", - "arrow-ord 56.2.0", "arrow-schema 56.2.0", "arrow-select 56.2.0", "async-backtrace", @@ -3727,7 +3724,6 @@ dependencies = [ "bstr", "bumpalo", "comfy-table", - "crc32fast", "ctor", "databend-common-ast", "databend-common-base", @@ -3747,11 +3743,9 @@ dependencies = [ "databend-functions-scalar-numeric-basic-arithmetic", "divan", "geo", - "geohash", "geozero", "glob", "goldenfile", - "h3o", "hex", "itertools 0.13.0", "jaq-core", @@ -3760,15 +3754,12 @@ dependencies = [ "jaq-std", "jiff", "jsonb", - "lexical-core", "libm", "log", "match-template", "md-5", "naive-cityhash", "num-traits", - "once_cell", - "proj4rs", "proptest", "rand 0.8.5", "regex", @@ -3779,7 +3770,6 @@ dependencies = [ "simdutf8", "simple_hll", "siphasher 0.3.11", - "strength_reduce", "stringslice", "twox-hash 1.6.3", "unicase", @@ -3989,7 +3979,6 @@ dependencies = [ name = "databend-common-meta-app-types" version = "0.1.0" dependencies = [ - "anyhow", "num-derive", "prost", "serde", @@ -3999,14 +3988,12 @@ dependencies = [ name = "databend-common-meta-cache" version = "0.1.0" dependencies = [ - "anyhow", "async-trait", "databend-common-base", "databend-common-meta-client", "databend-common-meta-types", "futures", "log", - "pretty_assertions", "sub-cache", "tonic 0.13.1", ] @@ -4021,7 +4008,6 @@ dependencies = [ "async-backtrace", "chrono", "databend-common-base", - "databend-common-exception", "databend-common-grpc", "databend-common-meta-api", "databend-common-meta-kvapi", @@ -4038,7 +4024,6 @@ dependencies = [ "logcall", "once_cell", "parking_lot 0.12.3", - "pretty_assertions", "prost", "rand 0.8.5", "semver", @@ -4496,7 +4481,6 @@ dependencies = [ "num-traits", "opendal", "parking_lot 0.12.3", - "pretty_assertions", "prqlc", "recursive", "regex", @@ -4693,7 +4677,6 @@ dependencies = [ "tantivy-common", "tantivy-fst", "tantivy-jieba", - "tempfile", "thrift", "typetag", "uuid", @@ -4975,7 +4958,6 @@ name = "databend-common-timezone" version = "0.1.0" dependencies = [ "jiff", - "rand 0.8.5", ] [[package]] @@ -5137,7 +5119,6 @@ dependencies = [ "databend-common-config", "databend-common-exception", "databend-common-expression", - "databend-common-functions", "databend-common-io", "databend-common-license", "databend-common-management", @@ -5163,6 +5144,7 @@ dependencies = [ "databend-enterprise-row-access-policy-feature", "databend-enterprise-storage-encryption", "databend-enterprise-stream-handler", + "databend-enterprise-table-ref-handler", "databend-enterprise-vacuum-handler", "databend-enterprise-virtual-column", "databend-query", @@ -5176,10 +5158,8 @@ dependencies = [ "log", "opendal", "serde", - "tantivy", "tempfile", "typetag", - "uuid", "walkdir", ] @@ -5229,6 +5209,18 @@ dependencies = [ "databend-common-sql", ] +[[package]] +name = "databend-enterprise-table-ref-handler" +version = "0.1.0" +dependencies = [ + "async-backtrace", + "async-trait", + "databend-common-base", + "databend-common-catalog", + "databend-common-exception", + "databend-common-sql", +] + [[package]] name = "databend-enterprise-vacuum-handler" version = "0.1.0" @@ -5483,7 +5475,6 @@ dependencies = [ "dashmap 6.1.0", "databend-common-ast", "databend-common-base", - "databend-common-building", "databend-common-cache", "databend-common-catalog", "databend-common-cloud-control", @@ -5537,6 +5528,7 @@ dependencies = [ "databend-enterprise-resources-management", "databend-enterprise-row-access-policy-feature", "databend-enterprise-stream-handler", + "databend-enterprise-table-ref-handler", "databend-enterprise-vacuum-handler", "databend-enterprise-virtual-column", "databend-storages-common-blocks", diff --git a/Cargo.toml b/Cargo.toml index 8143f848e0d7e..d54a9501f6ea8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -191,6 +191,7 @@ databend-enterprise-resources-management = { path = "src/query/ee_features/resou databend-enterprise-row-access-policy-feature = { path = "src/query/ee_features/row_access_policy" } databend-enterprise-storage-encryption = { path = "src/query/ee_features/storage_encryption" } databend-enterprise-stream-handler = { path = "src/query/ee_features/stream_handler" } +databend-enterprise-table-ref-handler = { path = "src/query/ee_features/table_ref_handler" } databend-enterprise-vacuum-handler = { path = "src/query/ee_features/vacuum_handler" } databend-enterprise-virtual-column = { path = "src/query/ee_features/virtual_column" } databend-functions-scalar-arithmetic = { path = "src/query/functions/src/scalars/arithmetic" } diff --git a/src/common/cloud_control/Cargo.toml b/src/common/cloud_control/Cargo.toml index 76db8de4f8ef3..a54867e1e981d 100644 --- a/src/common/cloud_control/Cargo.toml +++ b/src/common/cloud_control/Cargo.toml @@ -16,7 +16,6 @@ hyper-util = { workspace = true } prost = { workspace = true } serde = { workspace = true } tonic = { workspace = true } -#tonic-prost = { workspace = true } [build-dependencies] lenient_semver = { workspace = true } diff --git a/src/common/exception/src/exception_code.rs b/src/common/exception/src/exception_code.rs index 1470be0d0597f..7ecccbe15a6a3 100644 --- a/src/common/exception/src/exception_code.rs +++ b/src/common/exception/src/exception_code.rs @@ -333,6 +333,20 @@ build_exceptions! { UnsupportedEngineParams(2703), } +// Table reference Errors [2745-2749] +build_exceptions! { + /// Unknown reference + UnknownReference(2745), + /// Reference already exists + ReferenceAlreadyExists(2746), + /// Illegal reference + IllegalReference(2747), + /// Mismatched reference type + MismatchedReferenceType(2748), + /// Reference expired + ReferenceExpired(2749), +} + // License Errors [1401-1404] build_exceptions! { /// License key parse error diff --git a/src/common/license/src/license.rs b/src/common/license/src/license.rs index f5ca9d1580ad2..58dab2edc9b23 100644 --- a/src/common/license/src/license.rs +++ b/src/common/license/src/license.rs @@ -39,6 +39,8 @@ pub enum Feature { StorageEncryption, #[serde(alias = "stream", alias = "STREAM")] Stream, + #[serde(alias = "table_ref", alias = "TABLE_REF")] + TableRef, #[serde(alias = "attach_table", alias = "ATTACH_TABLE")] AttacheTable, #[serde(alias = "amend_table", alias = "AMEND_TABLE")] @@ -80,6 +82,7 @@ impl fmt::Display for Feature { Feature::ComputedColumn => write!(f, "computed_column"), Feature::StorageEncryption => write!(f, "storage_encryption"), Feature::Stream => write!(f, "stream"), + Feature::TableRef => write!(f, "table_ref"), Feature::AttacheTable => write!(f, "attach_table"), Feature::AmendTable => write!(f, "amend_table"), Feature::SystemManagement => write!(f, "system_management"), @@ -118,6 +121,7 @@ impl Feature { | (Feature::Vacuum, Feature::Vacuum) | (Feature::LicenseInfo, Feature::LicenseInfo) | (Feature::Stream, Feature::Stream) + | (Feature::TableRef, Feature::TableRef) | (Feature::DataMask, Feature::DataMask) | (Feature::RowAccessPolicy, Feature::RowAccessPolicy) | (Feature::VirtualColumn, Feature::VirtualColumn) @@ -226,6 +230,10 @@ mod tests { Feature::Stream, serde_json::from_str::("\"Stream\"").unwrap() ); + assert_eq!( + Feature::TableRef, + serde_json::from_str::("\"TableRef\"").unwrap() + ); assert_eq!( Feature::AttacheTable, serde_json::from_str::("\"ATTACH_TABLE\"").unwrap() @@ -287,6 +295,7 @@ mod tests { Feature::ComputedColumn, Feature::StorageEncryption, Feature::Stream, + Feature::TableRef, Feature::AttacheTable, Feature::AmendTable, Feature::HilbertClustering, @@ -298,7 +307,7 @@ mod tests { }; assert_eq!( - "LicenseInfo{ type: enterprise, org: databend, tenants: [databend_tenant,foo], features: [amend_table,attach_table,computed_column,data_mask,hilbert_clustering,license_info,private_task,row_access_policy,storage_encryption,stream,system_history,vacuum,virtual_column,workload_group] }", + "LicenseInfo{ type: enterprise, org: databend, tenants: [databend_tenant,foo], features: [amend_table,attach_table,computed_column,data_mask,hilbert_clustering,license_info,private_task,row_access_policy,storage_encryption,stream,system_history,table_ref,vacuum,virtual_column,workload_group] }", license_info.to_string() ); } diff --git a/src/common/timezone/Cargo.toml b/src/common/timezone/Cargo.toml index eb71aec84d983..6d8dc7029fadb 100644 --- a/src/common/timezone/Cargo.toml +++ b/src/common/timezone/Cargo.toml @@ -10,7 +10,6 @@ edition = { workspace = true } jiff = { workspace = true } [dev-dependencies] -rand = { workspace = true } [lints] workspace = true diff --git a/src/meta/app-types/Cargo.toml b/src/meta/app-types/Cargo.toml index fedc6d1dfa00d..94b0dee5ad3d4 100644 --- a/src/meta/app-types/Cargo.toml +++ b/src/meta/app-types/Cargo.toml @@ -12,7 +12,6 @@ prost = { workspace = true } serde = { workspace = true } [dev-dependencies] -anyhow = { workspace = true } [package.metadata.cargo-machete] ignored = ["num-derive", "prost"] diff --git a/src/meta/app/src/schema/mod.rs b/src/meta/app/src/schema/mod.rs index 12597512975ef..a76d59e4e48e2 100644 --- a/src/meta/app/src/schema/mod.rs +++ b/src/meta/app/src/schema/mod.rs @@ -125,6 +125,8 @@ pub use table::SetTableColumnMaskPolicyReply; pub use table::SetTableColumnMaskPolicyReq; pub use table::SetTableRowAccessPolicyReply; pub use table::SetTableRowAccessPolicyReq; +pub use table::SnapshotRef; +pub use table::SnapshotRefType; pub use table::SwapTableReply; pub use table::SwapTableReq; pub use table::TableCopiedFileInfo; diff --git a/src/meta/app/src/schema/table/mod.rs b/src/meta/app/src/schema/table/mod.rs index 4b74c18e10642..f8b47260501f2 100644 --- a/src/meta/app/src/schema/table/mod.rs +++ b/src/meta/app/src/schema/table/mod.rs @@ -26,6 +26,7 @@ use std::time::Duration; use anyerror::func_name; use chrono::DateTime; use chrono::Utc; +use databend_common_ast::ast::SnapshotRefType as AstSnapshotRefType; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ColumnId; @@ -184,6 +185,54 @@ pub struct TableMeta { pub row_access_policy_columns_ids: Option, pub indexes: BTreeMap, pub constraints: BTreeMap, + + pub refs: BTreeMap, +} + +// Inspired by iceberg(https://github.com/apache/iceberg-rust/blob/main/crates/iceberg/src/spec/snapshot.rs#L443-L449) +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq)] +pub struct SnapshotRef { + /// The unique id of the reference. + pub id: u64, + /// After this timestamp, the reference becomes inactive. + pub expire_at: Option>, + /// The type of the reference. + pub typ: SnapshotRefType, + /// The location of the snapshot that this reference points to. + pub loc: String, +} + +#[derive( + serde::Serialize, + serde::Deserialize, + Clone, + Debug, + Eq, + PartialEq, + num_derive::FromPrimitive, + Hash, +)] +pub enum SnapshotRefType { + Branch = 0, + Tag = 1, +} + +impl From<&AstSnapshotRefType> for SnapshotRefType { + fn from(v: &AstSnapshotRefType) -> Self { + match v { + AstSnapshotRefType::Branch => SnapshotRefType::Branch, + AstSnapshotRefType::Tag => SnapshotRefType::Tag, + } + } +} + +impl Display for SnapshotRefType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + SnapshotRefType::Branch => write!(f, "BRANCH"), + SnapshotRefType::Tag => write!(f, "TAG"), + } + } } #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq)] @@ -316,6 +365,31 @@ impl TableInfo { .clone() .map(|k| (self.meta.cluster_key_seq, k)) } + + pub fn get_table_ref(&self, typ: Option<&SnapshotRefType>, name: &str) -> Result<&SnapshotRef> { + let Some(table_ref) = self.meta.refs.get(name) else { + return Err(ErrorCode::UnknownReference(format!( + "Unknown reference '{}' in table {}", + name, self.desc + ))); + }; + let ref_type = &table_ref.typ; + if let Some(typ) = typ { + if ref_type != typ { + return Err(ErrorCode::MismatchedReferenceType(format!( + "'{}' is a {} reference, please use 'AT({} => {})' instead.", + name, ref_type, ref_type, name, + ))); + } + } + if table_ref.expire_at.is_some_and(|v| v < Utc::now()) { + return Err(ErrorCode::ReferenceExpired(format!( + "{} '{}' in table {} is expired", + ref_type, name, self.desc, + ))); + } + Ok(table_ref) + } } impl Default for TablePartition { @@ -359,6 +433,7 @@ impl Default for TableMeta { row_access_policy_columns_ids: None, indexes: BTreeMap::new(), constraints: BTreeMap::new(), + refs: BTreeMap::new(), } } } diff --git a/src/meta/cache/Cargo.toml b/src/meta/cache/Cargo.toml index 53e4617bc1dac..6f7e46f59c60f 100644 --- a/src/meta/cache/Cargo.toml +++ b/src/meta/cache/Cargo.toml @@ -31,8 +31,6 @@ sub-cache = { workspace = true } tonic = { workspace = true } [dev-dependencies] -anyhow = { workspace = true } -pretty_assertions = { workspace = true } [lints] workspace = true diff --git a/src/meta/client/Cargo.toml b/src/meta/client/Cargo.toml index 30ba89b711c99..20bd4567f92c2 100644 --- a/src/meta/client/Cargo.toml +++ b/src/meta/client/Cargo.toml @@ -39,9 +39,7 @@ tonic = { workspace = true } [dev-dependencies] anyhow = { workspace = true } -databend-common-exception = { workspace = true } databend-common-version = { workspace = true } -pretty_assertions = { workspace = true } rand = { workspace = true } [lints] diff --git a/src/meta/proto-conv/src/table_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/table_from_to_protobuf_impl.rs index 0ff39f0904e7a..b5029e6f930e9 100644 --- a/src/meta/proto-conv/src/table_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/table_from_to_protobuf_impl.rs @@ -210,6 +210,10 @@ impl FromToProto for mt::TableMeta { for (constraint_name, constraint) in p.constraints { constraints.insert(constraint_name, mt::Constraint::from_pb(constraint)?); } + let mut refs = BTreeMap::new(); + for (ref_name, snapshot_ref) in p.refs { + refs.insert(ref_name, mt::SnapshotRef::from_pb(snapshot_ref)?); + } let v = Self { schema: Arc::new(ex::TableSchema::from_pb(schema)?), engine: p.engine, @@ -254,6 +258,7 @@ impl FromToProto for mt::TableMeta { indexes, virtual_schema, constraints, + refs, }; Ok(v) } @@ -267,6 +272,10 @@ impl FromToProto for mt::TableMeta { for (constraint_name, constraint) in &self.constraints { constraints.insert(constraint_name.clone(), constraint.to_pb()?); } + let mut refs = BTreeMap::new(); + for (ref_name, snapshot_ref) in &self.refs { + refs.insert(ref_name.clone(), snapshot_ref.to_pb()?); + } let p = pb::TableMeta { ver: VER, min_reader_ver: MIN_READER_VER, @@ -315,6 +324,7 @@ impl FromToProto for mt::TableMeta { .map(VirtualDataSchema::to_pb) .transpose()?, constraints, + refs, }; Ok(p) } @@ -481,3 +491,33 @@ impl FromToProto for mt::TableIndex { Ok(p) } } + +impl FromToProto for mt::SnapshotRef { + type PB = pb::SnapshotRef; + fn get_pb_ver(p: &Self::PB) -> u64 { + p.ver + } + fn from_pb(p: pb::SnapshotRef) -> Result { + reader_check_msg(p.ver, p.min_reader_ver)?; + let v = Self { + id: p.id, + expire_at: p.expire_at.map(DateTime::::from_pb).transpose()?, + typ: FromPrimitive::from_i32(p.typ) + .ok_or_else(|| Incompatible::new(format!("invalid RefType: {}", p.typ)))?, + loc: p.loc, + }; + Ok(v) + } + + fn to_pb(&self) -> Result { + let p = pb::SnapshotRef { + ver: VER, + min_reader_ver: MIN_READER_VER, + id: self.id, + expire_at: self.expire_at.map(|x| x.to_pb()).transpose()?, + typ: self.typ.clone() as i32, + loc: self.loc.clone(), + }; + Ok(p) + } +} diff --git a/src/meta/proto-conv/src/util.rs b/src/meta/proto-conv/src/util.rs index 3deca046806b6..fdf91a1f23389 100644 --- a/src/meta/proto-conv/src/util.rs +++ b/src/meta/proto-conv/src/util.rs @@ -191,6 +191,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[ (159, "2025-11-18: Add: Grant/OwnershipMaskingPolicyObject and masking policy privileges"), (160, "2025-11-27: Add: udf.proto/UserDefinedFunction add update_on field"), (161, "2025-12-04: Add: Grant/OwnershipRowAccessPolicyObject and row access policy privileges"), + (162, "2025-12-09: Add: SnapshotRef"), // Dear developer: // If you're gonna add a new metadata version, you'll have to add a test for it. // You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`) diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index 9f73f286ad259..9e322fb02616a 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -153,3 +153,4 @@ mod v158_udtf_server; mod v159_grant_object_masking_policy; mod v160_udf_update_on; mod v161_grant_object_row_access_policy; +mod v162_snapshot_ref; diff --git a/src/meta/proto-conv/tests/it/proto_conv.rs b/src/meta/proto-conv/tests/it/proto_conv.rs index 4a56759c2c70c..91db82d4e9612 100644 --- a/src/meta/proto-conv/tests/it/proto_conv.rs +++ b/src/meta/proto-conv/tests/it/proto_conv.rs @@ -173,6 +173,7 @@ fn new_table_meta() -> mt::TableMeta { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, } } diff --git a/src/meta/proto-conv/tests/it/v002_table_meta.rs b/src/meta/proto-conv/tests/it/v002_table_meta.rs index de78b0937c73a..9bdb44dafb012 100644 --- a/src/meta/proto-conv/tests/it/v002_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v002_table_meta.rs @@ -148,6 +148,7 @@ fn test_decode_v2_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v010_table_meta.rs b/src/meta/proto-conv/tests/it/v010_table_meta.rs index d1dac823d276d..f81ec44901ea0 100644 --- a/src/meta/proto-conv/tests/it/v010_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v010_table_meta.rs @@ -150,6 +150,7 @@ fn test_decode_v10_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v012_table_meta.rs b/src/meta/proto-conv/tests/it/v012_table_meta.rs index 30e6d3dd2f06f..46b411e141bb0 100644 --- a/src/meta/proto-conv/tests/it/v012_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v012_table_meta.rs @@ -152,6 +152,7 @@ fn test_decode_v12_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v023_table_meta.rs b/src/meta/proto-conv/tests/it/v023_table_meta.rs index 9a7c1737bc668..9cacb1cc774da 100644 --- a/src/meta/proto-conv/tests/it/v023_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v023_table_meta.rs @@ -152,6 +152,7 @@ fn test_decode_v23_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v024_table_meta.rs b/src/meta/proto-conv/tests/it/v024_table_meta.rs index 4eb06f4cd066b..40fdf5ca79261 100644 --- a/src/meta/proto-conv/tests/it/v024_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v024_table_meta.rs @@ -152,6 +152,7 @@ fn test_decode_v24_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v033_table_meta.rs b/src/meta/proto-conv/tests/it/v033_table_meta.rs index 1c03c31ee3bdc..4540627f8f9db 100644 --- a/src/meta/proto-conv/tests/it/v033_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v033_table_meta.rs @@ -154,6 +154,7 @@ fn test_decode_v33_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v040_table_meta.rs b/src/meta/proto-conv/tests/it/v040_table_meta.rs index d1a5514971dc1..d63ecf9bceb3a 100644 --- a/src/meta/proto-conv/tests/it/v040_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v040_table_meta.rs @@ -154,6 +154,7 @@ fn test_decode_v40_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v044_table_meta.rs b/src/meta/proto-conv/tests/it/v044_table_meta.rs index 13e37821b6f24..43b682eeafe26 100644 --- a/src/meta/proto-conv/tests/it/v044_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v044_table_meta.rs @@ -105,6 +105,7 @@ fn test_decode_v44_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_load_old(func_name!(), bytes.as_slice(), 44, want())?; diff --git a/src/meta/proto-conv/tests/it/v055_table_meta.rs b/src/meta/proto-conv/tests/it/v055_table_meta.rs index e85d29b373c51..34cd26e0819f5 100644 --- a/src/meta/proto-conv/tests/it/v055_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v055_table_meta.rs @@ -96,6 +96,7 @@ fn test_decode_v55_table_meta() -> anyhow::Result<()> { indexes: btreemap! {}, row_access_policy_columns_ids: None, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), bytes.as_slice(), 55, want())?; diff --git a/src/meta/proto-conv/tests/it/v074_table_db_meta.rs b/src/meta/proto-conv/tests/it/v074_table_db_meta.rs index 7f421ffd91d1c..68e55072cc58f 100644 --- a/src/meta/proto-conv/tests/it/v074_table_db_meta.rs +++ b/src/meta/proto-conv/tests/it/v074_table_db_meta.rs @@ -95,6 +95,7 @@ fn test_decode_v74_table_meta() -> anyhow::Result<()> { indexes: btreemap! {}, row_access_policy_columns_ids: None, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v74.as_slice(), 74, want())?; diff --git a/src/meta/proto-conv/tests/it/v080_geometry_datatype.rs b/src/meta/proto-conv/tests/it/v080_geometry_datatype.rs index 41b0e1a0946c7..b52d03839b26e 100644 --- a/src/meta/proto-conv/tests/it/v080_geometry_datatype.rs +++ b/src/meta/proto-conv/tests/it/v080_geometry_datatype.rs @@ -220,6 +220,7 @@ fn test_decode_v80_table_meta() -> anyhow::Result<()> { indexes: btreemap! {}, row_access_policy_columns_ids: None, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v80.as_slice(), 80, want())?; diff --git a/src/meta/proto-conv/tests/it/v082_table_index.rs b/src/meta/proto-conv/tests/it/v082_table_index.rs index 83736d464f7e1..43c87728b9df2 100644 --- a/src/meta/proto-conv/tests/it/v082_table_index.rs +++ b/src/meta/proto-conv/tests/it/v082_table_index.rs @@ -104,6 +104,7 @@ fn test_decode_v82_table_meta() -> anyhow::Result<()> { }}, row_access_policy_columns_ids: None, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v82.as_slice(), 82, want())?; diff --git a/src/meta/proto-conv/tests/it/v085_table_index.rs b/src/meta/proto-conv/tests/it/v085_table_index.rs index f5b73f21ec99d..b905215547db8 100644 --- a/src/meta/proto-conv/tests/it/v085_table_index.rs +++ b/src/meta/proto-conv/tests/it/v085_table_index.rs @@ -104,6 +104,7 @@ fn test_decode_v85_table_meta() -> anyhow::Result<()> { }}, row_access_policy_columns_ids: None, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v85.as_slice(), 85, want())?; diff --git a/src/meta/proto-conv/tests/it/v086_table_index.rs b/src/meta/proto-conv/tests/it/v086_table_index.rs index 819703a23668a..2cb1b24993b68 100644 --- a/src/meta/proto-conv/tests/it/v086_table_index.rs +++ b/src/meta/proto-conv/tests/it/v086_table_index.rs @@ -107,6 +107,7 @@ fn test_decode_v86_table_meta() -> anyhow::Result<()> { }}, row_access_policy_columns_ids: None, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v86.as_slice(), 86, want())?; diff --git a/src/meta/proto-conv/tests/it/v094_table_meta.rs b/src/meta/proto-conv/tests/it/v094_table_meta.rs index 8b1c44ac82a4b..1ba73c1c52e60 100644 --- a/src/meta/proto-conv/tests/it/v094_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v094_table_meta.rs @@ -94,6 +94,7 @@ fn test_decode_v94_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), bytes.as_slice(), 94, want())?; diff --git a/src/meta/proto-conv/tests/it/v107_geography_datatype.rs b/src/meta/proto-conv/tests/it/v107_geography_datatype.rs index 43ddd7010f0a4..3577c43b5ed10 100644 --- a/src/meta/proto-conv/tests/it/v107_geography_datatype.rs +++ b/src/meta/proto-conv/tests/it/v107_geography_datatype.rs @@ -225,6 +225,7 @@ fn test_decode_v107_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v107.as_slice(), 107, want())?; diff --git a/src/meta/proto-conv/tests/it/v114_interval_datatype.rs b/src/meta/proto-conv/tests/it/v114_interval_datatype.rs index 029df20198205..d064de51916ab 100644 --- a/src/meta/proto-conv/tests/it/v114_interval_datatype.rs +++ b/src/meta/proto-conv/tests/it/v114_interval_datatype.rs @@ -216,6 +216,7 @@ fn test_decode_v114_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v114.as_slice(), 114, want())?; diff --git a/src/meta/proto-conv/tests/it/v122_virtual_schema.rs b/src/meta/proto-conv/tests/it/v122_virtual_schema.rs index 30f93fc677df3..e7e420b1f2d9b 100644 --- a/src/meta/proto-conv/tests/it/v122_virtual_schema.rs +++ b/src/meta/proto-conv/tests/it/v122_virtual_schema.rs @@ -175,6 +175,7 @@ fn test_decode_v122_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_load_old(func_name!(), table_meta_v122.as_slice(), 122, want())?; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v129_vector_datatype.rs b/src/meta/proto-conv/tests/it/v129_vector_datatype.rs index b6fec43c8b2a9..c0d3b8471f3b0 100644 --- a/src/meta/proto-conv/tests/it/v129_vector_datatype.rs +++ b/src/meta/proto-conv/tests/it/v129_vector_datatype.rs @@ -266,6 +266,7 @@ fn test_decode_v129_table_meta() -> anyhow::Result<()> { options: btreemap! {s("type") => s("hnsw")}, }}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v129.as_slice(), 129, want())?; diff --git a/src/meta/proto-conv/tests/it/v142_table_row_access_policy.rs b/src/meta/proto-conv/tests/it/v142_table_row_access_policy.rs index 542d41d15efd2..3f0c097e96b5c 100644 --- a/src/meta/proto-conv/tests/it/v142_table_row_access_policy.rs +++ b/src/meta/proto-conv/tests/it/v142_table_row_access_policy.rs @@ -56,6 +56,7 @@ fn test_decode_v142_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: BTreeMap::default(), constraints: BTreeMap::default(), + refs: BTreeMap::default(), }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v142.as_slice(), 142, want())?; diff --git a/src/meta/proto-conv/tests/it/v146_constraint.rs b/src/meta/proto-conv/tests/it/v146_constraint.rs index d6a829c0ad80e..d2afb8d9e390c 100644 --- a/src/meta/proto-conv/tests/it/v146_constraint.rs +++ b/src/meta/proto-conv/tests/it/v146_constraint.rs @@ -74,6 +74,7 @@ fn test_decode_v146_constraint() -> anyhow::Result<()> { "constraint_1".to_string() => Constraint::Check("c1 > 10".to_string()), "constraint_2".to_string() => Constraint::Check("c1 != 0".to_string()), }, + refs: btreemap! {}, }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v142.as_slice(), 146, want())?; diff --git a/src/meta/proto-conv/tests/it/v148_virtual_schema.rs b/src/meta/proto-conv/tests/it/v148_virtual_schema.rs index 5a0dd1b6414f9..7d9294fdfcea7 100644 --- a/src/meta/proto-conv/tests/it/v148_virtual_schema.rs +++ b/src/meta/proto-conv/tests/it/v148_virtual_schema.rs @@ -188,6 +188,7 @@ fn test_decode_v148_table_meta() -> anyhow::Result<()> { row_access_policy_columns_ids: None, indexes: btreemap! {}, constraints: btreemap! {}, + refs: btreemap! {}, }; common::test_load_old(func_name!(), table_meta_v148.as_slice(), 148, want())?; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v151_row_access_column_map.rs b/src/meta/proto-conv/tests/it/v151_row_access_column_map.rs index 890a30ea4d001..593ab56c5439b 100644 --- a/src/meta/proto-conv/tests/it/v151_row_access_column_map.rs +++ b/src/meta/proto-conv/tests/it/v151_row_access_column_map.rs @@ -60,6 +60,7 @@ fn test_decode_v151_table_meta() -> anyhow::Result<()> { }), indexes: BTreeMap::default(), constraints: BTreeMap::default(), + refs: BTreeMap::default(), }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v151.as_slice(), 151, want())?; diff --git a/src/meta/proto-conv/tests/it/v153_security_column_map.rs b/src/meta/proto-conv/tests/it/v153_security_column_map.rs index fbd6eba20f2bb..5b24e47840c7e 100644 --- a/src/meta/proto-conv/tests/it/v153_security_column_map.rs +++ b/src/meta/proto-conv/tests/it/v153_security_column_map.rs @@ -68,6 +68,7 @@ fn test_decode_v152_table_meta() -> anyhow::Result<()> { }), indexes: BTreeMap::default(), constraints: BTreeMap::default(), + refs: BTreeMap::default(), }; common::test_pb_from_to(func_name!(), want())?; common::test_load_old(func_name!(), table_meta_v153.as_slice(), 153, want())?; diff --git a/src/meta/proto-conv/tests/it/v162_snapshot_ref.rs b/src/meta/proto-conv/tests/it/v162_snapshot_ref.rs new file mode 100644 index 0000000000000..f6b26961035ba --- /dev/null +++ b/src/meta/proto-conv/tests/it/v162_snapshot_ref.rs @@ -0,0 +1,98 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use chrono::TimeZone; +use chrono::Utc; +use databend_common_expression as ce; +use databend_common_meta_app::schema as mt; +use databend_common_meta_app::schema::SnapshotRef; +use databend_common_meta_app::schema::SnapshotRefType; +use fastrace::func_name; +use maplit::btreemap; +use maplit::btreeset; + +use crate::common; + +// These bytes are built when a new version in introduced, +// and are kept for backward compatibility test. +// +// ************************************************************* +// * These messages should never be updated, * +// * only be added when a new version is added, * +// * or be removed when an old version is no longer supported. * +// ************************************************************* +// +// The message bytes are built from the output of `test_pb_from_to()` +#[test] +fn test_decode_v162_snapshot_ref() -> anyhow::Result<()> { + let table_meta_v162 = vec![ + 10, 7, 160, 6, 162, 1, 168, 6, 24, 64, 0, 162, 1, 23, 50, 48, 49, 52, 45, 49, 49, 45, 50, + 56, 32, 49, 50, 58, 48, 48, 58, 48, 57, 32, 85, 84, 67, 170, 1, 23, 50, 48, 49, 52, 45, 49, + 49, 45, 50, 57, 32, 49, 50, 58, 48, 48, 58, 49, 48, 32, 85, 84, 67, 186, 1, 7, 160, 6, 162, + 1, 168, 6, 24, 226, 1, 1, 1, 170, 2, 49, 10, 8, 98, 114, 97, 110, 99, 104, 95, 49, 18, 37, + 8, 1, 18, 23, 50, 48, 49, 52, 45, 49, 49, 45, 50, 56, 32, 49, 50, 58, 48, 48, 58, 48, 57, + 32, 85, 84, 67, 34, 1, 97, 160, 6, 162, 1, 168, 6, 24, 170, 2, 23, 10, 5, 116, 97, 103, 95, + 49, 18, 14, 8, 2, 24, 1, 34, 1, 99, 160, 6, 162, 1, 168, 6, 24, 160, 6, 162, 1, 168, 6, 24, + ]; + + let want = || mt::TableMeta { + schema: Arc::new(ce::TableSchema::default()), + engine: s(""), + storage_params: None, + part_prefix: s(""), + engine_options: btreemap! {}, + options: btreemap! {}, + cluster_key: None, + cluster_key_seq: 0, + created_on: Utc.with_ymd_and_hms(2014, 11, 28, 12, 0, 9).unwrap(), + updated_on: Utc.with_ymd_and_hms(2014, 11, 29, 12, 0, 10).unwrap(), + comment: s(""), + field_comments: vec![], + virtual_schema: None, + drop_on: None, + statistics: Default::default(), + shared_by: btreeset! {1}, + column_mask_policy: None, + column_mask_policy_columns_ids: btreemap! {}, + row_access_policy: None, + row_access_policy_columns_ids: None, + indexes: btreemap! {}, + constraints: btreemap! {}, + refs: btreemap! { + "branch_1".to_string() => SnapshotRef { + id: 1, + expire_at: Some(Utc.with_ymd_and_hms(2014, 11, 28, 12, 0, 9).unwrap()), + typ: SnapshotRefType::Branch, + loc: "a".to_string(), + }, + "tag_1".to_string() => SnapshotRef { + id: 2, + expire_at: None, + typ: SnapshotRefType::Tag, + loc: "c".to_string(), + } + }, + }; + common::test_pb_from_to(func_name!(), want())?; + + common::test_load_old(func_name!(), table_meta_v162.as_slice(), 162, want())?; + + Ok(()) +} + +fn s(ss: impl ToString) -> String { + ss.to_string() +} diff --git a/src/meta/protos/proto/table.proto b/src/meta/protos/proto/table.proto index d6b3fb33a6342..3dca6770f8d60 100644 --- a/src/meta/protos/proto/table.proto +++ b/src/meta/protos/proto/table.proto @@ -141,6 +141,23 @@ message TableMeta { map constraints = 34; optional RowAccessPolicyColumnMap row_access_policy_columns_ids = 35; map column_mask_policy_columns_ids = 36; + + map refs = 37; +} + +message SnapshotRef { + uint64 ver = 100; + uint64 min_reader_ver = 101; + + enum RefType { + Branch = 0; + Tag = 1; + } + + uint64 id = 1; + optional string expire_at = 2; + RefType typ = 3; + string loc = 4; } message RowAccessPolicyColumnMap { diff --git a/src/query/ast/src/ast/query.rs b/src/query/ast/src/ast/query.rs index ccab4ded2c831..d7c588370cad2 100644 --- a/src/query/ast/src/ast/query.rs +++ b/src/query/ast/src/ast/query.rs @@ -30,6 +30,7 @@ use crate::ast::Hint; use crate::ast::Identifier; use crate::ast::Lambda; use crate::ast::SelectStageOptions; +use crate::ast::SnapshotRefType; use crate::ast::WindowDefinition; use crate::ParseError; use crate::Result; @@ -571,6 +572,10 @@ pub enum TimeTravelPoint { database: Option, name: Identifier, }, + TableRef { + typ: SnapshotRefType, + name: Identifier, + }, } impl Display for TimeTravelPoint { @@ -597,6 +602,9 @@ impl Display for TimeTravelPoint { )?; write!(f, ")")?; } + TimeTravelPoint::TableRef { typ, name } => { + write!(f, "({} => {})", typ, name)?; + } } Ok(()) @@ -845,6 +853,7 @@ pub enum TableReference { catalog: Option, database: Option, table: Identifier, + ref_name: Option, alias: Option, temporal: Option, with_options: Option, @@ -926,6 +935,7 @@ impl Display for TableReference { catalog, database, table, + ref_name, alias, temporal, with_options, @@ -937,6 +947,9 @@ impl Display for TableReference { f, catalog.iter().chain(database.iter()).chain(Some(table)), )?; + if let Some(ref_name) = ref_name { + write!(f, "/{ref_name}")?; + } if let Some(temporal) = temporal { write!(f, " {temporal}")?; diff --git a/src/query/ast/src/ast/statements/delete.rs b/src/query/ast/src/ast/statements/delete.rs index d8112e402c4f7..cbf9bf5f04e7f 100644 --- a/src/query/ast/src/ast/statements/delete.rs +++ b/src/query/ast/src/ast/statements/delete.rs @@ -18,15 +18,20 @@ use std::fmt::Formatter; use derive_visitor::Drive; use derive_visitor::DriveMut; +use crate::ast::write_dot_separated_list; use crate::ast::Expr; use crate::ast::Hint; -use crate::ast::TableReference; +use crate::ast::Identifier; +use crate::ast::TableAlias; use crate::ast::With; #[derive(Debug, Clone, PartialEq, Drive, DriveMut)] pub struct DeleteStmt { pub hints: Option, - pub table: TableReference, + pub catalog: Option, + pub database: Option, + pub table: Identifier, + pub table_alias: Option, pub selection: Option, // With clause, common table expression pub with: Option, @@ -41,7 +46,17 @@ impl Display for DeleteStmt { if let Some(hints) = &self.hints { write!(f, "{} ", hints)?; } - write!(f, "FROM {}", self.table)?; + write!(f, "FROM ")?; + write_dot_separated_list( + f, + self.catalog + .iter() + .chain(&self.database) + .chain(Some(&self.table)), + )?; + if let Some(alias) = &self.table_alias { + write!(f, " AS {}", alias)?; + } if let Some(conditions) = &self.selection { write!(f, " WHERE {conditions}")?; } diff --git a/src/query/ast/src/ast/statements/merge_into.rs b/src/query/ast/src/ast/statements/merge_into.rs index 19551c9baa337..946e3ef281ac8 100644 --- a/src/query/ast/src/ast/statements/merge_into.rs +++ b/src/query/ast/src/ast/statements/merge_into.rs @@ -205,6 +205,7 @@ impl MutationSource { catalog: catalog.clone(), database: database.clone(), table: table.clone(), + ref_name: None, alias: alias.clone(), temporal: None, with_options: with_options.clone(), diff --git a/src/query/ast/src/ast/statements/table.rs b/src/query/ast/src/ast/statements/table.rs index abe58053be3be..8148f55f7f527 100644 --- a/src/query/ast/src/ast/statements/table.rs +++ b/src/query/ast/src/ast/statements/table.rs @@ -434,6 +434,21 @@ impl Display for AlterTableStmt { } } +#[derive(Debug, Clone, PartialEq, Drive, DriveMut)] +pub enum SnapshotRefType { + Branch, + Tag, +} + +impl Display for SnapshotRefType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + SnapshotRefType::Branch => write!(f, "BRANCH"), + SnapshotRefType::Tag => write!(f, "TAG"), + } + } +} + #[derive(Debug, Clone, PartialEq, Drive, DriveMut)] pub enum AlterTableAction { RenameTable { @@ -497,6 +512,17 @@ pub enum AlterTableAction { ModifyConnection { new_connection: BTreeMap, }, + CreateTableRef { + ref_type: SnapshotRefType, + ref_name: Identifier, + travel_point: Option, + #[drive(skip)] + retain: Option, + }, + DropTableRef { + ref_type: SnapshotRefType, + ref_name: Identifier, + }, } impl Display for AlterTableAction { @@ -594,6 +620,30 @@ impl Display for AlterTableAction { AlterTableAction::DropAllRowAccessPolicies => { write!(f, "DROP ALL ROW ACCESS POLICIES")? } + AlterTableAction::CreateTableRef { + ref_type, + ref_name, + travel_point, + retain, + } => { + write!(f, "CREATE {ref_type} {ref_name}")?; + if let Some(travel_point) = travel_point { + write!(f, " AT {travel_point}")?; + } + if let Some(retain) = retain { + let days = Duration::from_secs(60 * 60 * 24); + if retain >= &days { + let days = retain.as_secs() / (60 * 60 * 24); + write!(f, " RETAIN {days} DAYS ")?; + } else { + let seconds = retain.as_secs(); + write!(f, " RETAIN {seconds} SECONDS ")?; + } + } + } + AlterTableAction::DropTableRef { ref_type, ref_name } => { + write!(f, "DROP {ref_type} {ref_name}")?; + } }; Ok(()) } @@ -726,15 +776,15 @@ pub struct VacuumTemporaryFiles { impl Display for crate::ast::VacuumTemporaryFiles { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "VACUUM TEMPORARY FILES ")?; + write!(f, "VACUUM TEMPORARY FILES")?; if let Some(retain) = &self.retain { let days = Duration::from_secs(60 * 60 * 24); if retain >= &days { let days = retain.as_secs() / (60 * 60 * 24); - write!(f, "RETAIN {days} DAYS ")?; + write!(f, " RETAIN {days} DAYS")?; } else { let seconds = retain.as_secs(); - write!(f, "RETAIN {seconds} SECONDS ")?; + write!(f, " RETAIN {seconds} SECONDS")?; } } diff --git a/src/query/ast/src/ast/statements/update.rs b/src/query/ast/src/ast/statements/update.rs index 2dbf27d33b18f..1f182a834f724 100644 --- a/src/query/ast/src/ast/statements/update.rs +++ b/src/query/ast/src/ast/statements/update.rs @@ -59,7 +59,7 @@ impl Display for UpdateStmt { .chain(Some(&self.table)), )?; if let Some(alias) = &self.table_alias { - write!(f, " AS {}", alias.name)?; + write!(f, " AS {}", alias)?; } write!(f, " SET ")?; write_comma_separated_list(f, &self.update_list)?; diff --git a/src/query/ast/src/parser/common.rs b/src/query/ast/src/parser/common.rs index 5db06eeef6eac..b4a47976a5b48 100644 --- a/src/query/ast/src/parser/common.rs +++ b/src/query/ast/src/parser/common.rs @@ -288,6 +288,7 @@ pub fn table_reference_only(i: Input) -> IResult { catalog, database, table, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -311,6 +312,7 @@ pub fn column_reference_only(i: Input) -> IResult<(TableReference, Identifier)> catalog, database, table, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -384,6 +386,49 @@ pub fn dot_separated_idents_1_to_2(i: Input) -> IResult<(Option, Ide .parse(i) } +pub type DotSeparatedIdentifiersWithRef = ( + Option, + Option, + Identifier, + Option, +); + +/// Parse one to three idents separated by dots, with optional "/ref" at the end. +/// +/// Compatible with `dot_separated_idents_1_to_3`. +/// +/// Examples: +/// - `table` +/// - `db.table` +/// - `catalog.db.table` +/// - `table/branch` +/// - `db.table/branch` +/// - `catalog.db.table/tag_v1` +/// +/// Returns (catalog, database, table, ref_name) +pub fn dot_separated_idents_with_ref(i: Input) -> IResult { + map( + rule! { + // 1~3 dot-separated identifiers + #ident ~ ( "." ~ #ident ~ ( "." ~ #ident )? )? ~ ( "/" ~ #ident )? + }, + |res| match res { + (ident2, None, opt_ref) => (None, None, ident2, opt_ref.map(|(_, r)| r)), + + // db.table / db.table@ref + (ident1, Some((_, ident2, None)), opt_ref) => { + (None, Some(ident1), ident2, opt_ref.map(|(_, r)| r)) + } + + // catalog.db.table / catalog.db.table@ref + (ident0, Some((_, ident1, Some((_, ident2)))), opt_ref) => { + (Some(ident0), Some(ident1), ident2, opt_ref.map(|(_, r)| r)) + } + }, + ) + .parse(i) +} + /// Parse one to three idents separated by a dot, fulfilling from the right. /// /// Example: `db.table.column` diff --git a/src/query/ast/src/parser/query.rs b/src/query/ast/src/parser/query.rs index 3931829f0a238..ba70dc1a002a1 100644 --- a/src/query/ast/src/parser/query.rs +++ b/src/query/ast/src/parser/query.rs @@ -566,6 +566,21 @@ pub fn travel_point(i: Input) -> IResult { .parse(i) } +pub fn at_table_ref(i: Input) -> IResult { + map( + rule! { "(" ~ ( BRANCH | TAG ) ~ "=>" ~ #ident ~ ")" }, + |(_, token, _, name, _)| { + let typ = match token.kind { + TokenKind::BRANCH => SnapshotRefType::Branch, + TokenKind::TAG => SnapshotRefType::Tag, + _ => unreachable!(), + }; + TimeTravelPoint::TableRef { typ, name } + }, + ) + .parse(i) +} + pub fn at_snapshot_or_ts(i: Input) -> IResult { let at_snapshot = map( rule! { "(" ~ SNAPSHOT ~ "=>" ~ #literal_string ~ ")" }, @@ -753,6 +768,7 @@ pub enum TableReferenceElement { catalog: Option, database: Option, table: Identifier, + ref_name: Option, alias: Option, temporal: Option, with_options: Option, @@ -796,10 +812,10 @@ pub enum TableReferenceElement { pub fn table_reference_element(i: Input) -> IResult> { let aliased_table = map( rule! { - #dot_separated_idents_1_to_3 ~ #temporal_clause? ~ #with_options? ~ #table_alias? ~ #pivot? ~ #unpivot? ~ SAMPLE? ~ (BLOCK ~ "(" ~ #expr ~ ")")? ~ (ROW ~ "(" ~ #expr ~ ROWS? ~ ")")? + #dot_separated_idents_with_ref ~ #temporal_clause? ~ #with_options? ~ #table_alias? ~ #pivot? ~ #unpivot? ~ SAMPLE? ~ (BLOCK ~ "(" ~ #expr ~ ")")? ~ (ROW ~ "(" ~ #expr ~ ROWS? ~ ")")? }, |( - (catalog, database, table), + (catalog, database, table, ref_name), temporal, with_options, alias, @@ -814,6 +830,7 @@ pub fn table_reference_element(i: Input) -> IResult>> PrattParser catalog, database, table, + ref_name, alias, temporal, with_options, @@ -1037,6 +1055,7 @@ impl<'a, I: Iterator>> PrattParser catalog, database, table, + ref_name, alias, temporal, with_options, diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index 869d7b008ac3c..dabfb9d50dbab 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -273,12 +273,15 @@ pub fn statement_body(i: Input) -> IResult { let delete = map( rule! { - #with? ~ DELETE ~ #hint? ~ FROM ~ #table_reference_with_alias ~ ( WHERE ~ ^#expr )? + #with? ~ DELETE ~ #hint? ~ FROM ~ #dot_separated_idents_1_to_3 ~ #table_alias? ~ ( WHERE ~ ^#expr )? }, - |(with, _, hints, _, table, opt_selection)| { + |(with, _, hints, _, (catalog, database, table), table_alias, opt_selection)| { Statement::Delete(DeleteStmt { hints, + catalog, + database, table, + table_alias, selection: opt_selection.map(|(_, selection)| selection), with, }) @@ -4566,28 +4569,69 @@ pub fn alter_table_action(i: Input) -> IResult { }, ); - rule!( - #alter_table_cluster_key - | #drop_table_cluster_key - | #drop_constraint - | #rename_table - | #swap_with - | #rename_column - | #modify_table_comment - | #add_column - | #drop_column - | #modify_column - | #recluster_table - | #revert_table - | #set_table_options - | #unset_table_options - | #refresh_cache - | #modify_table_connection - | #drop_all_row_access_polices - | #drop_row_access_policy - | #add_row_access_policy - | #add_constraint - ) + let create_snapshot_ref = map( + rule! { + CREATE ~ ( BRANCH | TAG ) ~ #ident ~ ( AT ~ ^(#travel_point | #at_table_ref) )? ~ (RETAIN ~ #literal_duration)? + }, + |(_, token, ref_name, opt_travel_point, retain)| { + let ref_type = match token.kind { + TokenKind::BRANCH => SnapshotRefType::Branch, + TokenKind::TAG => SnapshotRefType::Tag, + _ => unreachable!(), + }; + + AlterTableAction::CreateTableRef { + ref_type, + ref_name, + travel_point: opt_travel_point.map(|(_, point)| point), + retain: retain.map(|(_, reatin)| reatin), + } + }, + ); + + let drop_snapshot_ref = map( + rule! { + DROP ~ ( BRANCH | TAG ) ~ #ident + }, + |(_, token, ref_name)| { + let ref_type = match token.kind { + TokenKind::BRANCH => SnapshotRefType::Branch, + TokenKind::TAG => SnapshotRefType::Tag, + _ => unreachable!(), + }; + + AlterTableAction::DropTableRef { ref_type, ref_name } + }, + ); + + alt(( + rule!( + #create_snapshot_ref + | #drop_snapshot_ref + ), + rule!( + #alter_table_cluster_key + | #drop_table_cluster_key + | #drop_constraint + | #rename_table + | #swap_with + | #rename_column + | #modify_table_comment + | #add_column + | #drop_column + | #modify_column + | #recluster_table + | #revert_table + | #set_table_options + | #unset_table_options + | #refresh_cache + | #modify_table_connection + | #drop_all_row_access_polices + | #drop_row_access_policy + | #add_row_access_policy + | #add_constraint + ), + )) .parse(i) } @@ -5376,31 +5420,6 @@ pub fn presign_option(i: Input) -> IResult { .parse(i) } -pub fn table_reference_with_alias(i: Input) -> IResult { - map( - consumed(rule! { - #dot_separated_idents_1_to_3 ~ #alias_name? - }), - |(span, ((catalog, database, table), alias))| TableReference::Table { - span: transform_span(span.tokens), - catalog, - database, - table, - alias: alias.map(|v| TableAlias { - name: v, - columns: vec![], - keep_database_name: false, - }), - temporal: None, - with_options: None, - pivot: None, - unpivot: None, - sample: None, - }, - ) - .parse(i) -} - pub fn update_expr(i: Input) -> IResult { map(rule! { ( #ident ~ "=" ~ ^#expr ) }, |(name, _, expr)| { UpdateExpr { name, expr } diff --git a/src/query/ast/src/parser/token.rs b/src/query/ast/src/parser/token.rs index 086d1c10f5e4f..65867b56e0cc1 100644 --- a/src/query/ast/src/parser/token.rs +++ b/src/query/ast/src/parser/token.rs @@ -419,6 +419,8 @@ pub enum TokenKind { BOOLEAN, #[token("BOTH", ignore(ascii_case))] BOTH, + #[token("BRANCH", ignore(ascii_case))] + BRANCH, #[token("BY", ignore(ascii_case))] BY, #[token("BROTLI", ignore(ascii_case))] @@ -1224,6 +1226,8 @@ pub enum TokenKind { TABLE, #[token("TABLES", ignore(ascii_case))] TABLES, + #[token("TAG", ignore(ascii_case))] + TAG, #[token("TARGET_LAG", ignore(ascii_case))] TARGET_LAG, #[token("TEXT", ignore(ascii_case))] diff --git a/src/query/ast/tests/it/testdata/query.txt b/src/query/ast/tests/it/testdata/query.txt index 952eb8c03ac06..670638bb630b5 100644 --- a/src/query/ast/tests/it/testdata/query.txt +++ b/src/query/ast/tests/it/testdata/query.txt @@ -200,6 +200,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -221,6 +222,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -425,6 +427,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -508,6 +511,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -575,6 +579,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: Some( TimeTravel( @@ -671,6 +676,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: Some( Changes( @@ -804,6 +810,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: Some( TableAlias { name: Identifier { @@ -890,6 +897,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: Some( TableAlias { name: Identifier { @@ -1026,6 +1034,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: Some( TableAlias { name: Identifier { @@ -1119,6 +1128,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1140,6 +1150,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1216,6 +1227,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1237,6 +1249,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1358,6 +1371,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1379,6 +1393,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1500,6 +1515,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1521,6 +1537,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1651,6 +1668,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1672,6 +1690,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1766,6 +1785,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1787,6 +1807,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1881,6 +1902,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1902,6 +1924,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1925,6 +1948,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2044,6 +2068,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2124,6 +2149,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2284,6 +2310,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2364,6 +2391,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2515,6 +2543,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2595,6 +2624,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2755,6 +2785,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2834,6 +2865,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2925,6 +2957,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3101,6 +3134,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3122,6 +3156,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3143,6 +3178,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3310,6 +3346,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3372,6 +3409,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3454,6 +3492,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3685,6 +3724,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3866,6 +3906,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -3887,6 +3928,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: Some( TableAlias { name: Identifier { @@ -4101,6 +4143,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4122,6 +4165,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4382,6 +4426,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4432,6 +4477,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4508,6 +4554,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4558,6 +4605,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4641,6 +4689,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4691,6 +4740,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4743,6 +4793,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4826,6 +4877,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4876,6 +4928,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -4928,6 +4981,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5084,6 +5138,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5168,6 +5223,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5261,6 +5317,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5345,6 +5402,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5440,6 +5498,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5497,6 +5556,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5547,6 +5607,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5632,6 +5693,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5682,6 +5744,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5734,6 +5797,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5817,6 +5881,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5867,6 +5932,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5919,6 +5985,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5995,6 +6062,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6052,6 +6120,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6102,6 +6171,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6490,6 +6560,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6654,6 +6725,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6802,6 +6874,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7012,6 +7085,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7208,6 +7282,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7315,6 +7390,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7456,6 +7532,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7581,6 +7658,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7690,6 +7768,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7883,6 +7962,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8187,6 +8267,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8509,6 +8590,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8750,6 +8832,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9171,6 +9254,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9336,6 +9420,7 @@ Query { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, diff --git a/src/query/ast/tests/it/testdata/raw-insert.txt b/src/query/ast/tests/it/testdata/raw-insert.txt index 41f34434d9281..a3b5e4d91d671 100644 --- a/src/query/ast/tests/it/testdata/raw-insert.txt +++ b/src/query/ast/tests/it/testdata/raw-insert.txt @@ -151,6 +151,7 @@ Insert( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, diff --git a/src/query/ast/tests/it/testdata/script.txt b/src/query/ast/tests/it/testdata/script.txt index b2b2c2610268b..835a370edce96 100644 --- a/src/query/ast/tests/it/testdata/script.txt +++ b/src/query/ast/tests/it/testdata/script.txt @@ -510,6 +510,7 @@ Return { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1831,6 +1832,7 @@ Loop { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, diff --git a/src/query/ast/tests/it/testdata/stmt.txt b/src/query/ast/tests/it/testdata/stmt.txt index 1a59d06352de6..1205d0086321a 100644 --- a/src/query/ast/tests/it/testdata/stmt.txt +++ b/src/query/ast/tests/it/testdata/stmt.txt @@ -923,6 +923,7 @@ Replace( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1037,6 +1038,7 @@ Explain { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1181,6 +1183,7 @@ Explain { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1296,6 +1299,7 @@ Explain { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1378,6 +1382,7 @@ Explain { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1595,6 +1600,7 @@ CreateIndex( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -1791,6 +1797,7 @@ CreateIndex( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -2504,6 +2511,7 @@ CreateTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -5759,6 +5767,7 @@ CreateTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6325,6 +6334,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6550,6 +6560,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6621,6 +6632,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6699,6 +6711,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6777,6 +6790,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6855,6 +6869,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6924,6 +6939,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6945,6 +6961,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -6966,6 +6983,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7035,6 +7053,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7056,6 +7075,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7077,6 +7097,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7264,6 +7285,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7285,6 +7307,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7426,6 +7449,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7447,6 +7471,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7588,6 +7613,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7609,6 +7635,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7750,6 +7777,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7771,6 +7799,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7912,6 +7941,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -7933,6 +7963,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8074,6 +8105,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8095,6 +8127,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8236,6 +8269,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8257,6 +8291,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8519,6 +8554,7 @@ StatementWithSettings { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8540,6 +8576,7 @@ StatementWithSettings { quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8682,6 +8719,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8703,6 +8741,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8844,6 +8883,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -8865,6 +8905,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9006,6 +9047,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9027,6 +9069,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9098,6 +9141,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9237,6 +9281,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9258,6 +9303,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9347,6 +9393,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9368,6 +9415,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9457,6 +9505,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9478,6 +9527,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9567,6 +9617,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9588,6 +9639,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9677,6 +9729,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9698,6 +9751,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9769,6 +9823,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9879,6 +9934,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -9962,6 +10018,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -10072,6 +10129,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -10155,6 +10213,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -10265,6 +10324,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -10348,6 +10408,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -10456,6 +10517,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -10994,6 +11056,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -11083,6 +11146,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -11666,6 +11730,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -11744,6 +11809,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -11820,6 +11886,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -12481,6 +12548,7 @@ Insert( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -12568,6 +12636,7 @@ Insert( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -12655,6 +12724,7 @@ Insert( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -12841,6 +12911,7 @@ InsertMultiTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -13025,6 +13096,7 @@ InsertMultiTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -13837,6 +13909,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -13896,6 +13969,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -13939,6 +14013,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -13973,6 +14048,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14009,6 +14085,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14043,6 +14120,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14117,6 +14195,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14169,6 +14248,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14221,6 +14301,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14286,6 +14367,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14356,6 +14438,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14407,6 +14490,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14450,6 +14534,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14493,6 +14578,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14540,6 +14626,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14606,6 +14693,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14651,6 +14739,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14704,6 +14793,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14773,6 +14863,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14816,6 +14907,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14850,6 +14942,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -14928,6 +15021,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15010,6 +15104,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15094,6 +15189,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15180,6 +15276,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15241,6 +15338,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15294,6 +15392,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15347,6 +15446,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15392,6 +15492,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15431,6 +15532,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15513,6 +15615,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -15586,6 +15689,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -16013,6 +16117,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -16049,6 +16154,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -20734,6 +20840,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -20983,6 +21090,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -21054,6 +21162,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -21204,6 +21313,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -21362,6 +21472,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -21518,6 +21629,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -21693,6 +21805,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -21848,6 +21961,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -21979,6 +22093,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -22110,6 +22225,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -22249,6 +22365,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -22714,6 +22831,7 @@ CreateDynamicTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -22864,6 +22982,7 @@ CreateDynamicTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -23072,6 +23191,7 @@ CreateDynamicTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -23287,6 +23407,7 @@ CreateDynamicTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -23534,6 +23655,7 @@ CreateDynamicTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -25215,6 +25337,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -25339,6 +25462,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -25465,6 +25589,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -25591,6 +25716,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -25717,6 +25843,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -25836,6 +25963,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -27649,6 +27777,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -27881,6 +28010,7 @@ Query( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -28592,6 +28722,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -28630,6 +28761,7 @@ AlterTable( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, @@ -28860,6 +28992,7 @@ CreateRowAccessPolicy( quote: None, ident_type: None, }, + ref_name: None, alias: None, temporal: None, with_options: None, diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index bc2f4302916eb..0958e7ebe6a57 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -30,6 +30,7 @@ use databend_common_expression::Scalar; use databend_common_expression::TableSchema; use databend_common_meta_app::app_error::AppError; use databend_common_meta_app::app_error::UnknownTableId; +use databend_common_meta_app::schema::SnapshotRefType; use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; @@ -301,16 +302,9 @@ pub trait Table: Sync + Send { ctx: Arc, instant: Option, num_snapshot_limit: Option, - keep_last_snapshot: bool, dry_run: bool, ) -> Result>> { - let (_, _, _, _, _) = ( - ctx, - instant, - num_snapshot_limit, - keep_last_snapshot, - dry_run, - ); + let (_, _, _, _) = (ctx, instant, num_snapshot_limit, dry_run); Ok(None) } @@ -364,6 +358,15 @@ pub trait Table: Sync + Send { ))) } + fn with_branch(&self, branch_name: &str) -> Result> { + let _ = branch_name; + Err(ErrorCode::Unimplemented(format!( + "Table branch is not supported for the table '{}', which uses the '{}' engine.", + self.name(), + self.get_table_info().engine(), + ))) + } + async fn generate_changes_query( &self, ctx: Arc, @@ -560,6 +563,7 @@ pub enum NavigationPoint { SnapshotID(String), TimePoint(DateTime), StreamInfo(TableInfo), + TableRef { typ: SnapshotRefType, name: String }, } #[derive(Debug, Copy, Clone, Default, serde::Serialize, serde::Deserialize)] diff --git a/src/query/catalog/src/table_context.rs b/src/query/catalog/src/table_context.rs index 06f28ab50669b..5e19340360aa5 100644 --- a/src/query/catalog/src/table_context.rs +++ b/src/query/catalog/src/table_context.rs @@ -301,6 +301,7 @@ pub trait TableContext: Send + Sync { catalog: &str, database: &str, table: &str, + branch: Option<&str>, max_batch_size: Option, ) -> Result>; diff --git a/src/query/config/Cargo.toml b/src/query/config/Cargo.toml index 5a635acae3015..7ba66c4267c53 100644 --- a/src/query/config/Cargo.toml +++ b/src/query/config/Cargo.toml @@ -32,8 +32,6 @@ serfig = { workspace = true } toml = { workspace = true } [dev-dependencies] -pretty_assertions = { workspace = true } -tempfile = { workspace = true } [lints] workspace = true diff --git a/src/query/ee/Cargo.toml b/src/query/ee/Cargo.toml index f8c59454933cd..539e3624d1623 100644 --- a/src/query/ee/Cargo.toml +++ b/src/query/ee/Cargo.toml @@ -42,6 +42,7 @@ databend-enterprise-resources-management = { workspace = true } databend-enterprise-row-access-policy-feature = { workspace = true } databend-enterprise-storage-encryption = { workspace = true } databend-enterprise-stream-handler = { workspace = true } +databend-enterprise-table-ref-handler = { workspace = true } databend-enterprise-vacuum-handler = { workspace = true } databend-enterprise-virtual-column = { workspace = true } databend-query = { workspace = true } @@ -56,13 +57,10 @@ opendal = { workspace = true } serde = { workspace = true } tempfile = { workspace = true } typetag = { workspace = true } -uuid = { workspace = true } [dev-dependencies] -databend-common-functions = { workspace = true } databend-common-meta-kvapi = { workspace = true } jsonb = { workspace = true } -tantivy = { workspace = true } walkdir = { workspace = true } [package.metadata.cargo-machete] diff --git a/src/query/ee/src/enterprise_services.rs b/src/query/ee/src/enterprise_services.rs index dec0ed25e3d71..4306bbe62caea 100644 --- a/src/query/ee/src/enterprise_services.rs +++ b/src/query/ee/src/enterprise_services.rs @@ -27,6 +27,7 @@ use crate::row_access_policy::row_access_policy_handler::RealRowAccessPolicyHand use crate::storage_encryption::RealStorageEncryptionHandler; use crate::storages::fuse::operations::RealVacuumHandler; use crate::stream::RealStreamHandler; +use crate::table_ref::RealTableRefHandler; use crate::virtual_column::RealVirtualColumnHandler; pub struct EnterpriseServices; @@ -40,6 +41,7 @@ impl EnterpriseServices { RealRowAccessPolicyHandler::init()?; RealVirtualColumnHandler::init()?; RealStreamHandler::init()?; + RealTableRefHandler::init()?; RealAttachTableHandler::init()?; RealFailSafeHandler::init()?; init_resources_management(&cfg, version).await?; diff --git a/src/query/ee/src/lib.rs b/src/query/ee/src/lib.rs index 00467de13c8c3..ae6de9d68103c 100644 --- a/src/query/ee/src/lib.rs +++ b/src/query/ee/src/lib.rs @@ -23,5 +23,6 @@ pub mod row_access_policy; pub mod storage_encryption; pub mod storages; pub mod stream; +pub mod table_ref; pub mod test_kits; pub mod virtual_column; diff --git a/src/query/ee/src/storages/fuse/io/snapshots.rs b/src/query/ee/src/storages/fuse/io/snapshots.rs deleted file mode 100644 index 81f5a610d7731..0000000000000 --- a/src/query/ee/src/storages/fuse/io/snapshots.rs +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2023 Databend Cloud -// -// Licensed under the Elastic License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.elastic.co/licensing/elastic-license -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Read all the referenced segments by all the snapshot file. -// limit: limits the number of snapshot files listed - -use std::sync::Arc; -use std::time::Instant; - -use databend_common_exception::Result; -use databend_common_storages_fuse::io::SnapshotLiteExtended; -use databend_common_storages_fuse::io::SnapshotsIO; -use databend_storages_common_table_meta::meta::Location; -use log::info; - -#[async_backtrace::framed] -pub async fn get_snapshot_referenced_segments( - snapshots_io: &SnapshotsIO, - root_snapshot_location: String, - root_snapshot_lite: Arc, - status_callback: T, -) -> Result>> -where - T: Fn(String), -{ - let ctx = snapshots_io.get_ctx(); - - // List all the snapshot file paths - // note that snapshot file paths of ongoing txs might be included - let mut snapshot_files = vec![]; - if let Some(prefix) = SnapshotsIO::get_s3_prefix_from_file(&root_snapshot_location) { - snapshot_files = - SnapshotsIO::list_files(snapshots_io.get_operator(), &prefix, None).await?; - } - - if snapshot_files.is_empty() { - return Ok(None); - } - - // 1. Get all the snapshot by chunks, save all the segments location. - let max_threads = ctx.get_settings().get_max_threads()? as usize; - - let start = Instant::now(); - let mut count = 1; - // 2. Get all the referenced segments - let mut segments = vec![]; - // first save root snapshot segments - root_snapshot_lite.segments.iter().for_each(|location| { - segments.push(location.to_owned()); - }); - for chunk in snapshot_files.chunks(max_threads) { - // Since we want to get all the snapshot referenced files, so set `ignore_timestamp` true - let results = snapshots_io - .read_snapshot_lite_extends(chunk, root_snapshot_lite.clone(), true) - .await?; - - results - .into_iter() - .flatten() - .for_each(|snapshot_lite_extend| { - snapshot_lite_extend.segments.iter().for_each(|location| { - segments.push(location.to_owned()); - }); - }); - - // Refresh status. - { - count += chunk.len(); - let status = format!( - "gc orphan: read snapshot files:{}/{}, segment files: {}, cost:{:?}", - count, - snapshot_files.len(), - segments.len(), - start.elapsed() - ); - info!("{}", status); - (status_callback)(status); - } - } - - Ok(Some(segments)) -} - -#[async_backtrace::framed] -async fn get_files_by_prefix(snapshots_io: &SnapshotsIO, input_file: &str) -> Result> { - if let Some(prefix) = SnapshotsIO::get_s3_prefix_from_file(input_file) { - SnapshotsIO::list_files(snapshots_io.get_operator(), &prefix, None).await - } else { - Ok(vec![]) - } -} diff --git a/src/query/ee/src/storages/fuse/mod.rs b/src/query/ee/src/storages/fuse/mod.rs index 342c19d67ca16..38fd1128ae213 100644 --- a/src/query/ee/src/storages/fuse/mod.rs +++ b/src/query/ee/src/storages/fuse/mod.rs @@ -12,10 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod io; pub mod operations; -pub use io::snapshots::get_snapshot_referenced_segments; pub use operations::vacuum_drop_tables::vacuum_drop_tables; pub use operations::vacuum_table::do_vacuum; pub use operations::vacuum_table_v2::do_vacuum2; diff --git a/src/query/ee/src/storages/fuse/operations/vacuum_drop_tables.rs b/src/query/ee/src/storages/fuse/operations/vacuum_drop_tables.rs index 1c1079c156e2a..e916e8370fe37 100644 --- a/src/query/ee/src/storages/fuse/operations/vacuum_drop_tables.rs +++ b/src/query/ee/src/storages/fuse/operations/vacuum_drop_tables.rs @@ -72,11 +72,9 @@ async fn vacuum_drop_single_table( match dry_run_limit { None => { - let result = operator.remove_all(&dir).await; - if let Err(ref err) = result { + operator.remove_all(&dir).await.inspect_err(|err| { error!("failed to remove all in directory {}: {}", dir, err); - } - result?; + })?; } Some(dry_run_limit) => { let mut ds = operator.lister_with(&dir).recursive(true).await?; diff --git a/src/query/ee/src/storages/fuse/operations/vacuum_table.rs b/src/query/ee/src/storages/fuse/operations/vacuum_table.rs index 4e4f4114a16f0..1742cbd0d8699 100644 --- a/src/query/ee/src/storages/fuse/operations/vacuum_table.rs +++ b/src/query/ee/src/storages/fuse/operations/vacuum_table.rs @@ -17,22 +17,14 @@ use std::sync::Arc; use std::time::Instant; use chrono::DateTime; +use chrono::TimeDelta; use chrono::Utc; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_storages_fuse::io::MetaReaders; -use databend_common_storages_fuse::io::SnapshotLiteExtended; -use databend_common_storages_fuse::io::SnapshotsIO; -use databend_common_storages_fuse::io::TableMetaLocationGenerator; use databend_common_storages_fuse::FuseTable; -use databend_common_storages_fuse::RetentionPolicy; -use databend_storages_common_cache::LoadParams; use databend_storages_common_table_meta::meta::SegmentInfo; -use crate::storages::fuse::get_snapshot_referenced_segments; - const DRY_RUN_LIMIT: usize = 1000; #[derive(Debug, PartialEq, Eq)] @@ -68,72 +60,24 @@ pub async fn get_snapshot_referenced_files( fuse_table: &FuseTable, ctx: &Arc, ) -> Result> { - // 1. Read the root snapshot. - let root_snapshot_location_op = fuse_table.snapshot_loc(); - if root_snapshot_location_op.is_none() { - return Ok(None); - } - - let root_snapshot_location = root_snapshot_location_op.unwrap(); - let reader = MetaReaders::table_snapshot_reader(fuse_table.get_operator()); - let ver = TableMetaLocationGenerator::snapshot_version(root_snapshot_location.as_str()); - let params = LoadParams { - location: root_snapshot_location.clone(), - len_hint: None, - ver, - put_cache: true, - }; - let root_snapshot = match reader.read(¶ms).await { - Err(e) if e.code() == ErrorCode::STORAGE_NOT_FOUND => { - // concurrent gc: someone else has already collected this snapshot, ignore it - // warn!( - // "concurrent gc: snapshot {:?} already collected. table: {}, ident {}", - // root_snapshot_location, self.table_info.desc, self.table_info.ident, - //); - return Ok(None); - } - Err(e) => return Err(e), - Ok(v) => v, - }; - - let root_snapshot_lite = Arc::new(SnapshotLiteExtended { - format_version: ver, - snapshot_id: root_snapshot.snapshot_id, - timestamp: root_snapshot.timestamp, - segments: HashSet::from_iter(root_snapshot.segments.clone()), - table_statistics_location: root_snapshot.table_statistics_location(), - }); - drop(root_snapshot); - - // 2. Find all segments referenced by the current snapshots - let snapshots_io = SnapshotsIO::create(ctx.clone(), fuse_table.get_operator()); - let segments_opt = get_snapshot_referenced_segments( - &snapshots_io, - root_snapshot_location, - root_snapshot_lite, - |status| { + // 1. Find all segments referenced by the current snapshots (including branches and tags) + let segments_opt = fuse_table + .get_snapshot_referenced_segments(ctx.clone(), |status| { ctx.set_status_info(&status); - }, - ) - .await?; + }) + .await?; - let segments_vec = match segments_opt { - Some(segments) => segments, - None => { - return Ok(None); - } + let Some(segments) = segments_opt else { + return Ok(None); }; + let segment_refs: Vec<&_> = segments.iter().collect(); let locations_referenced = fuse_table - .get_block_locations(ctx.clone(), &segments_vec, false, false) + .get_block_locations(ctx.clone(), &segment_refs, false, false) .await?; - let mut segments = HashSet::with_capacity(segments_vec.len()); - segments_vec.into_iter().for_each(|(location, _)| { - segments.insert(location); - }); Ok(Some(SnapshotReferencedFiles { - segments, + segments: segments.into_iter().map(|(location, _)| location).collect(), blocks: locations_referenced.block_location, blocks_index: locations_referenced.bloom_location, segments_stats: locations_referenced.hll_location, @@ -164,9 +108,8 @@ pub async fn do_gc_orphan_files( start: Instant, ) -> Result<()> { // 1. Get all the files referenced by the current snapshot - let referenced_files = match get_snapshot_referenced_files(fuse_table, ctx).await? { - Some(referenced_files) => referenced_files, - None => return Ok(()), + let Some(referenced_files) = get_snapshot_referenced_files(fuse_table, ctx).await? else { + return Ok(()); }; let status = format!( "gc orphan: read referenced files:{},{},{},{}, cost:{:?}", @@ -317,9 +260,8 @@ pub async fn do_dry_run_orphan_files( dry_run_limit: usize, ) -> Result<()> { // 1. Get all the files referenced by the current snapshot - let referenced_files = match get_snapshot_referenced_files(fuse_table, ctx).await? { - Some(referenced_files) => referenced_files, - None => return Ok(()), + let Some(referenced_files) = get_snapshot_referenced_files(fuse_table, ctx).await? else { + return Ok(()); }; let status = format!( "dry_run orphan: read referenced files:{},{},{},{}, cost:{:?}", @@ -420,31 +362,21 @@ pub async fn do_vacuum( let dry_run_limit = if dry_run { Some(DRY_RUN_LIMIT) } else { None }; // Let the table navigate to the point according to the table's retention policy. let navigation_point = None; - let keep_last_snapshot = true; let purge_files_opt = fuse_table - .purge( - ctx.clone(), - navigation_point, - dry_run_limit, - keep_last_snapshot, - dry_run, - ) + .purge(ctx.clone(), navigation_point, dry_run_limit, dry_run) .await?; let status = format!("do_vacuum: purged table, cost:{:?}", start.elapsed()); ctx.set_status_info(&status); - let retention_policy = fuse_table.get_data_retention_policy(ctx.as_ref())?; - - let retention_period = match retention_policy { - RetentionPolicy::ByTimePeriod(retention_period) => retention_period, - RetentionPolicy::ByNumOfSnapshotsToKeep(_) => { - // Technically, we should derive a reasonable retention period from the ByNumOfSnapshotsToKeep policy, - // but it's not worth the effort since VACUUM2 will replace legacy purge and vacuum soon. - // Use the table retention period for now. - fuse_table.get_data_retention_period(ctx.as_ref())? - } + // Technically, we should derive a reasonable retention period from the ByNumOfSnapshotsToKeep policy, + // but it's not worth the effort since VACUUM2 will replace legacy purge and vacuum soon. + // Use the table retention period for now. + let retention_period = if fuse_table.is_transient() { + // For transient table, keep no history data + TimeDelta::zero() + } else { + fuse_table.get_data_retention_period(ctx.as_ref())? }; - - let retention_time = chrono::Utc::now() - retention_period; + let retention_time = Utc::now() - retention_period; if let Some(mut purge_files) = purge_files_opt { let dry_run_limit = dry_run_limit.unwrap(); if purge_files.len() < dry_run_limit { diff --git a/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs b/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs index 27ad1abe8fcc0..918909466ca63 100644 --- a/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs +++ b/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs @@ -19,10 +19,10 @@ use std::collections::HashSet; use std::sync::Arc; use chrono::DateTime; -use chrono::Duration; use chrono::TimeDelta; use chrono::Utc; use databend_common_base::base::uuid::Uuid; +use databend_common_base::base::uuid::Version; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; @@ -30,70 +30,35 @@ use databend_common_exception::Result; use databend_common_meta_app::schema::least_visible_time_ident::LeastVisibleTimeIdent; use databend_common_meta_app::schema::LeastVisibleTime; use databend_common_meta_app::schema::ListIndexesByIdReq; -use databend_common_storages_fuse::io::MetaReaders; +use databend_common_meta_app::schema::SnapshotRefType; use databend_common_storages_fuse::io::SegmentsIO; +use databend_common_storages_fuse::io::SnapshotsIO; use databend_common_storages_fuse::io::TableMetaLocationGenerator; +use databend_common_storages_fuse::operations::ASSUMPTION_MAX_TXN_DURATION; use databend_common_storages_fuse::FuseTable; use databend_common_storages_fuse::RetentionPolicy; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; -use databend_storages_common_cache::LoadParams; use databend_storages_common_io::Files; -use databend_storages_common_table_meta::meta::uuid_from_date_time; use databend_storages_common_table_meta::meta::CompactSegmentInfo; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::VACUUM2_OBJECT_KEY_PREFIX; -use futures_util::TryStreamExt; use log::info; use opendal::Entry; use opendal::ErrorKind; -use opendal::Operator; -use opendal::Scheme; -use uuid::Version; -/// An assumption of the maximum duration from the time the first block is written to the time the -/// snapshot is written. -/// -/// To handle the situation during an upgrade where some nodes may not be able to upgrade in time to -/// a version that includes the vacuum2 logic, we introduce this assumption. It is used in two places: -/// -/// - When determining whether a snapshot object generated by an old version node can be cleaned up -/// -/// Snapshots whose object key does not start with `VACUUM2_OBJECT_KEY_PREFIX` are all created by -/// nodes of previous versions (do not support vacuum2). For such snapshot objects, if their -/// timestamp is less than -/// `GC_root's timestamp - ASSUMPTION_MAX_TXN_DURATION` -/// we consider them safe to delete. -/// -/// Generally speaking, if a snapshot from an old version was created a sufficiently long time -/// before the gc root, it would not be successfully committed after the gc root; this way, we -/// avoid deleting a snapshot object produced by an ongoing (not yet committed) transaction. -/// -/// - When determining whether a segment/block object generated by an old version query node can be -/// cleaned up -/// -/// Similarly, if a segment/block was created at a time sufficiently long before the gc root and -/// is not referenced by the gc root, then it will not be referenced by a snapshot that can be -/// successfully committed after the gc root, and safe to delete. -/// -/// NOTE: -/// If this assumption does not hold, it may lead to table data becoming inaccessible: -/// snapshots may become inaccessible, or some data may become unavailable. -/// -/// If the entire cluster is upgraded to the new version that includes the vacuum2 logic, -/// the above risks will not exist. -const ASSUMPTION_MAX_TXN_DURATION: Duration = Duration::days(3); #[async_backtrace::framed] pub async fn do_vacuum2( table: &dyn Table, ctx: Arc, respect_flash_back: bool, ) -> Result> { + let table_info = table.get_table_info(); { if ctx.txn_mgr().lock().is_active() { info!( "Transaction is active, skipping vacuum, target table {}", - table.get_table_info().desc + table_info.desc ); return Ok(vec![]); } @@ -102,15 +67,14 @@ pub async fn do_vacuum2( let fuse_table = FuseTable::try_from_table(table)?; let Some(latest_snapshot) = fuse_table.read_table_snapshot().await? else { - info!( - "Table {} has no snapshot, stopping vacuum", - fuse_table.get_table_info().desc - ); + info!("Table {} has no snapshot, stopping vacuum", table_info.desc); return Ok(vec![]); }; - let start = std::time::Instant::now(); + // Process snapshot refs (branches and tags) before main vacuum + let ref_info = process_snapshot_refs(fuse_table, &ctx).await?; + let start = std::time::Instant::now(); let retention_policy = fuse_table.get_data_retention_policy(ctx.as_ref())?; // By default, do not vacuum all the historical snapshots. @@ -142,7 +106,7 @@ pub async fn do_vacuum2( ctx.set_status_info(&format!( "Set LVT for table {}, elapsed: {:?}, LVT: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, start.elapsed(), lvt )); @@ -158,17 +122,17 @@ pub async fn do_vacuum2( ); // List the snapshot order by timestamp asc, till the current snapshot(inclusively). let need_one_more = true; - let mut snapshots = list_until_prefix( - fuse_table, - fuse_table - .meta_location_generator() - .snapshot_location_prefix(), - // Safe to unwrap here: we have checked that `fuse_table` has a snapshot - fuse_table.snapshot_loc().unwrap().as_str(), - need_one_more, - None, - ) - .await?; + let mut snapshots = fuse_table + .list_files_until_prefix( + fuse_table + .meta_location_generator() + .snapshot_location_prefix(), + // Safe to unwrap here: we have checked that `fuse_table` has a snapshot + fuse_table.snapshot_loc().unwrap().as_str(), + need_one_more, + None, + ) + .await?; let len = snapshots.len(); if len <= num_snapshots_to_keep { @@ -196,7 +160,7 @@ pub async fn do_vacuum2( let elapsed = start.elapsed(); ctx.set_status_info(&format!( "Listed snapshots for table {}, elapsed: {:?}, snapshots_dir: {:?}, snapshots: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, elapsed, fuse_table .meta_location_generator() @@ -204,8 +168,7 @@ pub async fn do_vacuum2( slice_summary(&snapshots_before_lvt) )); - let start = std::time::Instant::now(); - let Some((gc_root, snapshots_to_gc, gc_root_meta_ts)) = select_gc_root( + let Some((gc_root, snapshots_to_gc, mut gc_root_meta_ts)) = select_gc_root( &ctx, fuse_table, &snapshots_before_lvt, @@ -216,38 +179,49 @@ pub async fn do_vacuum2( else { return Ok(vec![]); }; + + // Use the oldest gc_root_meta_ts between main branch and refs + if let Some(ref_ts) = ref_info.gc_root_meta_ts { + gc_root_meta_ts = gc_root_meta_ts.min(ref_ts); + } + let gc_root_timestamp = ref_info + .gc_root_timestamp + .into_iter() + .chain(gc_root.timestamp) + .min() + .unwrap(); ctx.set_status_info(&format!( "Selected gc_root for table {}, elapsed: {:?}, gc_root: {:?}, snapshots_to_gc: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, start.elapsed(), gc_root, slice_summary(&snapshots_to_gc) )); let start = std::time::Instant::now(); - let gc_root_timestamp = gc_root.timestamp.unwrap(); - let gc_root_segments = gc_root + let gc_root_segments: HashSet<_> = gc_root .segments .iter() - .map(|(path, _)| path) - .collect::>(); - let segments_before_gc_root = list_until_timestamp( - fuse_table, - fuse_table - .meta_location_generator() - .segment_location_prefix(), - gc_root_timestamp, - false, - Some(gc_root_meta_ts), - ) - .await? - .into_iter() - .map(|v| v.path().to_owned()) - .collect::>(); + .chain(ref_info.ref_gc_roots.iter().flat_map(|r| r.segments.iter())) + .map(|(path, _)| path.clone()) + .collect(); + let segments_before_gc_root = fuse_table + .list_files_until_timestamp( + fuse_table + .meta_location_generator() + .segment_location_prefix(), + gc_root_timestamp, + false, + Some(gc_root_meta_ts), + ) + .await? + .into_iter() + .map(|v| v.path().to_owned()) + .collect::>(); ctx.set_status_info(&format!( "Listed segments before gc_root for table {}, elapsed: {:?}, segment_dir: {:?}, gc_root_timestamp: {:?}, segments: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, start.elapsed(), fuse_table.meta_location_generator().segment_location_prefix(), gc_root_timestamp, @@ -265,7 +239,7 @@ pub async fn do_vacuum2( .collect::>(); ctx.set_status_info(&format!( "Filtered segments_to_gc for table {}, elapsed: {:?}, segments_to_gc: {:?}, stats_to_gc: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, start.elapsed(), slice_summary(&segments_to_gc), slice_summary(&stats_to_gc) @@ -274,6 +248,8 @@ pub async fn do_vacuum2( let start = std::time::Instant::now(); let segments_io = SegmentsIO::create(ctx.clone(), fuse_table.get_operator(), fuse_table.schema()); + + // Collect blocks from main gc_root let segments = segments_io .read_segments::>(&gc_root.segments, false) .await?; @@ -281,28 +257,40 @@ pub async fn do_vacuum2( for segment in segments { gc_root_blocks.extend(segment?.block_metas()?.iter().map(|b| b.location.0.clone())); } + + // Collect blocks from ref gc_roots + for ref_gc_root in ref_info.ref_gc_roots { + let ref_segments = segments_io + .read_segments::>(&ref_gc_root.segments, false) + .await?; + for segment in ref_segments { + gc_root_blocks.extend(segment?.block_metas()?.iter().map(|b| b.location.0.clone())); + } + } + ctx.set_status_info(&format!( - "Read segments for table {}, elapsed: {:?}", - fuse_table.get_table_info().desc, + "Read segments for table {}, elapsed: {:?}, total protected blocks: {}", + table_info.desc, start.elapsed(), + gc_root_blocks.len() )); let start = std::time::Instant::now(); - let blocks_before_gc_root = list_until_timestamp( - fuse_table, - fuse_table.meta_location_generator().block_location_prefix(), - gc_root_timestamp, - false, - Some(gc_root_meta_ts), - ) - .await? - .into_iter() - .map(|v| v.path().to_owned()) - .collect::>(); + let blocks_before_gc_root = fuse_table + .list_files_until_timestamp( + fuse_table.meta_location_generator().block_location_prefix(), + gc_root_timestamp, + false, + Some(gc_root_meta_ts), + ) + .await? + .into_iter() + .map(|v| v.path().to_owned()) + .collect::>(); ctx.set_status_info(&format!( "Listed blocks before gc_root for table {}, elapsed: {:?}, block_dir: {:?}, gc_root_timestamp: {:?}, blocks: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, start.elapsed(), fuse_table.meta_location_generator().block_location_prefix(), gc_root_timestamp, @@ -316,7 +304,7 @@ pub async fn do_vacuum2( .collect(); ctx.set_status_info(&format!( "Filtered blocks_to_gc for table {}, elapsed: {:?}, blocks_to_gc: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, start.elapsed(), slice_summary(&blocks_to_gc) )); @@ -329,7 +317,7 @@ pub async fn do_vacuum2( fuse_table.get_id(), )) .await?; - let inverted_indexes = &fuse_table.get_table_info().meta.indexes; + let inverted_indexes = &table_info.meta.indexes; let mut indexes_to_gc = Vec::with_capacity( blocks_to_gc.len() * (table_agg_index_ids.len() + inverted_indexes.len() + 1), ); @@ -356,7 +344,7 @@ pub async fn do_vacuum2( ctx.set_status_info(&format!( "Collected indexes_to_gc for table {}, elapsed: {:?}, indexes_to_gc: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, start.elapsed(), slice_summary(&indexes_to_gc) )); @@ -384,23 +372,23 @@ pub async fn do_vacuum2( // - TODO: To ensure correctness in such cases, the table's Least Visible Timestamp (LVT), // stored in the meta-server, should be utilized to determine snapshot visibility and // resolve potential issues. - if let Some(snapshot_cache) = CacheManager::instance().get_table_snapshot_cache() { - for path in &snapshots_to_gc { + for path in snapshots_to_gc.iter() { snapshot_cache.evict(path); } } - + let op = Files::create(ctx.clone(), fuse_table.get_operator()); op.remove_file_in_batch(&snapshots_to_gc).await?; let files_to_gc: Vec<_> = subject_files_to_gc .into_iter() .chain(snapshots_to_gc.into_iter()) .chain(indexes_to_gc.into_iter()) + .chain(ref_info.files_to_gc.into_iter()) .collect(); ctx.set_status_info(&format!( "Removed files for table {}, elapsed: {:?}, files_to_gc: {:?}", - fuse_table.get_table_info().desc, + table_info.desc, start.elapsed(), slice_summary(&files_to_gc) )); @@ -413,27 +401,27 @@ async fn collect_gc_candidates_by_retention_period( is_vacuum_all: bool, ) -> Result> { let snapshots_before_lvt = if is_vacuum_all { - list_until_prefix( - fuse_table, - fuse_table - .meta_location_generator() - .snapshot_location_prefix(), - fuse_table.snapshot_loc().unwrap().as_str(), - true, - None, - ) - .await? + fuse_table + .list_files_until_prefix( + fuse_table + .meta_location_generator() + .snapshot_location_prefix(), + fuse_table.snapshot_loc().unwrap().as_str(), + true, + None, + ) + .await? } else { - list_until_timestamp( - fuse_table, - fuse_table - .meta_location_generator() - .snapshot_location_prefix(), - lvt, - true, - None, - ) - .await? + fuse_table + .list_files_until_timestamp( + fuse_table + .meta_location_generator() + .snapshot_location_prefix(), + lvt, + true, + None, + ) + .await? }; Ok(snapshots_before_lvt) @@ -441,7 +429,7 @@ async fn collect_gc_candidates_by_retention_period( /// Try set lvt as min(latest_snapshot.timestamp, now - retention_time). /// -/// Return `None` means we stop vacuumming, but don't want to report error to user. +/// Return `None` means we stop vacuuming, but don't want to report error to user. async fn set_lvt( fuse_table: &FuseTable, latest_snapshot: Arc, @@ -456,7 +444,7 @@ async fn set_lvt( return Ok(None); } let cat = ctx.get_default_catalog()?; - // safe to unwrap, as we have checked the version is v5 + // safe to unwrap, as we have checked the version is v4 let latest_ts = latest_snapshot.timestamp.unwrap(); let lvt_point_candidate = std::cmp::min(Utc::now() - retention_period, latest_ts); @@ -475,156 +463,6 @@ fn is_uuid_v7(uuid: &Uuid) -> bool { version.is_some_and(|v| matches!(v, Version::SortRand)) } -async fn list_until_prefix( - fuse_table: &FuseTable, - path: &str, - until: &str, - need_one_more: bool, - gc_root_meta_ts: Option>, -) -> Result> { - info!("Listing files until prefix: {}", until); - let dal = fuse_table.get_operator_ref(); - - match dal.info().scheme() { - Scheme::Fs => fs_list_until_prefix(dal, path, until, need_one_more, gc_root_meta_ts).await, - _ => general_list_until_prefix(dal, path, until, need_one_more, gc_root_meta_ts).await, - } -} - -/// Object storage supported by Databend is expected to return entries sorted in ascending lexicographical -/// order by object key. Databend leverages this property to enhance the efficiency and thoroughness -/// of the vacuum process. -/// -/// The safety of the vacuum algorithm does not depend on this ordering. -async fn general_list_until_prefix( - dal: &Operator, - path: &str, - until: &str, - need_one_more: bool, - gc_root_meta_ts: Option>, -) -> Result> { - let mut lister = dal.lister(path).await?; - let mut paths = vec![]; - while let Some(entry) = lister.try_next().await? { - if entry.metadata().is_dir() { - continue; - } - if entry.path() >= until { - info!("entry path: {} >= until: {}", entry.path(), until); - if need_one_more { - paths.push(entry); - } - break; - } - if gc_root_meta_ts.is_none() - || is_gc_candidate_segment_block(&entry, dal, gc_root_meta_ts.unwrap()).await? - { - paths.push(entry); - } - } - Ok(paths) -} - -/// If storage is backed by FS, we prioritize thoroughness over efficiency (though efficiency loss -/// is usually no significant). All entries are fetched and sorted before extracting the prefix entries. -async fn fs_list_until_prefix( - dal: &Operator, - path: &str, - until: &str, - need_one_more: bool, - gc_root_meta_ts: Option>, -) -> Result> { - // Fetch ALL entries from the path and sort them by path in lexicographical order. - let mut lister = dal.lister(path).await?; - let mut entries = Vec::new(); - while let Some(item) = lister.try_next().await? { - if item.metadata().is_file() { - entries.push(item); - } - } - entries.sort_by(|l, r| l.path().cmp(r.path())); - - // Extract entries up to the `until` path, respecting lexicographical order. - let mut res = Vec::new(); - for entry in entries { - if entry.path() >= until { - info!("entry path: {} >= until: {}", entry.path(), until); - if need_one_more { - res.push(entry); - } - break; - } - if gc_root_meta_ts.is_none() - || is_gc_candidate_segment_block(&entry, dal, gc_root_meta_ts.unwrap()).await? - { - res.push(entry); - } - } - - Ok(res) -} - -async fn is_gc_candidate_segment_block( - entry: &Entry, - op: &Operator, - gc_root_meta_ts: DateTime, -) -> Result { - let path = entry.path(); - let last_part = path.rsplit('/').next().unwrap(); - if last_part.starts_with(VACUUM2_OBJECT_KEY_PREFIX) { - return Ok(true); - } - let last_modified = if let Some(v) = entry.metadata().last_modified() { - v - } else { - let path = entry.path(); - let meta = op.stat(path).await?; - meta.last_modified().ok_or_else(|| { - ErrorCode::StorageOther(format!( - "Failed to get `last_modified` metadata of the entry '{}'", - path - )) - })? - }; - - Ok(last_modified + ASSUMPTION_MAX_TXN_DURATION < gc_root_meta_ts) -} - -async fn list_until_timestamp( - fuse_table: &FuseTable, - path: &str, - until: DateTime, - need_one_more: bool, - gc_root_meta_ts: Option>, -) -> Result> { - let uuid = uuid_from_date_time(until); - let uuid_str = uuid.simple().to_string(); - - // extract the most significant 48 bits, which is 12 characters - let timestamp_component = &uuid_str[..12]; - let until = format!( - "{}{}{}", - path, VACUUM2_OBJECT_KEY_PREFIX, timestamp_component - ); - list_until_prefix(fuse_table, path, &until, need_one_more, gc_root_meta_ts).await -} - -async fn read_snapshot_from_location( - fuse_table: &FuseTable, - path: &str, -) -> Result> { - let reader = MetaReaders::table_snapshot_reader(fuse_table.get_operator()); - let ver = TableMetaLocationGenerator::snapshot_version(path); - info!("read snapshot from location: {}, version: {}", path, ver); - let params = LoadParams { - location: path.to_owned(), - len_hint: None, - ver, - put_cache: false, - }; - reader.read(¶ms).await -} - async fn select_gc_root( ctx: &Arc, fuse_table: &FuseTable, @@ -632,17 +470,18 @@ async fn select_gc_root( is_vacuum_all: bool, respect_flash_back: Option>, ) -> Result, Vec, DateTime)>> { + let op = fuse_table.get_operator(); let gc_root_path = if is_vacuum_all { // safe to unwrap, or we should have stopped vacuuming in set_lvt() fuse_table.snapshot_loc().unwrap() } else if let Some(lvt) = respect_flash_back { let latest_location = fuse_table.snapshot_loc().unwrap(); let gc_root = fuse_table - .find(ctx, latest_location, |snapshot| { + .find_location(ctx, latest_location, |snapshot| { snapshot.timestamp.is_some_and(|ts| ts <= lvt) }) - .await? - .snapshot_loc(); + .await + .ok(); let Some(gc_root) = gc_root else { info!("no gc_root found, stop vacuuming"); return Ok(None); @@ -653,9 +492,12 @@ async fn select_gc_root( info!("no snapshots before lvt, stop vacuuming"); return Ok(None); } - let anchor = - read_snapshot_from_location(fuse_table, snapshots_before_lvt.last().unwrap().path()) - .await?; + let (anchor, _) = SnapshotsIO::read_snapshot( + snapshots_before_lvt.last().unwrap().path().to_owned(), + op.clone(), + false, + ) + .await?; let Some((gc_root_id, gc_root_ver)) = anchor.prev_snapshot_id else { info!("anchor has no prev_snapshot_id, stop vacuuming"); return Ok(None); @@ -671,7 +513,7 @@ async fn select_gc_root( }; let dal = fuse_table.get_operator_ref(); - let gc_root = read_snapshot_from_location(fuse_table, &gc_root_path).await; + let gc_root = SnapshotsIO::read_snapshot(gc_root_path.clone(), op.clone(), false).await; let gc_root_meta_ts = match dal.stat(&gc_root_path).await { Ok(v) => v.last_modified().ok_or_else(|| { @@ -691,7 +533,7 @@ async fn select_gc_root( }; match gc_root { - Ok(gc_root) => { + Ok((gc_root, _)) => { info!("gc_root found: {:?}", gc_root); let mut gc_candidates = Vec::with_capacity(snapshots_before_lvt.len()); @@ -750,3 +592,205 @@ fn slice_summary(s: &[T]) -> String { format!("{:?}", s) } } + +/// Result of vacuum processing for table refs (branches and tags) +struct RefVacuumInfo { + /// GC root snapshots from refs (to protect their segments and blocks) + ref_gc_roots: Vec>, + /// The oldest gc_root_meta_ts among all refs + gc_root_meta_ts: Option>, + /// The oldest gc_root_ts among all refs + gc_root_timestamp: Option>, + /// The files(include snapshot and expired refs' dir) to be cleaned up + files_to_gc: Vec, +} + +/// Process snapshot refs (branches and tags) for vacuum +#[async_backtrace::framed] +async fn process_snapshot_refs( + fuse_table: &FuseTable, + ctx: &Arc, +) -> Result { + let start = std::time::Instant::now(); + let op = fuse_table.get_operator(); + // Refs that expired and should be cleaned up + let mut expired_refs = HashSet::new(); + // Ref snapshot paths to be cleaned up + let mut ref_snapshots_to_gc = Vec::new(); + let mut ref_gc_roots = Vec::new(); + let mut gc_root_meta_ts: Option> = None; + let mut gc_root_timestamp: Option> = None; + let mut files_to_gc = Vec::new(); + + let now = Utc::now(); + let (retention_time, num_snapshots_to_keep) = + fuse_table.get_refs_retention_policy(ctx.as_ref(), now)?; + let table_info = fuse_table.get_table_info(); + // Process active refs + for (ref_name, snapshot_ref) in table_info.meta.refs.iter() { + if snapshot_ref.expire_at.is_some_and(|v| v < now) { + expired_refs.insert(ref_name); + continue; + } + + match snapshot_ref.typ { + SnapshotRefType::Tag => { + // Tag: read head snapshot as gc root to protect its segments and blocks + let (tag_snapshot, _) = + SnapshotsIO::read_snapshot(snapshot_ref.loc.clone(), op.clone(), false).await?; + ref_gc_roots.push(tag_snapshot); + } + SnapshotRefType::Branch => { + let branch_id = snapshot_ref.id; + let snapshots_before_retention = fuse_table + .list_branch_snapshots_with_fallback( + branch_id, + &snapshot_ref.loc, + retention_time, + num_snapshots_to_keep, + ) + .await?; + + let (gc_root_location, gc_root_snap) = match process_branch_gc_root( + fuse_table, + branch_id, + &snapshot_ref.loc, + &snapshots_before_retention, + &mut ref_snapshots_to_gc, + ) + .await? + { + Some((location, sn)) => (location, sn), + None => { + // No gc_root found, use snapshot history to find earliest + let earliest_snap = fuse_table + .find_earliest_snapshot_via_history(ref_name, snapshot_ref) + .await?; + (snapshot_ref.loc.clone(), earliest_snap) + } + }; + + if snapshot_ref.loc != gc_root_location { + // Only collect snapshot timestamps when the GC root is NOT the head location. + // The head location serves as the current root and does not participate in + // the minimum timestamp calculation. + let meta = fuse_table + .get_operator_ref() + .stat(&gc_root_location) + .await?; + + let meta_ts = meta.last_modified().ok_or_else(|| { + ErrorCode::StorageOther(format!( + "Failed to get `last_modified` metadata of snapshot '{}'", + gc_root_location + )) + })?; + + // Update minimum metadata timestamp + gc_root_meta_ts = Some(gc_root_meta_ts.map_or(meta_ts, |cur| cur.min(meta_ts))); + + // Update minimum snapshot timestamp (keep original unwrap logic) + let gc_root_ts = gc_root_snap.timestamp.unwrap(); + gc_root_timestamp = + Some(gc_root_timestamp.map_or(gc_root_ts, |cur| cur.min(gc_root_ts))); + } + ref_gc_roots.push(gc_root_snap); + } + } + } + + if !expired_refs.is_empty() { + let start_update = std::time::Instant::now(); + files_to_gc = fuse_table + .update_table_refs_meta(ctx, &expired_refs) + .await?; + ctx.set_status_info(&format!( + "Updated table meta for table {}, elapsed: {:?}", + table_info.desc, + start_update.elapsed() + )); + } + + // Step 3: Purge ref snapshots + if !ref_snapshots_to_gc.is_empty() { + let file_op = Files::create(ctx.clone(), op.clone()); + file_op.remove_file_in_batch(&ref_snapshots_to_gc).await?; + } + + let expired_vec = expired_refs.into_iter().collect::>(); + ctx.set_status_info(&format!( + "Processed snapshot refs for table {}, elapsed: {:?}, expire_refs: {}, ref_snapshots_to_gc: {}, ref_gc_root_meta_ts: {:?}", + table_info.desc, + start.elapsed(), + slice_summary(&expired_vec), + slice_summary(&ref_snapshots_to_gc), + gc_root_meta_ts + )); + + files_to_gc.extend(ref_snapshots_to_gc.into_iter()); + Ok(RefVacuumInfo { + ref_gc_roots, + gc_root_meta_ts, + gc_root_timestamp, + files_to_gc, + }) +} + +/// Process branch gc_root: find gc_root, collect snapshots to GC, and update collections +#[async_backtrace::framed] +async fn process_branch_gc_root( + fuse_table: &FuseTable, + branch_id: u64, + head: &str, + snapshots_before_retention: &[Entry], + ref_snapshots_to_gc: &mut Vec, +) -> Result)>> { + if snapshots_before_retention.is_empty() { + return Ok(None); + } + + let op = fuse_table.get_operator(); + // Read the last snapshot (oldest one) + let last_snapshot_path = snapshots_before_retention.last().unwrap().path(); + let (last_snapshot, _) = + SnapshotsIO::read_snapshot(last_snapshot_path.to_string(), op.clone(), false).await?; + // If last_snapshot_path is head, use head as gc_root and clean up all snapshots before retention + if last_snapshot_path == head { + // All snapshots before retention can be cleaned up, except the last one (head itself as gc_root) + let len = snapshots_before_retention.len(); + for snapshot in snapshots_before_retention.iter().take(len - 1) { + ref_snapshots_to_gc.push(snapshot.path().to_owned()); + } + return Ok(Some((head.to_string(), last_snapshot))); + } + + // Get its prev_snapshot_id as gc_root + let Some((gc_root_id, gc_root_ver)) = last_snapshot.prev_snapshot_id else { + return Ok(None); + }; + let gc_root_path = fuse_table + .meta_location_generator() + .ref_snapshot_location_from_uuid(branch_id, &gc_root_id, gc_root_ver)?; + + // Try to read gc_root snapshot + match SnapshotsIO::read_snapshot(gc_root_path.clone(), op, false).await { + Ok((gc_root_snap, _)) => { + // Collect snapshots_to_gc + let mut gc_candidates = Vec::with_capacity(snapshots_before_retention.len()); + for snapshot in snapshots_before_retention.iter() { + gc_candidates.push(snapshot.path().to_owned()); + } + + // Find gc_root position in candidates + let gc_root_idx = gc_candidates.binary_search(&gc_root_path).ok(); + let snapshots_to_gc = if let Some(idx) = gc_root_idx { + gc_candidates[..idx].to_vec() + } else { + return Ok(None); + }; + ref_snapshots_to_gc.extend(snapshots_to_gc); + Ok(Some((gc_root_path, gc_root_snap))) + } + Err(_) => Ok(None), + } +} diff --git a/src/query/ee/src/stream/handler.rs b/src/query/ee/src/stream/handler.rs index a42e42e3500e8..7d80683430052 100644 --- a/src/query/ee/src/stream/handler.rs +++ b/src/query/ee/src/stream/handler.rs @@ -74,6 +74,14 @@ impl StreamHandler for RealStreamHandler { plan.table_database, plan.table_name ))); } + if table_info.engine() != "FUSE" { + return Err(ErrorCode::IllegalStream(format!( + "The table '{}.{}' uses engine '{}', only FUSE tables support stream creation", + plan.table_database, + plan.table_name, + table_info.engine() + ))); + } let table_id = table_info.ident.table_id; if !table.change_tracking_enabled() { diff --git a/src/query/ee/src/table_ref/handler.rs b/src/query/ee/src/table_ref/handler.rs new file mode 100644 index 0000000000000..ea9b5f0418343 --- /dev/null +++ b/src/query/ee/src/table_ref/handler.rs @@ -0,0 +1,213 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use chrono::Utc; +use databend_common_base::base::GlobalInstance; +use databend_common_catalog::table_context::TableContext; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_meta_app::schema::SnapshotRef; +use databend_common_meta_app::schema::UpdateTableMetaReq; +use databend_common_meta_types::MatchSeq; +use databend_common_sql::plans::CreateTableRefPlan; +use databend_common_sql::plans::DropTableRefPlan; +use databend_common_storages_fuse::FuseTable; +use databend_common_storages_fuse::FUSE_TBL_REF_PREFIX; +use databend_enterprise_table_ref_handler::TableRefHandler; +use databend_enterprise_table_ref_handler::TableRefHandlerWrapper; +use databend_storages_common_table_meta::meta::TableSnapshot; +use log::error; + +pub struct RealTableRefHandler {} + +#[async_trait::async_trait] +impl TableRefHandler for RealTableRefHandler { + #[async_backtrace::framed] + async fn do_create_table_ref( + &self, + ctx: Arc, + plan: &CreateTableRefPlan, + ) -> Result<()> { + let tenant = ctx.get_tenant(); + let catalog = ctx.get_catalog(&plan.catalog).await?; + + let table = catalog + .get_table(&tenant, &plan.database, &plan.table) + .await?; + let table_info = table.get_table_info(); + if table.is_temp() { + return Err(ErrorCode::IllegalReference(format!( + "The table '{}.{}' is temporary, can't create {}", + plan.database, plan.table, plan.ref_type + ))); + } + if table_info.engine() != "FUSE" { + return Err(ErrorCode::IllegalReference(format!( + "The table '{}.{}' uses engine '{}', only FUSE tables support {} creation", + plan.database, + plan.table, + table_info.engine(), + plan.ref_type + ))); + } + let refs = &table_info.meta.refs; + if refs.contains_key(&plan.ref_name) { + return Err(ErrorCode::ReferenceAlreadyExists(format!( + "The table '{}.{}' already has a reference named '{}'", + plan.database, plan.table, plan.ref_name + ))); + } + + let fuse_table = FuseTable::try_from_table(table.as_ref())?; + if fuse_table.is_transient() { + return Err(ErrorCode::IllegalReference(format!( + "The table '{}.{}' is transient, can't create {}", + plan.database, plan.table, plan.ref_type + ))); + } + let seq = table_info.ident.seq; + let base_loc = fuse_table.snapshot_loc(); + let snapshot_loc = match &plan.navigation { + Some(navigation) => { + fuse_table + .navigate_to_location(ctx.clone(), navigation) + .await? + } + None => base_loc, + }; + + let new_snapshot = if let Some(snapshot) = fuse_table + .read_table_snapshot_with_location(snapshot_loc) + .await? + { + let mut new_snapshot = TableSnapshot::try_from_previous( + snapshot.clone(), + Some(seq), + ctx.get_table_meta_timestamps(fuse_table, Some(snapshot.clone()))?, + )?; + new_snapshot.prev_snapshot_id = None; + new_snapshot + } else { + TableSnapshot::try_new( + Some(seq), + None, + table_info.schema().as_ref().clone(), + Default::default(), + vec![], + None, + ctx.get_table_meta_timestamps(fuse_table, None)?, + )? + }; + // write down new snapshot + let new_snapshot_location = fuse_table + .meta_location_generator() + .ref_snapshot_location_from_uuid( + seq, + &new_snapshot.snapshot_id, + new_snapshot.format_version, + )?; + let data = new_snapshot.to_bytes()?; + fuse_table + .get_operator_ref() + .write(&new_snapshot_location, data) + .await?; + + let expire_at = plan.retain.map(|v| Utc::now() + v); + let mut new_table_meta = table_info.meta.clone(); + new_table_meta + .refs + .insert(plan.ref_name.clone(), SnapshotRef { + id: seq, + expire_at, + typ: plan.ref_type.clone(), + loc: new_snapshot_location, + }); + let req = UpdateTableMetaReq { + table_id: table_info.ident.table_id, + seq: MatchSeq::Exact(seq), + new_table_meta, + base_snapshot_location: fuse_table.snapshot_loc(), + }; + catalog.update_single_table_meta(req, table_info).await?; + Ok(()) + } + + #[async_backtrace::framed] + async fn do_drop_table_ref( + &self, + ctx: Arc, + plan: &DropTableRefPlan, + ) -> Result<()> { + let tenant = ctx.get_tenant(); + let catalog = ctx.get_catalog(&plan.catalog).await?; + + let table = catalog + .get_table(&tenant, &plan.database, &plan.table) + .await?; + let table_info = table.get_table_info(); + let refs = &table_info.meta.refs; + let Some(table_ref) = refs.get(&plan.ref_name) else { + return Err(ErrorCode::UnknownReference(format!( + "Unknown {} '{}' in table '{}.{}'", + plan.ref_type, plan.ref_name, plan.database, plan.table + ))); + }; + if table_ref.typ != plan.ref_type { + return Err(ErrorCode::MismatchedReferenceType(format!( + "'{}' is a {} reference, please use 'ALTER TABLE {}.{} DROP {} {}' instead.", + plan.ref_name, + table_ref.typ, + plan.database, + plan.table, + table_ref.typ, + plan.ref_name, + ))); + } + + let fuse_table = FuseTable::try_from_table(table.as_ref())?; + let mut new_table_meta = table_info.meta.clone(); + new_table_meta.refs.remove(&plan.ref_name); + let req = UpdateTableMetaReq { + table_id: table_info.ident.table_id, + seq: MatchSeq::Exact(table_info.ident.seq), + new_table_meta, + base_snapshot_location: fuse_table.snapshot_loc(), + }; + catalog.update_single_table_meta(req, table_info).await?; + + // clear the ref snapshot. + let operator = fuse_table.get_operator_ref(); + let dir = format!( + "{}/{}/{}/", + FuseTable::parse_storage_prefix_from_table_info(table_info)?, + FUSE_TBL_REF_PREFIX, + plan.ref_name, + ); + operator.remove_all(&dir).await.inspect_err(|err| { + error!("failed to remove all in directory {}: {}", dir, err); + })?; + Ok(()) + } +} + +impl RealTableRefHandler { + pub fn init() -> Result<()> { + let handler = RealTableRefHandler {}; + let wrapper = TableRefHandlerWrapper::new(Box::new(handler)); + GlobalInstance::set(Arc::new(wrapper)); + Ok(()) + } +} diff --git a/src/query/ee/src/storages/fuse/io/mod.rs b/src/query/ee/src/table_ref/mod.rs similarity index 92% rename from src/query/ee/src/storages/fuse/io/mod.rs rename to src/query/ee/src/table_ref/mod.rs index 071c8cdd0c182..a719a581c520a 100644 --- a/src/query/ee/src/storages/fuse/io/mod.rs +++ b/src/query/ee/src/table_ref/mod.rs @@ -12,4 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod snapshots; +mod handler; + +pub use handler::RealTableRefHandler; diff --git a/src/query/ee_features/table_ref_handler/Cargo.toml b/src/query/ee_features/table_ref_handler/Cargo.toml new file mode 100644 index 0000000000000..f0ffd67d966db --- /dev/null +++ b/src/query/ee_features/table_ref_handler/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "databend-enterprise-table-ref-handler" +version = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +edition = { workspace = true } + +[dependencies] +async-backtrace = { workspace = true } +async-trait = { workspace = true } +databend-common-base = { workspace = true } +databend-common-catalog = { workspace = true } +databend-common-exception = { workspace = true } +databend-common-sql = { workspace = true } + +[build-dependencies] + +[lints] +workspace = true diff --git a/src/query/ee_features/table_ref_handler/src/handler.rs b/src/query/ee_features/table_ref_handler/src/handler.rs new file mode 100644 index 0000000000000..bc1f0a39a3439 --- /dev/null +++ b/src/query/ee_features/table_ref_handler/src/handler.rs @@ -0,0 +1,68 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_base::base::GlobalInstance; +use databend_common_catalog::table_context::TableContext; +use databend_common_exception::Result; +use databend_common_sql::plans::CreateTableRefPlan; +use databend_common_sql::plans::DropTableRefPlan; + +#[async_trait::async_trait] +pub trait TableRefHandler: Sync + Send { + async fn do_create_table_ref( + &self, + ctx: Arc, + plan: &CreateTableRefPlan, + ) -> Result<()>; + + async fn do_drop_table_ref( + &self, + ctx: Arc, + plan: &DropTableRefPlan, + ) -> Result<()>; +} + +pub struct TableRefHandlerWrapper { + handler: Box, +} + +impl TableRefHandlerWrapper { + pub fn new(handler: Box) -> Self { + Self { handler } + } + + #[async_backtrace::framed] + pub async fn do_create_table_ref( + &self, + ctx: Arc, + plan: &CreateTableRefPlan, + ) -> Result<()> { + self.handler.do_create_table_ref(ctx, plan).await + } + + #[async_backtrace::framed] + pub async fn do_drop_table_ref( + &self, + ctx: Arc, + plan: &DropTableRefPlan, + ) -> Result<()> { + self.handler.do_drop_table_ref(ctx, plan).await + } +} + +pub fn get_table_ref_handler() -> Arc { + GlobalInstance::get() +} diff --git a/src/query/ee_features/table_ref_handler/src/lib.rs b/src/query/ee_features/table_ref_handler/src/lib.rs new file mode 100644 index 0000000000000..0a3a4990331a6 --- /dev/null +++ b/src/query/ee_features/table_ref_handler/src/lib.rs @@ -0,0 +1,19 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod handler; + +pub use handler::get_table_ref_handler; +pub use handler::TableRefHandler; +pub use handler::TableRefHandlerWrapper; diff --git a/src/query/expression/Cargo.toml b/src/query/expression/Cargo.toml index 2825e58c73f4d..28a24c53c8cdf 100644 --- a/src/query/expression/Cargo.toml +++ b/src/query/expression/Cargo.toml @@ -64,7 +64,6 @@ typetag = { workspace = true } unicode-segmentation = { workspace = true } [dev-dependencies] -arrow-ord = { workspace = true } divan = { workspace = true } goldenfile = { workspace = true } pretty_assertions = { workspace = true } diff --git a/src/query/functions/Cargo.toml b/src/query/functions/Cargo.toml index 150381fe59cb5..df8bd94679075 100644 --- a/src/query/functions/Cargo.toml +++ b/src/query/functions/Cargo.toml @@ -12,7 +12,6 @@ blake3 = { workspace = true } borsh = { workspace = true } bstr = { workspace = true } bumpalo = { workspace = true } -crc32fast = { workspace = true } ctor = { workspace = true } databend-common-ast = { workspace = true } databend-common-base = { workspace = true } @@ -30,10 +29,8 @@ databend-functions-scalar-integer-basic-arithmetic = { workspace = true } databend-functions-scalar-math = { workspace = true } databend-functions-scalar-numeric-basic-arithmetic = { workspace = true } geo = { workspace = true } -geohash = { workspace = true } geozero = { workspace = true } glob = { workspace = true } -h3o = { workspace = true } hex = { workspace = true } itertools = { workspace = true } jaq-core = { workspace = true } @@ -42,15 +39,12 @@ jaq-parse = { workspace = true } jaq-std = { workspace = true } jiff = { workspace = true } jsonb = { workspace = true } -lexical-core = { workspace = true } libm = { workspace = true } log = { workspace = true } match-template = { workspace = true } md-5 = { workspace = true } naive-cityhash = { workspace = true } num-traits = { workspace = true } -once_cell = { workspace = true } -proj4rs = { workspace = true } proptest = { workspace = true } rand = { workspace = true } regex = { workspace = true } @@ -61,7 +55,6 @@ sha2 = { workspace = true } simdutf8 = { workspace = true } simple_hll = { workspace = true } siphasher = { workspace = true } -strength_reduce = { workspace = true } stringslice = { workspace = true } twox-hash = { workspace = true } unicase = { workspace = true } diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index 33fb813df5289..ad4cd5251e9b0 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -105,6 +105,7 @@ databend-enterprise-hilbert-clustering = { workspace = true } databend-enterprise-resources-management = { workspace = true } databend-enterprise-row-access-policy-feature = { workspace = true } databend-enterprise-stream-handler = { workspace = true } +databend-enterprise-table-ref-handler = { workspace = true } databend-enterprise-vacuum-handler = { workspace = true } databend-enterprise-virtual-column = { workspace = true } databend-storages-common-blocks = { workspace = true } @@ -200,7 +201,6 @@ url = { workspace = true } wiremock = { workspace = true } [build-dependencies] -databend-common-building = { workspace = true } [lints] workspace = true diff --git a/src/query/service/src/interpreters/access/privilege_access.rs b/src/query/service/src/interpreters/access/privilege_access.rs index 3c354b9e4d188..f69354370922a 100644 --- a/src/query/service/src/interpreters/access/privilege_access.rs +++ b/src/query/service/src/interpreters/access/privilege_access.rs @@ -1509,7 +1509,13 @@ impl AccessChecker for PrivilegeAccess { self.validate_table_access(&plan.catalog, &plan.database, &plan.table, UserPrivilegeType::Alter, false, false).await? } Plan::DropTableClusterKey(plan) => { - self.validate_table_access(&plan.catalog, &plan.database, &plan.table, UserPrivilegeType::Drop, false, false).await? + self.validate_table_access(&plan.catalog, &plan.database, &plan.table, UserPrivilegeType::Alter, false, false).await? + } + Plan::CreateTableRef(plan) => { + self.validate_table_access(&plan.catalog, &plan.database, &plan.table, UserPrivilegeType::Alter, false, false).await? + } + Plan::DropTableRef(plan) => { + self.validate_table_access(&plan.catalog, &plan.database, &plan.table, UserPrivilegeType::Alter, false, false).await? } Plan::RefreshTableCache(_) | Plan::RefreshDatabaseCache(_) => { // Only Iceberg support this plan diff --git a/src/query/service/src/interpreters/interpreter_factory.rs b/src/query/service/src/interpreters/interpreter_factory.rs index a43aca4c76fac..851f3b80b892a 100644 --- a/src/query/service/src/interpreters/interpreter_factory.rs +++ b/src/query/service/src/interpreters/interpreter_factory.rs @@ -87,6 +87,8 @@ use crate::interpreters::interpreter_system_action::SystemActionInterpreter; use crate::interpreters::interpreter_table_add_constraint::AddTableConstraintInterpreter; use crate::interpreters::interpreter_table_create::CreateTableInterpreter; use crate::interpreters::interpreter_table_drop_constraint::DropTableConstraintInterpreter; +use crate::interpreters::interpreter_table_ref_create::CreateTableRefInterpreter; +use crate::interpreters::interpreter_table_ref_drop::DropTableRefInterpreter; use crate::interpreters::interpreter_table_revert::RevertTableInterpreter; use crate::interpreters::interpreter_table_row_access_add::AddTableRowAccessPolicyInterpreter; use crate::interpreters::interpreter_table_unset_options::UnsetOptionsInterpreter; @@ -438,6 +440,14 @@ impl InterpreterFactory { Plan::DropAllTableRowAccessPolicies(p) => Ok(Arc::new( DropAllTableRowAccessPoliciesInterpreter::try_create(ctx, *p.clone())?, )), + Plan::CreateTableRef(p) => Ok(Arc::new(CreateTableRefInterpreter::try_create( + ctx, + *p.clone(), + )?)), + Plan::DropTableRef(p) => Ok(Arc::new(DropTableRefInterpreter::try_create( + ctx, + *p.clone(), + )?)), // Views Plan::CreateView(create_view) => Ok(Arc::new(CreateViewInterpreter::try_create( diff --git a/src/query/service/src/interpreters/interpreter_optimize_purge.rs b/src/query/service/src/interpreters/interpreter_optimize_purge.rs index a401147a272fa..4286e73964dad 100644 --- a/src/query/service/src/interpreters/interpreter_optimize_purge.rs +++ b/src/query/service/src/interpreters/interpreter_optimize_purge.rs @@ -77,10 +77,7 @@ pub(crate) async fn purge( // check mutability table.check_mutable()?; - let keep_latest = true; - let res = table - .purge(ctx, instant, num_snapshot_limit, keep_latest, false) - .await?; + let res = table.purge(ctx, instant, num_snapshot_limit, false).await?; assert!(res.is_none()); Ok(()) } diff --git a/src/query/service/src/interpreters/interpreter_table_ref_create.rs b/src/query/service/src/interpreters/interpreter_table_ref_create.rs new file mode 100644 index 0000000000000..391016e9c2c6b --- /dev/null +++ b/src/query/service/src/interpreters/interpreter_table_ref_create.rs @@ -0,0 +1,61 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::Result; +use databend_common_license::license::Feature; +use databend_common_license::license_manager::LicenseManagerSwitch; +use databend_common_sql::plans::CreateTableRefPlan; +use databend_common_storages_fuse::TableContext; +use databend_enterprise_table_ref_handler::get_table_ref_handler; + +use crate::interpreters::Interpreter; +use crate::pipelines::PipelineBuildResult; +use crate::sessions::QueryContext; + +pub struct CreateTableRefInterpreter { + ctx: Arc, + plan: CreateTableRefPlan, +} + +impl CreateTableRefInterpreter { + pub fn try_create(ctx: Arc, plan: CreateTableRefPlan) -> Result { + Ok(CreateTableRefInterpreter { ctx, plan }) + } +} + +#[async_trait::async_trait] +impl Interpreter for CreateTableRefInterpreter { + fn name(&self) -> &str { + "CreateTableRefInterpreter" + } + + fn is_ddl(&self) -> bool { + true + } + + #[async_backtrace::framed] + async fn execute2(&self) -> Result { + LicenseManagerSwitch::instance() + .check_enterprise_enabled(self.ctx.get_license_key(), Feature::TableRef)?; + + let handler = get_table_ref_handler(); + handler + .do_create_table_ref(self.ctx.clone(), &self.plan) + .await?; + + Ok(PipelineBuildResult::create()) + } +} diff --git a/src/query/service/src/interpreters/interpreter_table_ref_drop.rs b/src/query/service/src/interpreters/interpreter_table_ref_drop.rs new file mode 100644 index 0000000000000..50de312f2f9d2 --- /dev/null +++ b/src/query/service/src/interpreters/interpreter_table_ref_drop.rs @@ -0,0 +1,61 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::Result; +use databend_common_license::license::Feature; +use databend_common_license::license_manager::LicenseManagerSwitch; +use databend_common_sql::plans::DropTableRefPlan; +use databend_common_storages_fuse::TableContext; +use databend_enterprise_table_ref_handler::get_table_ref_handler; + +use crate::interpreters::Interpreter; +use crate::pipelines::PipelineBuildResult; +use crate::sessions::QueryContext; + +pub struct DropTableRefInterpreter { + ctx: Arc, + plan: DropTableRefPlan, +} + +impl DropTableRefInterpreter { + pub fn try_create(ctx: Arc, plan: DropTableRefPlan) -> Result { + Ok(DropTableRefInterpreter { ctx, plan }) + } +} + +#[async_trait::async_trait] +impl Interpreter for DropTableRefInterpreter { + fn name(&self) -> &str { + "DropTableRefInterpreter" + } + + fn is_ddl(&self) -> bool { + true + } + + #[async_backtrace::framed] + async fn execute2(&self) -> Result { + LicenseManagerSwitch::instance() + .check_enterprise_enabled(self.ctx.get_license_key(), Feature::TableRef)?; + + let handler = get_table_ref_handler(); + handler + .do_drop_table_ref(self.ctx.clone(), &self.plan) + .await?; + + Ok(PipelineBuildResult::create()) + } +} diff --git a/src/query/service/src/interpreters/mod.rs b/src/query/service/src/interpreters/mod.rs index 1d014c1965c99..3d8ca41171653 100644 --- a/src/query/service/src/interpreters/mod.rs +++ b/src/query/service/src/interpreters/mod.rs @@ -117,6 +117,8 @@ mod interpreter_show_online_nodes; mod interpreter_show_warehouses; mod interpreter_show_workload_groups; mod interpreter_stream_create; +mod interpreter_table_ref_create; +mod interpreter_table_ref_drop; pub use hook::vacuum_hook::hook_clear_m_cte_temp_table; mod interpreter_stream_drop; mod interpreter_suspend_warehouse; diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index c13af21deedc1..332423ffdb49e 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -510,6 +510,7 @@ impl QueryContext { catalog: &str, database: &str, table: &str, + branch: Option<&str>, max_batch_size: Option, ) -> Result> { let table = self @@ -533,6 +534,12 @@ impl QueryContext { } _ => table, }; + + let table = if let Some(branch) = branch { + table.with_branch(branch)? + } else { + table + }; Ok(table) } @@ -1441,7 +1448,7 @@ impl TableContext for QueryContext { } let batch_size = self.get_settings().get_stream_consume_batch_size_hint()?; - self.get_table_from_shared(catalog, database, table, batch_size) + self.get_table_from_shared(catalog, database, table, None, batch_size) .await } @@ -1455,6 +1462,7 @@ impl TableContext for QueryContext { catalog: &str, database: &str, table: &str, + branch: Option<&str>, max_batch_size: Option, ) -> Result> { let final_batch_size = match max_batch_size { @@ -1473,8 +1481,9 @@ impl TableContext for QueryContext { }; let table = self - .get_table_from_shared(catalog, database, table, final_batch_size) + .get_table_from_shared(catalog, database, table, branch, final_batch_size) .await?; + if table.is_stream() { let stream = StreamTable::try_from_table(table.as_ref())?; let actual_batch_limit = stream.max_batch_size(); diff --git a/src/query/service/src/table_functions/system/table_statistics.rs b/src/query/service/src/table_functions/system/table_statistics.rs index 67a809e56f9a4..d3f51ac1987eb 100644 --- a/src/query/service/src/table_functions/system/table_statistics.rs +++ b/src/query/service/src/table_functions/system/table_statistics.rs @@ -219,6 +219,7 @@ pub async fn get_fuse_table_snapshot( snapshot_location, format_version, meta_location_generator.clone(), + fuse_table.get_branch_id(), ); if let Some(Ok((snapshot, _v))) = lite_snapshot_stream.take(1).next().await { diff --git a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs index 61f09377b0d27..ab30038fde08e 100644 --- a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs +++ b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs @@ -872,6 +872,7 @@ impl TableContext for CtxDelegation { catalog: &str, database: &str, table: &str, + _branch: Option<&str>, _max_batch_size: Option, ) -> Result> { self.get_table(catalog, database, table).await diff --git a/src/query/service/tests/it/storages/fuse/conflict.rs b/src/query/service/tests/it/storages/fuse/conflict.rs index 663b8ad3b7c9c..3838b2f5255f6 100644 --- a/src/query/service/tests/it/storages/fuse/conflict.rs +++ b/src/query/service/tests/it/storages/fuse/conflict.rs @@ -39,14 +39,14 @@ use crate::storages::fuse::utils::new_empty_snapshot; /// so the delete operation cannot be applied #[test] fn test_unresolvable_delete_conflict() { - let mut base_snapshot = new_empty_snapshot(TableSchema::default(), None); + let mut base_snapshot = new_empty_snapshot(TableSchema::default()); base_snapshot.segments = vec![ ("1".to_string(), 1), ("2".to_string(), 1), ("3".to_string(), 1), ]; - let mut latest_snapshot = new_empty_snapshot(TableSchema::default(), None); + let mut latest_snapshot = new_empty_snapshot(TableSchema::default()); latest_snapshot.segments = vec![("1".to_string(), 1), ("4".to_string(), 1)]; let ctx = ConflictResolveContext::ModifiedSegmentExistsInLatest(SnapshotChanges { @@ -81,7 +81,7 @@ fn test_unresolvable_delete_conflict() { /// /// the delete operation is merged into the latest snapshot, by removing segments 2, 3, and adding segment 8 in the latest snapshot fn test_resolvable_delete_conflict() { - let mut base_snapshot = new_empty_snapshot(TableSchema::default(), None); + let mut base_snapshot = new_empty_snapshot(TableSchema::default()); base_snapshot.segments = vec![ ("1".to_string(), 1), ("2".to_string(), 1), @@ -107,7 +107,7 @@ fn test_resolvable_delete_conflict() { additional_stats_meta: None, }; - let mut latest_snapshot = new_empty_snapshot(TableSchema::default(), None); + let mut latest_snapshot = new_empty_snapshot(TableSchema::default()); latest_snapshot.segments = vec![ ("2".to_string(), 1), ("3".to_string(), 1), @@ -226,7 +226,7 @@ fn test_resolvable_delete_conflict() { /// /// the replace operation is merged into the latest snapshot, by removing segments 2, 3, and adding segment 6,5 in the latest snapshot fn test_resolvable_replace_conflict() { - let mut base_snapshot = new_empty_snapshot(TableSchema::default(), None); + let mut base_snapshot = new_empty_snapshot(TableSchema::default()); base_snapshot.segments = vec![ ("1".to_string(), 1), ("2".to_string(), 1), @@ -252,7 +252,7 @@ fn test_resolvable_replace_conflict() { additional_stats_meta: None, }; - let mut latest_snapshot = new_empty_snapshot(TableSchema::default(), None); + let mut latest_snapshot = new_empty_snapshot(TableSchema::default()); latest_snapshot.segments = vec![ ("2".to_string(), 1), ("3".to_string(), 1), diff --git a/src/query/service/tests/it/storages/fuse/operations/analyze.rs b/src/query/service/tests/it/storages/fuse/operations/analyze.rs index 62178934cf30f..567667f5f36c9 100644 --- a/src/query/service/tests/it/storages/fuse/operations/analyze.rs +++ b/src/query/service/tests/it/storages/fuse/operations/analyze.rs @@ -40,7 +40,7 @@ async fn test_fuse_snapshot_analyze() -> Result<()> { let snapshot_files = fuse_table.list_snapshot_files().await?; let table_ctx: Arc = ctx.clone(); fuse_table - .do_purge(&table_ctx, snapshot_files, None, true, false) + .do_purge(&table_ctx, snapshot_files, None, false) .await?; check_data_dir(&fixture, case_name, 1, 1, 1, 1, 1, 1, Some(()), Some(())).await?; @@ -110,7 +110,7 @@ async fn test_fuse_snapshot_analyze_purge() -> Result<()> { let snapshot_files = fuse_table.list_snapshot_files().await?; let table_ctx: Arc = ctx.clone(); fuse_table - .do_purge(&table_ctx, snapshot_files, None, true, false) + .do_purge(&table_ctx, snapshot_files, None, false) .await?; check_data_dir(&fixture, case_name, 1, 1, 2, 2, 2, 2, Some(()), Some(())).await?; diff --git a/src/query/service/tests/it/storages/fuse/operations/commit.rs b/src/query/service/tests/it/storages/fuse/operations/commit.rs index 7c30b06b2f4bb..9ecf154edf28b 100644 --- a/src/query/service/tests/it/storages/fuse/operations/commit.rs +++ b/src/query/service/tests/it/storages/fuse/operations/commit.rs @@ -725,6 +725,7 @@ impl TableContext for CtxDelegation { _catalog: &str, _database: &str, _table: &str, + _branch: Option<&str>, _max_batch_size: Option, ) -> Result> { todo!() diff --git a/src/query/service/tests/it/storages/fuse/operations/gc.rs b/src/query/service/tests/it/storages/fuse/operations/gc.rs index 4d73008d6f25d..e71e4445c63ae 100644 --- a/src/query/service/tests/it/storages/fuse/operations/gc.rs +++ b/src/query/service/tests/it/storages/fuse/operations/gc.rs @@ -44,10 +44,9 @@ async fn test_fuse_purge_normal_case() -> Result<()> { let table = fixture.latest_default_table().await?; let fuse_table = FuseTable::try_from_table(table.as_ref())?; let snapshot_files = fuse_table.list_snapshot_files().await?; - let keep_last_snapshot = true; let table_ctx: Arc = ctx.clone(); fuse_table - .do_purge(&table_ctx, snapshot_files, None, keep_last_snapshot, false) + .do_purge(&table_ctx, snapshot_files, None, false) .await?; let expected_num_of_snapshot = 1; @@ -103,11 +102,10 @@ async fn test_fuse_purge_normal_orphan_snapshot() -> Result<()> { } // do_gc - let keep_last_snapshot = true; let table_ctx: Arc = ctx.clone(); let snapshot_files = fuse_table.list_snapshot_files().await?; fuse_table - .do_purge(&table_ctx, snapshot_files, None, keep_last_snapshot, false) + .do_purge(&table_ctx, snapshot_files, None, false) .await?; // expects two snapshot there @@ -241,11 +239,10 @@ async fn test_fuse_purge_orphan_retention() -> Result<()> { } // do_gc - let keep_last_snapshot = true; let table_ctx: Arc = ctx.clone(); let snapshot_files = fuse_table.list_snapshot_files().await?; fuse_table - .do_purge(&table_ctx, snapshot_files, None, keep_last_snapshot, false) + .do_purge(&table_ctx, snapshot_files, None, false) .await?; let expected_num_of_snapshot = 2; @@ -291,9 +288,8 @@ async fn test_fuse_purge_older_version() -> Result<()> { let table = fuse_table .navigate_to_time_point(&table_ctx, snapshot_loc, time_point) .await?; - let keep_last_snapshot = true; table - .do_purge(&table_ctx, snapshot_files, None, keep_last_snapshot, false) + .do_purge(&table_ctx, snapshot_files, None, false) .await?; let expected_num_of_snapshot = 2; @@ -332,7 +328,7 @@ async fn test_fuse_purge_older_version() -> Result<()> { { let snapshot_files = fuse_table.list_snapshot_files().await?; fuse_table - .do_purge(&table_ctx, snapshot_files, None, true, false) + .do_purge(&table_ctx, snapshot_files, None, false) .await?; let expected_num_of_snapshot = 1; @@ -355,31 +351,5 @@ async fn test_fuse_purge_older_version() -> Result<()> { .await?; } - // keep_last_snapshot is false. All of snapshots will be purged. - { - let snapshot_files = fuse_table.list_snapshot_files().await?; - fuse_table - .do_purge(&table_ctx, snapshot_files, None, false, false) - .await?; - let expected_num_of_snapshot = 0; - let expected_num_of_segment = 0; - let expected_num_of_blocks = 0; - let expected_num_of_index = expected_num_of_blocks; - let expected_num_of_segment_stats = expected_num_of_segment; - check_data_dir( - &fixture, - "do_gc: purge last snapshot", - expected_num_of_snapshot, - 0, - expected_num_of_segment, - expected_num_of_blocks, - expected_num_of_index, - expected_num_of_segment_stats, - Some(()), - None, - ) - .await?; - } - Ok(()) } diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs index c70c37a235cb8..9bbc642c1893a 100644 --- a/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs +++ b/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs @@ -136,7 +136,7 @@ async fn test_recluster_mutator_block_select() -> Result<()> { test_segment_locations.push(segment_location); test_block_locations.push(block_location); // unused snapshot. - let snapshot = new_empty_snapshot(schema.as_ref().clone(), None); + let snapshot = new_empty_snapshot(schema.as_ref().clone()); let ctx: Arc = ctx.clone(); let segment_locations = create_segment_location_vector(test_segment_locations, None); diff --git a/src/query/service/tests/it/storages/fuse/operations/navigate.rs b/src/query/service/tests/it/storages/fuse/operations/navigate.rs index 3cd8d1a90d551..f8eb7b1a11cb6 100644 --- a/src/query/service/tests/it/storages/fuse/operations/navigate.rs +++ b/src/query/service/tests/it/storages/fuse/operations/navigate.rs @@ -87,6 +87,7 @@ async fn test_fuse_navigate() -> Result<()> { loc.clone(), version, fuse_table.meta_location_generator().clone(), + None, ) .try_collect() .await?; @@ -214,6 +215,7 @@ async fn test_navigate_for_purge() -> Result<()> { loc.clone(), version, fuse_table.meta_location_generator().clone(), + None, ) .try_collect() .await?; diff --git a/src/query/service/tests/it/storages/fuse/utils.rs b/src/query/service/tests/it/storages/fuse/utils.rs index 7ebb753ab17ab..9c142ae99a1aa 100644 --- a/src/query/service/tests/it/storages/fuse/utils.rs +++ b/src/query/service/tests/it/storages/fuse/utils.rs @@ -25,9 +25,9 @@ use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::TableSnapshot; -pub fn new_empty_snapshot(schema: TableSchema, prev_table_seq: Option) -> TableSnapshot { +pub fn new_empty_snapshot(schema: TableSchema) -> TableSnapshot { TableSnapshot::try_new( - prev_table_seq, + None, None, schema, Statistics::default(), @@ -71,7 +71,7 @@ pub async fn do_purge_test( let snapshot_files = fuse_table.list_snapshot_files().await?; let table_ctx: Arc = fixture.new_query_ctx().await?; fuse_table - .do_purge(&table_ctx, snapshot_files, None, true, false) + .do_purge(&table_ctx, snapshot_files, None, false) .await?; check_data_dir( diff --git a/src/query/sql/Cargo.toml b/src/query/sql/Cargo.toml index 0dd0511d2b83c..03317ba21d77e 100644 --- a/src/query/sql/Cargo.toml +++ b/src/query/sql/Cargo.toml @@ -75,7 +75,6 @@ unicase = { workspace = true } url = { workspace = true } [dev-dependencies] -pretty_assertions = { workspace = true } [lints] workspace = true diff --git a/src/query/sql/src/planner/binder/bind_mutation/delete.rs b/src/query/sql/src/planner/binder/bind_mutation/delete.rs index 0e8fcb8d7d671..493ae3fdb554a 100644 --- a/src/query/sql/src/planner/binder/bind_mutation/delete.rs +++ b/src/query/sql/src/planner/binder/bind_mutation/delete.rs @@ -16,7 +16,6 @@ use databend_common_ast::ast::DeleteStmt; use databend_common_ast::ast::MatchOperation; use databend_common_ast::ast::MatchedClause; use databend_common_ast::ast::TableReference; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; use crate::binder::bind_mutation::bind::Mutation; @@ -35,7 +34,10 @@ impl Binder { stamt: &DeleteStmt, ) -> Result { let DeleteStmt { + catalog, + database, table, + table_alias, selection, with, .. @@ -43,20 +45,21 @@ impl Binder { self.init_cte(bind_context, with)?; - let target_table_identifier = if let TableReference::Table { - catalog, - database, - table, - alias, - .. - } = table - { - TableIdentifier::new(self, catalog, database, table, alias) - } else { - // We do not support USING clause yet. - return Err(ErrorCode::Internal( - "should not happen, parser should have report error already", - )); + let target_table_identifier = + TableIdentifier::new(self, catalog, database, table, &None, table_alias); + + let target_table_reference = TableReference::Table { + span: None, + catalog: catalog.clone(), + database: database.clone(), + table: table.clone(), + ref_name: None, + alias: table_alias.clone(), + temporal: None, + with_options: None, + pivot: None, + unpivot: None, + sample: None, }; let matched_clause = MatchedClause { @@ -67,7 +70,7 @@ impl Binder { let mutation = Mutation { target_table_identifier, expression: MutationExpression::Delete { - target: table.clone(), + target: target_table_reference, from: None, filter: selection.clone(), }, diff --git a/src/query/sql/src/planner/binder/bind_mutation/merge.rs b/src/query/sql/src/planner/binder/bind_mutation/merge.rs index 593eab1837c5b..110139482177c 100644 --- a/src/query/sql/src/planner/binder/bind_mutation/merge.rs +++ b/src/query/sql/src/planner/binder/bind_mutation/merge.rs @@ -46,6 +46,7 @@ impl Binder { &stmt.catalog, &stmt.database, &stmt.table_ident, + &None, &stmt.target_alias, ); @@ -54,6 +55,7 @@ impl Binder { catalog: stmt.catalog.clone(), database: stmt.database.clone(), table: stmt.table_ident.clone(), + ref_name: None, alias: stmt.target_alias.clone(), temporal: None, with_options: None, diff --git a/src/query/sql/src/planner/binder/bind_mutation/update.rs b/src/query/sql/src/planner/binder/bind_mutation/update.rs index e18e303edc90b..d65b4462f6c22 100644 --- a/src/query/sql/src/planner/binder/bind_mutation/update.rs +++ b/src/query/sql/src/planner/binder/bind_mutation/update.rs @@ -66,13 +66,14 @@ impl Binder { self.init_cte(bind_context, with)?; let target_table_identifier = - TableIdentifier::new(self, catalog, database, table, table_alias); + TableIdentifier::new(self, catalog, database, table, &None, table_alias); let target_table_reference = TableReference::Table { span: None, catalog: catalog.clone(), database: database.clone(), table: table.clone(), + ref_name: None, alias: table_alias.clone(), temporal: None, with_options: None, diff --git a/src/query/sql/src/planner/binder/bind_query/bind_select.rs b/src/query/sql/src/planner/binder/bind_query/bind_select.rs index af63b3b93256f..feb8f6b0f0edc 100644 --- a/src/query/sql/src/planner/binder/bind_query/bind_select.rs +++ b/src/query/sql/src/planner/binder/bind_query/bind_select.rs @@ -716,6 +716,7 @@ impl SelectRewriter { catalog, database, table, + ref_name, alias, temporal, with_options, @@ -732,6 +733,10 @@ impl SelectRewriter { source_query.push('.'); } source_query.push_str(&table.name); + if let Some(ref_name) = ref_name { + source_query.push('@'); + source_query.push_str(&ref_name.name); + } if let Some(temporal) = temporal { source_query.push(' '); diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind.rs index a3bfaa7c336ad..a777a38fd01b7 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind.rs @@ -31,6 +31,7 @@ impl Binder { catalog, database, table, + ref_name, alias, temporal, pivot: _, @@ -43,6 +44,7 @@ impl Binder { catalog, database, table, + ref_name, alias, temporal, with_options, diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_obfuscate.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_obfuscate.rs index 9359906187c98..cb50ab2fe4b13 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_obfuscate.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_obfuscate.rs @@ -102,7 +102,7 @@ impl Binder { table: &Identifier, seed: u64, ) -> Result<(SExpr, BindContext)> { - let table_identifier = TableIdentifier::new(self, catalog, database, table, &None); + let table_identifier = TableIdentifier::new(self, catalog, database, table, &None, &None); let catalog_name = table_identifier.catalog_name(); let database_name = table_identifier.database_name(); @@ -116,6 +116,7 @@ impl Binder { &table_name, None, None, + None, )? .schema(); @@ -189,6 +190,7 @@ fn build_subquery( catalog: None, database: Some(database.clone()), table: table_name.clone(), + ref_name: None, alias: None, temporal: None, with_options: None, diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs index bc3a3d6cd8fed..275d27e98ae10 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs @@ -45,16 +45,19 @@ impl Binder { catalog: &Option, database: &Option, table: &Identifier, + ref_name: &Option, alias: &Option, temporal: &Option, with_options: &Option, sample: &Option, ) -> Result<(SExpr, BindContext)> { - let table_identifier = TableIdentifier::new(self, catalog, database, table, alias); - let (catalog, database, table_name, table_name_alias) = ( + let table_identifier = + TableIdentifier::new(self, catalog, database, table, ref_name, alias); + let (catalog, database, table_name, table_ref_name, table_name_alias) = ( table_identifier.catalog_name(), table_identifier.database_name(), table_identifier.table_name(), + table_identifier.table_ref_name(), table_identifier.table_name_alias(), ); @@ -130,6 +133,7 @@ impl Binder { catalog.as_str(), database.as_str(), table_name.as_str(), + table_ref_name.as_deref(), navigation.as_ref(), max_batch_size, ) { @@ -173,6 +177,7 @@ impl Binder { catalog, database.clone(), table_meta.clone(), + table_ref_name, table_name_alias, bind_context.view_info.is_some(), bind_context.planning_agg_index, @@ -259,6 +264,7 @@ impl Binder { catalog, database.clone(), table_meta, + table_ref_name, table_name_alias, false, false, @@ -292,6 +298,7 @@ impl Binder { catalog.clone(), database.clone(), table_meta.clone(), + table_ref_name, table_name_alias, bind_context.view_info.is_some(), bind_context.planning_agg_index, diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs index e595074d60fdf..47d25761469fc 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs @@ -297,6 +297,7 @@ impl Binder { CATALOG_DEFAULT.to_string(), "system".to_string(), table.as_table(), + None, table_alias_name, false, false, @@ -358,6 +359,7 @@ impl Binder { CATALOG_DEFAULT.to_string(), "system".to_string(), table.clone(), + None, table_alias_name, false, false, @@ -571,8 +573,7 @@ impl Binder { Err(ErrorCode::InvalidArgument(format!( "The function '{}' is not supported for lateral joins. Lateral joins currently support only Set Returning Functions (SRFs).", func_name - )) - .set_span(*span)) + )).set_span(*span)) } } _ => unreachable!(), diff --git a/src/query/sql/src/planner/binder/ddl/table.rs b/src/query/sql/src/planner/binder/ddl/table.rs index cdaf63e1748b8..fd73c95c8f61f 100644 --- a/src/query/sql/src/planner/binder/ddl/table.rs +++ b/src/query/sql/src/planner/binder/ddl/table.rs @@ -127,12 +127,14 @@ use crate::plans::AddTableRowAccessPolicyPlan; use crate::plans::AlterTableClusterKeyPlan; use crate::plans::AnalyzeTablePlan; use crate::plans::CreateTablePlan; +use crate::plans::CreateTableRefPlan; use crate::plans::DescribeTablePlan; use crate::plans::DropAllTableRowAccessPoliciesPlan; use crate::plans::DropTableClusterKeyPlan; use crate::plans::DropTableColumnPlan; use crate::plans::DropTableConstraintPlan; use crate::plans::DropTablePlan; +use crate::plans::DropTableRefPlan; use crate::plans::DropTableRowAccessPolicyPlan; use crate::plans::ExistsTablePlan; use crate::plans::ModifyColumnAction as ModifyColumnActionInPlan; @@ -1025,9 +1027,11 @@ impl Binder { catalog, database, table, + ref_name, .. } = table_reference { + debug_assert!(ref_name.is_none()); self.normalize_object_identifier_triple(catalog, database, table) } else { return Err(ErrorCode::Internal( @@ -1397,6 +1401,40 @@ impl Binder { }, ))) } + AlterTableAction::CreateTableRef { + ref_type, + ref_name, + travel_point, + retain, + } => { + let navigation = if let Some(point) = travel_point { + Some(self.resolve_data_travel_point(bind_context, point)?) + } else { + None + }; + let ref_name = self.normalize_identifier(ref_name).name; + Ok(Plan::CreateTableRef(Box::new(CreateTableRefPlan { + tenant, + catalog, + database, + table, + ref_type: ref_type.into(), + ref_name, + navigation, + retain: *retain, + }))) + } + AlterTableAction::DropTableRef { ref_type, ref_name } => { + let ref_name = self.normalize_identifier(ref_name).name; + Ok(Plan::DropTableRef(Box::new(DropTableRefPlan { + tenant, + catalog, + database, + table, + ref_type: ref_type.into(), + ref_name, + }))) + } } } diff --git a/src/query/sql/src/planner/binder/insert.rs b/src/query/sql/src/planner/binder/insert.rs index 0cc84ce8e91d6..d73c7270c6d27 100644 --- a/src/query/sql/src/planner/binder/insert.rs +++ b/src/query/sql/src/planner/binder/insert.rs @@ -113,7 +113,7 @@ impl Binder { self.init_cte(bind_context, with)?; - let table_identifier = TableIdentifier::new(self, catalog, database, table, &None); + let table_identifier = TableIdentifier::new(self, catalog, database, table, &None, &None); let (catalog_name, database_name, table_name) = ( table_identifier.catalog_name(), table_identifier.database_name(), diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index 037f70d0af34c..0029aeb78b009 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -152,6 +152,7 @@ impl Binder { CATALOG_DEFAULT.to_string(), "system".to_string(), table.clone(), + None, table_alias_name, false, false, @@ -582,6 +583,7 @@ impl Binder { catalog_name: &str, database_name: &str, table_name: &str, + branch: Option<&str>, navigation: Option<&TimeNavigation>, max_batch_size: Option, ) -> Result> { @@ -592,7 +594,13 @@ impl Binder { // newest snapshot, we can't get consistent snapshot let mut table_meta = self .ctx - .get_table_with_batch(catalog_name, database_name, table_name, max_batch_size) + .get_table_with_batch( + catalog_name, + database_name, + table_name, + branch, + max_batch_size, + ) .await?; if let Some(desc) = navigation { @@ -716,6 +724,13 @@ impl Binder { database, name, } => self.resolve_stream_data_travel_point(catalog, database, name), + TimeTravelPoint::TableRef { typ, name } => { + let name = self.normalize_identifier(name).name; + Ok(NavigationPoint::TableRef { + typ: typ.into(), + name, + }) + } } } diff --git a/src/query/sql/src/planner/binder/util.rs b/src/query/sql/src/planner/binder/util.rs index 6efdc80f99514..0e1fcb1182571 100644 --- a/src/query/sql/src/planner/binder/util.rs +++ b/src/query/sql/src/planner/binder/util.rs @@ -85,6 +85,7 @@ pub struct TableIdentifier { catalog: Identifier, database: Identifier, table: Identifier, + table_ref: Option, table_alias: Option, dialect: Dialect, name_resolution_ctx: NameResolutionContext, @@ -96,6 +97,7 @@ impl TableIdentifier { catalog: &Option, database: &Option, table: &Identifier, + table_ref: &Option, table_alias: &Option, ) -> TableIdentifier { // Use the common normalization logic to handle MySQL-style identifiers. @@ -152,6 +154,7 @@ impl TableIdentifier { catalog, database, table, + table_ref: table_ref.clone(), table_alias: table_alias.clone(), dialect: *dialect, name_resolution_ctx: name_resolution_ctx.clone(), @@ -170,6 +173,12 @@ impl TableIdentifier { normalize_identifier(&self.table, &self.name_resolution_ctx).name } + pub fn table_ref_name(&self) -> Option { + self.table_ref + .as_ref() + .map(|v| normalize_identifier(v, &self.name_resolution_ctx).name) + } + pub fn table_name_alias(&self) -> Option { self.table_alias.as_ref().map(|table_alias| { normalize_identifier(&table_alias.name, &self.name_resolution_ctx).name diff --git a/src/query/sql/src/planner/dataframe.rs b/src/query/sql/src/planner/dataframe.rs index 1fd54cbef93e2..7d3f9a675bc82 100644 --- a/src/query/sql/src/planner/dataframe.rs +++ b/src/query/sql/src/planner/dataframe.rs @@ -58,6 +58,7 @@ impl Dataframe { let table = TableReference::Table { database: db.map(|db| Identifier::from_name(None, db)), table: Identifier::from_name(None, table_name), + ref_name: None, span: None, catalog: None, alias: None, @@ -84,14 +85,15 @@ impl Dataframe { let (s_expr, bind_context) = if db == Some("system") && table_name == "one" { let catalog = CATALOG_DEFAULT; let database = "system"; - let table_meta: Arc = - binder.resolve_data_source(&query_ctx, catalog, database, "one", None, None)?; + let table_meta: Arc = binder + .resolve_data_source(&query_ctx, catalog, database, "one", None, None, None)?; let table_index = metadata.write().add_table( CATALOG_DEFAULT.to_owned(), database.to_string(), table_meta, None, + None, false, false, false, @@ -449,6 +451,7 @@ impl Dataframe { let table = TableReference::Table { database: db.map(|db| Identifier::from_name(None, db)), table: Identifier::from_name(None, table_name), + ref_name: None, span: None, catalog: None, alias: None, diff --git a/src/query/sql/src/planner/expression/expression_parser.rs b/src/query/sql/src/planner/expression/expression_parser.rs index 84952a4283b82..6102fb6dd1ff3 100644 --- a/src/query/sql/src/planner/expression/expression_parser.rs +++ b/src/query/sql/src/planner/expression/expression_parser.rs @@ -60,6 +60,7 @@ pub fn bind_table(table_meta: Arc) -> Result<(BindContext, MetadataRe "default".to_string(), table_meta, None, + None, false, false, false, diff --git a/src/query/sql/src/planner/format/display_plan.rs b/src/query/sql/src/planner/format/display_plan.rs index 596384f93e1f5..6f8dc67f9539c 100644 --- a/src/query/sql/src/planner/format/display_plan.rs +++ b/src/query/sql/src/planner/format/display_plan.rs @@ -110,6 +110,8 @@ impl Plan { Plan::DropAllTableRowAccessPolicies(_) => { Ok("DropAllTableRowAccessPolicies".to_string()) } + Plan::CreateTableRef(_) => Ok("CreateTableRef".to_string()), + Plan::DropTableRef(_) => Ok("DropTableRef".to_string()), // Views Plan::CreateView(_) => Ok("CreateView".to_string()), diff --git a/src/query/sql/src/planner/metadata/metadata.rs b/src/query/sql/src/planner/metadata/metadata.rs index 946e376180dc1..fcfde7b18e299 100644 --- a/src/query/sql/src/planner/metadata/metadata.rs +++ b/src/query/sql/src/planner/metadata/metadata.rs @@ -339,6 +339,7 @@ impl Metadata { catalog: String, database: String, table_meta: Arc, + branch: Option, table_alias_name: Option, source_of_view: bool, source_of_index: bool, @@ -357,6 +358,7 @@ impl Metadata { database, catalog, table: table_meta.clone(), + branch, alias_name: table_alias_name, source_of_view, source_of_index, @@ -553,6 +555,7 @@ pub struct TableEntry { catalog: String, database: String, name: String, + branch: Option, alias_name: Option, index: IndexType, source_of_view: bool, @@ -576,27 +579,6 @@ impl Debug for TableEntry { } impl TableEntry { - pub fn new( - index: IndexType, - name: String, - alias_name: Option, - catalog: String, - database: String, - table: Arc, - ) -> Self { - TableEntry { - index, - name, - catalog, - database, - table, - alias_name, - source_of_view: false, - source_of_index: false, - source_of_stage: false, - } - } - /// Get the catalog name of this table entry. pub fn catalog(&self) -> &str { &self.catalog @@ -612,6 +594,10 @@ impl TableEntry { &self.name } + pub fn branch(&self) -> &Option { + &self.branch + } + /// Get the alias name of this table entry. pub fn alias_name(&self) -> &Option { &self.alias_name diff --git a/src/query/sql/src/planner/planner_cache.rs b/src/query/sql/src/planner/planner_cache.rs index cc3453cb432cd..a075f58af82dd 100644 --- a/src/query/sql/src/planner/planner_cache.rs +++ b/src/query/sql/src/planner/planner_cache.rs @@ -109,9 +109,16 @@ impl Planner { let metadata = metadata.read(); if visitor.schema_snapshots.iter().all(|ss| { metadata.tables().iter().any(|table| { - !table.table().is_temp() - && table.table().options().get(OPT_KEY_SNAPSHOT_LOCATION) == Some(&ss.1) - && table.table().schema().eq(&ss.0) + let tbl = table.table(); + if tbl.is_temp() || tbl.schema().ne(&ss.0) { + return false; + } + let snapshot = if let Some(branch) = table.branch() { + tbl.get_table_info().meta.refs.get(branch).map(|v| &v.loc) + } else { + tbl.options().get(OPT_KEY_SNAPSHOT_LOCATION) + }; + snapshot == Some(&ss.1) }) }) { return (!visitor.cache_miss, Some(plan_item.as_ref().clone())); @@ -175,6 +182,7 @@ impl TableRefVisitor { catalog, database, table, + ref_name, temporal, with_options, .. @@ -201,21 +209,40 @@ impl TableRefVisitor { let catalog_name = normalize_identifier(&catalog, &self.name_resolution_ctx).name; let database_name = normalize_identifier(&database, &self.name_resolution_ctx).name; let table_name = normalize_identifier(table, &self.name_resolution_ctx).name; + let ref_name = ref_name + .as_ref() + .map(|v| normalize_identifier(v, &self.name_resolution_ctx).name); databend_common_base::runtime::block_on(async move { if let Ok(table_meta) = self .ctx - .get_table(&catalog_name, &database_name, &table_name) + .get_table_with_batch( + &catalog_name, + &database_name, + &table_name, + ref_name.as_deref(), + None, + ) .await { if !table_meta.is_temp() && !table_meta.is_stage_table() && !table_meta.is_stream() - && let Some(sn) = table_meta.options().get(OPT_KEY_SNAPSHOT_LOCATION) { - self.schema_snapshots - .push((table_meta.schema(), sn.clone())); - return; + let snapshot = if let Some(ref_name) = &ref_name { + table_meta + .get_table_info() + .meta + .refs + .get(ref_name) + .map(|v| v.loc.clone()) + } else { + table_meta.options().get(OPT_KEY_SNAPSHOT_LOCATION).cloned() + }; + if let Some(sn) = snapshot { + self.schema_snapshots.push((table_meta.schema(), sn)); + return; + } } } self.cache_miss = true; diff --git a/src/query/sql/src/planner/plans/ddl/table.rs b/src/query/sql/src/planner/plans/ddl/table.rs index 0edf049101f27..88a66f95e7ecd 100644 --- a/src/query/sql/src/planner/plans/ddl/table.rs +++ b/src/query/sql/src/planner/plans/ddl/table.rs @@ -18,6 +18,7 @@ use std::time::Duration; use databend_common_ast::ast::Engine; use databend_common_ast::ast::Identifier; +use databend_common_catalog::table::NavigationPoint; use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::DataField; @@ -29,6 +30,7 @@ use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_meta_app::schema::Constraint; use databend_common_meta_app::schema::CreateOption; +use databend_common_meta_app::schema::SnapshotRefType; use databend_common_meta_app::schema::TableIndex; use databend_common_meta_app::schema::TableNameIdent; use databend_common_meta_app::schema::UndropTableReq; @@ -584,3 +586,27 @@ pub struct DropAllTableRowAccessPoliciesPlan { pub database: String, pub table: String, } + +#[derive(Clone, Debug)] +pub struct CreateTableRefPlan { + pub tenant: Tenant, + pub catalog: String, + pub database: String, + pub table: String, + + pub ref_type: SnapshotRefType, + pub ref_name: String, + pub navigation: Option, + pub retain: Option, +} + +#[derive(Clone, Debug)] +pub struct DropTableRefPlan { + pub tenant: Tenant, + pub catalog: String, + pub database: String, + pub table: String, + + pub ref_type: SnapshotRefType, + pub ref_name: String, +} diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index b0c84fdbd5931..4a41c87f17e20 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -69,6 +69,7 @@ use crate::plans::CreateStagePlan; use crate::plans::CreateStreamPlan; use crate::plans::CreateTableIndexPlan; use crate::plans::CreateTablePlan; +use crate::plans::CreateTableRefPlan; use crate::plans::CreateTaskPlan; use crate::plans::CreateUDFPlan; use crate::plans::CreateUserPlan; @@ -108,6 +109,7 @@ use crate::plans::DropTableColumnPlan; use crate::plans::DropTableConstraintPlan; use crate::plans::DropTableIndexPlan; use crate::plans::DropTablePlan; +use crate::plans::DropTableRefPlan; use crate::plans::DropTableRowAccessPolicyPlan; use crate::plans::DropTaskPlan; use crate::plans::DropUDFPlan; @@ -294,6 +296,8 @@ pub enum Plan { AddTableRowAccessPolicy(Box), DropTableRowAccessPolicy(Box), DropAllTableRowAccessPolicies(Box), + CreateTableRef(Box), + DropTableRef(Box), // Optimize OptimizePurge(Box), diff --git a/src/query/sql/src/planner/semantic/view_rewriter.rs b/src/query/sql/src/planner/semantic/view_rewriter.rs index 4e61dbb73325d..7a7cd9dac1e60 100644 --- a/src/query/sql/src/planner/semantic/view_rewriter.rs +++ b/src/query/sql/src/planner/semantic/view_rewriter.rs @@ -29,6 +29,7 @@ impl ViewRewriter { catalog, database, table, + ref_name, alias, temporal, with_options, @@ -49,6 +50,7 @@ impl ViewRewriter { catalog: catalog.clone(), database, table: table.clone(), + ref_name: ref_name.clone(), alias: alias.clone(), temporal: temporal.clone(), with_options: with_options.clone(), diff --git a/src/query/storages/common/table_meta/Cargo.toml b/src/query/storages/common/table_meta/Cargo.toml index 8d22c20b570cf..3cac3bdb701c5 100644 --- a/src/query/storages/common/table_meta/Cargo.toml +++ b/src/query/storages/common/table_meta/Cargo.toml @@ -10,7 +10,6 @@ edition = { workspace = true } dev = ["snap"] [dependencies] - arrow = { workspace = true } bincode_v1 = { workspace = true } bytes = { workspace = true } diff --git a/src/query/storages/common/table_meta/src/meta/mod.rs b/src/query/storages/common/table_meta/src/meta/mod.rs index e3e7c5b7491fe..c5c7717f40320 100644 --- a/src/query/storages/common/table_meta/src/meta/mod.rs +++ b/src/query/storages/common/table_meta/src/meta/mod.rs @@ -38,15 +38,16 @@ pub use statistics::*; // export legacy versioned table meta types locally, // currently, used by versioned readers only pub(crate) use testing::*; +pub(crate) use utils::monotonically_increased_timestamp; pub use utils::parse_storage_prefix; pub use utils::trim_object_prefix; +pub(crate) use utils::trim_timestamp_to_milli_second; pub use utils::try_extract_uuid_str_from_path; pub use utils::uuid_from_date_time; pub use utils::SnapshotTimestampValidationContext; pub use utils::TableMetaTimestamps; pub use utils::TEMP_TABLE_STORAGE_PREFIX; pub use utils::VACUUM2_OBJECT_KEY_PREFIX; -pub(crate) use utils::*; pub use v0::ColumnMeta as ColumnMetaV0; pub use versions::testify_version; pub use versions::SegmentInfoVersion; diff --git a/src/query/storages/common/table_meta/src/meta/utils.rs b/src/query/storages/common/table_meta/src/meta/utils.rs index dfba6e672c00d..38b06d05415a3 100644 --- a/src/query/storages/common/table_meta/src/meta/utils.rs +++ b/src/query/storages/common/table_meta/src/meta/utils.rs @@ -29,17 +29,17 @@ use databend_common_base::base::uuid::Uuid; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use crate::meta::TableSnapshot; +use crate::readers::snapshot_reader::TableSnapshotAccessor; use crate::table::table_storage_prefix; use crate::table::OPT_KEY_DATABASE_ID; use crate::table::OPT_KEY_STORAGE_PREFIX; use crate::table::OPT_KEY_TEMP_PREFIX; pub const TEMP_TABLE_STORAGE_PREFIX: &str = "_tmp_tbl"; -use crate::meta::TableSnapshot; -use crate::readers::snapshot_reader::TableSnapshotAccessor; pub const VACUUM2_OBJECT_KEY_PREFIX: &str = "h"; -pub fn trim_timestamp_to_milli_second(ts: DateTime) -> DateTime { +pub(crate) fn trim_timestamp_to_milli_second(ts: DateTime) -> DateTime { Utc.with_ymd_and_hms( ts.year(), ts.month(), @@ -53,7 +53,7 @@ pub fn trim_timestamp_to_milli_second(ts: DateTime) -> DateTime { .unwrap() } -pub fn monotonically_increased_timestamp( +pub(crate) fn monotonically_increased_timestamp( timestamp: DateTime, previous_timestamp: &Option>, ) -> DateTime { diff --git a/src/query/storages/common/table_meta/src/meta/v4/snapshot.rs b/src/query/storages/common/table_meta/src/meta/v4/snapshot.rs index 4f6ea9c3306c6..7aa836878d51d 100644 --- a/src/query/storages/common/table_meta/src/meta/v4/snapshot.rs +++ b/src/query/storages/common/table_meta/src/meta/v4/snapshot.rs @@ -160,21 +160,6 @@ impl TableSnapshot { }) } - /// used in ut - #[cfg(test)] - pub fn new_empty_snapshot(schema: TableSchema, prev_table_seq: Option) -> Self { - Self::try_new( - prev_table_seq, - None, - schema, - Statistics::default(), - vec![], - None, - Default::default(), - ) - .unwrap() - } - pub fn try_from_previous( previous: Arc, prev_table_seq: Option, diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index 198a114dd1b7d..a481d5d6fb492 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -79,7 +79,6 @@ uuid = { workspace = true } [dev-dependencies] divan = { workspace = true } -tempfile = { workspace = true } [[bench]] name = "bench" diff --git a/src/query/storages/fuse/src/constants.rs b/src/query/storages/fuse/src/constants.rs index 1e70e029e380b..d15a9559a1dde 100644 --- a/src/query/storages/fuse/src/constants.rs +++ b/src/query/storages/fuse/src/constants.rs @@ -39,6 +39,7 @@ pub const FUSE_TBL_VIRTUAL_BLOCK_PREFIX: &str = "_vb"; pub const FUSE_TBL_AGG_INDEX_PREFIX: &str = "_i_a"; pub const FUSE_TBL_INVERTED_INDEX_PREFIX: &str = "_i_i"; pub const FUSE_TBL_VECTOR_INDEX_PREFIX: &str = "_i_v"; +pub const FUSE_TBL_REF_PREFIX: &str = "_ref"; pub const DEFAULT_ROW_PER_PAGE: usize = 8192; pub const DEFAULT_ROW_PER_INDEX: usize = 100000; diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index dc8c9ea11d75b..69abda6c4374c 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -61,6 +61,7 @@ use databend_common_io::constants::DEFAULT_BLOCK_COMPRESSED_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_PER_SEGMENT; use databend_common_io::constants::DEFAULT_BLOCK_ROW_COUNT; use databend_common_meta_app::schema::DatabaseType; +use databend_common_meta_app::schema::SnapshotRef; use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; @@ -157,6 +158,7 @@ pub struct FuseTable { // If this is set, reading from fuse_table should only return the increment blocks pub(crate) changes_desc: Option, + pub(crate) table_branch: Option, pub pruned_result_receiver: Arc>, } @@ -282,6 +284,7 @@ impl FuseTable { table_compression: table_compression.as_str().try_into()?, table_type, changes_desc: None, + table_branch: None, pruned_result_receiver: Arc::new(Mutex::new(None)), })) } @@ -432,6 +435,10 @@ impl FuseTable { } pub fn snapshot_loc(&self) -> Option { + if let Some(snapshot_ref) = self.table_branch.as_ref() { + return Some(snapshot_ref.loc.clone()); + } + let options = self.table_info.options(); options .get(OPT_KEY_SNAPSHOT_LOCATION) @@ -448,6 +455,10 @@ impl FuseTable { &self.operator } + pub fn get_branch_id(&self) -> Option { + self.table_branch.as_ref().map(|v| v.id) + } + pub fn try_from_table(tbl: &dyn Table) -> Result<&FuseTable> { tbl.as_any().downcast_ref::().ok_or_else(|| { ErrorCode::Internal(format!( @@ -942,13 +953,12 @@ impl Table for FuseTable { ctx: Arc, instant: Option, num_snapshot_limit: Option, - keep_last_snapshot: bool, dry_run: bool, ) -> Result>> { match self.navigate_for_purge(&ctx, instant).await { Ok((table, files)) => { table - .do_purge(&ctx, files, num_snapshot_limit, keep_last_snapshot, dry_run) + .do_purge(&ctx, files, num_snapshot_limit, dry_run) .await } Err(e) if e.code() == ErrorCode::TABLE_HISTORICAL_DATA_NOT_FOUND => { @@ -1004,19 +1014,39 @@ impl Table for FuseTable { } } _ => { - let s = &self.table_info.meta.statistics; - TableStatistics { - num_rows: Some(s.number_of_rows), - data_size: Some(s.data_bytes), - data_size_compressed: Some(s.compressed_data_bytes), - index_size: Some(s.index_data_bytes), - bloom_index_size: s.bloom_index_size, - ngram_index_size: s.ngram_index_size, - inverted_index_size: s.inverted_index_size, - vector_index_size: s.vector_index_size, - virtual_column_size: s.virtual_column_size, - number_of_blocks: s.number_of_blocks, - number_of_segments: s.number_of_segments, + if self.table_branch.is_some() { + let Some(ss) = self.read_table_snapshot().await? else { + return Ok(None); + }; + let stats = &ss.summary; + TableStatistics { + num_rows: Some(stats.row_count), + data_size: Some(stats.uncompressed_byte_size), + data_size_compressed: Some(stats.compressed_byte_size), + index_size: Some(stats.index_size), + bloom_index_size: stats.bloom_index_size, + ngram_index_size: stats.ngram_index_size, + inverted_index_size: stats.inverted_index_size, + vector_index_size: stats.vector_index_size, + virtual_column_size: stats.virtual_column_size, + number_of_blocks: Some(stats.block_count), + number_of_segments: Some(ss.segments.len() as u64), + } + } else { + let s = &self.table_info.meta.statistics; + TableStatistics { + num_rows: Some(s.number_of_rows), + data_size: Some(s.data_bytes), + data_size_compressed: Some(s.compressed_data_bytes), + index_size: Some(s.index_data_bytes), + bloom_index_size: s.bloom_index_size, + ngram_index_size: s.ngram_index_size, + inverted_index_size: s.inverted_index_size, + vector_index_size: s.vector_index_size, + virtual_column_size: s.virtual_column_size, + number_of_blocks: s.number_of_blocks, + number_of_segments: s.number_of_segments, + } } } }; @@ -1176,6 +1206,13 @@ impl Table for FuseTable { } } + fn with_branch(&self, branch_name: &str) -> Result> { + let snapshot_ref = self.table_info.get_table_ref(None, branch_name)?; + let mut new_table = self.clone(); + new_table.table_branch = Some(snapshot_ref.clone()); + Ok(Arc::new(new_table)) + } + #[async_backtrace::framed] async fn generate_changes_query( &self, diff --git a/src/query/storages/fuse/src/fuse_type.rs b/src/query/storages/fuse/src/fuse_type.rs index cf5cbfce288f5..294fe4d3cd10f 100644 --- a/src/query/storages/fuse/src/fuse_type.rs +++ b/src/query/storages/fuse/src/fuse_type.rs @@ -28,8 +28,6 @@ pub enum FuseTableType { External, // Table attached to the system. Attached, - // Shared table with read-only access. - SharedReadOnly, } impl FuseTableType { @@ -39,7 +37,6 @@ impl FuseTableType { FuseTableType::Standard => false, FuseTableType::External => false, FuseTableType::Attached => true, - FuseTableType::SharedReadOnly => true, } } } diff --git a/src/query/storages/fuse/src/io/locations.rs b/src/query/storages/fuse/src/io/locations.rs index a98de49cdb430..e5000991763af 100644 --- a/src/query/storages/fuse/src/io/locations.rs +++ b/src/query/storages/fuse/src/io/locations.rs @@ -39,6 +39,7 @@ use crate::index::InvertedIndexFile; use crate::FUSE_TBL_AGG_INDEX_PREFIX; use crate::FUSE_TBL_INVERTED_INDEX_PREFIX; use crate::FUSE_TBL_LAST_SNAPSHOT_HINT_V2; +use crate::FUSE_TBL_REF_PREFIX; use crate::FUSE_TBL_SEGMENT_STATISTICS_PREFIX; use crate::FUSE_TBL_VECTOR_INDEX_PREFIX; use crate::FUSE_TBL_XOR_BLOOM_INDEX_PREFIX; @@ -70,6 +71,7 @@ pub struct TableMetaLocationGenerator { inverted_index_location_prefix: String, vector_index_location_prefix: String, segment_statistics_location_prefix: String, + ref_snapshot_location_prefix: String, } impl TableMetaLocationGenerator { @@ -85,6 +87,7 @@ impl TableMetaLocationGenerator { let vector_index_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_VECTOR_INDEX_PREFIX); let segment_statistics_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_SEGMENT_STATISTICS_PREFIX); + let ref_snapshot_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_REF_PREFIX); Self { prefix, block_location_prefix, @@ -95,6 +98,7 @@ impl TableMetaLocationGenerator { inverted_index_location_prefix, vector_index_location_prefix, segment_statistics_location_prefix, + ref_snapshot_location_prefix, } } @@ -126,6 +130,10 @@ impl TableMetaLocationGenerator { &self.segment_statistics_location_prefix } + pub fn ref_snapshot_location_prefix(&self) -> &str { + &self.ref_snapshot_location_prefix + } + pub fn gen_block_location( &self, table_meta_timestamps: TableMetaTimestamps, @@ -195,6 +203,16 @@ impl TableMetaLocationGenerator { Ok(snapshot_version.create(id, &self.prefix)) } + pub fn ref_snapshot_location_from_uuid( + &self, + table_ref: u64, + id: &Uuid, + version: u64, + ) -> Result { + let snapshot_version = SnapshotVersion::try_from(version)?; + Ok(snapshot_version.create_ref(table_ref, id, &self.prefix)) + } + pub fn snapshot_version(location: impl AsRef) -> u64 { if location.as_ref().ends_with(SNAPSHOT_V4.suffix().as_str()) { SNAPSHOT_V4.version() @@ -338,6 +356,7 @@ impl TableMetaLocationGenerator { trait SnapshotLocationCreator { fn create(&self, id: &Uuid, prefix: impl AsRef) -> String; + fn create_ref(&self, table_ref: u64, id: &Uuid, prefix: impl AsRef) -> String; fn suffix(&self) -> String; } @@ -361,6 +380,18 @@ impl SnapshotLocationCreator for SnapshotVersion { ) } + fn create_ref(&self, table_ref: u64, id: &Uuid, prefix: impl AsRef) -> String { + format!( + "{}/{}/{}/{}{}{}", + prefix.as_ref(), + FUSE_TBL_REF_PREFIX, + table_ref, + VACUUM2_OBJECT_KEY_PREFIX, + id.simple(), + self.suffix(), + ) + } + fn suffix(&self) -> String { match self { SnapshotVersion::V0(_) => "".to_string(), @@ -383,6 +414,10 @@ impl SnapshotLocationCreator for TableSnapshotStatisticsVersion { ) } + fn create_ref(&self, _table_ref: u64, _id: &Uuid, _prefix: impl AsRef) -> String { + unimplemented!() + } + fn suffix(&self) -> String { match self { TableSnapshotStatisticsVersion::V0(_) => "_ts_v0.json".to_string(), diff --git a/src/query/storages/fuse/src/io/read/snapshot_history_reader.rs b/src/query/storages/fuse/src/io/read/snapshot_history_reader.rs index 79dd6382efbf2..1d1548ffa82d6 100644 --- a/src/query/storages/fuse/src/io/read/snapshot_history_reader.rs +++ b/src/query/storages/fuse/src/io/read/snapshot_history_reader.rs @@ -39,57 +39,60 @@ pub trait SnapshotHistoryReader { location: String, format_version: u64, location_gen: TableMetaLocationGenerator, + branch_id: Option, ) -> TableSnapshotStream; } + impl SnapshotHistoryReader for TableSnapshotReader { fn snapshot_history( self, location: String, format_version: u64, location_gen: TableMetaLocationGenerator, + branch_id: Option, ) -> TableSnapshotStream { let stream = stream::try_unfold( - (self, location_gen, Some((location, format_version))), - |(reader, gen, next)| async move { + ( + self, + location_gen, + branch_id, + Some((location, format_version)), + ), + |(reader, gen, branch_id, next)| async move { if let Some((loc, ver)) = next { - let load_params = LoadParams { + let params = LoadParams { location: loc, len_hint: None, ver, put_cache: true, }; - let snapshot = match reader.read(&load_params).await { - Ok(s) => Ok(Some(s)), - Err(e) => { - if e.code() == ErrorCode::STORAGE_NOT_FOUND { - info!( - "traverse snapshot history break at location ({}, {}), err detail {}", - load_params.location, load_params.ver, e - ); - Ok(None) - } else { - Err(e) - } + let snapshot = match reader.read(¶ms).await { + Ok(s) => s, + Err(e) if e.code() == ErrorCode::STORAGE_NOT_FOUND => { + info!( + "traverse snapshot history break at location ({}, {}), err {}", + params.location, params.ver, e + ); + return Ok(None); } + Err(e) => return Err(e), }; - match snapshot { - Ok(Some(snapshot)) => { - if let Some((prev_id, prev_version)) = snapshot.prev_snapshot_id { - let new_ver = prev_version; - let new_loc = - gen.snapshot_location_from_uuid(&prev_id, prev_version)?; - Ok(Some(( - (snapshot, ver), - (reader, gen, Some((new_loc, new_ver))), - ))) + + let next = snapshot + .prev_snapshot_id + .map(|(prev_id, prev_ver)| { + let next_loc = if let Some(id) = branch_id { + gen.ref_snapshot_location_from_uuid(id, &prev_id, prev_ver) } else { - Ok(Some(((snapshot, ver), (reader, gen, None)))) - } - } - Ok(None) => Ok(None), - Err(e) => Err(e), - } + gen.snapshot_location_from_uuid(&prev_id, prev_ver) + }; + + next_loc.map(|loc| (loc, prev_ver)) + }) + .transpose()?; + + Ok(Some(((snapshot, ver), (reader, gen, branch_id, next)))) } else { Ok(None) } diff --git a/src/query/storages/fuse/src/io/segments.rs b/src/query/storages/fuse/src/io/segments.rs index ab37e23c9df8a..c0248a41bbbf7 100644 --- a/src/query/storages/fuse/src/io/segments.rs +++ b/src/query/storages/fuse/src/io/segments.rs @@ -120,13 +120,13 @@ impl SegmentsIO { #[fastrace::trace] pub async fn generic_read_compact_segments( &self, - segment_locations: &[Location], + segment_locations: &[&Location], put_cache: bool, projection: &HashSet, ) -> Result>>> { let mut iter = segment_locations.iter(); let tasks = std::iter::from_fn(|| { - iter.next().map(|location| { + iter.next().map(|&location| { let dal = self.operator.clone(); let table_schema = self.schema.clone(); let segment_location = location.clone(); diff --git a/src/query/storages/fuse/src/io/snapshots.rs b/src/query/storages/fuse/src/io/snapshots.rs index 64dc1895505e4..d8a5d8bc2c7a8 100644 --- a/src/query/storages/fuse/src/io/snapshots.rs +++ b/src/query/storages/fuse/src/io/snapshots.rs @@ -78,6 +78,7 @@ impl SnapshotsIO { pub async fn read_snapshot( snapshot_location: String, data_accessor: Operator, + put_cache: bool, ) -> Result<(Arc, FormatVersion)> { let reader = MetaReaders::table_snapshot_reader(data_accessor); let ver: u64 = TableMetaLocationGenerator::snapshot_version(snapshot_location.as_str()); @@ -85,7 +86,7 @@ impl SnapshotsIO { location: snapshot_location, len_hint: None, ver, - put_cache: true, + put_cache, }; info!("Reading snapshot with parameters: {:?}", load_params); let snapshot = reader.read(&load_params).await?; @@ -206,7 +207,7 @@ impl SnapshotsIO { } let (root_snapshot, format_version) = - Self::read_snapshot(root_snapshot_file.clone(), data_accessor.clone()).await?; + Self::read_snapshot(root_snapshot_file.clone(), data_accessor.clone(), true).await?; Ok(Self::chain_snapshots( snapshot_lites, @@ -222,12 +223,13 @@ impl SnapshotsIO { dal: Operator, location_generator: TableMetaLocationGenerator, root_snapshot: String, + branch_id: Option, limit: Option, ) -> Result> { let table_snapshot_reader = MetaReaders::table_snapshot_reader(dal); let format_version = TableMetaLocationGenerator::snapshot_version(root_snapshot.as_str()); let lite_snapshot_stream = table_snapshot_reader - .snapshot_history(root_snapshot, format_version, location_generator) + .snapshot_history(root_snapshot, format_version, location_generator, branch_id) .map_ok(|(snapshot, format_version)| { TableSnapshotLite::from((snapshot.as_ref(), format_version)) }); @@ -415,4 +417,32 @@ impl SnapshotsIO { } None } + + /// Read a snapshot from a location for vacuum operations + /// + /// Returns Ok(None) if snapshot not found (concurrent GC case) + /// Returns Err for other errors + #[async_backtrace::framed] + pub async fn read_snapshot_for_vacuum( + operator: Operator, + location: &str, + ) -> Result>> { + let reader = MetaReaders::table_snapshot_reader(operator); + let ver = TableMetaLocationGenerator::snapshot_version(location); + let params = LoadParams { + location: location.to_string(), + len_hint: None, + ver, + put_cache: true, + }; + + match reader.read(¶ms).await { + Err(e) if e.code() == ErrorCode::STORAGE_NOT_FOUND => { + // Concurrent gc: someone else has already collected this snapshot + Ok(None) + } + Err(e) => Err(e), + Ok(v) => Ok(Some(v)), + } + } } diff --git a/src/query/storages/fuse/src/operations/changes.rs b/src/query/storages/fuse/src/operations/changes.rs index 819292a67e2c4..1b00c2d594427 100644 --- a/src/query/storages/fuse/src/operations/changes.rs +++ b/src/query/storages/fuse/src/operations/changes.rs @@ -81,7 +81,7 @@ impl FuseTable { Some(_) => { if let Some(snapshot_loc) = &location { let (snapshot, _) = - SnapshotsIO::read_snapshot(snapshot_loc.clone(), self.get_operator()) + SnapshotsIO::read_snapshot(snapshot_loc.clone(), self.get_operator(), true) .await?; let Some(prev_table_seq) = snapshot.prev_table_seq else { return Err(ErrorCode::IllegalStream( @@ -362,7 +362,7 @@ impl FuseTable { let latest_segments = if let Some(snapshot) = latest { let (sn, _) = - SnapshotsIO::read_snapshot(snapshot.to_string(), self.get_operator()).await?; + SnapshotsIO::read_snapshot(snapshot.to_string(), self.get_operator(), true).await?; HashSet::from_iter(sn.segments.clone()) } else { HashSet::new() @@ -568,7 +568,8 @@ impl FuseTable { &self, base_location: &String, ) -> Result> { - match SnapshotsIO::read_snapshot(base_location.to_string(), self.get_operator()).await { + match SnapshotsIO::read_snapshot(base_location.to_string(), self.get_operator(), true).await + { Ok((base_snapshot, _)) => Ok(base_snapshot), Err(_) => Err(ErrorCode::IllegalStream(format!( "Failed to read the offset snapshot: {:?}, maybe purged", diff --git a/src/query/storages/fuse/src/operations/commit.rs b/src/query/storages/fuse/src/operations/commit.rs index ac3b386254071..7a8b2413c36ff 100644 --- a/src/query/storages/fuse/src/operations/commit.rs +++ b/src/query/storages/fuse/src/operations/commit.rs @@ -15,7 +15,6 @@ use std::collections::BTreeMap; use std::collections::HashMap; use std::sync::Arc; -use std::time::Duration; use backoff::backoff::Backoff; use chrono::Utc; @@ -328,10 +327,9 @@ impl FuseTable { base_segments: &[Location], base_summary: Statistics, table_meta_timestamps: TableMetaTimestamps, - max_retry_elapsed: Option, ) -> Result<()> { let mut retries = 0; - let mut backoff = set_backoff(None, None, max_retry_elapsed); + let mut backoff = set_backoff(None, None, None); let mut latest_snapshot = base_snapshot.clone(); let mut latest_table_info = &self.table_info; diff --git a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs index 59caafebef139..b24ae95729f08 100644 --- a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs +++ b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs @@ -59,7 +59,6 @@ use opendal::Operator; use crate::io::TableMetaLocationGenerator; use crate::operations::set_backoff; use crate::operations::set_compaction_num_block_hint; -use crate::operations::vacuum::vacuum_table; use crate::operations::AppendGenerator; use crate::operations::CommitMeta; use crate::operations::MutationGenerator; @@ -346,7 +345,8 @@ where F: SnapshotGenerator + Send + Sync + 'static if let Some(vacuum_handler) = &self.vacuum_handler { let respect_flash_back = true; - vacuum_table(tbl, self.ctx.clone(), vacuum_handler, respect_flash_back).await; + tbl.vacuum_table(self.ctx.clone(), vacuum_handler, respect_flash_back) + .await; } else { info!("No vacuum handler available for auto vacuuming, please verify your license"); } diff --git a/src/query/storages/fuse/src/operations/gc.rs b/src/query/storages/fuse/src/operations/gc.rs index d61456bf6fe18..500dae957c076 100644 --- a/src/query/storages/fuse/src/operations/gc.rs +++ b/src/query/storages/fuse/src/operations/gc.rs @@ -17,16 +17,19 @@ use std::collections::HashSet; use std::sync::Arc; use std::time::Instant; +use chrono::DateTime; +use chrono::Utc; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ScalarRef; use databend_common_meta_app::schema::ListIndexesByIdReq; +use databend_common_meta_app::schema::SnapshotRef; +use databend_common_meta_app::schema::SnapshotRefType; use databend_common_meta_app::schema::TableIndex; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CachedObject; -use databend_storages_common_cache::LoadParams; use databend_storages_common_index::BloomIndexMeta; use databend_storages_common_index::InvertedIndexMeta; use databend_storages_common_io::Files; @@ -38,13 +41,16 @@ use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::SegmentInfo; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::TableSnapshotStatistics; +use futures::TryStreamExt; use log::error; use log::info; use log::warn; +use opendal::Entry; use crate::index::InvertedIndexFile; use crate::io::read::ColumnOrientedSegmentReader; use crate::io::read::RowOrientedSegmentReader; +use crate::io::read::SnapshotHistoryReader; use crate::io::InvertedIndexReader; use crate::io::MetaReaders; use crate::io::SegmentsIO; @@ -52,26 +58,34 @@ use crate::io::SnapshotLiteExtended; use crate::io::SnapshotsIO; use crate::io::TableMetaLocationGenerator; use crate::FuseTable; +use crate::RetentionPolicy; use crate::FUSE_TBL_SNAPSHOT_PREFIX; +const DEFAULT_REF_NUM_SNAPSHOT_LIMIT: usize = 100; + impl FuseTable { pub async fn do_purge( &self, ctx: &Arc, snapshot_files: Vec, num_snapshot_limit: Option, - keep_last_snapshot: bool, dry_run: bool, ) -> Result>> { let mut counter = PurgeCounter::new(); + + // Step 1: Process snapshot refs (branches and tags) before main purge + let ref_protected_segments = self + .process_refs_for_purge(ctx, &mut counter, dry_run) + .await?; + let res = self .execute_purge( ctx, snapshot_files, num_snapshot_limit, - keep_last_snapshot, &mut counter, dry_run, + ref_protected_segments, ) .await; info!("purge counter {:?}", counter); @@ -84,18 +98,14 @@ impl FuseTable { ctx: &Arc, snapshot_files: Vec, num_snapshot_limit: Option, - keep_last_snapshot: bool, counter: &mut PurgeCounter, dry_run: bool, + ref_protected_segments: HashSet, ) -> Result>> { // 1. Read the root snapshot. - let root_snapshot_info_opt = self.read_root_snapshot(ctx, keep_last_snapshot).await?; + let root_snapshot_info_opt = self.read_root_snapshot(ctx, ref_protected_segments).await?; if root_snapshot_info_opt.is_none() { - if dry_run { - return Ok(Some(vec![])); - } else { - return Ok(None); - } + return if dry_run { Ok(Some(vec![])) } else { Ok(None) }; } let root_snapshot_info = root_snapshot_info_opt.unwrap(); @@ -282,27 +292,13 @@ impl FuseTable { return Ok(Some(dry_run_purge_files)); } - // 3. purge root snapshots - if !keep_last_snapshot { - self.purge_root_snapshot( - ctx, - counter, - root_snapshot_info.snapshot_lite, - root_snapshot_info.referenced_locations, - root_snapshot_info.snapshot_location, - &table_agg_index_ids, - inverted_indexes, - ) - .await?; - } - Ok(None) } async fn read_root_snapshot( &self, ctx: &Arc, - put_cache: bool, + ref_protected_segments: HashSet, ) -> Result> { let root_snapshot_location_op = self.snapshot_loc(); if root_snapshot_location_op.is_none() { @@ -310,36 +306,26 @@ impl FuseTable { } let snapshot_location = root_snapshot_location_op.unwrap(); - let reader = MetaReaders::table_snapshot_reader(self.get_operator()); - let ver = TableMetaLocationGenerator::snapshot_version(snapshot_location.as_str()); - let params = LoadParams { - location: snapshot_location.clone(), - len_hint: None, - ver, - put_cache, - }; - let root_snapshot = match reader.read(¶ms).await { - Err(e) if e.code() == ErrorCode::STORAGE_NOT_FOUND => { - // concurrent gc: someone else has already collected this snapshot, ignore it - warn!( - "concurrent gc: snapshot {:?} already collected. table: {}, ident {}", - snapshot_location, self.table_info.desc, self.table_info.ident, - ); - return Ok(None); - } - Err(e) => return Err(e), - Ok(v) => v, + let Some(root_snapshot) = + SnapshotsIO::read_snapshot_for_vacuum(self.get_operator(), &snapshot_location).await? + else { + return Ok(None); }; // root snapshot cannot ignore storage not find error. + let mut segments = + HashSet::with_capacity(ref_protected_segments.len() + root_snapshot.segments.len()); + segments.extend(ref_protected_segments); + segments.extend(root_snapshot.segments.clone()); + let segment_refs: Vec<&Location> = segments.iter().collect(); let referenced_locations = self - .get_block_locations(ctx.clone(), &root_snapshot.segments, put_cache, false) + .get_block_locations(ctx.clone(), &segment_refs, true, false) .await?; let snapshot_lite = Arc::new(SnapshotLiteExtended { - format_version: ver, + format_version: root_snapshot.format_version, snapshot_id: root_snapshot.snapshot_id, timestamp: root_snapshot.timestamp, - segments: HashSet::from_iter(root_snapshot.segments.clone()), + segments, table_statistics_location: root_snapshot.table_statistics_location(), }); Ok(Some(RootSnapshotInfo { @@ -365,8 +351,9 @@ impl FuseTable { let segment_locations = Vec::from_iter(segments_to_be_purged); for chunk in segment_locations.chunks(chunk_size) { // since we are purging files, the ErrorCode::STORAGE_NOT_FOUND error can be safely ignored. + let chunk_refs: Vec<&Location> = chunk.iter().collect(); let locations = self - .get_block_locations(ctx.clone(), chunk, false, true) + .get_block_locations(ctx.clone(), &chunk_refs, false, true) .await?; for loc in &locations.block_location { @@ -416,8 +403,9 @@ impl FuseTable { let segment_locations = Vec::from_iter(segments_to_be_purged); for chunk in segment_locations.chunks(chunk_size) { // since we are purging files, the ErrorCode::STORAGE_NOT_FOUND error can be safely ignored. + let chunk_refs: Vec<&Location> = chunk.iter().collect(); let locations = self - .get_block_locations(ctx.clone(), chunk, false, true) + .get_block_locations(ctx.clone(), &chunk_refs, false, true) .await?; let mut blocks_to_be_purged = HashSet::new(); @@ -499,75 +487,6 @@ impl FuseTable { .await } - async fn purge_root_snapshot( - &self, - ctx: &Arc, - counter: &mut PurgeCounter, - root_snapshot: Arc, - root_location_tuple: LocationTuple, - root_snapshot_location: String, - table_agg_index_ids: &[u64], - inverted_indexes: &BTreeMap, - ) -> Result<()> { - let segment_locations_to_be_purged = HashSet::from_iter( - root_snapshot - .segments - .iter() - .map(|loc| loc.0.clone()) - .collect::>(), - ); - - let mut agg_indexes_to_be_purged = HashSet::new(); - let mut inverted_indexes_to_be_purged = HashSet::new(); - for index_id in table_agg_index_ids { - agg_indexes_to_be_purged.extend(root_location_tuple.block_location.iter().map(|loc| { - TableMetaLocationGenerator::gen_agg_index_location_from_block_location( - loc, *index_id, - ) - })); - } - - // Collect the inverted index files accompanying blocks - // NOTE: For a block, and one index of it, there might be multiple inverted index files, - // such as, different versions of same (in the sense of name) inverted index. - // we do not handle this one block multiple inverted indexes case now. - for idx in inverted_indexes.values() { - inverted_indexes_to_be_purged.extend(root_location_tuple.block_location.iter().map( - |loc| { - TableMetaLocationGenerator::gen_inverted_index_location_from_block_location( - loc, - idx.name.as_str(), - idx.version.as_str(), - ) - }, - )); - } - - self.purge_block_segments( - ctx, - counter, - root_location_tuple.block_location, - agg_indexes_to_be_purged, - inverted_indexes_to_be_purged, - root_location_tuple.bloom_location, - root_location_tuple.hll_location, - segment_locations_to_be_purged, - ) - .await?; - - let mut ts_to_be_purged = HashSet::new(); - if let Some(ts) = root_snapshot.table_statistics_location.clone() { - ts_to_be_purged.insert(ts); - } - self.purge_ts_snapshots( - ctx, - counter, - ts_to_be_purged, - HashSet::from([root_snapshot_location]), - ) - .await - } - async fn purge_block_segments( &self, ctx: &Arc, @@ -728,7 +647,7 @@ impl FuseTable { pub async fn get_block_locations( &self, ctx: Arc, - segment_locations: &[Location], + segment_locations: &[&Location], put_cache: bool, ignore_err: bool, ) -> Result { @@ -738,9 +657,10 @@ impl FuseTable { let fuse_segments = SegmentsIO::create(ctx.clone(), self.operator.clone(), self.schema()); let chunk_size = ctx.get_settings().get_max_threads()? as usize * 4; - let mut projection = HashSet::new(); - projection.insert(LOCATION.to_string()); - projection.insert(BLOOM_FILTER_INDEX_LOCATION.to_string()); + let projection = HashSet::from([ + LOCATION.to_string(), + BLOOM_FILTER_INDEX_LOCATION.to_string(), + ]); for chunk in segment_locations.chunks(chunk_size) { let results = match self.is_column_oriented() { true => { @@ -787,7 +707,7 @@ impl FuseTable { for (idx, location_tuple) in results.into_iter().enumerate() { let location_tuple = match location_tuple { Err(e) if e.code() == ErrorCode::STORAGE_NOT_FOUND && ignore_err => { - let location = &segment_locations[idx]; + let location = chunk[idx]; // concurrent gc: someone else has already collected this segment, ignore it warn!( "concurrent gc: segment of location {} already collected. table: {}, ident {}", @@ -819,6 +739,244 @@ impl FuseTable { ); SnapshotsIO::list_files(self.get_operator(), &prefix, None).await } + + /// Design note: + /// Branches are vacuumed using a timestamp-based retention policy to simplify data lifecycle management. + /// For inactive branches, the snapshot root timestamp may remain very old. + /// If we apply snapshot-count based cleanup, this old timestamp could unnecessarily retain data + pub fn get_refs_retention_policy( + &self, + ctx: &dyn TableContext, + now: DateTime, + ) -> Result<(DateTime, usize)> { + // referenced by the main branch and delay garbage collection. + let retention_policy = self.get_data_retention_policy(ctx)?; + let (delta_duration, num_snapshots_to_keep) = match retention_policy { + RetentionPolicy::ByNumOfSnapshotsToKeep(n) => { + let duration = self.get_data_retention_period(ctx)?; + (duration, n) + } + RetentionPolicy::ByTimePeriod(delta_duration) => { + (delta_duration, DEFAULT_REF_NUM_SNAPSHOT_LIMIT) + } + }; + let retention_time = now - delta_duration; + Ok((retention_time, num_snapshots_to_keep)) + } + + /// List snapshots for branch with fallback strategy: + /// 1. First try to list by timestamp (retention_time) + /// 2. If empty, fallback to list all and truncate by num_snapshots_to_keep + #[async_backtrace::framed] + pub async fn list_branch_snapshots_with_fallback( + &self, + branch_id: u64, + head: &str, + retention_time: DateTime, + num_snapshots_to_keep: usize, + ) -> Result> { + let ref_snapshot_location_prefix = self + .meta_location_generator() + .ref_snapshot_location_prefix(); + let ref_prefix = format!("{}{}/", ref_snapshot_location_prefix, branch_id); + // First attempt: list by timestamp + let mut snapshots = self + .list_files_until_timestamp(&ref_prefix, retention_time, true, None) + .await?; + + // If no snapshots found by timestamp, fallback to count-based strategy + let len = snapshots.len(); + if len == 0 { + snapshots = self + .list_files_until_prefix(&ref_prefix, head, true, None) + .await?; + if len > num_snapshots_to_keep { + let num_candidates = len - num_snapshots_to_keep + 2; + snapshots.truncate(num_candidates); + } else { + snapshots.clear(); + } + } + Ok(snapshots) + } + + /// Find the earliest snapshot via snapshot history traversal + #[async_backtrace::framed] + pub async fn find_earliest_snapshot_via_history( + &self, + ref_name: &str, + snapshot_ref: &SnapshotRef, + ) -> Result> { + let head_location = &snapshot_ref.loc; + let snapshot_version = TableMetaLocationGenerator::snapshot_version(head_location); + let reader = MetaReaders::table_snapshot_reader(self.get_operator()); + let mut snapshot_stream = reader.snapshot_history( + head_location.to_string(), + snapshot_version, + self.meta_location_generator().clone(), + Some(snapshot_ref.id), + ); + + let mut last_snapshot = None; + while let Some((snapshot, _version)) = snapshot_stream.try_next().await? { + last_snapshot = Some(snapshot); + } + + last_snapshot.ok_or_else(|| { + ErrorCode::Internal(format!( + "Failed to find any snapshot in history for branch {}", + ref_name + )) + }) + } + + /// Process gc_root from last snapshot and collect snapshots to purge + /// + /// Returns gc_root snapshot if found + async fn select_branch_gc_root( + &self, + branch_id: u64, + snapshots_before_retention: &[Entry], + ref_snapshots_to_purge: &mut Vec, + ) -> Result>> { + if snapshots_before_retention.len() < 2 { + return Ok(None); + } + + let last_snapshot_path = snapshots_before_retention.last().unwrap().path(); + let op = self.get_operator(); + let (last_snapshot, _) = + SnapshotsIO::read_snapshot(last_snapshot_path.to_string(), op.clone(), false).await?; + + // Get its prev_snapshot_id as gc_root + let Some((gc_root_id, gc_root_ver)) = last_snapshot.prev_snapshot_id else { + return Ok(None); + }; + let gc_root_path = self + .meta_location_generator() + .ref_snapshot_location_from_uuid(branch_id, &gc_root_id, gc_root_ver)?; + // Try to read gc_root snapshot + match SnapshotsIO::read_snapshot(gc_root_path.clone(), op.clone(), false).await { + Ok((gc_root_snap, _)) => { + // Collect snapshots_to_purge + let mut gc_candidates = Vec::with_capacity(snapshots_before_retention.len()); + for snapshot in snapshots_before_retention.iter() { + gc_candidates.push(snapshot.path().to_owned()); + } + + // Find gc_root position in candidates + let gc_root_idx = gc_candidates.binary_search(&gc_root_path).map_err(|_| { + ErrorCode::Internal(format!( + "gc root path {} should be one of the candidates, candidates: {:?}", + gc_root_path, gc_candidates + )) + })?; + ref_snapshots_to_purge.extend_from_slice(&gc_candidates[..gc_root_idx]); + + Ok(Some(gc_root_snap)) + } + Err(e) => { + // Log the error but continue processing + warn!( + "Failed to read gc_root snapshot at {}: {}, using anchor instead", + gc_root_path, e + ); + Ok(None) + } + } + } + + /// Process snapshot refs (branches and tags) for purge. + /// Return the protected segments from ref gc roots (tags and branches). + #[async_backtrace::framed] + async fn process_refs_for_purge( + &self, + ctx: &Arc, + counter: &mut PurgeCounter, + dry_run: bool, + ) -> Result> { + let now = Utc::now(); + let table_info = self.get_table_info(); + let op = self.get_operator(); + let (retention_time, num_snapshots_to_keep) = + self.get_refs_retention_policy(ctx.as_ref(), now)?; + + let mut ref_protected_segments = HashSet::new(); + let mut ref_snapshots_to_purge = Vec::new(); + let mut expired_refs = HashSet::new(); + + // First pass: process refs, identify expired refs, and collect anchor updates + for (ref_name, snapshot_ref) in table_info.meta.refs.iter() { + // Check if ref is expired + if snapshot_ref.expire_at.is_some_and(|v| v < now) { + expired_refs.insert(ref_name); + continue; + } + + match &snapshot_ref.typ { + SnapshotRefType::Tag => { + // Tag: read head snapshot as gc root to protect its segments + let (tag_snapshot, _) = + SnapshotsIO::read_snapshot(snapshot_ref.loc.clone(), op.clone(), true) + .await?; + + // Collect segments from tag + for seg_loc in &tag_snapshot.segments { + ref_protected_segments.insert(seg_loc.clone()); + } + } + SnapshotRefType::Branch => { + let branch_id = snapshot_ref.id; + let snapshots_before_lvt = self + .list_branch_snapshots_with_fallback( + branch_id, + &snapshot_ref.loc, + retention_time, + num_snapshots_to_keep, + ) + .await?; + + let gc_root_snap = if let Some(gc_root_snap) = self + .select_branch_gc_root( + branch_id, + &snapshots_before_lvt, + &mut ref_snapshots_to_purge, + ) + .await? + { + gc_root_snap + } else { + self.find_earliest_snapshot_via_history(ref_name, snapshot_ref) + .await? + }; + // Collect segments from gc_root + for seg_loc in &gc_root_snap.segments { + ref_protected_segments.insert(seg_loc.clone()); + } + } + } + } + + if dry_run { + return Ok(ref_protected_segments); + } + + // Cleanup expired ref directories + if !expired_refs.is_empty() { + let _ = self.update_table_refs_meta(ctx, &expired_refs).await?; + } + + // Purge ref snapshots if not dry_run + if !ref_snapshots_to_purge.is_empty() { + counter.snapshots += ref_snapshots_to_purge.len(); + let fuse_file = Files::create(ctx.clone(), op); + fuse_file + .remove_file_in_batch(ref_snapshots_to_purge) + .await?; + } + + Ok(ref_protected_segments) + } } struct RootSnapshotInfo { diff --git a/src/query/storages/fuse/src/operations/mod.rs b/src/query/storages/fuse/src/operations/mod.rs index 8eec907a06df0..ead9a33a8a147 100644 --- a/src/query/storages/fuse/src/operations/mod.rs +++ b/src/query/storages/fuse/src/operations/mod.rs @@ -59,3 +59,4 @@ pub use util::column_parquet_metas; pub use util::read_block; pub use util::set_backoff; pub use vacuum::vacuum_tables_from_info; +pub use vacuum::ASSUMPTION_MAX_TXN_DURATION; diff --git a/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs b/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs index 6daa79bbe20d7..a069ff91431da 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs @@ -147,7 +147,6 @@ impl SegmentCompactMutator { &self.compaction.segments_locations, statistics, self.table_meta_timestamps, - None, ) .await } diff --git a/src/query/storages/fuse/src/operations/navigate.rs b/src/query/storages/fuse/src/operations/navigate.rs index 0eba172650d63..73b58af5f2cc5 100644 --- a/src/query/storages/fuse/src/operations/navigate.rs +++ b/src/query/storages/fuse/src/operations/navigate.rs @@ -62,6 +62,11 @@ impl FuseTable { .await } NavigationPoint::StreamInfo(info) => self.navigate_to_stream(ctx, info).await, + NavigationPoint::TableRef { typ, name } => { + let table_ref = self.table_info.get_table_ref(Some(typ), name)?; + self.load_table_by_location(ctx, Some(table_ref.loc.clone())) + .await + } } } @@ -82,8 +87,17 @@ impl FuseTable { stream_info.desc, self.table_info.desc ))); } + let location = options.get(OPT_KEY_SNAPSHOT_LOCATION).cloned(); + self.load_table_by_location(ctx, location).await + } - let Some(snapshot_loc) = options.get(OPT_KEY_SNAPSHOT_LOCATION) else { + #[async_backtrace::framed] + async fn load_table_by_location( + &self, + ctx: &Arc, + location: Option, + ) -> Result> { + let Some(snapshot_loc) = location else { let mut table_info = self.table_info.clone(); table_info.meta.options.remove(OPT_KEY_SNAPSHOT_LOCATION); table_info.meta.statistics = TableStatistics::default(); @@ -94,7 +108,7 @@ impl FuseTable { return Ok(table.into()); }; let (snapshot, format_version) = - SnapshotsIO::read_snapshot(snapshot_loc.clone(), self.get_operator()).await?; + SnapshotsIO::read_snapshot(snapshot_loc.clone(), self.get_operator(), true).await?; self.load_table_by_snapshot( snapshot.as_ref(), format_version, @@ -175,6 +189,7 @@ impl FuseTable { location, snapshot_version, self.meta_location_generator().clone(), + self.get_branch_id(), ); // Find the instant which matches the given `time_point`. @@ -222,9 +237,13 @@ impl FuseTable { table_info.meta.schema = Arc::new(snapshot.schema.clone()); // 2. the table option `snapshot_location` - let loc = self - .meta_location_generator - .snapshot_location_from_uuid(&snapshot.snapshot_id, format_version)?; + let loc = if let Some(id) = self.get_branch_id() { + self.meta_location_generator + .ref_snapshot_location_from_uuid(id, &snapshot.snapshot_id, format_version)? + } else { + self.meta_location_generator + .snapshot_location_from_uuid(&snapshot.snapshot_id, format_version)? + }; table_info .meta .options @@ -284,6 +303,7 @@ impl FuseTable { Some(NavigationPoint::StreamInfo(info)) => { self.list_by_stream(info, time_point).await } + Some(NavigationPoint::TableRef { .. }) => unreachable!(), None => self.list_by_time_point(time_point).await, }?; @@ -455,4 +475,111 @@ impl FuseTable { Ok(file_list.into_iter().map(|v| v.0).collect()) } + + #[fastrace::trace] + #[async_backtrace::framed] + pub async fn navigate_to_location( + &self, + ctx: Arc, + point: &NavigationPoint, + ) -> Result> { + match point { + NavigationPoint::SnapshotID(snapshot_id) => { + // Because the user explicitly asked for a specific snapshot, + // we treat "not found" as an error instead of silently returning None. + let Some(location) = self.snapshot_loc() else { + return Err(ErrorCode::TableHistoricalDataNotFound( + "Empty Table has no historical data", + )); + }; + let loc = self + .find_location(&ctx, location, |snapshot| { + snapshot + .snapshot_id + .simple() + .to_string() + .as_str() + .starts_with(snapshot_id) + }) + .await?; + Ok(Some(loc)) + } + NavigationPoint::TimePoint(time_point) => { + // This allows users to query historical states gracefully even if + // the table was created *after* the given time. + let Some(location) = self.snapshot_loc() else { + return Ok(None); + }; + let loc = self + .find_location(&ctx, location, |snapshot| { + if let Some(ts) = snapshot.timestamp { + ts <= *time_point + } else { + false + } + }) + .await + .ok(); + Ok(loc) + } + NavigationPoint::StreamInfo(stream_info) => { + let options = stream_info.options(); + let stream_table_id = options + .get(OPT_KEY_SOURCE_TABLE_ID) + .ok_or_else(|| ErrorCode::Internal("table id must be set"))? + .parse::()?; + if stream_table_id != self.table_info.ident.table_id { + return Err(ErrorCode::IllegalStream(format!( + "The stream '{}' is not match the table '{}'", + stream_info.desc, self.table_info.desc + ))); + } + Ok(options.get(OPT_KEY_SNAPSHOT_LOCATION).cloned()) + } + NavigationPoint::TableRef { typ, name } => { + let table_ref = self.table_info.get_table_ref(Some(typ), name)?; + Ok(Some(table_ref.loc.clone())) + } + } + } + + #[async_backtrace::framed] + pub async fn find_location

( + &self, + ctx: &Arc, + location: String, + mut pred: P, + ) -> Result + where + P: FnMut(&TableSnapshot) -> bool, + { + let abort_checker = ctx.clone().get_abort_checker(); + let snapshot_version = TableMetaLocationGenerator::snapshot_version(location.as_str()); + let reader = MetaReaders::table_snapshot_reader(self.get_operator()); + // grab the table history as stream + // snapshots are order by timestamp DESC. + let mut snapshot_stream = reader.snapshot_history( + location, + snapshot_version, + self.meta_location_generator().clone(), + self.get_branch_id(), + ); + + // Find the snapshot which matches the given `time_point`. + while let Some((snapshot, format_version)) = snapshot_stream.try_next().await? { + abort_checker + .try_check_aborting() + .with_context(|| "failed to find snapshot")?; + if pred(snapshot.as_ref()) { + let snapshot_location = self + .meta_location_generator + .snapshot_location_from_uuid(&snapshot.snapshot_id, format_version)?; + return Ok(snapshot_location); + } + } + + Err(ErrorCode::TableHistoricalDataNotFound( + "No historical data found at given point", + )) + } } diff --git a/src/query/storages/fuse/src/operations/vacuum.rs b/src/query/storages/fuse/src/operations/vacuum.rs index c6a4ba62d3960..372b875278b84 100644 --- a/src/query/storages/fuse/src/operations/vacuum.rs +++ b/src/query/storages/fuse/src/operations/vacuum.rs @@ -15,39 +15,495 @@ // Logs from this module will show up as "[VACUUM] ...". databend_common_tracing::register_module_tag!("[VACUUM]"); -// src/query/storages/fuse/src/vacuum/mod.rs - +use std::collections::HashSet; use std::sync::Arc; +use backoff::backoff::Backoff; +use chrono::DateTime; +use chrono::Duration; +use chrono::Utc; +use databend_common_catalog::table::Table; use databend_common_catalog::table::TableExt; use databend_common_catalog::table_context::TableContext; +use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_meta_app::schema::SnapshotRefType; use databend_common_meta_app::schema::TableInfo; +use databend_common_meta_app::schema::UpdateTableMetaReq; +use databend_common_meta_types::MatchSeq; use databend_enterprise_vacuum_handler::VacuumHandlerWrapper; +use databend_storages_common_table_meta::meta::uuid_from_date_time; +use databend_storages_common_table_meta::meta::Location; +use databend_storages_common_table_meta::meta::VACUUM2_OBJECT_KEY_PREFIX; +use futures_util::TryStreamExt; +use log::error; use log::info; use log::warn; +use opendal::Entry; +use opendal::Operator; +use opendal::Scheme; +use crate::io::SnapshotLiteExtended; +use crate::io::SnapshotsIO; +use crate::io::TableMetaLocationGenerator; +use crate::operations::set_backoff; use crate::FuseTable; -pub async fn vacuum_table( - fuse_table: &FuseTable, - ctx: Arc, - vacuum_handler: &VacuumHandlerWrapper, - respect_flash_back: bool, -) { - warn!( - "Vacuuming table: {}, ident: {}", - fuse_table.table_info.name, fuse_table.table_info.ident - ); - - if let Err(e) = vacuum_handler - .do_vacuum2(fuse_table, ctx, respect_flash_back) - .await - { - // Vacuum in a best-effort manner, errors are ignored - warn!("Vacuum table {} failed : {}", fuse_table.table_info.name, e); +/// An assumption of the maximum duration from the time the first block is written to the time the +/// snapshot is written. +/// +/// To handle the situation during an upgrade where some nodes may not be able to upgrade in time to +/// a version that includes the vacuum2 logic, we introduce this assumption. It is used in two places: +/// +/// - When determining whether a snapshot object generated by an old version node can be cleaned up +/// +/// Snapshots whose object key does not start with `VACUUM2_OBJECT_KEY_PREFIX` are all created by +/// nodes of previous versions (do not support vacuum2). For such snapshot objects, if their +/// timestamp is less than +/// `GC_root's timestamp - ASSUMPTION_MAX_TXN_DURATION` +/// we consider them safe to delete. +/// +/// Generally speaking, if a snapshot from an old version was created a sufficiently long time +/// before the gc root, it would not be successfully committed after the gc root; this way, we +/// avoid deleting a snapshot object produced by an ongoing (not yet committed) transaction. +/// +/// - When determining whether a segment/block object generated by an old version query node can be +/// cleaned up +/// +/// Similarly, if a segment/block was created at a time sufficiently long before the gc root and +/// is not referenced by the gc root, then it will not be referenced by a snapshot that can be +/// successfully committed after the gc root, and safe to delete. +/// +/// NOTE: +/// If this assumption does not hold, it may lead to table data becoming inaccessible: +/// snapshots may become inaccessible, or some data may become unavailable. +/// +/// If the entire cluster is upgraded to the new version that includes the vacuum2 logic, +/// the above risks will not exist. +pub const ASSUMPTION_MAX_TXN_DURATION: Duration = Duration::days(3); + +/// Object storage supported by Databend is expected to return entries sorted in ascending lexicographical +/// order by object key. Databend leverages this property to enhance the efficiency and thoroughness +/// of the vacuum process. +/// +/// The safety of the vacuum algorithm does not depend on this ordering. +async fn general_list_until_prefix( + dal: &Operator, + path: &str, + until: &str, + need_one_more: bool, + gc_root_meta_ts: Option>, +) -> Result> { + let mut lister = dal.lister(path).await?; + let mut paths = vec![]; + while let Some(entry) = lister.try_next().await? { + if entry.metadata().is_dir() { + continue; + } + if entry.path() >= until { + info!("entry path: {} >= until: {}", entry.path(), until); + if need_one_more { + paths.push(entry); + } + break; + } + if gc_root_meta_ts.is_none() + || is_gc_candidate_segment_block(&entry, dal, gc_root_meta_ts.unwrap()).await? + { + paths.push(entry); + } + } + Ok(paths) +} + +/// If storage is backed by FS, we prioritize thoroughness over efficiency (though efficiency loss +/// is usually not significant). All entries are fetched and sorted before extracting the prefix entries. +async fn fs_list_until_prefix( + dal: &Operator, + path: &str, + until: &str, + need_one_more: bool, + gc_root_meta_ts: Option>, +) -> Result> { + // Fetch ALL entries from the path and sort them by path in lexicographical order. + let mut lister = dal.lister(path).await?; + let mut entries = Vec::new(); + while let Some(item) = lister.try_next().await? { + if item.metadata().is_file() { + entries.push(item); + } + } + entries.sort_by(|l, r| l.path().cmp(r.path())); + + // Extract entries up to the `until` path, respecting lexicographical order. + let mut res = Vec::new(); + for entry in entries { + if entry.path() >= until { + info!("entry path: {} >= until: {}", entry.path(), until); + if need_one_more { + res.push(entry); + } + break; + } + if gc_root_meta_ts.is_none() + || is_gc_candidate_segment_block(&entry, dal, gc_root_meta_ts.unwrap()).await? + { + res.push(entry); + } + } + + Ok(res) +} + +/// Check if an entry is a candidate for garbage collection +async fn is_gc_candidate_segment_block( + entry: &Entry, + op: &Operator, + gc_root_meta_ts: DateTime, +) -> Result { + let path = entry.path(); + let last_part = path.rsplit('/').next().unwrap(); + if last_part.starts_with(VACUUM2_OBJECT_KEY_PREFIX) { + return Ok(true); + } + let last_modified = if let Some(v) = entry.metadata().last_modified() { + v } else { - info!("Vacuum table {} done", fuse_table.table_info.name); + let path = entry.path(); + let meta = op.stat(path).await?; + meta.last_modified().ok_or_else(|| { + ErrorCode::StorageOther(format!( + "Failed to get `last_modified` metadata of the entry '{}'", + path + )) + })? + }; + + Ok(last_modified + ASSUMPTION_MAX_TXN_DURATION < gc_root_meta_ts) +} + +impl FuseTable { + pub async fn vacuum_table( + &self, + ctx: Arc, + vacuum_handler: &VacuumHandlerWrapper, + respect_flash_back: bool, + ) { + warn!( + "Vacuuming table: {}, ident: {}", + self.table_info.name, self.table_info.ident + ); + + if let Err(e) = vacuum_handler + .do_vacuum2(self, ctx, respect_flash_back) + .await + { + // Vacuum in a best-effort manner, errors are ignored + warn!("Vacuum table {} failed : {}", self.table_info.name, e); + } else { + info!("Vacuum table {} done", self.table_info.name); + } + } + + /// List files until a specific timestamp + /// + /// This implementation uses UUID v7 timestamp extraction for precise filtering. + /// Used by both do_vacuum and do_vacuum2. + pub async fn list_files_until_timestamp( + &self, + path: &str, + until: DateTime, + need_one_more: bool, + gc_root_meta_ts: Option>, + ) -> Result> { + let uuid = uuid_from_date_time(until); + let uuid_str = uuid.simple().to_string(); + + // extract the most significant 48 bits, which is 12 characters + let timestamp_component = &uuid_str[..12]; + let until = format!( + "{}{}{}", + path, VACUUM2_OBJECT_KEY_PREFIX, timestamp_component + ); + self.list_files_until_prefix(path, &until, need_one_more, gc_root_meta_ts) + .await + } + + /// List files until a specific location/prefix + /// + /// This implementation handles different storage schemes (FS vs object storage) and + /// includes gc_root_meta_ts checking for safe vacuum operations. + pub async fn list_files_until_prefix( + &self, + path: &str, + until: &str, + need_one_more: bool, + gc_root_meta_ts: Option>, + ) -> Result> { + info!("Listing files until prefix: {}", until); + let dal = self.get_operator_ref(); + + match dal.info().scheme() { + Scheme::Fs => { + fs_list_until_prefix(dal, path, until, need_one_more, gc_root_meta_ts).await + } + _ => general_list_until_prefix(dal, path, until, need_one_more, gc_root_meta_ts).await, + } + } + + /// Collect segments from snapshots in a given prefix + /// + /// This is a helper function used by both main branch and branch refs + async fn collect_snapshots_segments( + snapshots_io: &SnapshotsIO, + operator: &Operator, + snapshot_location: &str, + root_snapshot_lite: Arc, + max_threads: usize, + segments: &mut HashSet, + status_callback: &T, + ) -> Result<()> + where + T: Fn(String), + { + // List all the snapshot file paths + let mut snapshot_files = vec![]; + if let Some(prefix) = SnapshotsIO::get_s3_prefix_from_file(snapshot_location) { + snapshot_files = SnapshotsIO::list_files(operator.clone(), &prefix, None).await?; + } + + if snapshot_files.is_empty() { + return Ok(()); + } + + let start = std::time::Instant::now(); + let mut count = 1; + + // First save root snapshot segments + root_snapshot_lite.segments.iter().for_each(|location| { + segments.insert(location.to_owned()); + }); + + // Process snapshots in chunks + for chunk in snapshot_files.chunks(max_threads) { + // Since we want to get all the snapshot referenced files, so set `ignore_timestamp` true + let results = snapshots_io + .read_snapshot_lite_extends(chunk, root_snapshot_lite.clone(), true) + .await?; + + results + .into_iter() + .flatten() + .for_each(|snapshot_lite_extend| { + snapshot_lite_extend.segments.iter().for_each(|location| { + segments.insert(location.to_owned()); + }); + }); + + // Refresh status + count += chunk.len(); + let status = format!( + "gc orphan: read snapshot files:{}/{}, segment files: {}, cost:{:?}", + count, + snapshot_files.len(), + segments.len(), + start.elapsed() + ); + info!("{}", status); + (status_callback)(status); + } + + Ok(()) + } + + /// Get all segments referenced by snapshots, including branches and tags + #[async_backtrace::framed] + pub async fn get_snapshot_referenced_segments( + &self, + ctx: Arc, + status_callback: T, + ) -> Result>> + where + T: Fn(String), + { + // 1. Read the root snapshot + let root_snapshot_location_op = self.snapshot_loc(); + if root_snapshot_location_op.is_none() { + return Ok(None); + } + + let root_snapshot_location = root_snapshot_location_op.unwrap(); + let root_snapshot = match SnapshotsIO::read_snapshot_for_vacuum( + self.get_operator(), + root_snapshot_location.as_str(), + ) + .await? + { + Some(snapshot) => snapshot, + None => return Ok(None), + }; + + let ver = TableMetaLocationGenerator::snapshot_version(root_snapshot_location.as_str()); + let root_snapshot_lite = Arc::new(SnapshotLiteExtended { + format_version: ver, + snapshot_id: root_snapshot.snapshot_id, + timestamp: root_snapshot.timestamp, + segments: HashSet::from_iter(root_snapshot.segments.clone()), + table_statistics_location: root_snapshot.table_statistics_location(), + }); + drop(root_snapshot); + + let snapshots_io = SnapshotsIO::create(ctx.clone(), self.get_operator()); + let operator = self.get_operator(); + let table_info = self.get_table_info(); + let max_threads = ctx.get_settings().get_max_threads()? as usize; + + // 2. Collect segments from main branch + let mut segments = HashSet::new(); + Self::collect_snapshots_segments( + &snapshots_io, + &operator, + &root_snapshot_location, + root_snapshot_lite.clone(), + max_threads, + &mut segments, + &status_callback, + ) + .await?; + + // 3. Collect segments from branches and tags + for snapshot_ref in table_info.meta.refs.values() { + match snapshot_ref.typ { + SnapshotRefType::Tag => { + // Read tag snapshot and collect its segments + match SnapshotsIO::read_snapshot_for_vacuum(operator.clone(), &snapshot_ref.loc) + .await? + { + Some(snapshot) => { + for seg_loc in &snapshot.segments { + segments.insert(seg_loc.clone()); + } + } + None => { + return Ok(None); + } + } + } + SnapshotRefType::Branch => { + // Read branch head snapshot to create branch-specific root_snapshot_lite + match SnapshotsIO::read_snapshot_for_vacuum(operator.clone(), &snapshot_ref.loc) + .await? + { + Some(snapshot) => { + let branch_root_snapshot_lite = Arc::new(SnapshotLiteExtended { + format_version: TableMetaLocationGenerator::snapshot_version( + &snapshot_ref.loc, + ), + snapshot_id: snapshot.snapshot_id, + timestamp: snapshot.timestamp, + segments: HashSet::from_iter(snapshot.segments.clone()), + table_statistics_location: snapshot.table_statistics_location(), + }); + + // Collect segments from all branch snapshots using branch's own root_snapshot_lite + Self::collect_snapshots_segments( + &snapshots_io, + &operator, + &snapshot_ref.loc, + branch_root_snapshot_lite, + max_threads, + &mut segments, + &status_callback, + ) + .await?; + } + None => { + return Ok(None); + } + } + } + } + } + + info!( + "gc orphan: collected segments from {} refs, total segments: {}", + table_info.meta.refs.len(), + segments.len() + ); + + Ok(Some(segments)) + } + + /// Update table metadata with updated refs (expired refs removed) + #[async_backtrace::framed] + pub async fn update_table_refs_meta( + &self, + ctx: &Arc, + expired_refs: &HashSet<&String>, + ) -> Result> { + let catalog = ctx.get_default_catalog()?; + + let mut retries = 0; + let mut backoff = set_backoff(None, None, None); + let mut latest_table_info = self.get_table_info(); + // holding the reference of latest table during retries + let mut latest_table_ref: Arc; + // Step 1: Update table meta if refs changed + loop { + let mut new_table_meta = latest_table_info.meta.clone(); + new_table_meta + .refs + .retain(|ref_name, _| !expired_refs.contains(ref_name)); + let req = UpdateTableMetaReq { + table_id: latest_table_info.ident.table_id, + seq: MatchSeq::Exact(latest_table_info.ident.seq), + new_table_meta, + base_snapshot_location: self.snapshot_loc(), + }; + match catalog + .update_single_table_meta(req, latest_table_info) + .await + { + Err(e) if e.code() == ErrorCode::TABLE_VERSION_MISMATCHED => { + match backoff.next_backoff() { + Some(d) => { + databend_common_base::base::tokio::time::sleep(d).await; + latest_table_ref = self.refresh(ctx.as_ref()).await?; + latest_table_info = latest_table_ref.get_table_info(); + retries += 1; + continue; + } + None => { + return Err(ErrorCode::StorageOther(format!( + "update table meta failed after {} retries", + retries + ))); + } + } + } + Err(e) => { + return Err(e); + } + Ok(_) => { + break; + } + } + } + + // Step 2: Cleanup expired ref directories + let mut dir_to_gc = Vec::with_capacity(expired_refs.len()); + let ref_snapshot_location_prefix = self + .meta_location_generator() + .ref_snapshot_location_prefix(); + let op = self.get_operator(); + for ref_name in expired_refs { + let dir = format!("{}{}/", ref_snapshot_location_prefix, *ref_name); + op.remove_all(&dir).await.inspect_err(|err| { + error!("Failed to remove expired ref directory {}: {}", dir, err); + })?; + dir_to_gc.push(dir); + } + Ok(dir_to_gc) } } @@ -64,7 +520,9 @@ pub async fn vacuum_tables_from_info( .refresh(ctx.as_ref()) .await?; let fuse_table = FuseTable::try_from_table(table.as_ref())?; - vacuum_table(fuse_table, ctx.clone(), &vacuum_handler, true).await; + fuse_table + .vacuum_table(ctx.clone(), &vacuum_handler, true) + .await; } Ok(()) diff --git a/src/query/storages/fuse/src/table_functions/function_template/fuse_table_meta_func.rs b/src/query/storages/fuse/src/table_functions/function_template/fuse_table_meta_func.rs index 9c5aac272a259..23dd1408b1bd0 100644 --- a/src/query/storages/fuse/src/table_functions/function_template/fuse_table_meta_func.rs +++ b/src/query/storages/fuse/src/table_functions/function_template/fuse_table_meta_func.rs @@ -86,6 +86,7 @@ async fn location_snapshot( snapshot_location, snapshot_version, tbl.meta_location_generator().clone(), + tbl.get_branch_id(), ); // find the element by snapshot_id in stream diff --git a/src/query/storages/fuse/src/table_functions/fuse_column.rs b/src/query/storages/fuse/src/table_functions/fuse_column.rs index c81fe93b5fd9b..019fa351932b3 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_column.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_column.rs @@ -123,8 +123,9 @@ impl FuseColumn { let projection = HashSet::new(); 'FOR: for chunk in snapshot.segments.chunks(chunk_size) { + let chunk_refs: Vec<&_> = chunk.iter().collect(); let segments = segments_io - .generic_read_compact_segments::(chunk, true, &projection) + .generic_read_compact_segments::(&chunk_refs, true, &projection) .await?; for segment in segments { let segment = segment?; diff --git a/src/query/storages/fuse/src/table_functions/fuse_dump_snapshot.rs b/src/query/storages/fuse/src/table_functions/fuse_dump_snapshot.rs index fb88d9e19f538..e15366a80a120 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_dump_snapshot.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_dump_snapshot.rs @@ -29,6 +29,7 @@ use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRefExt; use crate::io::MetaReaders; +use crate::io::SnapshotHistoryReader; use crate::io::TableMetaLocationGenerator; use crate::sessions::TableContext; use crate::table_functions::parse_db_tb_args; @@ -104,11 +105,11 @@ impl SimpleTableFunc for FuseDumpSnapshotsFunc { let format_version = TableMetaLocationGenerator::snapshot_version(snapshot_location.as_str()); - use crate::io::read::SnapshotHistoryReader; let lite_snapshot_stream = table_snapshot_reader.snapshot_history( snapshot_location, format_version, meta_location_generator.clone(), + table.get_branch_id(), ); let mut snapshot_ids: Vec = Vec::with_capacity(limit); diff --git a/src/query/storages/fuse/src/table_functions/fuse_snapshot.rs b/src/query/storages/fuse/src/table_functions/fuse_snapshot.rs index 80e5e78e9e639..bc198d9fc714e 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_snapshot.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_snapshot.rs @@ -241,6 +241,7 @@ impl SimpleTableFunc for FuseSnapshotFunc { table.operator.clone(), meta_location_generator.clone(), snapshot_location, + table.get_branch_id(), limit, ) .await?; diff --git a/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs b/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs index 31c3d6fca4f67..116c4b6c3f0c2 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs @@ -254,7 +254,7 @@ async fn calc_tbl_size(tbl: &FuseTable) -> Result<(u64, Result)> { Some(snapshot_location) => { let start = std::time::Instant::now(); info!("fuse_time_travel_size will read: {}", snapshot_location); - let snapshot = SnapshotsIO::read_snapshot(snapshot_location, operator).await; + let snapshot = SnapshotsIO::read_snapshot(snapshot_location, operator, true).await; info!("read_snapshot cost: {:?}", start.elapsed()); snapshot.map(|(snapshot, _)| { snapshot.summary.compressed_byte_size + snapshot.summary.index_size diff --git a/src/query/storages/fuse/src/table_functions/fuse_virtual_column.rs b/src/query/storages/fuse/src/table_functions/fuse_virtual_column.rs index bbcb18583ebde..6c37c30003c5d 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_virtual_column.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_virtual_column.rs @@ -127,8 +127,9 @@ impl FuseVirtualColumn { let schema = tbl.schema(); let projection = HashSet::new(); 'FOR: for chunk in snapshot.segments.chunks(chunk_size) { + let chunk_refs: Vec<&_> = chunk.iter().collect(); let segments = segments_io - .generic_read_compact_segments::(chunk, true, &projection) + .generic_read_compact_segments::(&chunk_refs, true, &projection) .await?; for segment in segments { let segment = segment?; diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs index 1603787f85fdc..4498953a9ccbf 100644 --- a/src/query/storages/hive/hive/src/hive_table.rs +++ b/src/query/storages/hive/hive/src/hive_table.rs @@ -475,7 +475,6 @@ impl Table for HiveTable { _ctx: Arc, _instant: Option, _limit: Option, - _keep_last_snapshot: bool, _dry_run: bool, ) -> Result>> { Ok(None) diff --git a/src/query/storages/stream/src/stream_table.rs b/src/query/storages/stream/src/stream_table.rs index a43b7f6767472..c851ec6506061 100644 --- a/src/query/storages/stream/src/stream_table.rs +++ b/src/query/storages/stream/src/stream_table.rs @@ -189,6 +189,7 @@ impl StreamTable { location, snapshot_version, fuse_table.meta_location_generator().clone(), + fuse_table.get_branch_id(), ); let mut instant = None; diff --git a/src/tests/sqlsmith/src/sql_gen/dml.rs b/src/tests/sqlsmith/src/sql_gen/dml.rs index 2fcd3656ead4f..52fc59075a3ba 100644 --- a/src/tests/sqlsmith/src/sql_gen/dml.rs +++ b/src/tests/sqlsmith/src/sql_gen/dml.rs @@ -128,12 +128,17 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { fn gen_delete(&mut self) -> DeleteStmt { let hints = self.gen_hints(); - let (_table, table_reference) = self.random_select_table(); + let (table, _) = self.random_select_table(); let selection = Some(self.gen_expr(&DataType::Boolean)); DeleteStmt { hints, - table: table_reference, + catalog: None, + database: table + .db_name + .map(|name| Identifier::from_name(None, name.name)), + table: Identifier::from_name(None, table.name.name.clone()), + table_alias: None, selection, with: None, } @@ -340,6 +345,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { catalog: None, database: table.db_name.clone(), table: table.name.clone(), + ref_name: None, alias: None, temporal: None, with_options: None, @@ -544,6 +550,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { catalog: None, database: table.db_name.clone(), table: table.name.clone(), + ref_name: None, alias: None, temporal: None, with_options: None, diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs index 694bf3a304023..d419d5aebeb27 100644 --- a/src/tests/sqlsmith/src/sql_gen/query.rs +++ b/src/tests/sqlsmith/src/sql_gen/query.rs @@ -632,6 +632,7 @@ impl SqlGenerator<'_, R> { // TODO database: table.db_name.clone(), table: table.name.clone(), + ref_name: None, // TODO alias: None, // TODO diff --git a/tests/sqllogictests/suites/ee/08_table_ref/08_0000_branch.test b/tests/sqllogictests/suites/ee/08_table_ref/08_0000_branch.test new file mode 100644 index 0000000000000..16c665a7c9a39 --- /dev/null +++ b/tests/sqllogictests/suites/ee/08_table_ref/08_0000_branch.test @@ -0,0 +1,144 @@ +## Copyright 2023 Databend Cloud +## +## Licensed under the Elastic License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## https://www.elastic.co/licensing/elastic-license +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +statement ok +DROP DATABASE IF EXISTS test_branch + +statement ok +CREATE DATABASE test_branch + +statement ok +USE test_branch + +statement ok +set data_retention_time_in_days = 0; + +## Test basic branch creation and query +statement ok +CREATE OR REPLACE TABLE t1(a INT, b STRING) + +statement ok +INSERT INTO t1 VALUES (1, 'a'), (2, 'b'), (3, 'c') + +query IT +SELECT * FROM t1 ORDER BY a +---- +1 a +2 b +3 c + +statement ok +optimize table t1 compact + +## Create branch from current state +statement ok +ALTER TABLE t1 CREATE BRANCH dev + +query IT +SELECT * FROM t1/dev ORDER BY a +---- +1 a +2 b +3 c + +statement ok +INSERT INTO t1 VALUES (10, 'x') + +## Test branch creation with AT clause - BRANCH +statement ok +ALTER TABLE t1 CREATE BRANCH from_dev AT (BRANCH => dev) + +query IT +SELECT * FROM t1/from_dev ORDER BY a +---- +1 a +2 b +3 c + +## Test branch with RETAIN +statement ok +ALTER TABLE t1 CREATE BRANCH temp_branch RETAIN 2 SECONDS + +query IT +SELECT * FROM t1/temp_branch ORDER BY a +---- +1 a +2 b +3 c +10 x + +## Test type mismatch error (drop tag when it's a branch) +statement error 2748 +ALTER TABLE t1 DROP TAG temp_branch + +## Wait for branch to expire +statement ok +SELECT SLEEP(2) + +## Query expired branch should fail +statement error 2749 +SELECT * FROM t1/temp_branch + +## Do compact and purge. +statement ok +optimize table t1 all; + +## temp_branch was purged. +statement error 2745 +SELECT * FROM t1/temp_branch + +query I +select count() from fuse_snapshot('test_branch','t1'); +---- +1 + +query IT +SELECT * FROM t1/from_dev ORDER BY a +---- +1 a +2 b +3 c + +## Test duplicate branch name +statement error 2746 +ALTER TABLE t1 CREATE BRANCH dev + +## Test drop branch +statement ok +ALTER TABLE t1 DROP BRANCH dev + +## Query dropped branch should fail +statement error 2745 +SELECT * FROM t1/dev + +## Test drop non-existent branch +statement error 2745 +ALTER TABLE t1 DROP BRANCH non_existent + +## Test branch on non-FUSE table (should fail) +statement ok +CREATE TABLE t_memory(a INT) ENGINE = Memory + +statement error 2747 +ALTER TABLE t_memory CREATE BRANCH test + +## Test branch on temporary table (should fail) +statement ok +CREATE TEMPORARY TABLE t_temp(a INT) + +statement error 2747 +ALTER TABLE t_temp CREATE BRANCH test + +statement ok +DROP DATABASE test_branch diff --git a/tests/sqllogictests/suites/ee/08_table_ref/08_0001_tag.test b/tests/sqllogictests/suites/ee/08_table_ref/08_0001_tag.test new file mode 100644 index 0000000000000..885fb9ac5f4eb --- /dev/null +++ b/tests/sqllogictests/suites/ee/08_table_ref/08_0001_tag.test @@ -0,0 +1,155 @@ +## Copyright 2023 Databend Cloud +## +## Licensed under the Elastic License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## https://www.elastic.co/licensing/elastic-license +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +statement ok +DROP DATABASE IF EXISTS test_tag + +statement ok +CREATE DATABASE test_tag + +statement ok +USE test_tag + +statement ok +set data_retention_time_in_days = 0; + +## Test basic tag creation and query +statement ok +CREATE OR REPLACE TABLE t1(a INT, b STRING) + +statement ok +INSERT INTO t1 VALUES (1, 'a'), (2, 'b'), (3, 'c') + +query IT +SELECT * FROM t1 ORDER BY a +---- +1 a +2 b +3 c + +## Create tag from current state +statement ok +ALTER TABLE t1 CREATE TAG v1_0 + +query IT +SELECT * FROM t1/v1_0 ORDER BY a +---- +1 a +2 b +3 c + +## Insert more data to main table +statement ok +INSERT INTO t1 VALUES (4, 'd'), (5, 'e') + +query I +SELECT COUNT(*) FROM t1 +---- +5 + +## Tag should still point to original snapshot +query I +SELECT COUNT(*) FROM t1/v1_0 +---- +3 + +## Test tag creation with AT clause - TAG +statement ok +ALTER TABLE t1 CREATE TAG v1_0_copy AT (TAG => v1_0) + +query IT +SELECT * FROM t1/v1_0_copy ORDER BY a +---- +1 a +2 b +3 c + +## Test tag with RETAIN +statement ok +ALTER TABLE t1 CREATE TAG temp_tag RETAIN 2 SECONDS + +query IT +SELECT * FROM t1/temp_tag ORDER BY a +---- +1 a +2 b +3 c +4 d +5 e + +## Wait for tag to expire +statement ok +SELECT SLEEP(2) + +## Query expired tag should fail +statement error 2749 +SELECT * FROM t1/temp_tag + +statement ok +optimize table t1 compact + +statement ok +select * from fuse_vacuum2('test_tag', 't1') ignore_result; + +## temp_tag was purged. +statement error 2745 +SELECT * FROM t1/temp_tag + +query IT +SELECT * FROM t1/v1_0 ORDER BY a +---- +1 a +2 b +3 c + +## Test duplicate tag name +statement error 2746 +ALTER TABLE t1 CREATE TAG v1_0 + +## Test drop tag +statement ok +ALTER TABLE t1 DROP TAG v1_0_copy + +## Query dropped tag should fail +statement error 2745 +SELECT * FROM t1/v1_0_copy + +## Test drop non-existent tag +statement error 2745 +ALTER TABLE t1 DROP TAG non_existent + +## Test tag on non-FUSE table (should fail) +statement ok +CREATE TABLE t_memory(a INT) ENGINE = Memory + +statement error 2747 +ALTER TABLE t_memory CREATE TAG test + +## Test tag on temporary table (should fail) +statement ok +CREATE TEMPORARY TABLE t_temp(a INT) + +statement error 2747 +ALTER TABLE t_temp CREATE TAG test + +## Test type mismatch error (drop branch when it's a tag) +statement error 2748 +ALTER TABLE t1 DROP BRANCH v1_0 + +## Cleanup +statement ok +ALTER TABLE t1 DROP TAG v1_0 + +statement ok +DROP DATABASE test_tag