diff --git a/crates/fluss/src/row/binary_map.rs b/crates/fluss/src/row/binary_map.rs index 57201b84..02425129 100644 --- a/crates/fluss/src/row/binary_map.rs +++ b/crates/fluss/src/row/binary_map.rs @@ -432,6 +432,7 @@ impl FlussMapWriter { } (DataType::Array(_), Datum::Array(v)) => writer.write_array(pos, v), (DataType::Map(_), Datum::Map(v)) => writer.write_map(pos, v), + (DataType::Row(_), Datum::Row(v)) => writer.write_row(pos, v.as_ref())?, _ => { return Err(IllegalArgument { message: format!("Type mismatch: expected {:?}, got {:?}", dt, datum), diff --git a/crates/fluss/src/row/column.rs b/crates/fluss/src/row/column.rs index f4da6b3f..9dbdd947 100644 --- a/crates/fluss/src/row/column.rs +++ b/crates/fluss/src/row/column.rs @@ -661,25 +661,19 @@ impl InternalRow for ColumnarRow { }; let column = self.column(pos)?; - let element_field = match column.data_type() { - ArrowDataType::List(field) => field, + match column.data_type() { + ArrowDataType::List(_) => {} other => { return Err(IllegalArgument { message: format!("expected List array at position {pos}, got {other:?}"), }); } - }; - - let actual_element_type = from_arrow_field(element_field)?; - if actual_element_type != *element_fluss_type { - return Err(IllegalArgument { - message: format!( - "Arrow list element type {:?} does not match expected Fluss type {:?}", - actual_element_type, element_fluss_type - ), - }); } + // `to_arrow_type` is lossy (e.g. TIMESTAMP_LTZ → plain Arrow Timestamp); + // trust the Fluss schema and let the per-element conversion below catch + // real shape mismatches. + let list_arr = column .as_any() .downcast_ref::() @@ -778,25 +772,9 @@ fn arrow_map_entry_to_fluss_map( }); } - let actual_key_type = from_arrow_field(&fields[0])?; - if actual_key_type != *key_type { - return Err(IllegalArgument { - message: format!( - "Arrow map key type {:?} does not match expected Fluss type {:?}", - actual_key_type, key_type - ), - }); - } - - let actual_value_type = from_arrow_field(&fields[1])?; - if actual_value_type != *value_type { - return Err(IllegalArgument { - message: format!( - "Arrow map value type {:?} does not match expected Fluss type {:?}", - actual_value_type, value_type - ), - }); - } + // `to_arrow_type` is lossy (e.g. TIMESTAMP_LTZ → plain Arrow Timestamp); + // trust the Fluss schema and let the per-element conversion below catch + // real shape mismatches. let keys_arrow = struct_arr.column(0); let values_arrow = struct_arr.column(1); @@ -1443,8 +1421,7 @@ mod tests { let err = row.get_array(0).unwrap_err(); assert!( - err.to_string() - .contains("Cannot convert Arrow type to Fluss type"), + err.to_string().contains("expected Int32Type"), "unexpected error: {err}" ); } @@ -1735,7 +1712,7 @@ mod tests { let err = row.get_map(0).expect_err("type mismatch must error"); let msg = err.to_string(); assert!( - msg.contains("does not match expected Fluss type"), + msg.contains("expected StringArray"), "unexpected error: {msg}" ); } diff --git a/crates/fluss/src/row/column_writer.rs b/crates/fluss/src/row/column_writer.rs index 94777faf..85776bc5 100644 --- a/crates/fluss/src/row/column_writer.rs +++ b/crates/fluss/src/row/column_writer.rs @@ -22,12 +22,11 @@ use crate::error::Error::RowConvertError; use crate::error::{Error, Result}; use crate::metadata::{DataType, RowType}; -use crate::row::FlussMap; -use crate::row::InternalRow; use crate::row::datum::{ MICROS_PER_MILLI, MILLIS_PER_SECOND, NANOS_PER_MILLI, append_decimal_to_builder, millis_nanos_to_micros, millis_nanos_to_nanos, }; +use crate::row::{FlussArray, FlussMap, InternalRow}; use arrow::array::{ ArrayBuilder, ArrayRef, BinaryBuilder, BooleanBuilder, Date32Builder, Decimal128Builder, FixedSizeBinaryBuilder, Float32Builder, Float64Builder, Int8Builder, Int16Builder, @@ -928,8 +927,8 @@ fn write_map_into( let key_array = map.key_array(); let value_array = map.value_array(); for i in 0..map.size() { - key_writer.write_field_at(key_array, i)?; - value_writer.write_field_at(value_array, i)?; + write_array_element_into_column(key_writer, key_array, i)?; + write_array_element_into_column(value_writer, value_array, i)?; } let last = *offsets.last().unwrap(); offsets.push( @@ -940,6 +939,57 @@ fn write_map_into( Ok(()) } +// FlussArray carries no schema; nested row/map elements need the typed +// inherent accessors (get_row/get_map with explicit types). +fn write_array_element_into_column( + writer: &mut ColumnWriter, + array: &FlussArray, + index: usize, +) -> Result<()> { + match &mut writer.inner { + TypedWriter::Struct { + field_writers, + validity, + row_type, + .. + } => { + if array.is_null_at(index) { + for child in field_writers.iter_mut() { + child.append_null(); + } + validity.push(false); + } else { + let nested = array.get_row(index, row_type)?; + for (j, child) in field_writers.iter_mut().enumerate() { + child.write_field_at(&nested, j)?; + } + validity.push(true); + } + Ok(()) + } + TypedWriter::Map { + key_writer, + value_writer, + key_type, + value_type, + offsets, + validity, + } => { + if array.is_null_at(index) { + validity.push(false); + let last = *offsets.last().unwrap(); + offsets.push(last); + } else { + let nested = array.get_map(index, key_type, value_type)?; + write_map_into(nested, key_writer, value_writer, offsets)?; + validity.push(true); + } + Ok(()) + } + _ => writer.write_field_at(array, index), + } +} + fn finish_struct_array( fields: arrow_schema::Fields, child_arrays: Vec, diff --git a/crates/fluss/tests/integration/kv_table.rs b/crates/fluss/tests/integration/kv_table.rs index ee496a5a..4da7c75d 100644 --- a/crates/fluss/tests/integration/kv_table.rs +++ b/crates/fluss/tests/integration/kv_table.rs @@ -19,13 +19,18 @@ #[cfg(test)] mod kv_table_test { use crate::integration::utils::{ - create_partitions, create_table, get_shared_cluster, make_int_array, make_string_array, + ColumnPlan, array_dt_basics_columns, as_row_type, create_partitions, create_table, + dt_array_int, dt_map_string_int, dt_row_seq_label, get_shared_cluster, make_int_array, + make_string_array, map_dt_basics_columns, row_dt_basics_columns, scalar_dt_columns, }; + use fluss::client::TableUpsert; use fluss::metadata::{DataField, DataTypes, Schema, TableDescriptor, TablePath}; use fluss::row::binary_array::FlussArrayWriter; + use fluss::row::binary_map::FlussMapWriter; use fluss::row::{ - Date, Datum, Decimal, FlussArray, GenericRow, InternalRow, Time, TimestampLtz, TimestampNtz, + Date, Datum, Decimal, GenericRow, InternalRow, Time, TimestampLtz, TimestampNtz, }; + use futures::stream::{FuturesUnordered, StreamExt}; fn make_key(id: i32) -> GenericRow<'static> { make_key_with_field_count(id, 3) @@ -284,11 +289,11 @@ mod kv_table_test { .expect("Failed to drop table"); } + /// Partial-update preserves columns absent from the partial-write set. #[tokio::test] async fn partial_update() { let cluster = get_shared_cluster(); let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); let table_path = TablePath::new("fluss", "test_partial_update"); @@ -303,1295 +308,368 @@ mod kv_table_test { Schema::builder() .column("id", DataTypes::int()) .column("name", DataTypes::string()) - .column("age", DataTypes::bigint()) .column("score", DataTypes::bigint()) .column("nested", nested_type) + .column( + "attrs", + DataTypes::map(DataTypes::string(), DataTypes::int()), + ) + .column("tags", DataTypes::array(DataTypes::string())) .primary_key(vec!["id"]) .build() - .expect("Failed to build schema"), + .expect("schema"), ) .build() - .expect("Failed to build table"); + .expect("table descriptor"); create_table(&admin, &table_path, &table_descriptor).await; - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let table_upsert = table.new_upsert().expect("Failed to create upsert"); - let upsert_writer = table_upsert - .create_writer() - .expect("Failed to create writer"); + let table = connection.get_table(&table_path).await.expect("table"); + let table_upsert = table.new_upsert().expect("upsert"); + let upsert_writer = table_upsert.create_writer().expect("writer"); let mut nested0 = GenericRow::new(2); nested0.set_field(0, 10_i32); nested0.set_field(1, "alpha"); - let mut row = GenericRow::new(5); + let attrs0 = { + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &DataTypes::int()); + w.write_entry("a".into(), 1.into()).unwrap(); + w.write_entry("b".into(), 2.into()).unwrap(); + w.complete().expect("attrs0") + }; + let tags0 = make_string_array(&[Some("alpha-tag"), Some("beta-tag")]); + + let mut row = GenericRow::new(6); row.set_field(0, 1); row.set_field(1, "Verso"); - row.set_field(2, 32i64); - row.set_field(3, 6942i64); - row.set_field(4, Datum::Row(Box::new(nested0))); + row.set_field(2, 100i64); + row.set_field(3, Datum::Row(Box::new(nested0))); + row.set_field(4, Datum::Map(attrs0)); + row.set_field(5, tags0); upsert_writer .upsert(&row) - .expect("Failed to upsert initial row") + .expect("upsert initial") .await - .expect("Failed to wait for upsert acknowledgment"); + .expect("ack initial"); let mut lookuper = table .new_lookup() - .expect("Failed to create lookup") + .expect("lookup") .create_lookuper() - .expect("Failed to create lookuper"); - - let result = lookuper - .lookup(&make_key(1)) - .await - .expect("Failed to lookup"); - let found_row = result - .get_single_row() - .expect("Failed to get row") - .expect("Row should exist"); - - assert_eq!(found_row.get_int(0).unwrap(), 1); - assert_eq!(found_row.get_string(1).unwrap(), "Verso"); - assert_eq!(found_row.get_long(2).unwrap(), 32i64); - assert_eq!(found_row.get_long(3).unwrap(), 6942i64); - let nested = found_row.get_row(4).unwrap(); - assert_eq!(nested.get_int(0).unwrap(), 10); - assert_eq!(nested.get_string(1).unwrap(), "alpha"); - - let partial_upsert = table_upsert - .partial_update_with_column_names(&["id", "score"]) - .expect("Failed to create TableUpsert with partial update"); - let partial_writer = partial_upsert - .create_writer() - .expect("Failed to create UpsertWriter with partial write"); - - let mut partial_row = GenericRow::new(5); - partial_row.set_field(0, 1); - partial_row.set_field(1, Datum::Null); - partial_row.set_field(2, Datum::Null); - partial_row.set_field(3, 420i64); - partial_row.set_field(4, Datum::Null); - partial_writer - .upsert(&partial_row) - .expect("Failed to upsert") - .await - .expect("Failed to wait for upsert acknowledgment"); + .expect("lookuper"); + + // Helper to issue a partial upsert against a specific column set. + async fn partial_upsert(table_upsert: &TableUpsert, cols: &[&str], row: GenericRow<'_>) { + let pu = table_upsert + .partial_update_with_column_names(cols) + .expect("partial upsert"); + let pw = pu.create_writer().expect("partial writer"); + pw.upsert(&row) + .expect("partial upsert") + .await + .expect("partial ack"); + } - let result = lookuper - .lookup(&make_key(1)) - .await - .expect("Failed to lookup after partial update"); - let found_row = result + // === Partial update on a scalar column — compound columns preserved === + let mut p1 = GenericRow::new(6); + p1.set_field(0, 1); + p1.set_field(1, Datum::Null); + p1.set_field(2, 420i64); + p1.set_field(3, Datum::Null); + p1.set_field(4, Datum::Null); + p1.set_field(5, Datum::Null); + partial_upsert(&table_upsert, &["id", "score"], p1).await; + + let result = lookuper.lookup(&make_key(1)).await.expect("lookup"); + let r = result .get_single_row() - .expect("Failed to get row") - .expect("Row should exist"); - - assert_eq!(found_row.get_int(0).unwrap(), 1, "id should remain 1"); - assert_eq!( - found_row.get_string(1).unwrap(), - "Verso", - "name should remain unchanged" - ); - assert_eq!( - found_row.get_long(2).unwrap(), - 32, - "age should remain unchanged" - ); - assert_eq!( - found_row.get_long(3).unwrap(), - 420, - "score should be updated to 420" - ); - let nested = found_row.get_row(4).unwrap(); - assert_eq!( - nested.get_int(0).unwrap(), - 10, - "ROW preserved across non-ROW partial update" - ); - assert_eq!(nested.get_string(1).unwrap(), "alpha"); - - let partial_nested_upsert = table_upsert - .partial_update_with_column_names(&["id", "nested"]) - .expect("partial_update_with_column_names"); - let partial_nested_writer = partial_nested_upsert - .create_writer() - .expect("partial writer"); + .expect("get row") + .expect("row exists"); + assert_eq!(r.get_string(1).unwrap(), "Verso", "name preserved"); + assert_eq!(r.get_long(2).unwrap(), 420, "score updated"); + let n = r.get_row(3).unwrap(); + assert_eq!(n.get_int(0).unwrap(), 10, "ROW preserved"); + assert_eq!(r.get_map(4).unwrap().size(), 2, "MAP preserved"); + assert_eq!(r.get_array(5).unwrap().size(), 2, "ARRAY preserved"); + + // === Partial update on the ROW column === let mut new_nested = GenericRow::new(2); new_nested.set_field(0, 99_i32); new_nested.set_field(1, "omega"); - let mut partial_nested = GenericRow::new(5); - partial_nested.set_field(0, 1); - partial_nested.set_field(1, Datum::Null); - partial_nested.set_field(2, Datum::Null); - partial_nested.set_field(3, Datum::Null); - partial_nested.set_field(4, Datum::Row(Box::new(new_nested))); - partial_nested_writer - .upsert(&partial_nested) - .expect("partial upsert") - .await - .expect("partial ack"); - - let result = lookuper - .lookup(&make_key(1)) - .await - .expect("Failed to lookup after nested partial"); - let found_row = result - .get_single_row() - .expect("Failed to get row") - .expect("Row should exist"); - assert_eq!( - found_row.get_string(1).unwrap(), - "Verso", - "name preserved when ROW updated" - ); - assert_eq!( - found_row.get_long(3).unwrap(), - 420, - "score preserved when ROW updated" - ); - let nested = found_row.get_row(4).unwrap(); - assert_eq!(nested.get_int(0).unwrap(), 99); - assert_eq!(nested.get_string(1).unwrap(), "omega"); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - #[tokio::test] - async fn partitioned_table_upsert_and_lookup() { - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_partitioned_kv_table"); - - let nested_type = DataTypes::row(vec![ - DataField::new("seq", DataTypes::int(), None), - DataField::new("label", DataTypes::string(), None), - ]); - - let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("region", DataTypes::string()) - .column("user_id", DataTypes::int()) - .column("name", DataTypes::string()) - .column("score", DataTypes::bigint()) - .column("nested", nested_type) - .primary_key(vec!["region", "user_id"]) - .build() - .expect("Failed to build schema"), - ) - .partitioned_by(vec!["region"]) - .build() - .expect("Failed to build table"); - - create_table(&admin, &table_path, &table_descriptor).await; - - create_partitions(&admin, &table_path, "region", &["US", "EU", "APAC"]).await; - - let connection = cluster.get_fluss_connection().await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let table_upsert = table.new_upsert().expect("Failed to create upsert"); - - let upsert_writer = table_upsert - .create_writer() - .expect("Failed to create writer"); - - let test_data = [ - ("US", 1, "Gustave", 100i64, 11_i32, "a"), - ("US", 2, "Lune", 200i64, 22, "b"), - ("EU", 1, "Sciel", 150i64, 33, "c"), - ("EU", 2, "Maelle", 250i64, 44, "d"), - ("APAC", 1, "Noco", 300i64, 55, "e"), - ]; - - for (region, user_id, name, score, seq, label) in &test_data { - let mut nested = GenericRow::new(2); - nested.set_field(0, *seq); - nested.set_field(1, *label); - let mut row = GenericRow::new(5); - row.set_field(0, *region); - row.set_field(1, *user_id); - row.set_field(2, *name); - row.set_field(3, *score); - row.set_field(4, Datum::Row(Box::new(nested))); - upsert_writer.upsert(&row).expect("Failed to upsert"); - } - upsert_writer.flush().await.expect("Failed to flush"); - - let mut lookuper = table - .new_lookup() - .expect("Failed to create lookup") - .create_lookuper() - .expect("Failed to create lookuper"); - - for (region, user_id, expected_name, expected_score, expected_seq, expected_label) in - &test_data - { - let mut key = GenericRow::new(5); - key.set_field(0, *region); - key.set_field(1, *user_id); - - let result = lookuper.lookup(&key).await.expect("Failed to lookup"); - let row = result - .get_single_row() - .expect("Failed to get row") - .expect("Row should exist"); - - assert_eq!(row.get_string(0).unwrap(), *region, "region mismatch"); - assert_eq!(row.get_int(1).unwrap(), *user_id, "user_id mismatch"); - assert_eq!(row.get_string(2).unwrap(), *expected_name, "name mismatch"); - assert_eq!(row.get_long(3).unwrap(), *expected_score, "score mismatch"); - let nested = row.get_row(4).unwrap(); - assert_eq!( - nested.get_int(0).unwrap(), - *expected_seq, - "ROW seq mismatch" - ); - assert_eq!( - nested.get_string(1).unwrap(), - *expected_label, - "ROW label mismatch" - ); - } - - let mut updated_nested = GenericRow::new(2); - updated_nested.set_field(0, 999_i32); - updated_nested.set_field(1, "updated"); - let mut updated_row = GenericRow::new(5); - updated_row.set_field(0, "US"); - updated_row.set_field(1, 1); - updated_row.set_field(2, "Gustave Updated"); - updated_row.set_field(3, 999i64); - updated_row.set_field(4, Datum::Row(Box::new(updated_nested))); - upsert_writer - .upsert(&updated_row) - .expect("Failed to upsert updated row") - .await - .expect("Failed to wait for upsert acknowledgment"); - - // Verify the update - let mut key = GenericRow::new(5); - key.set_field(0, "US"); - key.set_field(1, 1); - let result = lookuper.lookup(&key).await.expect("Failed to lookup"); - let row = result + let mut p2 = GenericRow::new(6); + p2.set_field(0, 1); + p2.set_field(1, Datum::Null); + p2.set_field(2, Datum::Null); + p2.set_field(3, Datum::Row(Box::new(new_nested))); + p2.set_field(4, Datum::Null); + p2.set_field(5, Datum::Null); + partial_upsert(&table_upsert, &["id", "nested"], p2).await; + + let result = lookuper.lookup(&make_key(1)).await.expect("lookup"); + let r = result .get_single_row() - .expect("Failed to get row") - .expect("Row should exist"); - assert_eq!(row.get_string(2).unwrap(), "Gustave Updated"); - assert_eq!(row.get_long(3).unwrap(), 999); - let nested = row.get_row(4).unwrap(); - assert_eq!(nested.get_int(0).unwrap(), 999); - assert_eq!(nested.get_string(1).unwrap(), "updated"); - - // Lookup in non-existent partition should return empty result - let mut non_existent_key = GenericRow::new(5); - non_existent_key.set_field(0, "UNKNOWN_REGION"); - non_existent_key.set_field(1, 1); - let result = lookuper - .lookup(&non_existent_key) - .await - .expect("Failed to lookup non-existent partition"); - assert!( - result - .get_single_row() - .expect("Failed to get row") - .is_none(), - "Lookup in non-existent partition should return None" - ); - - // Delete a record within a partition (await acknowledgment) - let mut delete_key = GenericRow::new(5); - delete_key.set_field(0, "EU"); - delete_key.set_field(1, 1); - upsert_writer - .delete(&delete_key) - .expect("Failed to delete") - .await - .expect("Failed to wait for delete acknowledgment"); - - // Verify deletion - let mut key = GenericRow::new(5); - key.set_field(0, "EU"); - key.set_field(1, 1); - let result = lookuper.lookup(&key).await.expect("Failed to lookup"); - assert!( - result - .get_single_row() - .expect("Failed to get row") - .is_none(), - "Deleted record should not exist" - ); - - // Verify other records in the same partition still exist - let mut key = GenericRow::new(5); - key.set_field(0, "EU"); - key.set_field(1, 2); - let result = lookuper.lookup(&key).await.expect("Failed to lookup"); - let row = result + .expect("get row") + .expect("row exists"); + assert_eq!(r.get_string(1).unwrap(), "Verso", "name preserved"); + assert_eq!(r.get_long(2).unwrap(), 420, "score preserved"); + let n = r.get_row(3).unwrap(); + assert_eq!(n.get_int(0).unwrap(), 99); + assert_eq!(n.get_string(1).unwrap(), "omega"); + assert_eq!(r.get_map(4).unwrap().size(), 2, "MAP preserved"); + assert_eq!(r.get_array(5).unwrap().size(), 2, "ARRAY preserved"); + + // === Partial update on the MAP column === + let new_attrs = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::int()); + w.write_entry("z".into(), 99.into()).unwrap(); + w.complete().expect("new_attrs") + }; + let mut p3 = GenericRow::new(6); + p3.set_field(0, 1); + p3.set_field(1, Datum::Null); + p3.set_field(2, Datum::Null); + p3.set_field(3, Datum::Null); + p3.set_field(4, Datum::Map(new_attrs)); + p3.set_field(5, Datum::Null); + partial_upsert(&table_upsert, &["id", "attrs"], p3).await; + + let result = lookuper.lookup(&make_key(1)).await.expect("lookup"); + let r = result .get_single_row() - .expect("Failed to get row") - .expect("Row should exist"); - assert_eq!(row.get_string(2).unwrap(), "Maelle"); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - #[tokio::test] - async fn upsert_and_lookup_with_row_rich_types() { - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_kv_row_rich_types"); - - let row_type_owned = DataTypes::row(vec![ - DataField::new("f_bool", DataTypes::boolean(), None), - DataField::new("f_long", DataTypes::bigint(), None), - DataField::new("f_float", DataTypes::float(), None), - DataField::new("f_double", DataTypes::double(), None), - DataField::new("f_str", DataTypes::string(), None), - DataField::new("f_bytes", DataTypes::bytes(), None), - DataField::new("f_decimal", DataTypes::decimal(10, 2), None), - DataField::new("f_date", DataTypes::date(), None), - DataField::new("f_time", DataTypes::time_with_precision(3), None), - DataField::new("f_ts_ntz", DataTypes::timestamp_with_precision(6), None), - DataField::new("f_ts_ltz", DataTypes::timestamp_ltz_with_precision(6), None), - ]); - - let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("id", DataTypes::int()) - .column("nested", row_type_owned) - .primary_key(vec!["id"]) - .build() - .expect("Failed to build schema"), - ) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - let upsert = table.new_upsert().expect("Failed to create upsert"); - let upsert_writer = upsert.create_writer().expect("Failed to create writer"); - - let mut nested = GenericRow::new(11); - nested.set_field(0, true); - nested.set_field(1, 9_876_543_210_i64); - nested.set_field(2, f32::NEG_INFINITY); - nested.set_field(3, f64::NAN); - nested.set_field(4, "rich types here"); - nested.set_field(5, b"opaque".as_slice()); - nested.set_field(6, Decimal::from_unscaled_long(54321, 10, 2).unwrap()); - nested.set_field(7, Datum::Date(Date::new(20476))); - nested.set_field(8, Datum::Time(Time::new(36_827_123))); - nested.set_field(9, Datum::TimestampNtz(TimestampNtz::new(1_769_163_227_123))); - nested.set_field( - 10, - Datum::TimestampLtz(TimestampLtz::new(1_769_163_227_123)), - ); - - let mut row = GenericRow::new(2); - row.set_field(0, 1_i32); - row.set_field(1, Datum::Row(Box::new(nested))); - - upsert_writer - .upsert(&row) - .expect("upsert") - .await - .expect("ack"); - - let mut lookuper = table - .new_lookup() - .expect("Failed to create lookup") - .create_lookuper() - .expect("Failed to create lookuper"); - - let result = lookuper - .lookup(&make_key_with_field_count(1, 2)) - .await - .expect("lookup"); + .expect("get row") + .expect("row exists"); + assert_eq!(r.get_string(1).unwrap(), "Verso", "name preserved"); + let n = r.get_row(3).unwrap(); + assert_eq!(n.get_int(0).unwrap(), 99, "ROW preserved"); + let m = r.get_map(4).unwrap(); + assert_eq!(m.size(), 1); + assert_eq!(m.get(&Datum::from("z")).unwrap(), Some(Datum::from(99_i32))); + assert_eq!(r.get_array(5).unwrap().size(), 2, "ARRAY preserved"); + + // === Partial update on the ARRAY column === + let new_tags = make_string_array(&[Some("gamma-tag")]); + let mut p4 = GenericRow::new(6); + p4.set_field(0, 1); + p4.set_field(1, Datum::Null); + p4.set_field(2, Datum::Null); + p4.set_field(3, Datum::Null); + p4.set_field(4, Datum::Null); + p4.set_field(5, new_tags); + partial_upsert(&table_upsert, &["id", "tags"], p4).await; + + let result = lookuper.lookup(&make_key(1)).await.expect("lookup"); let r = result .get_single_row() .expect("get row") - .expect("row should exist"); - - let n = r.get_row(1).unwrap(); - assert!(n.get_boolean(0).unwrap()); - assert_eq!(n.get_long(1).unwrap(), 9_876_543_210); - assert!(n.get_float(2).unwrap().is_infinite()); - assert!(n.get_float(2).unwrap().is_sign_negative()); - assert!(n.get_double(3).unwrap().is_nan()); - assert_eq!(n.get_string(4).unwrap(), "rich types here"); - assert_eq!(n.get_bytes(5).unwrap(), b"opaque"); - assert_eq!( - n.get_decimal(6, 10, 2).unwrap(), - Decimal::from_unscaled_long(54321, 10, 2).unwrap(), - ); - assert_eq!(n.get_date(7).unwrap().get_inner(), 20476); - assert_eq!(n.get_time(8).unwrap().get_inner(), 36_827_123); - assert_eq!( - n.get_timestamp_ntz(9, 6).unwrap().get_millisecond(), - 1_769_163_227_123, - ); - assert_eq!( - n.get_timestamp_ltz(10, 6).unwrap().get_epoch_millisecond(), - 1_769_163_227_123, - ); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - /// Integration test covering put and get operations for all supported datatypes. - #[tokio::test] - async fn all_supported_datatypes() { - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_all_datatypes"); - - // Create a table with all supported primitive datatypes - let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - // Primary key column - .column("pk_int", DataTypes::int()) - // Boolean type - .column("col_boolean", DataTypes::boolean()) - // Integer types - .column("col_tinyint", DataTypes::tinyint()) - .column("col_smallint", DataTypes::smallint()) - .column("col_int", DataTypes::int()) - .column("col_bigint", DataTypes::bigint()) - // Floating point types - .column("col_float", DataTypes::float()) - .column("col_double", DataTypes::double()) - // String types - .column("col_char", DataTypes::char(10)) - .column("col_string", DataTypes::string()) - // Decimal type - .column("col_decimal", DataTypes::decimal(10, 2)) - // Date and time types - .column("col_date", DataTypes::date()) - .column("col_time", DataTypes::time()) - .column("col_timestamp", DataTypes::timestamp()) - .column("col_timestamp_ltz", DataTypes::timestamp_ltz()) - // Binary types - .column("col_bytes", DataTypes::bytes()) - .column("col_binary", DataTypes::binary(20)) - .column("col_array", DataTypes::array(DataTypes::string())) - .column( - "col_row", - DataTypes::row(vec![ - DataField::new("seq", DataTypes::int(), None), - DataField::new("label", DataTypes::string(), None), - ]), - ) - .primary_key(vec!["pk_int"]) - .build() - .expect("Failed to build schema"), - ) - .build() - .expect("Failed to build table"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let table_upsert = table.new_upsert().expect("Failed to create upsert"); - let upsert_writer = table_upsert - .create_writer() - .expect("Failed to create writer"); - - // Test data for all datatypes - let pk_int = 1i32; - let col_boolean = true; - let col_tinyint = 127i8; - let col_smallint = 32767i16; - let col_int = 2147483647i32; - let col_bigint = 9223372036854775807i64; - let col_float = std::f32::consts::PI; - let col_double = std::f64::consts::E; - let col_char = "hello"; - let col_string = "world of fluss rust client"; - let col_decimal = Decimal::from_unscaled_long(12345, 10, 2).unwrap(); // 123.45 - let col_date = Date::new(20476); // 2026-01-23 - let col_time = Time::new(36827123); // 10:13:47.123 - let col_timestamp = TimestampNtz::new(1769163227123); // 2026-01-23 10:13:47.123 UTC - let col_timestamp_ltz = TimestampLtz::new(1769163227123); // 2026-01-23 10:13:47.123 UTC - let col_bytes: &[u8] = b"binary data"; - let col_binary: &[u8] = b"fixed binary data!!!"; - - let col_array = make_string_array(&[Some("fluss"), Some("rust")]); - - let mut col_row_inner = GenericRow::new(2); - col_row_inner.set_field(0, 7_i32); - col_row_inner.set_field(1, "lumiere"); - - // Upsert a row with all datatypes - let mut row = GenericRow::new(19); - row.set_field(0, pk_int); - row.set_field(1, col_boolean); - row.set_field(2, col_tinyint); - row.set_field(3, col_smallint); - row.set_field(4, col_int); - row.set_field(5, col_bigint); - row.set_field(6, col_float); - row.set_field(7, col_double); - row.set_field(8, col_char); - row.set_field(9, col_string); - row.set_field(10, col_decimal.clone()); - row.set_field(11, col_date); - row.set_field(12, col_time); - row.set_field(13, col_timestamp); - row.set_field(14, col_timestamp_ltz); - row.set_field(15, col_bytes); - row.set_field(16, col_binary); - row.set_field(17, col_array); - row.set_field(18, Datum::Row(Box::new(col_row_inner))); - - upsert_writer - .upsert(&row) - .expect("Failed to upsert row with all datatypes") - .await - .expect("Failed to wait for upsert acknowledgment"); - - // Lookup the record - let mut lookuper = table - .new_lookup() - .expect("Failed to create lookup") - .create_lookuper() - .expect("Failed to create lookuper"); - - let mut key = GenericRow::new(19); - key.set_field(0, pk_int); - - let result = lookuper.lookup(&key).await.expect("Failed to lookup"); - let found_row = result - .get_single_row() - .expect("Failed to get row") - .expect("Row should exist"); - - // Verify all datatypes - assert_eq!(found_row.get_int(0).unwrap(), pk_int, "pk_int mismatch"); - assert_eq!( - found_row.get_boolean(1).unwrap(), - col_boolean, - "col_boolean mismatch" - ); - assert_eq!( - found_row.get_byte(2).unwrap(), - col_tinyint, - "col_tinyint mismatch" - ); - assert_eq!( - found_row.get_short(3).unwrap(), - col_smallint, - "col_smallint mismatch" - ); - assert_eq!(found_row.get_int(4).unwrap(), col_int, "col_int mismatch"); - assert_eq!( - found_row.get_long(5).unwrap(), - col_bigint, - "col_bigint mismatch" - ); - assert!( - (found_row.get_float(6).unwrap() - col_float).abs() < f32::EPSILON, - "col_float mismatch: expected {}, got {}", - col_float, - found_row.get_float(6).unwrap() - ); - assert!( - (found_row.get_double(7).unwrap() - col_double).abs() < f64::EPSILON, - "col_double mismatch: expected {}, got {}", - col_double, - found_row.get_double(7).unwrap() - ); - assert_eq!( - found_row.get_char(8, 10).unwrap(), - col_char, - "col_char mismatch" - ); - assert_eq!( - found_row.get_string(9).unwrap(), - col_string, - "col_string mismatch" - ); - assert_eq!( - found_row.get_decimal(10, 10, 2).unwrap(), - col_decimal, - "col_decimal mismatch" - ); - assert_eq!( - found_row.get_date(11).unwrap().get_inner(), - col_date.get_inner(), - "col_date mismatch" - ); - assert_eq!( - found_row.get_time(12).unwrap().get_inner(), - col_time.get_inner(), - "col_time mismatch" - ); - assert_eq!( - found_row - .get_timestamp_ntz(13, 6) - .unwrap() - .get_millisecond(), - col_timestamp.get_millisecond(), - "col_timestamp mismatch" - ); - assert_eq!( - found_row - .get_timestamp_ltz(14, 6) - .unwrap() - .get_epoch_millisecond(), - col_timestamp_ltz.get_epoch_millisecond(), - "col_timestamp_ltz mismatch" - ); - assert_eq!( - found_row.get_bytes(15).unwrap(), - col_bytes, - "col_bytes mismatch" - ); - assert_eq!( - found_row.get_binary(16, 20).unwrap(), - col_binary, - "col_binary mismatch" - ); - let arr = found_row.get_array(17).unwrap(); - assert_eq!(arr.size(), 2, "col_array size mismatch"); - assert_eq!(arr.get_string(0).unwrap(), "fluss", "col_array[0] mismatch"); - assert_eq!(arr.get_string(1).unwrap(), "rust", "col_array[1] mismatch"); - let nested = found_row.get_row(18).unwrap(); - assert_eq!(nested.get_int(0).unwrap(), 7, "col_row.seq mismatch"); - assert_eq!( - nested.get_string(1).unwrap(), - "lumiere", - "col_row.label mismatch" - ); - - // Test with null values for nullable columns - let pk_int_2 = 2i32; - let mut row_with_nulls = GenericRow::new(19); - row_with_nulls.set_field(0, pk_int_2); - row_with_nulls.set_field(1, Datum::Null); // col_boolean - row_with_nulls.set_field(2, Datum::Null); // col_tinyint - row_with_nulls.set_field(3, Datum::Null); // col_smallint - row_with_nulls.set_field(4, Datum::Null); // col_int - row_with_nulls.set_field(5, Datum::Null); // col_bigint - row_with_nulls.set_field(6, Datum::Null); // col_float - row_with_nulls.set_field(7, Datum::Null); // col_double - row_with_nulls.set_field(8, Datum::Null); // col_char - row_with_nulls.set_field(9, Datum::Null); // col_string - row_with_nulls.set_field(10, Datum::Null); // col_decimal - row_with_nulls.set_field(11, Datum::Null); // col_date - row_with_nulls.set_field(12, Datum::Null); // col_time - row_with_nulls.set_field(13, Datum::Null); // col_timestamp - row_with_nulls.set_field(14, Datum::Null); // col_timestamp_ltz - row_with_nulls.set_field(15, Datum::Null); // col_bytes - row_with_nulls.set_field(16, Datum::Null); // col_binary - row_with_nulls.set_field(17, Datum::Null); // col_array - row_with_nulls.set_field(18, Datum::Null); // col_row - - upsert_writer - .upsert(&row_with_nulls) - .expect("Failed to upsert row with nulls") - .await - .expect("Failed to wait for upsert acknowledgment"); - - // Lookup row with nulls - let mut key2 = GenericRow::new(19); - key2.set_field(0, pk_int_2); - - let result = lookuper.lookup(&key2).await.expect("Failed to lookup"); - let found_row_nulls = result - .get_single_row() - .expect("Failed to get row") - .expect("Row should exist"); - - // Verify all nullable columns are null - assert_eq!( - found_row_nulls.get_int(0).unwrap(), - pk_int_2, - "pk_int mismatch" - ); - assert!( - found_row_nulls.is_null_at(1).unwrap(), - "col_boolean should be null" - ); - assert!( - found_row_nulls.is_null_at(2).unwrap(), - "col_tinyint should be null" - ); - assert!( - found_row_nulls.is_null_at(3).unwrap(), - "col_smallint should be null" - ); - assert!( - found_row_nulls.is_null_at(4).unwrap(), - "col_int should be null" - ); - assert!( - found_row_nulls.is_null_at(5).unwrap(), - "col_bigint should be null" - ); - assert!( - found_row_nulls.is_null_at(6).unwrap(), - "col_float should be null" - ); - assert!( - found_row_nulls.is_null_at(7).unwrap(), - "col_double should be null" - ); - assert!( - found_row_nulls.is_null_at(8).unwrap(), - "col_char should be null" - ); - assert!( - found_row_nulls.is_null_at(9).unwrap(), - "col_string should be null" - ); - assert!( - found_row_nulls.is_null_at(10).unwrap(), - "col_decimal should be null" - ); - assert!( - found_row_nulls.is_null_at(11).unwrap(), - "col_date should be null" - ); - assert!( - found_row_nulls.is_null_at(12).unwrap(), - "col_time should be null" - ); - assert!( - found_row_nulls.is_null_at(13).unwrap(), - "col_timestamp should be null" - ); - assert!( - found_row_nulls.is_null_at(14).unwrap(), - "col_timestamp_ltz should be null" - ); - assert!( - found_row_nulls.is_null_at(15).unwrap(), - "col_bytes should be null" - ); - assert!( - found_row_nulls.is_null_at(16).unwrap(), - "col_binary should be null" - ); - assert!( - found_row_nulls.is_null_at(17).unwrap(), - "col_array should be null" - ); - assert!( - found_row_nulls.is_null_at(18).unwrap(), - "col_row should be null" - ); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - #[tokio::test] - async fn upsert_and_lookup_with_row() { - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_kv_rows"); - let nested_row_type = DataTypes::row(vec![ - DataField::new("x", DataTypes::int(), None), - DataField::new("label", DataTypes::string(), None), - ]); - let deep_inner_row_type = DataTypes::row(vec![DataField::new("n", DataTypes::int(), None)]); - let deep_row_type = - DataTypes::row(vec![DataField::new("inner", deep_inner_row_type, None)]); - - let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("id", DataTypes::int()) - .column("nested", nested_row_type) - .column("deep", deep_row_type) - .primary_key(vec!["id"]) - .build() - .expect("Failed to build schema"), - ) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let upsert = table.new_upsert().expect("Failed to create upsert"); - let upsert_writer = upsert.create_writer().expect("Failed to create writer"); - - let mut nested1 = GenericRow::new(2); - nested1.set_field(0, 42_i32); - nested1.set_field(1, "hello"); - - let mut deep_inner1 = GenericRow::new(1); - deep_inner1.set_field(0, 99_i32); - let mut deep1 = GenericRow::new(1); - deep1.set_field(0, Datum::Row(Box::new(deep_inner1))); - - let mut row1 = GenericRow::new(3); - row1.set_field(0, 1_i32); - row1.set_field(1, Datum::Row(Box::new(nested1))); - row1.set_field(2, Datum::Row(Box::new(deep1))); - - upsert_writer - .upsert(&row1) - .expect("upsert row1") - .await - .expect("ack row1"); - - let mut nested2 = GenericRow::new(2); - nested2.set_field(0, 7_i32); - nested2.set_field(1, Datum::Null); - - let mut row2 = GenericRow::new(3); - row2.set_field(0, 2_i32); - row2.set_field(1, Datum::Row(Box::new(nested2))); - row2.set_field(2, Datum::Null); - - upsert_writer - .upsert(&row2) - .expect("upsert row2") - .await - .expect("ack row2"); - - let mut deep_inner3 = GenericRow::new(1); - deep_inner3.set_field(0, -1_i32); - let mut deep3 = GenericRow::new(1); - deep3.set_field(0, Datum::Row(Box::new(deep_inner3))); - - let mut row3 = GenericRow::new(3); - row3.set_field(0, 3_i32); - row3.set_field(1, Datum::Null); - row3.set_field(2, Datum::Row(Box::new(deep3))); - - upsert_writer - .upsert(&row3) - .expect("upsert row3") - .await - .expect("ack row3"); - - let mut lookuper = table - .new_lookup() - .expect("Failed to create lookup") - .create_lookuper() - .expect("Failed to create lookuper"); - - let result1 = lookuper - .lookup(&make_key_with_field_count(1, 3)) - .await - .expect("lookup row1"); - let r1 = result1 - .get_single_row() - .expect("get row1") - .expect("row1 should exist"); - assert_eq!(r1.get_int(0).unwrap(), 1); - let nested_r1 = r1.get_row(1).unwrap(); - assert_eq!(nested_r1.get_int(0).unwrap(), 42); - assert_eq!(nested_r1.get_string(1).unwrap(), "hello"); - let deep_r1 = r1.get_row(2).unwrap(); - let deep_inner_r1 = deep_r1.get_row(0).unwrap(); - assert_eq!(deep_inner_r1.get_int(0).unwrap(), 99); - - let result2 = lookuper - .lookup(&make_key_with_field_count(2, 3)) - .await - .expect("lookup row2"); - let r2 = result2 - .get_single_row() - .expect("get row2") - .expect("row2 should exist"); - assert_eq!(r2.get_int(0).unwrap(), 2); - let nested_r2 = r2.get_row(1).unwrap(); - assert_eq!(nested_r2.get_int(0).unwrap(), 7); - assert!(nested_r2.is_null_at(1).unwrap()); - assert!(r2.is_null_at(2).unwrap()); - - let result3 = lookuper - .lookup(&make_key_with_field_count(3, 3)) - .await - .expect("lookup row3"); - let r3 = result3 - .get_single_row() - .expect("get row3") - .expect("row3 should exist"); - assert_eq!(r3.get_int(0).unwrap(), 3); - assert!(r3.is_null_at(1).unwrap()); - let deep_r3 = r3.get_row(2).unwrap(); - let deep_inner_r3 = deep_r3.get_row(0).unwrap(); - assert_eq!(deep_inner_r3.get_int(0).unwrap(), -1); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - #[tokio::test] - async fn upsert_and_lookup_with_array_of_row() { - use fluss::metadata::{DataField, DataType}; - - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_kv_array_of_row"); - - let event_row_type_owned = DataTypes::row(vec![ - DataField::new("seq", DataTypes::int(), None), - DataField::new("label", DataTypes::string(), None), - ]); - let array_of_row_type = DataTypes::array(event_row_type_owned.clone()); - - let event_row_type = match &event_row_type_owned { - DataType::Row(rt) => rt.clone(), - _ => unreachable!(), - }; - - let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("id", DataTypes::int()) - .column("events", array_of_row_type.clone()) - .primary_key(vec!["id"]) - .build() - .expect("Failed to build schema"), - ) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let upsert = table.new_upsert().expect("Failed to create upsert"); - let upsert_writer = upsert.create_writer().expect("Failed to create writer"); - - let mut events1 = FlussArrayWriter::new(2, &event_row_type_owned); - let mut e0 = GenericRow::new(2); - e0.set_field(0, 1_i32); - e0.set_field(1, "open"); - events1.write_row(0, &e0).expect("write e0"); - let mut e1 = GenericRow::new(2); - e1.set_field(0, 2_i32); - e1.set_field(1, "close"); - events1.write_row(1, &e1).expect("write e1"); - let events1 = events1.complete().expect("events1"); - - let mut row1 = GenericRow::new(2); - row1.set_field(0, 1_i32); - row1.set_field(1, events1); - - upsert_writer - .upsert(&row1) - .expect("upsert row1") - .await - .expect("ack row1"); - - let mut events2 = FlussArrayWriter::new(3, &event_row_type_owned); - let mut e2 = GenericRow::new(2); - e2.set_field(0, 7_i32); - e2.set_field(1, "x"); - events2.write_row(0, &e2).expect("write e2"); - events2.set_null_at(1); - let mut e3 = GenericRow::new(2); - e3.set_field(0, 8_i32); - e3.set_field(1, "y"); - events2.write_row(2, &e3).expect("write e3"); - let events2 = events2.complete().expect("events2"); - - let mut row2 = GenericRow::new(2); - row2.set_field(0, 2_i32); - row2.set_field(1, events2); - - upsert_writer - .upsert(&row2) - .expect("upsert row2") - .await - .expect("ack row2"); - - let mut row3 = GenericRow::new(2); - row3.set_field(0, 3_i32); - row3.set_field(1, Datum::Null); - - upsert_writer - .upsert(&row3) - .expect("upsert row3") - .await - .expect("ack row3"); - - let mut lookuper = table - .new_lookup() - .expect("Failed to create lookup") - .create_lookuper() - .expect("Failed to create lookuper"); - - let result1 = lookuper - .lookup(&make_key_with_field_count(1, 2)) - .await - .expect("lookup row1"); - let r1 = result1 - .get_single_row() - .expect("get row1") - .expect("row1 should exist"); - assert_eq!(r1.get_int(0).unwrap(), 1); - let events_r1 = r1.get_array(1).unwrap(); - assert_eq!(events_r1.size(), 2); - let e0_r1 = events_r1.get_row(0, &event_row_type).unwrap(); - assert_eq!(e0_r1.get_int(0).unwrap(), 1); - assert_eq!(e0_r1.get_string(1).unwrap(), "open"); - let e1_r1 = events_r1.get_row(1, &event_row_type).unwrap(); - assert_eq!(e1_r1.get_int(0).unwrap(), 2); - assert_eq!(e1_r1.get_string(1).unwrap(), "close"); - - let result2 = lookuper - .lookup(&make_key_with_field_count(2, 2)) - .await - .expect("lookup row2"); - let r2 = result2 - .get_single_row() - .expect("get row2") - .expect("row2 should exist"); - let events_r2 = r2.get_array(1).unwrap(); - assert_eq!(events_r2.size(), 3); - let e0_r2 = events_r2.get_row(0, &event_row_type).unwrap(); - assert_eq!(e0_r2.get_int(0).unwrap(), 7); - assert_eq!(e0_r2.get_string(1).unwrap(), "x"); - assert!(events_r2.is_null_at(1)); - let e2_r2 = events_r2.get_row(2, &event_row_type).unwrap(); - assert_eq!(e2_r2.get_int(0).unwrap(), 8); - assert_eq!(e2_r2.get_string(1).unwrap(), "y"); - - let result3 = lookuper - .lookup(&make_key_with_field_count(3, 2)) - .await - .expect("lookup row3"); - let r3 = result3 - .get_single_row() - .expect("get row3") - .expect("row3 should exist"); - assert_eq!(r3.get_int(0).unwrap(), 3); - assert!(r3.is_null_at(1).unwrap()); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); + .expect("row exists"); + assert_eq!(r.get_string(1).unwrap(), "Verso", "name preserved"); + let n = r.get_row(3).unwrap(); + assert_eq!(n.get_int(0).unwrap(), 99, "ROW preserved"); + assert_eq!(r.get_map(4).unwrap().size(), 1, "MAP preserved"); + let a = r.get_array(5).unwrap(); + assert_eq!(a.size(), 1); + assert_eq!(a.get_string(0).unwrap(), "gamma-tag"); + + admin.drop_table(&table_path, false).await.expect("drop"); } + /// Partitioned KV upsert + lookup against every compound type. #[tokio::test] - async fn upsert_and_lookup_with_array() { + async fn partitioned_table_upsert_and_lookup() { let cluster = get_shared_cluster(); let connection = cluster.get_fluss_connection().await; let admin = connection.get_admin().expect("Failed to get admin"); - let table_path = TablePath::new("fluss", "test_kv_arrays"); - let inner_array_type = DataTypes::array(DataTypes::int()); + let table_path = TablePath::new("fluss", "test_partitioned_kv_table"); + + let nested_type = DataTypes::row(vec![ + DataField::new("seq", DataTypes::int(), None), + DataField::new("label", DataTypes::string(), None), + ]); let table_descriptor = TableDescriptor::builder() .schema( Schema::builder() - .column("id", DataTypes::int()) + .column("region", DataTypes::string()) + .column("user_id", DataTypes::int()) + .column("name", DataTypes::string()) + .column("score", DataTypes::bigint()) + .column("nested", nested_type) + .column( + "attrs", + DataTypes::map(DataTypes::string(), DataTypes::int()), + ) .column("tags", DataTypes::array(DataTypes::string())) - .column("scores", DataTypes::array(DataTypes::int())) - .column("matrix", DataTypes::array(inner_array_type.clone())) - .primary_key(vec!["id"]) + .primary_key(vec!["region", "user_id"]) .build() - .expect("Failed to build schema"), + .expect("schema"), ) + .partitioned_by(vec!["region"]) .build() - .expect("Failed to build table descriptor"); + .expect("table descriptor"); create_table(&admin, &table_path, &table_descriptor).await; + create_partitions(&admin, &table_path, "region", &["US", "EU", "APAC"]).await; - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); + let table = connection.get_table(&table_path).await.expect("table"); + let table_upsert = table.new_upsert().expect("upsert"); + let upsert_writer = table_upsert.create_writer().expect("writer"); - let upsert = table.new_upsert().expect("Failed to create upsert"); - let upsert_writer = upsert.create_writer().expect("Failed to create writer"); + let test_data = [ + ("US", 1_i32, "Gustave", 100_i64, 11_i32, "a", 1_i32, "alpha"), + ("US", 2, "Lune", 200, 22, "b", 2, "beta"), + ("EU", 1, "Sciel", 150, 33, "c", 3, "gamma"), + ("EU", 2, "Maelle", 250, 44, "d", 4, "delta"), + ("APAC", 1, "Noco", 300, 55, "e", 5, "epsilon"), + ]; - let mut row1 = GenericRow::new(4); - row1.set_field(0, 1_i32); - row1.set_field(1, make_string_array(&[Some("hello"), Some("world")])); - row1.set_field(2, make_int_array(&[Some(10), Some(20), Some(30)])); - let m1 = { - let mut w = FlussArrayWriter::new(2, &inner_array_type); - w.write_array(0, &make_int_array(&[Some(1), Some(2)])); - w.write_array(1, &make_int_array(&[Some(3), Some(4)])); - w.complete().expect("matrix1") - }; - row1.set_field(3, m1); + for (region, user_id, name, score, seq, label, attr_v, tag) in &test_data { + let mut nested = GenericRow::new(2); + nested.set_field(0, *seq); + nested.set_field(1, *label); + let attrs = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::int()); + w.write_entry((*label).into(), (*attr_v).into()).unwrap(); + w.complete().expect("attrs") + }; + let tags = make_string_array(&[Some(*tag)]); + + let mut row = GenericRow::new(7); + row.set_field(0, *region); + row.set_field(1, *user_id); + row.set_field(2, *name); + row.set_field(3, *score); + row.set_field(4, Datum::Row(Box::new(nested))); + row.set_field(5, Datum::Map(attrs)); + row.set_field(6, tags); + upsert_writer.upsert(&row).expect("upsert"); + } + upsert_writer.flush().await.expect("flush"); - upsert_writer - .upsert(&row1) - .expect("upsert row1") - .await - .expect("ack row1"); + let mut lookuper = table + .new_lookup() + .expect("lookup") + .create_lookuper() + .expect("lookuper"); - let mut row2 = GenericRow::new(4); - row2.set_field(0, 2_i32); - row2.set_field(1, make_string_array(&[None])); - row2.set_field(2, make_int_array(&[])); - row2.set_field(3, Datum::Null); + // === Per-partition lookup verifies all compound columns === + for (region, user_id, name, score, seq, label, attr_v, tag) in &test_data { + let mut key = GenericRow::new(7); + key.set_field(0, *region); + key.set_field(1, *user_id); - upsert_writer - .upsert(&row2) - .expect("upsert row2") - .await - .expect("ack row2"); + let result = lookuper.lookup(&key).await.expect("lookup"); + let row = result + .get_single_row() + .expect("get row") + .expect("row exists"); - let mut row3 = GenericRow::new(4); - row3.set_field(0, 3_i32); - row3.set_field(1, Datum::Null); - row3.set_field(2, make_int_array(&[Some(42)])); - let m3 = { - let mut w = FlussArrayWriter::new(3, &inner_array_type); - w.write_array(0, &make_int_array(&[Some(5)])); - w.set_null_at(1); - w.write_array(2, &make_int_array(&[])); - w.complete().expect("matrix3") - }; - row3.set_field(3, m3); + assert_eq!(row.get_string(0).unwrap(), *region); + assert_eq!(row.get_int(1).unwrap(), *user_id); + assert_eq!(row.get_string(2).unwrap(), *name); + assert_eq!(row.get_long(3).unwrap(), *score); + let nested = row.get_row(4).unwrap(); + assert_eq!(nested.get_int(0).unwrap(), *seq); + assert_eq!(nested.get_string(1).unwrap(), *label); + let attrs = row.get_map(5).unwrap(); + assert_eq!(attrs.size(), 1); + assert_eq!( + attrs.get(&Datum::from(*label)).unwrap(), + Some(Datum::from(*attr_v)) + ); + let tags = row.get_array(6).unwrap(); + assert_eq!(tags.size(), 1); + assert_eq!(tags.get_string(0).unwrap(), *tag); + } + // === Update a row in US partition === + let mut updated_nested = GenericRow::new(2); + updated_nested.set_field(0, 999_i32); + updated_nested.set_field(1, "updated"); + let updated_attrs = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::int()); + w.write_entry("u".into(), 999.into()).unwrap(); + w.complete().expect("updated_attrs") + }; + let updated_tags = make_string_array(&[Some("renamed")]); + let mut updated_row = GenericRow::new(7); + updated_row.set_field(0, "US"); + updated_row.set_field(1, 1); + updated_row.set_field(2, "Gustave Updated"); + updated_row.set_field(3, 999_i64); + updated_row.set_field(4, Datum::Row(Box::new(updated_nested))); + updated_row.set_field(5, Datum::Map(updated_attrs)); + updated_row.set_field(6, updated_tags); upsert_writer - .upsert(&row3) - .expect("upsert row3") + .upsert(&updated_row) + .expect("upsert updated") .await - .expect("ack row3"); - - // Lookup and verify - let mut lookuper = table - .new_lookup() - .expect("Failed to create lookup") - .create_lookuper() - .expect("Failed to create lookuper"); + .expect("ack updated"); - let result1 = lookuper - .lookup(&make_key_with_field_count(1, 4)) - .await - .expect("lookup row1"); - let r1 = result1 + let mut key = GenericRow::new(7); + key.set_field(0, "US"); + key.set_field(1, 1); + let result = lookuper.lookup(&key).await.expect("lookup"); + let row = result .get_single_row() - .expect("get row1") - .expect("row1 should exist"); - assert_eq!(r1.get_int(0).unwrap(), 1); - let tags_r1 = r1.get_array(1).unwrap(); - assert_eq!(tags_r1.size(), 2); - assert_eq!(tags_r1.get_string(0).unwrap(), "hello"); - assert_eq!(tags_r1.get_string(1).unwrap(), "world"); - let scores_r1 = r1.get_array(2).unwrap(); - assert_eq!(scores_r1.size(), 3); - assert_eq!(scores_r1.get_int(0).unwrap(), 10); - assert_eq!(scores_r1.get_int(1).unwrap(), 20); - assert_eq!(scores_r1.get_int(2).unwrap(), 30); - let matrix_r1: FlussArray = r1.get_array(3).unwrap(); - assert_eq!(matrix_r1.size(), 2); - let mr1_0 = matrix_r1.get_array(0).unwrap(); - assert_eq!(mr1_0.size(), 2); - assert_eq!(mr1_0.get_int(0).unwrap(), 1); - assert_eq!(mr1_0.get_int(1).unwrap(), 2); - let mr1_1 = matrix_r1.get_array(1).unwrap(); - assert_eq!(mr1_1.size(), 2); - assert_eq!(mr1_1.get_int(0).unwrap(), 3); - assert_eq!(mr1_1.get_int(1).unwrap(), 4); - - let result2 = lookuper - .lookup(&make_key_with_field_count(2, 4)) + .expect("get row") + .expect("row exists"); + assert_eq!(row.get_string(2).unwrap(), "Gustave Updated"); + assert_eq!(row.get_long(3).unwrap(), 999); + let nested = row.get_row(4).unwrap(); + assert_eq!(nested.get_int(0).unwrap(), 999); + let attrs = row.get_map(5).unwrap(); + assert_eq!( + attrs.get(&Datum::from("u")).unwrap(), + Some(Datum::from(999_i32)) + ); + let tags = row.get_array(6).unwrap(); + assert_eq!(tags.get_string(0).unwrap(), "renamed"); + + // === Lookup in non-existent partition returns None === + let mut missing = GenericRow::new(7); + missing.set_field(0, "UNKNOWN_REGION"); + missing.set_field(1, 1); + let result = lookuper + .lookup(&missing) .await - .expect("lookup row2"); - let r2 = result2 - .get_single_row() - .expect("get row2") - .expect("row2 should exist"); - assert_eq!(r2.get_int(0).unwrap(), 2); - let tags_r2 = r2.get_array(1).unwrap(); - assert_eq!(tags_r2.size(), 1); - assert!(tags_r2.is_null_at(0)); - let scores_r2 = r2.get_array(2).unwrap(); - assert_eq!(scores_r2.size(), 0); - assert!(r2.is_null_at(3).unwrap()); - - let result3 = lookuper - .lookup(&make_key_with_field_count(3, 4)) + .expect("lookup unknown partition"); + assert!(result.get_single_row().expect("get").is_none()); + + // === Delete a row within a partition === + let mut delete_key = GenericRow::new(7); + delete_key.set_field(0, "EU"); + delete_key.set_field(1, 1); + upsert_writer + .delete(&delete_key) + .expect("delete") .await - .expect("lookup row3"); - let r3 = result3 + .expect("ack delete"); + let mut key = GenericRow::new(7); + key.set_field(0, "EU"); + key.set_field(1, 1); + let result = lookuper.lookup(&key).await.expect("lookup"); + assert!(result.get_single_row().expect("get").is_none()); + + // === Sibling row in same partition still exists === + let mut key = GenericRow::new(7); + key.set_field(0, "EU"); + key.set_field(1, 2); + let result = lookuper.lookup(&key).await.expect("lookup"); + let row = result .get_single_row() - .expect("get row3") - .expect("row3 should exist"); - assert_eq!(r3.get_int(0).unwrap(), 3); - assert!(r3.is_null_at(1).unwrap()); - let scores_r3 = r3.get_array(2).unwrap(); - assert_eq!(scores_r3.size(), 1); - assert_eq!(scores_r3.get_int(0).unwrap(), 42); - let matrix_r3 = r3.get_array(3).unwrap(); - assert_eq!(matrix_r3.size(), 3); - let mr3_0 = matrix_r3.get_array(0).unwrap(); - assert_eq!(mr3_0.size(), 1); - assert_eq!(mr3_0.get_int(0).unwrap(), 5); - assert!(matrix_r3.is_null_at(1)); - let mr3_2 = matrix_r3.get_array(2).unwrap(); - assert_eq!(mr3_2.size(), 0); + .expect("get row") + .expect("row exists"); + assert_eq!(row.get_string(2).unwrap(), "Maelle"); + assert_eq!(row.get_array(6).unwrap().get_string(0).unwrap(), "delta"); - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); + admin.drop_table(&table_path, false).await.expect("drop"); } + /// Integration test covering put and get operations for all supported datatypes. /// Integration test for concurrent batched lookups across partitions. #[tokio::test] async fn batched_concurrent_lookups_partitioned() { - use futures::stream::{FuturesUnordered, StreamExt}; - let cluster = get_shared_cluster(); let connection = cluster.get_fluss_connection().await; @@ -1977,8 +1055,6 @@ mod kv_table_test { /// Integration test for concurrent batched lookups. #[tokio::test] async fn batched_concurrent_lookups() { - use futures::stream::{FuturesUnordered, StreamExt}; - let cluster = get_shared_cluster(); let connection = cluster.get_fluss_connection().await; @@ -2175,4 +1251,522 @@ mod kv_table_test { .await .expect("Failed to drop table"); } + + /// KV upsert + lookup against a schema covering every supported data type. + #[tokio::test] + async fn all_supported_datatypes() { + let cluster = get_shared_cluster(); + let connection = cluster.get_fluss_connection().await; + let admin = connection.get_admin().expect("Failed to get admin"); + + let table_path = TablePath::new("fluss", "test_kv_complex_types"); + + let row_seq_label_owned = dt_row_seq_label(); + let row_seq_label = as_row_type(&row_seq_label_owned); + let inner_array_int = dt_array_int(); + let inner_map_string_int = dt_map_string_int(); + + let plan = ColumnPlan::new() + .add("id", DataTypes::int()) + .start_section("array_basics") + .extend(array_dt_basics_columns()) + .start_section("row_basics") + .extend(row_dt_basics_columns()) + .start_section("map_basics") + .extend(map_dt_basics_columns()) + .start_section("scalars") + .extend(scalar_dt_columns()); + let table_descriptor = TableDescriptor::builder() + .schema(plan.build_schema(Some(&["id"]))) + .build() + .expect("table descriptor"); + + create_table(&admin, &table_path, &table_descriptor).await; + + let table = connection.get_table(&table_path).await.expect("table"); + let upsert_writer = table + .new_upsert() + .expect("upsert") + .create_writer() + .expect("writer"); + + // Row 1 (id=1) — comprehensive: every column populated. + let column_count = plan.len(); + let mut row1 = GenericRow::new(column_count); + row1.set_field(0, 1_i32); + row1.set_field(1, make_int_array(&[Some(10), Some(20), Some(30)])); + row1.set_field(2, make_string_array(&[Some("hello"), Some("world")])); + let arr_of_arr_1 = { + let mut w = FlussArrayWriter::new(2, &inner_array_int); + w.write_array(0, &make_int_array(&[Some(1), Some(2)])); + w.write_array(1, &make_int_array(&[Some(3), Some(4)])); + w.complete().expect("arr_of_arr_1") + }; + row1.set_field(3, arr_of_arr_1); + let arr_of_row_1 = { + let mut w = FlussArrayWriter::new(2, &row_seq_label_owned); + let mut e0 = GenericRow::new(2); + e0.set_field(0, 1_i32); + e0.set_field(1, "open"); + w.write_row(0, &e0).expect("e0"); + let mut e1 = GenericRow::new(2); + e1.set_field(0, 2_i32); + e1.set_field(1, "close"); + w.write_row(1, &e1).expect("e1"); + w.complete().expect("arr_of_row_1") + }; + row1.set_field(4, arr_of_row_1); + let mut row_basic_1 = GenericRow::new(2); + row_basic_1.set_field(0, 42_i32); + row_basic_1.set_field(1, "hello"); + row1.set_field(5, Datum::Row(Box::new(row_basic_1))); + let mut deep_inner_1 = GenericRow::new(1); + deep_inner_1.set_field(0, 99_i32); + let mut row_deep_1 = GenericRow::new(1); + row_deep_1.set_field(0, Datum::Row(Box::new(deep_inner_1))); + row1.set_field(6, Datum::Row(Box::new(row_deep_1))); + let mut row_rich_1 = GenericRow::new(14); + row_rich_1.set_field(0, true); + row_rich_1.set_field(1, 100_000_i32); + row_rich_1.set_field(2, 9_876_543_210_i64); + row_rich_1.set_field(3, f32::INFINITY); + row_rich_1.set_field(4, std::f64::consts::PI); + row_rich_1.set_field(5, "hello world"); + row_rich_1.set_field(6, b"binary".as_slice()); + row_rich_1.set_field(7, Decimal::from_unscaled_long(12345, 10, 2).unwrap()); + row_rich_1.set_field(8, Datum::Date(Date::new(20476))); + row_rich_1.set_field(9, Datum::Time(Time::new(36_827_123))); + row_rich_1.set_field( + 10, + Datum::TimestampNtz(TimestampNtz::new(1_769_163_227_123)), + ); + row_rich_1.set_field( + 11, + Datum::TimestampLtz(TimestampLtz::new(1_769_163_227_456)), + ); + row_rich_1.set_field(12, b"\x01\x02\x03\x04".as_slice()); + row_rich_1.set_field(13, make_int_array(&[Some(7), None, Some(11)])); + row1.set_field(7, Datum::Row(Box::new(row_rich_1))); + let map_string_int_1 = { + let mut w = FlussMapWriter::new(3, &DataTypes::string(), &DataTypes::int()); + w.write_entry("a".into(), 1.into()).unwrap(); + w.write_entry("b".into(), Datum::Null).unwrap(); + w.write_entry("c".into(), 3.into()).unwrap(); + w.complete().expect("map_string_int_1") + }; + row1.set_field(8, Datum::Map(map_string_int_1)); + let map_of_row_1 = { + let mut e0 = GenericRow::new(2); + e0.set_field(0, 1_i32); + e0.set_field(1, "open"); + let mut e1 = GenericRow::new(2); + e1.set_field(0, 2_i32); + e1.set_field(1, "close"); + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &row_seq_label_owned); + w.write_entry("e0".into(), Datum::Row(Box::new(e0))) + .unwrap(); + w.write_entry("e1".into(), Datum::Row(Box::new(e1))) + .unwrap(); + w.complete().expect("map_of_row_1") + }; + row1.set_field(9, Datum::Map(map_of_row_1)); + let map_of_map_1 = { + let g1 = { + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &DataTypes::int()); + w.write_entry("a".into(), 1.into()).unwrap(); + w.write_entry("b".into(), 2.into()).unwrap(); + w.complete().expect("g1") + }; + let g2 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::int()); + w.write_entry("c".into(), 3.into()).unwrap(); + w.complete().expect("g2") + }; + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &inner_map_string_int); + w.write_entry("g1".into(), Datum::Map(g1)).unwrap(); + w.write_entry("g2".into(), Datum::Map(g2)).unwrap(); + w.complete().expect("map_of_map_1") + }; + row1.set_field(10, Datum::Map(map_of_map_1)); + let map_of_array_1 = { + let primes = make_int_array(&[Some(2), Some(3), Some(5)]); + let squares = make_int_array(&[Some(1), Some(4)]); + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &inner_array_int); + w.write_entry("primes".into(), Datum::Array(primes)) + .unwrap(); + w.write_entry("squares".into(), Datum::Array(squares)) + .unwrap(); + w.complete().expect("map_of_array_1") + }; + row1.set_field(11, Datum::Map(map_of_array_1)); + let array_of_map_1 = { + let m0 = { + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &DataTypes::int()); + w.write_entry("x".into(), 1.into()).unwrap(); + w.write_entry("y".into(), 2.into()).unwrap(); + w.complete().expect("m0") + }; + let m1 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::int()); + w.write_entry("z".into(), 9.into()).unwrap(); + w.complete().expect("m1") + }; + let mut w = FlussArrayWriter::new(2, &inner_map_string_int); + w.write_map(0, &m0); + w.write_map(1, &m1); + w.complete().expect("array_of_map_1") + }; + row1.set_field(12, array_of_map_1); + + // Scalar values for row 1. + let s_tinyint = 127_i8; + let s_smallint = 32_767_i16; + let s_bigint = 9_223_372_036_854_775_807_i64; + let s_float = std::f32::consts::PI; + let s_double = std::f64::consts::E; + let s_char = "hello"; + let s_string = "world of fluss rust client"; + let s_decimal = Decimal::from_unscaled_long(12345, 10, 2).unwrap(); + let s_date = Date::new(20476); + let s_time_s = Time::new(36_827_000); + let s_time_ms = Time::new(36_827_123); + let s_time_us = Time::new(86_399_999); + let s_time_ns = Time::new(1); + let s_ts_s = TimestampNtz::new(1_769_163_227_000); + let s_ts_ms = TimestampNtz::new(1_769_163_227_123); + let s_ts_us = TimestampNtz::from_millis_nanos(1_769_163_227_123, 456_000).unwrap(); + let s_ts_ns = TimestampNtz::from_millis_nanos(1_769_163_227_123, 999_999).unwrap(); + let s_ts_ltz_s = TimestampLtz::new(1_769_163_227_000); + let s_ts_ltz_ms = TimestampLtz::new(1_769_163_227_123); + let s_ts_ltz_us = TimestampLtz::from_millis_nanos(1_769_163_227_123, 456_000).unwrap(); + let s_ts_ltz_ns = TimestampLtz::from_millis_nanos(1_769_163_227_123, 999_999).unwrap(); + let s_bytes_top: Vec = b"binary data".to_vec(); + let s_binary_top: Vec = vec![0xDE, 0xAD, 0xBE, 0xEF]; + let s_ts_us_neg = TimestampNtz::from_millis_nanos(-301_234_154_877, 456_000).unwrap(); + let s_ts_ns_neg = TimestampNtz::from_millis_nanos(-301_234_154_877, 999_999).unwrap(); + let s_ts_ltz_us_neg = TimestampLtz::from_millis_nanos(-301_234_154_877, 456_000).unwrap(); + let s_ts_ltz_ns_neg = TimestampLtz::from_millis_nanos(-301_234_154_877, 999_999).unwrap(); + + row1.set_field(plan.idx("col_tinyint"), s_tinyint); + row1.set_field(plan.idx("col_smallint"), s_smallint); + row1.set_field(plan.idx("col_bigint"), s_bigint); + row1.set_field(plan.idx("col_float"), s_float); + row1.set_field(plan.idx("col_double"), s_double); + row1.set_field(plan.idx("col_boolean"), true); + row1.set_field(plan.idx("col_char"), s_char); + row1.set_field(plan.idx("col_string"), s_string); + row1.set_field(plan.idx("col_decimal"), s_decimal.clone()); + row1.set_field(plan.idx("col_date"), Datum::Date(s_date)); + row1.set_field(plan.idx("col_time_s"), s_time_s); + row1.set_field(plan.idx("col_time_ms"), s_time_ms); + row1.set_field(plan.idx("col_time_us"), s_time_us); + row1.set_field(plan.idx("col_time_ns"), s_time_ns); + row1.set_field(plan.idx("col_ts_s"), s_ts_s); + row1.set_field(plan.idx("col_ts_ms"), s_ts_ms); + row1.set_field(plan.idx("col_ts_us"), s_ts_us); + row1.set_field(plan.idx("col_ts_ns"), s_ts_ns); + row1.set_field(plan.idx("col_ts_ltz_s"), s_ts_ltz_s); + row1.set_field(plan.idx("col_ts_ltz_ms"), s_ts_ltz_ms); + row1.set_field(plan.idx("col_ts_ltz_us"), s_ts_ltz_us); + row1.set_field(plan.idx("col_ts_ltz_ns"), s_ts_ltz_ns); + row1.set_field(plan.idx("col_bytes_top"), s_bytes_top.as_slice()); + row1.set_field(plan.idx("col_binary_top"), s_binary_top.as_slice()); + row1.set_field(plan.idx("col_ts_us_neg"), s_ts_us_neg); + row1.set_field(plan.idx("col_ts_ns_neg"), s_ts_ns_neg); + row1.set_field(plan.idx("col_ts_ltz_us_neg"), s_ts_ltz_us_neg); + row1.set_field(plan.idx("col_ts_ltz_ns_neg"), s_ts_ltz_ns_neg); + + upsert_writer + .upsert(&row1) + .expect("upsert row1") + .await + .expect("ack row1"); + + // Row 2 (id=2) — empty MAP, all other compound + scalar columns NULL. + let mut row2 = GenericRow::new(column_count); + row2.set_field(0, 2_i32); + for i in 1..column_count { + row2.set_field(i, Datum::Null); + } + let empty_map = FlussMapWriter::new(0, &DataTypes::string(), &DataTypes::int()) + .complete() + .expect("empty_map"); + row2.set_field(plan.idx("map_string_int"), Datum::Map(empty_map)); + upsert_writer + .upsert(&row2) + .expect("upsert row2") + .await + .expect("ack row2"); + + // Row 3 (id=3) — every compound + scalar column NULL. + let mut row3 = GenericRow::new(column_count); + row3.set_field(0, 3_i32); + for i in 1..column_count { + row3.set_field(i, Datum::Null); + } + upsert_writer + .upsert(&row3) + .expect("upsert row3") + .await + .expect("ack row3"); + + let mut lookuper = table + .new_lookup() + .expect("lookup") + .create_lookuper() + .expect("lookuper"); + + let result1 = lookuper.lookup(&make_key(1)).await.expect("lookup row1"); + let r1 = result1 + .get_single_row() + .expect("row1") + .expect("row1 exists"); + assert_eq!(r1.get_int(0).unwrap(), 1); + + // === ARRAY: basic shapes === + let arr_int = r1.get_array(1).unwrap(); + assert_eq!(arr_int.size(), 3); + assert_eq!(arr_int.get_int(2).unwrap(), 30); + let arr_string = r1.get_array(2).unwrap(); + assert_eq!(arr_string.size(), 2); + assert_eq!(arr_string.get_string(0).unwrap(), "hello"); + let arr_of_arr = r1.get_array(3).unwrap(); + assert_eq!(arr_of_arr.size(), 2); + assert_eq!(arr_of_arr.get_array(1).unwrap().get_int(1).unwrap(), 4); + + // === ARRAY === + let aor = r1.get_array(4).unwrap(); + assert_eq!(aor.size(), 2); + let e0 = aor.get_row(0, &row_seq_label).unwrap(); + assert_eq!(e0.get_int(0).unwrap(), 1); + assert_eq!(e0.get_string(1).unwrap(), "open"); + + // === ROW: basic + deep + rich === + let rb = r1.get_row(5).unwrap(); + assert_eq!(rb.get_int(0).unwrap(), 42); + assert_eq!(rb.get_string(1).unwrap(), "hello"); + let rd = r1.get_row(6).unwrap(); + let rd_inner = rd.get_row(0).unwrap(); + assert_eq!(rd_inner.get_int(0).unwrap(), 99); + let rr = r1.get_row(7).unwrap(); + assert!(rr.get_boolean(0).unwrap()); + assert_eq!(rr.get_int(1).unwrap(), 100_000); + assert_eq!(rr.get_long(2).unwrap(), 9_876_543_210); + assert!(rr.get_float(3).unwrap().is_infinite()); + assert!((rr.get_double(4).unwrap() - std::f64::consts::PI).abs() < f64::EPSILON); + assert_eq!(rr.get_string(5).unwrap(), "hello world"); + assert_eq!(rr.get_bytes(6).unwrap(), b"binary"); + assert_eq!( + rr.get_decimal(7, 10, 2).unwrap(), + Decimal::from_unscaled_long(12345, 10, 2).unwrap() + ); + assert_eq!(rr.get_date(8).unwrap().get_inner(), 20476); + assert_eq!(rr.get_time(9).unwrap().get_inner(), 36_827_123); + assert_eq!( + rr.get_timestamp_ntz(10, 6).unwrap().get_millisecond(), + 1_769_163_227_123 + ); + assert_eq!( + rr.get_timestamp_ltz(11, 6).unwrap().get_epoch_millisecond(), + 1_769_163_227_456 + ); + assert_eq!(rr.get_binary(12, 4).unwrap(), b"\x01\x02\x03\x04"); + let f_arr = rr.get_array(13).unwrap(); + assert_eq!(f_arr.size(), 3); + assert!(f_arr.is_null_at(1)); + + // === MAP: basic === + let m = r1.get_map(8).unwrap(); + assert_eq!(m.size(), 3); + assert_eq!(m.get(&Datum::from("a")).unwrap(), Some(Datum::from(1_i32))); + assert_eq!(m.get(&Datum::from("b")).unwrap(), Some(Datum::Null)); + assert_eq!(m.get(&Datum::from("c")).unwrap(), Some(Datum::from(3_i32))); + + // === MAP === + let m = r1.get_map(9).unwrap(); + let v0 = m.value_array().get_row(0, &row_seq_label).unwrap(); + assert_eq!(v0.get_int(0).unwrap(), 1); + assert_eq!(v0.get_string(1).unwrap(), "open"); + + // === MAP === + let m = r1.get_map(10).unwrap(); + let g1 = m + .value_array() + .get_map(0, &DataTypes::string(), &DataTypes::int()) + .unwrap(); + assert_eq!(g1.size(), 2); + + // === MAP + ARRAY === + let m = r1.get_map(11).unwrap(); + assert_eq!(m.value_array().get_array(0).unwrap().size(), 3); + let am = r1.get_array(12).unwrap(); + assert_eq!(am.size(), 2); + let am0 = am + .get_map(0, &DataTypes::string(), &DataTypes::int()) + .unwrap(); + assert_eq!(am0.size(), 2); + + // === Scalars: integers + floating point === + assert_eq!(r1.get_byte(plan.idx("col_tinyint")).unwrap(), s_tinyint); + assert_eq!(r1.get_short(plan.idx("col_smallint")).unwrap(), s_smallint); + assert_eq!(r1.get_long(plan.idx("col_bigint")).unwrap(), s_bigint); + assert!((r1.get_float(plan.idx("col_float")).unwrap() - s_float).abs() < f32::EPSILON); + assert!((r1.get_double(plan.idx("col_double")).unwrap() - s_double).abs() < f64::EPSILON); + + // === Scalars: boolean / char / string / decimal / date === + assert!(r1.get_boolean(plan.idx("col_boolean")).unwrap()); + assert_eq!(r1.get_char(plan.idx("col_char"), 10).unwrap(), s_char); + assert_eq!(r1.get_string(plan.idx("col_string")).unwrap(), s_string); + assert_eq!( + r1.get_decimal(plan.idx("col_decimal"), 10, 2).unwrap(), + s_decimal + ); + assert_eq!( + r1.get_date(plan.idx("col_date")).unwrap().get_inner(), + s_date.get_inner() + ); + + // === Scalars: time across all four precisions === + assert_eq!( + r1.get_time(plan.idx("col_time_s")).unwrap().get_inner(), + s_time_s.get_inner() + ); + assert_eq!( + r1.get_time(plan.idx("col_time_ms")).unwrap().get_inner(), + s_time_ms.get_inner() + ); + assert_eq!( + r1.get_time(plan.idx("col_time_us")).unwrap().get_inner(), + s_time_us.get_inner() + ); + assert_eq!( + r1.get_time(plan.idx("col_time_ns")).unwrap().get_inner(), + s_time_ns.get_inner() + ); + + // === Scalars: timestamp across all four precisions === + assert_eq!( + r1.get_timestamp_ntz(plan.idx("col_ts_s"), 0) + .unwrap() + .get_millisecond(), + s_ts_s.get_millisecond() + ); + assert_eq!( + r1.get_timestamp_ntz(plan.idx("col_ts_ms"), 3) + .unwrap() + .get_millisecond(), + s_ts_ms.get_millisecond() + ); + let read_ts_us = r1.get_timestamp_ntz(plan.idx("col_ts_us"), 6).unwrap(); + assert_eq!(read_ts_us.get_millisecond(), s_ts_us.get_millisecond()); + assert_eq!( + read_ts_us.get_nano_of_millisecond(), + s_ts_us.get_nano_of_millisecond() + ); + let read_ts_ns = r1.get_timestamp_ntz(plan.idx("col_ts_ns"), 9).unwrap(); + assert_eq!(read_ts_ns.get_millisecond(), s_ts_ns.get_millisecond()); + assert_eq!( + read_ts_ns.get_nano_of_millisecond(), + s_ts_ns.get_nano_of_millisecond() + ); + + // === Scalars: timestamp_ltz across all four precisions === + assert_eq!( + r1.get_timestamp_ltz(plan.idx("col_ts_ltz_s"), 0) + .unwrap() + .get_epoch_millisecond(), + s_ts_ltz_s.get_epoch_millisecond() + ); + assert_eq!( + r1.get_timestamp_ltz(plan.idx("col_ts_ltz_ms"), 3) + .unwrap() + .get_epoch_millisecond(), + s_ts_ltz_ms.get_epoch_millisecond() + ); + let read_ltz_us = r1.get_timestamp_ltz(plan.idx("col_ts_ltz_us"), 6).unwrap(); + assert_eq!( + read_ltz_us.get_epoch_millisecond(), + s_ts_ltz_us.get_epoch_millisecond() + ); + assert_eq!( + read_ltz_us.get_nano_of_millisecond(), + s_ts_ltz_us.get_nano_of_millisecond() + ); + let read_ltz_ns = r1.get_timestamp_ltz(plan.idx("col_ts_ltz_ns"), 9).unwrap(); + assert_eq!( + read_ltz_ns.get_epoch_millisecond(), + s_ts_ltz_ns.get_epoch_millisecond() + ); + assert_eq!( + read_ltz_ns.get_nano_of_millisecond(), + s_ts_ltz_ns.get_nano_of_millisecond() + ); + + // === Scalars: bytes + fixed binary === + assert_eq!( + r1.get_bytes(plan.idx("col_bytes_top")).unwrap(), + s_bytes_top.as_slice() + ); + assert_eq!( + r1.get_binary(plan.idx("col_binary_top"), 4).unwrap(), + s_binary_top.as_slice() + ); + + // === Scalars: negative-epoch timestamps (pre-1970) === + let read_neg_us = r1.get_timestamp_ntz(plan.idx("col_ts_us_neg"), 6).unwrap(); + assert_eq!(read_neg_us.get_millisecond(), s_ts_us_neg.get_millisecond()); + assert_eq!( + read_neg_us.get_nano_of_millisecond(), + s_ts_us_neg.get_nano_of_millisecond() + ); + let read_neg_ns = r1.get_timestamp_ntz(plan.idx("col_ts_ns_neg"), 9).unwrap(); + assert_eq!(read_neg_ns.get_millisecond(), s_ts_ns_neg.get_millisecond()); + assert_eq!( + read_neg_ns.get_nano_of_millisecond(), + s_ts_ns_neg.get_nano_of_millisecond() + ); + let read_neg_ltz_us = r1 + .get_timestamp_ltz(plan.idx("col_ts_ltz_us_neg"), 6) + .unwrap(); + assert_eq!( + read_neg_ltz_us.get_epoch_millisecond(), + s_ts_ltz_us_neg.get_epoch_millisecond() + ); + let read_neg_ltz_ns = r1 + .get_timestamp_ltz(plan.idx("col_ts_ltz_ns_neg"), 9) + .unwrap(); + assert_eq!( + read_neg_ltz_ns.get_epoch_millisecond(), + s_ts_ltz_ns_neg.get_epoch_millisecond() + ); + + // === Row 2 lookup — empty map, all other columns NULL === + let result2 = lookuper.lookup(&make_key(2)).await.expect("lookup row2"); + let r2 = result2 + .get_single_row() + .expect("row2") + .expect("row2 exists"); + assert_eq!(r2.get_int(0).unwrap(), 2); + let map_idx = plan.idx("map_string_int"); + for i in 1..column_count { + if i == map_idx { + assert_eq!(r2.get_map(map_idx).unwrap().size(), 0); + } else { + assert!(r2.is_null_at(i).unwrap(), "field {i} should be null"); + } + } + + // === Row 3 lookup — every compound + scalar field NULL === + let result3 = lookuper.lookup(&make_key(3)).await.expect("lookup row3"); + let r3 = result3 + .get_single_row() + .expect("row3") + .expect("row3 exists"); + assert_eq!(r3.get_int(0).unwrap(), 3); + for i in 1..column_count { + assert!(r3.is_null_at(i).unwrap(), "field {i} should be null"); + } + + admin.drop_table(&table_path, false).await.expect("drop"); + } } diff --git a/crates/fluss/tests/integration/log_table.rs b/crates/fluss/tests/integration/log_table.rs index fc8f8376..5e61ab07 100644 --- a/crates/fluss/tests/integration/log_table.rs +++ b/crates/fluss/tests/integration/log_table.rs @@ -19,13 +19,16 @@ #[cfg(test)] mod table_test { use crate::integration::utils::{ - create_partitions, create_table, get_shared_cluster, make_int_array, make_string_array, + ColumnPlan, array_dt_basics_columns, as_row_type, create_partitions, create_table, + dt_array_int, dt_map_string_int, dt_row_seq_label, get_shared_cluster, make_int_array, + make_string_array, map_dt_basics_columns, row_dt_basics_columns, scalar_dt_columns, }; - use arrow::array::record_batch; + use arrow::array::{Int32Array, record_batch}; use fluss::client::{EARLIEST_OFFSET, FlussTable, TableScan}; use fluss::metadata::{DataField, DataTypes, Schema, TableDescriptor, TablePath}; - use fluss::record::ScanRecord; + use fluss::record::{ScanBatch, ScanRecord}; use fluss::row::binary_array::FlussArrayWriter; + use fluss::row::binary_map::FlussMapWriter; use fluss::row::{ Date, Datum, Decimal, FlussArray, GenericRow, InternalRow, Time, TimestampLtz, TimestampNtz, }; @@ -504,9 +507,7 @@ mod table_test { .unwrap(); writer.flush().await.unwrap(); - use arrow::array::Int32Array; - - fn extract_ids(batches: &[fluss::record::ScanBatch]) -> Vec { + fn extract_ids(batches: &[ScanBatch]) -> Vec { batches .iter() .flat_map(|b| { @@ -582,547 +583,6 @@ mod table_test { /// Integration test covering produce and scan operations for all supported datatypes /// in log tables. - #[tokio::test] - async fn all_supported_datatypes() { - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_log_all_datatypes"); - - // Create a log table with all supported datatypes for append/scan - let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - // Integer types - .column("col_tinyint", DataTypes::tinyint()) - .column("col_smallint", DataTypes::smallint()) - .column("col_int", DataTypes::int()) - .column("col_bigint", DataTypes::bigint()) - // Floating point types - .column("col_float", DataTypes::float()) - .column("col_double", DataTypes::double()) - // Boolean type - .column("col_boolean", DataTypes::boolean()) - // Char type - .column("col_char", DataTypes::char(10)) - // String type - .column("col_string", DataTypes::string()) - // Decimal type - .column("col_decimal", DataTypes::decimal(10, 2)) - // Date type - .column("col_date", DataTypes::date()) - // Time types - .column("col_time_s", DataTypes::time_with_precision(0)) - .column("col_time_ms", DataTypes::time_with_precision(3)) - .column("col_time_us", DataTypes::time_with_precision(6)) - .column("col_time_ns", DataTypes::time_with_precision(9)) - // Timestamp types - .column("col_timestamp_s", DataTypes::timestamp_with_precision(0)) - .column("col_timestamp_ms", DataTypes::timestamp_with_precision(3)) - .column("col_timestamp_us", DataTypes::timestamp_with_precision(6)) - .column("col_timestamp_ns", DataTypes::timestamp_with_precision(9)) - // Timestamp_ltz types - .column( - "col_timestamp_ltz_s", - DataTypes::timestamp_ltz_with_precision(0), - ) - .column( - "col_timestamp_ltz_ms", - DataTypes::timestamp_ltz_with_precision(3), - ) - .column( - "col_timestamp_ltz_us", - DataTypes::timestamp_ltz_with_precision(6), - ) - .column( - "col_timestamp_ltz_ns", - DataTypes::timestamp_ltz_with_precision(9), - ) - // Bytes type - .column("col_bytes", DataTypes::bytes()) - // Fixed-size binary type - .column("col_binary", DataTypes::binary(4)) - // Timestamp types with negative values (before Unix epoch) - .column( - "col_timestamp_us_neg", - DataTypes::timestamp_with_precision(6), - ) - .column( - "col_timestamp_ns_neg", - DataTypes::timestamp_with_precision(9), - ) - .column( - "col_timestamp_ltz_us_neg", - DataTypes::timestamp_ltz_with_precision(6), - ) - .column( - "col_timestamp_ltz_ns_neg", - DataTypes::timestamp_ltz_with_precision(9), - ) - .column("col_array", DataTypes::array(DataTypes::string())) - .column( - "col_row", - DataTypes::row(vec![ - DataField::new("seq", DataTypes::int(), None), - DataField::new("label", DataTypes::string(), None), - ]), - ) - .build() - .expect("Failed to build schema"), - ) - .build() - .expect("Failed to build table"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let field_count = table.get_table_info().schema.columns().len(); - - let append_writer = table - .new_append() - .expect("Failed to create append") - .create_writer() - .expect("Failed to create writer"); - - // Test data for all datatypes - let col_tinyint = 127i8; - let col_smallint = 32767i16; - let col_int = 2147483647i32; - let col_bigint = 9223372036854775807i64; - let col_float = std::f32::consts::PI; - let col_double = std::f64::consts::E; - let col_boolean = true; - let col_char = "hello"; - let col_string = "world of fluss rust client"; - let col_decimal = Decimal::from_unscaled_long(12345, 10, 2).unwrap(); // 123.45 - let col_date = Date::new(20476); // 2026-01-23 - let col_time_s = Time::new(36827000); // 10:13:47 - let col_time_ms = Time::new(36827123); // 10:13:47.123 - let col_time_us = Time::new(86399999); // 23:59:59.999 - let col_time_ns = Time::new(1); // 00:00:00.001 - // 2026-01-23 10:13:47 UTC - let col_timestamp_s = TimestampNtz::new(1769163227000); - // 2026-01-23 10:13:47.123 UTC - let col_timestamp_ms = TimestampNtz::new(1769163227123); - // 2026-01-23 10:13:47.123456 UTC - let col_timestamp_us = TimestampNtz::from_millis_nanos(1769163227123, 456000).unwrap(); - // 2026-01-23 10:13:47.123999999 UTC - let col_timestamp_ns = TimestampNtz::from_millis_nanos(1769163227123, 999_999).unwrap(); - let col_timestamp_ltz_s = TimestampLtz::new(1769163227000); - let col_timestamp_ltz_ms = TimestampLtz::new(1769163227123); - let col_timestamp_ltz_us = TimestampLtz::from_millis_nanos(1769163227123, 456000).unwrap(); - let col_timestamp_ltz_ns = TimestampLtz::from_millis_nanos(1769163227123, 999_999).unwrap(); - let col_bytes: Vec = b"binary data".to_vec(); - let col_binary: Vec = vec![0xDE, 0xAD, 0xBE, 0xEF]; - - // 1960-06-15 08:30:45.123456 UTC (before 1970) - let col_timestamp_us_neg = TimestampNtz::from_millis_nanos(-301234154877, 456000).unwrap(); - // 1960-06-15 08:30:45.123999999 UTC (before 1970) - let col_timestamp_ns_neg = TimestampNtz::from_millis_nanos(-301234154877, 999_999).unwrap(); - let col_timestamp_ltz_us_neg = - TimestampLtz::from_millis_nanos(-301234154877, 456000).unwrap(); - let col_timestamp_ltz_ns_neg = - TimestampLtz::from_millis_nanos(-301234154877, 999_999).unwrap(); - - let col_array = make_string_array(&[Some("fluss"), Some("rust")]); - - let mut col_row_inner = GenericRow::new(2); - col_row_inner.set_field(0, 7_i32); - col_row_inner.set_field(1, "lumiere"); - - let mut row = GenericRow::new(field_count); - row.set_field(0, col_tinyint); - row.set_field(1, col_smallint); - row.set_field(2, col_int); - row.set_field(3, col_bigint); - row.set_field(4, col_float); - row.set_field(5, col_double); - row.set_field(6, col_boolean); - row.set_field(7, col_char); - row.set_field(8, col_string); - row.set_field(9, col_decimal.clone()); - row.set_field(10, col_date); - row.set_field(11, col_time_s); - row.set_field(12, col_time_ms); - row.set_field(13, col_time_us); - row.set_field(14, col_time_ns); - row.set_field(15, col_timestamp_s); - row.set_field(16, col_timestamp_ms); - row.set_field(17, col_timestamp_us); - row.set_field(18, col_timestamp_ns); - row.set_field(19, col_timestamp_ltz_s); - row.set_field(20, col_timestamp_ltz_ms); - row.set_field(21, col_timestamp_ltz_us); - row.set_field(22, col_timestamp_ltz_ns); - row.set_field(23, col_bytes.as_slice()); - row.set_field(24, col_binary.as_slice()); - row.set_field(25, col_timestamp_us_neg); - row.set_field(26, col_timestamp_ns_neg); - row.set_field(27, col_timestamp_ltz_us_neg); - row.set_field(28, col_timestamp_ltz_ns_neg); - row.set_field(29, col_array); - row.set_field(30, Datum::Row(Box::new(col_row_inner))); - - append_writer - .append(&row) - .expect("Failed to append row with all datatypes"); - - // Append a row with null values for all columns - let mut row_with_nulls = GenericRow::new(field_count); - for i in 0..field_count { - row_with_nulls.set_field(i, Datum::Null); - } - - append_writer - .append(&row_with_nulls) - .expect("Failed to append row with nulls"); - - append_writer.flush().await.expect("Failed to flush"); - - // Scan the records - let records = scan_table(&table, |scan| scan).await; - - assert_eq!(records.len(), 2, "Expected 2 records"); - - let found_row = records[0].row(); - assert_eq!( - found_row.get_byte(0).unwrap(), - col_tinyint, - "col_tinyint mismatch" - ); - assert_eq!( - found_row.get_short(1).unwrap(), - col_smallint, - "col_smallint mismatch" - ); - assert_eq!(found_row.get_int(2).unwrap(), col_int, "col_int mismatch"); - assert_eq!( - found_row.get_long(3).unwrap(), - col_bigint, - "col_bigint mismatch" - ); - assert!( - (found_row.get_float(4).unwrap() - col_float).abs() < f32::EPSILON, - "col_float mismatch: expected {}, got {}", - col_float, - found_row.get_float(4).unwrap() - ); - assert!( - (found_row.get_double(5).unwrap() - col_double).abs() < f64::EPSILON, - "col_double mismatch: expected {}, got {}", - col_double, - found_row.get_double(5).unwrap() - ); - assert_eq!( - found_row.get_boolean(6).unwrap(), - col_boolean, - "col_boolean mismatch" - ); - assert_eq!( - found_row.get_char(7, 10).unwrap(), - col_char, - "col_char mismatch" - ); - assert_eq!( - found_row.get_string(8).unwrap(), - col_string, - "col_string mismatch" - ); - assert_eq!( - found_row.get_decimal(9, 10, 2).unwrap(), - col_decimal, - "col_decimal mismatch" - ); - assert_eq!( - found_row.get_date(10).unwrap().get_inner(), - col_date.get_inner(), - "col_date mismatch" - ); - - assert_eq!( - found_row.get_time(11).unwrap().get_inner(), - col_time_s.get_inner(), - "col_time_s mismatch" - ); - - assert_eq!( - found_row.get_time(12).unwrap().get_inner(), - col_time_ms.get_inner(), - "col_time_ms mismatch" - ); - - assert_eq!( - found_row.get_time(13).unwrap().get_inner(), - col_time_us.get_inner(), - "col_time_us mismatch" - ); - - assert_eq!( - found_row.get_time(14).unwrap().get_inner(), - col_time_ns.get_inner(), - "col_time_ns mismatch" - ); - - assert_eq!( - found_row - .get_timestamp_ntz(15, 0) - .unwrap() - .get_millisecond(), - col_timestamp_s.get_millisecond(), - "col_timestamp_s mismatch" - ); - - assert_eq!( - found_row - .get_timestamp_ntz(16, 3) - .unwrap() - .get_millisecond(), - col_timestamp_ms.get_millisecond(), - "col_timestamp_ms mismatch" - ); - - let read_ts_us = found_row.get_timestamp_ntz(17, 6).unwrap(); - assert_eq!( - read_ts_us.get_millisecond(), - col_timestamp_us.get_millisecond(), - "col_timestamp_us millis mismatch" - ); - assert_eq!( - read_ts_us.get_nano_of_millisecond(), - col_timestamp_us.get_nano_of_millisecond(), - "col_timestamp_us nanos mismatch" - ); - - let read_ts_ns = found_row.get_timestamp_ntz(18, 9).unwrap(); - assert_eq!( - read_ts_ns.get_millisecond(), - col_timestamp_ns.get_millisecond(), - "col_timestamp_ns millis mismatch" - ); - assert_eq!( - read_ts_ns.get_nano_of_millisecond(), - col_timestamp_ns.get_nano_of_millisecond(), - "col_timestamp_ns nanos mismatch" - ); - - assert_eq!( - found_row - .get_timestamp_ltz(19, 0) - .unwrap() - .get_epoch_millisecond(), - col_timestamp_ltz_s.get_epoch_millisecond(), - "col_timestamp_ltz_s mismatch" - ); - - assert_eq!( - found_row - .get_timestamp_ltz(20, 3) - .unwrap() - .get_epoch_millisecond(), - col_timestamp_ltz_ms.get_epoch_millisecond(), - "col_timestamp_ltz_ms mismatch" - ); - - let read_ts_ltz_us = found_row.get_timestamp_ltz(21, 6).unwrap(); - assert_eq!( - read_ts_ltz_us.get_epoch_millisecond(), - col_timestamp_ltz_us.get_epoch_millisecond(), - "col_timestamp_ltz_us millis mismatch" - ); - assert_eq!( - read_ts_ltz_us.get_nano_of_millisecond(), - col_timestamp_ltz_us.get_nano_of_millisecond(), - "col_timestamp_ltz_us nanos mismatch" - ); - - let read_ts_ltz_ns = found_row.get_timestamp_ltz(22, 9).unwrap(); - assert_eq!( - read_ts_ltz_ns.get_epoch_millisecond(), - col_timestamp_ltz_ns.get_epoch_millisecond(), - "col_timestamp_ltz_ns millis mismatch" - ); - assert_eq!( - read_ts_ltz_ns.get_nano_of_millisecond(), - col_timestamp_ltz_ns.get_nano_of_millisecond(), - "col_timestamp_ltz_ns nanos mismatch" - ); - assert_eq!( - found_row.get_bytes(23).unwrap(), - col_bytes, - "col_bytes mismatch" - ); - assert_eq!( - found_row.get_binary(24, 4).unwrap(), - col_binary, - "col_binary mismatch" - ); - - // Verify timestamps before Unix epoch (negative timestamps) - let read_ts_us_neg = found_row.get_timestamp_ntz(25, 6).unwrap(); - assert_eq!( - read_ts_us_neg.get_millisecond(), - col_timestamp_us_neg.get_millisecond(), - "col_timestamp_us_neg millis mismatch" - ); - assert_eq!( - read_ts_us_neg.get_nano_of_millisecond(), - col_timestamp_us_neg.get_nano_of_millisecond(), - "col_timestamp_us_neg nanos mismatch" - ); - - let read_ts_ns_neg = found_row.get_timestamp_ntz(26, 9).unwrap(); - assert_eq!( - read_ts_ns_neg.get_millisecond(), - col_timestamp_ns_neg.get_millisecond(), - "col_timestamp_ns_neg millis mismatch" - ); - assert_eq!( - read_ts_ns_neg.get_nano_of_millisecond(), - col_timestamp_ns_neg.get_nano_of_millisecond(), - "col_timestamp_ns_neg nanos mismatch" - ); - - let read_ts_ltz_us_neg = found_row.get_timestamp_ltz(27, 6).unwrap(); - assert_eq!( - read_ts_ltz_us_neg.get_epoch_millisecond(), - col_timestamp_ltz_us_neg.get_epoch_millisecond(), - "col_timestamp_ltz_us_neg millis mismatch" - ); - assert_eq!( - read_ts_ltz_us_neg.get_nano_of_millisecond(), - col_timestamp_ltz_us_neg.get_nano_of_millisecond(), - "col_timestamp_ltz_us_neg nanos mismatch" - ); - - let read_ts_ltz_ns_neg = found_row.get_timestamp_ltz(28, 9).unwrap(); - assert_eq!( - read_ts_ltz_ns_neg.get_epoch_millisecond(), - col_timestamp_ltz_ns_neg.get_epoch_millisecond(), - "col_timestamp_ltz_ns_neg millis mismatch" - ); - assert_eq!( - read_ts_ltz_ns_neg.get_nano_of_millisecond(), - col_timestamp_ltz_ns_neg.get_nano_of_millisecond(), - "col_timestamp_ltz_ns_neg nanos mismatch" - ); - - let arr = found_row.get_array(29).unwrap(); - assert_eq!(arr.size(), 2, "col_array size mismatch"); - assert_eq!(arr.get_string(0).unwrap(), "fluss", "col_array[0] mismatch"); - assert_eq!(arr.get_string(1).unwrap(), "rust", "col_array[1] mismatch"); - - let nested = found_row.get_row(30).unwrap(); - assert_eq!(nested.get_int(0).unwrap(), 7, "col_row.seq mismatch"); - assert_eq!( - nested.get_string(1).unwrap(), - "lumiere", - "col_row.label mismatch" - ); - - // Verify row with all nulls (record index 1) - let found_row_nulls = records[1].row(); - for i in 0..field_count { - assert!( - found_row_nulls.is_null_at(i).unwrap(), - "column {} should be null", - i - ); - } - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - #[tokio::test] - async fn test_map_datatype_roundtrip() { - use fluss::row::binary_map::FlussMapWriter; - use fluss::row::{Datum, GenericRow}; - - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_map_datatype_roundtrip"); - - let key_type = DataTypes::string(); - let value_type = DataTypes::int(); - let map_type = DataTypes::map(key_type.clone(), value_type.clone()); - - let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("id", DataTypes::int()) - .column("map_col", map_type.clone()) - .build() - .expect("Failed to build schema"), - ) - .build() - .expect("Failed to build table"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - // 1. Construct FlussMap - let mut map_writer = FlussMapWriter::new(3, &key_type, &value_type); - map_writer.write_entry("k1".into(), 10.into()).unwrap(); - map_writer.write_entry("k2".into(), 20.into()).unwrap(); - map_writer.write_entry("k3".into(), 30.into()).unwrap(); - let fluss_map = map_writer.complete().unwrap(); - - // 2. Insert Row - let mut row = GenericRow::new(2); - row.set_field(0, 1i32); - row.set_field(1, Datum::Map(fluss_map)); - - let append_writer = table - .new_append() - .expect("Failed to create append") - .create_writer() - .expect("Failed to create writer"); - - append_writer.append(&row).expect("Failed to append row"); - append_writer.flush().await.expect("Failed to flush"); - - // 3. Fetch Row - let records = scan_table(&table, |scan| scan).await; - assert_eq!(records.len(), 1, "Expected 1 record"); - - let found_row = records[0].row(); - assert_eq!(found_row.get_int(0).unwrap(), 1); - - // 4. Assert Map - let decoded_map = found_row.get_map(1).expect("Failed to get map"); - assert_eq!(decoded_map.size(), 3); - - let decoded_keys = decoded_map.key_array(); - let decoded_values = decoded_map.value_array(); - - assert_eq!(decoded_keys.get_string(0).unwrap(), "k1"); - assert_eq!(decoded_keys.get_string(1).unwrap(), "k2"); - assert_eq!(decoded_keys.get_string(2).unwrap(), "k3"); - - assert_eq!(decoded_values.get_int(0).unwrap(), 10); - assert_eq!(decoded_values.get_int(1).unwrap(), 20); - assert_eq!(decoded_values.get_int(2).unwrap(), 30); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - #[tokio::test] async fn partitioned_table_append_scan() { let cluster = get_shared_cluster(); @@ -1175,7 +635,7 @@ mod table_test { ]; for (id, region, value) in &test_data { - let mut row = fluss::row::GenericRow::new(3); + let mut row = GenericRow::new(3); row.set_field(0, *id); row.set_field(1, *region); row.set_field(2, *value); @@ -1418,650 +878,104 @@ mod table_test { .expect("Failed to drop table"); } + /// Projection over a log table containing every compound type. #[tokio::test] - async fn undersized_row_returns_error() { + async fn projection_with_compound_types() { let cluster = get_shared_cluster(); let connection = cluster.get_fluss_connection().await; let admin = connection.get_admin().expect("Failed to get admin"); - let table_path = TablePath::new("fluss", "test_log_undersized_row"); + let table_path = TablePath::new("fluss", "test_log_projection_compound"); - let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("col_bool", DataTypes::boolean()) - .column("col_int", DataTypes::int()) - .column("col_string", DataTypes::string()) - .column("col_bigint", DataTypes::bigint()) - .build() - .expect("Failed to build schema"), + let row_type = DataTypes::row(vec![ + DataField::new("seq", DataTypes::int(), None), + DataField::new("label", DataTypes::string(), None), + ]); + + let schema = Schema::builder() + .column("id", DataTypes::int()) + .column("nested", row_type) + .column( + "attrs", + DataTypes::map(DataTypes::string(), DataTypes::int()), ) + .column("tags", DataTypes::array(DataTypes::string())) + .column("extra", DataTypes::string()) .build() - .expect("Failed to build table"); - - create_table(&admin, &table_path, &table_descriptor).await; + .expect("schema"); - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); + create_table( + &admin, + &table_path, + &TableDescriptor::builder() + .schema(schema) + .build() + .expect("table descriptor"), + ) + .await; - let append_writer = table + let table = connection.get_table(&table_path).await.expect("table"); + let writer = table .new_append() - .expect("Failed to create table append") + .expect("append") .create_writer() - .expect("Failed to create writer"); + .expect("writer"); - // Scenario 1b: GenericRow with only 2 fields for a 4-column table - let mut row = fluss::row::GenericRow::new(2); - row.set_field(0, true); - row.set_field(1, 42_i32); + let mut nested = GenericRow::new(2); + nested.set_field(0, 42_i32); + nested.set_field(1, "hello"); + let attrs = { + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &DataTypes::int()); + w.write_entry("x".into(), 1.into()).unwrap(); + w.write_entry("y".into(), 2.into()).unwrap(); + w.complete().expect("attrs") + }; + let tags = make_string_array(&[Some("alpha"), Some("beta")]); - let result = append_writer.append(&row); - assert!(result.is_err(), "Undersized row should be rejected"); - let err_msg = result.unwrap_err().to_string(); - assert!( - err_msg.contains("Expected: 4") && err_msg.contains("Actual: 2"), - "Error should mention field count mismatch, got: {err_msg}" - ); + let mut row = GenericRow::new(5); + row.set_field(0, 7_i32); + row.set_field(1, Datum::Row(Box::new(nested))); + row.set_field(2, Datum::Map(attrs)); + row.set_field(3, tags); + row.set_field(4, "ignore-me"); + writer.append(&row).expect("append"); + writer.flush().await.expect("flush"); - // Correct column count but wrong types: - // Schema is (Boolean, Int, String, BigInt) but we put Int64 where String is expected. - // This should return an error, not panic. - let row_wrong_types = fluss::row::GenericRow::from_data(vec![ - fluss::row::Datum::Bool(true), - fluss::row::Datum::Int32(42), - fluss::row::Datum::Int64(999), // wrong: String column - fluss::row::Datum::Int64(100), - ]); + // Project columns in reordered form, dropping `extra`. + let records = scan_table(&table, |scan| { + scan.project_by_name(&["nested", "attrs", "tags", "id"]) + .expect("project failed") + }) + .await; + assert_eq!(records.len(), 1); + let r = records[0].row(); - let result = append_writer.append(&row_wrong_types); - assert!( - result.is_err(), - "Row with mismatched types should be rejected, not panic" - ); + // === Projection: ROW === + let projected_nested = r.get_row(0).expect("get_row over projection"); + assert_eq!(projected_nested.get_int(0).unwrap(), 42); + assert_eq!(projected_nested.get_string(1).unwrap(), "hello"); - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } + // === Projection: MAP === + let m = r.get_map(1).expect("get_map over projection"); + assert_eq!(m.size(), 2); + assert_eq!(m.get(&Datum::from("x")).unwrap(), Some(Datum::from(1_i32))); + assert_eq!(m.get(&Datum::from("y")).unwrap(), Some(Datum::from(2_i32))); - #[tokio::test] - async fn append_and_scan_with_array() { - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_log_arrays"); - let inner_array_type = DataTypes::array(DataTypes::int()); - - let schema = Schema::builder() - .column("id", DataTypes::int()) - .column("tags", DataTypes::array(DataTypes::string())) - .column("scores", DataTypes::array(DataTypes::int())) - .column("matrix", DataTypes::array(inner_array_type.clone())) - .build() - .expect("Failed to build schema"); - - let table_descriptor = TableDescriptor::builder() - .schema(schema) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let append_writer = table - .new_append() - .expect("Failed to create append") - .create_writer() - .expect("Failed to create writer"); - - let mut row1 = GenericRow::new(4); - row1.set_field(0, 1_i32); - row1.set_field(1, make_string_array(&[Some("hello"), Some("world")])); - row1.set_field(2, make_int_array(&[Some(10), Some(20), Some(30)])); - let m1 = { - let mut w = FlussArrayWriter::new(2, &inner_array_type); - w.write_array(0, &make_int_array(&[Some(1), Some(2)])); - w.write_array(1, &make_int_array(&[Some(3), Some(4)])); - w.complete().expect("matrix1") - }; - row1.set_field(3, m1); - - let mut row2 = GenericRow::new(4); - row2.set_field(0, 2_i32); - row2.set_field(1, make_string_array(&[None])); - row2.set_field(2, make_int_array(&[])); - let m2 = { - let mut w = FlussArrayWriter::new(3, &inner_array_type); - w.write_array(0, &make_int_array(&[Some(5)])); - w.set_null_at(1); - w.write_array(2, &make_int_array(&[])); - w.complete().expect("matrix2") - }; - row2.set_field(3, m2); - - let mut row3 = GenericRow::new(4); - row3.set_field(0, 3_i32); - row3.set_field(1, Datum::Null); - row3.set_field(2, make_int_array(&[Some(42)])); - row3.set_field(3, Datum::Null); - - append_writer.append(&row1).expect("append row1"); - append_writer.append(&row2).expect("append row2"); - append_writer.append(&row3).expect("append row3"); - append_writer.flush().await.expect("Failed to flush"); - - let records = scan_table(&table, |scan| scan).await; - assert_eq!(records.len(), 3, "expected three log records"); - - let r0 = records[0].row(); - assert_eq!(r0.get_int(0).unwrap(), 1); - let tags_r0 = r0.get_array(1).unwrap(); - assert_eq!(tags_r0.size(), 2); - assert_eq!(tags_r0.get_string(0).unwrap(), "hello"); - assert_eq!(tags_r0.get_string(1).unwrap(), "world"); - let scores_r0 = r0.get_array(2).unwrap(); - assert_eq!(scores_r0.size(), 3); - assert_eq!(scores_r0.get_int(0).unwrap(), 10); - assert_eq!(scores_r0.get_int(1).unwrap(), 20); - assert_eq!(scores_r0.get_int(2).unwrap(), 30); - let matrix_r0: FlussArray = r0.get_array(3).unwrap(); - assert_eq!(matrix_r0.size(), 2); - let mr0_0 = matrix_r0.get_array(0).unwrap(); - assert_eq!(mr0_0.size(), 2); - assert_eq!(mr0_0.get_int(0).unwrap(), 1); - assert_eq!(mr0_0.get_int(1).unwrap(), 2); - let mr0_1 = matrix_r0.get_array(1).unwrap(); - assert_eq!(mr0_1.size(), 2); - assert_eq!(mr0_1.get_int(0).unwrap(), 3); - assert_eq!(mr0_1.get_int(1).unwrap(), 4); - - let r1 = records[1].row(); - assert_eq!(r1.get_int(0).unwrap(), 2); - let tags_r1 = r1.get_array(1).unwrap(); - assert_eq!(tags_r1.size(), 1); - assert!(tags_r1.is_null_at(0)); - let scores_r1 = r1.get_array(2).unwrap(); - assert_eq!(scores_r1.size(), 0); - let matrix_r1 = r1.get_array(3).unwrap(); - assert_eq!(matrix_r1.size(), 3); - let mr1_0 = matrix_r1.get_array(0).unwrap(); - assert_eq!(mr1_0.size(), 1); - assert_eq!(mr1_0.get_int(0).unwrap(), 5); - assert!(matrix_r1.is_null_at(1)); - let mr1_2 = matrix_r1.get_array(2).unwrap(); - assert_eq!(mr1_2.size(), 0); - - let r2 = records[2].row(); - assert_eq!(r2.get_int(0).unwrap(), 3); - assert!(r2.is_null_at(1).unwrap()); - let scores_r2 = r2.get_array(2).unwrap(); - assert_eq!(scores_r2.size(), 1); - assert_eq!(scores_r2.get_int(0).unwrap(), 42); - assert!(r2.is_null_at(3).unwrap()); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - #[tokio::test] - async fn append_and_scan_with_array_of_row() { - use fluss::metadata::{DataField, DataType}; - - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_log_array_of_row"); - - let event_row_type_owned = DataTypes::row(vec![ - DataField::new("seq", DataTypes::int(), None), - DataField::new("label", DataTypes::string(), None), - ]); - let array_of_row_type = DataTypes::array(event_row_type_owned.clone()); - - let event_row_type = match &event_row_type_owned { - DataType::Row(rt) => rt.clone(), - _ => unreachable!(), - }; - - let schema = Schema::builder() - .column("id", DataTypes::int()) - .column("events", array_of_row_type) - .build() - .expect("Failed to build schema"); - - let table_descriptor = TableDescriptor::builder() - .schema(schema) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let append_writer = table - .new_append() - .expect("Failed to create append") - .create_writer() - .expect("Failed to create writer"); - - let mut events1 = FlussArrayWriter::new(2, &event_row_type_owned); - let mut e0 = GenericRow::new(2); - e0.set_field(0, 1_i32); - e0.set_field(1, "open"); - events1.write_row(0, &e0).expect("write e0"); - let mut e1 = GenericRow::new(2); - e1.set_field(0, 2_i32); - e1.set_field(1, "close"); - events1.write_row(1, &e1).expect("write e1"); - let events1 = events1.complete().expect("events1"); - - let mut row1 = GenericRow::new(2); - row1.set_field(0, 1_i32); - row1.set_field(1, events1); - - let mut events2 = FlussArrayWriter::new(3, &event_row_type_owned); - let mut e2 = GenericRow::new(2); - e2.set_field(0, 7_i32); - e2.set_field(1, "x"); - events2.write_row(0, &e2).expect("write e2"); - events2.set_null_at(1); - let mut e3 = GenericRow::new(2); - e3.set_field(0, 8_i32); - e3.set_field(1, "y"); - events2.write_row(2, &e3).expect("write e3"); - let events2 = events2.complete().expect("events2"); - - let mut row2 = GenericRow::new(2); - row2.set_field(0, 2_i32); - row2.set_field(1, events2); - - let mut row3 = GenericRow::new(2); - row3.set_field(0, 3_i32); - row3.set_field(1, Datum::Null); - - append_writer.append(&row1).expect("append row1"); - append_writer.append(&row2).expect("append row2"); - append_writer.append(&row3).expect("append row3"); - append_writer.flush().await.expect("Failed to flush"); - - let records = scan_table(&table, |scan| scan).await; - assert_eq!(records.len(), 3, "expected three log records"); - - let r0 = records[0].row(); - assert_eq!(r0.get_int(0).unwrap(), 1); - let events_r0 = r0.get_array(1).unwrap(); - assert_eq!(events_r0.size(), 2); - let e0_r0 = events_r0.get_row(0, &event_row_type).unwrap(); - assert_eq!(e0_r0.get_int(0).unwrap(), 1); - assert_eq!(e0_r0.get_string(1).unwrap(), "open"); - let e1_r0 = events_r0.get_row(1, &event_row_type).unwrap(); - assert_eq!(e1_r0.get_int(0).unwrap(), 2); - assert_eq!(e1_r0.get_string(1).unwrap(), "close"); - - let r1 = records[1].row(); - let events_r1 = r1.get_array(1).unwrap(); - assert_eq!(events_r1.size(), 3); - let e0_r1 = events_r1.get_row(0, &event_row_type).unwrap(); - assert_eq!(e0_r1.get_int(0).unwrap(), 7); - assert_eq!(e0_r1.get_string(1).unwrap(), "x"); - assert!(events_r1.is_null_at(1)); - let e2_r1 = events_r1.get_row(2, &event_row_type).unwrap(); - assert_eq!(e2_r1.get_int(0).unwrap(), 8); - assert_eq!(e2_r1.get_string(1).unwrap(), "y"); - - let r2 = records[2].row(); - assert_eq!(r2.get_int(0).unwrap(), 3); - assert!(r2.is_null_at(1).unwrap()); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - #[tokio::test] - async fn append_and_scan_with_row() { - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_log_rows"); - let nested_row_type = DataTypes::row(vec![ - DataField::new("x", DataTypes::int(), None), - DataField::new("label", DataTypes::string(), None), - ]); - let deep_inner_row_type = DataTypes::row(vec![DataField::new("n", DataTypes::int(), None)]); - let deep_row_type = - DataTypes::row(vec![DataField::new("inner", deep_inner_row_type, None)]); - - let schema = Schema::builder() - .column("id", DataTypes::int()) - .column("nested", nested_row_type) - .column("deep", deep_row_type) - .build() - .expect("Failed to build schema"); - - let table_descriptor = TableDescriptor::builder() - .schema(schema) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - - let append_writer = table - .new_append() - .expect("Failed to create append") - .create_writer() - .expect("Failed to create writer"); - - let mut nested1 = GenericRow::new(2); - nested1.set_field(0, 42_i32); - nested1.set_field(1, "hello"); - let mut deep_inner1 = GenericRow::new(1); - deep_inner1.set_field(0, 99_i32); - let mut deep1 = GenericRow::new(1); - deep1.set_field(0, Datum::Row(Box::new(deep_inner1))); - - let mut row1 = GenericRow::new(3); - row1.set_field(0, 1_i32); - row1.set_field(1, Datum::Row(Box::new(nested1))); - row1.set_field(2, Datum::Row(Box::new(deep1))); - - let mut nested2 = GenericRow::new(2); - nested2.set_field(0, 7_i32); - nested2.set_field(1, Datum::Null); - - let mut row2 = GenericRow::new(3); - row2.set_field(0, 2_i32); - row2.set_field(1, Datum::Row(Box::new(nested2))); - row2.set_field(2, Datum::Null); - - let mut deep_inner3 = GenericRow::new(1); - deep_inner3.set_field(0, -1_i32); - let mut deep3 = GenericRow::new(1); - deep3.set_field(0, Datum::Row(Box::new(deep_inner3))); - - let mut row3 = GenericRow::new(3); - row3.set_field(0, 3_i32); - row3.set_field(1, Datum::Null); - row3.set_field(2, Datum::Row(Box::new(deep3))); - - append_writer.append(&row1).expect("append row1"); - append_writer.append(&row2).expect("append row2"); - append_writer.append(&row3).expect("append row3"); - append_writer.flush().await.expect("Failed to flush"); - - let records = scan_table(&table, |scan| scan).await; - assert_eq!(records.len(), 3, "expected three log records"); - - let r0 = records[0].row(); - assert_eq!(r0.get_int(0).unwrap(), 1); - let nested_r0 = r0.get_row(1).unwrap(); - assert_eq!(nested_r0.get_int(0).unwrap(), 42); - assert_eq!(nested_r0.get_string(1).unwrap(), "hello"); - let deep_r0 = r0.get_row(2).unwrap(); - let deep_inner_r0 = deep_r0.get_row(0).unwrap(); - assert_eq!(deep_inner_r0.get_int(0).unwrap(), 99); - - let r1 = records[1].row(); - assert_eq!(r1.get_int(0).unwrap(), 2); - let nested_r1 = r1.get_row(1).unwrap(); - assert_eq!(nested_r1.get_int(0).unwrap(), 7); - assert!(nested_r1.is_null_at(1).unwrap()); - assert!(r1.is_null_at(2).unwrap()); - - let r2 = records[2].row(); - assert_eq!(r2.get_int(0).unwrap(), 3); - assert!(r2.is_null_at(1).unwrap()); - let deep_r2 = r2.get_row(2).unwrap(); - let deep_inner_r2 = deep_r2.get_row(0).unwrap(); - assert_eq!(deep_inner_r2.get_int(0).unwrap(), -1); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - /// Partitioned log table with a ROW column. Confirms partition routing - /// + ROW column encoding compose correctly across partitions. - /// ROW column with all rich element types (decimal, date, time, timestamps, - /// bytes, binary, float NaN/Inf, long strings) round-tripped through the - /// log path. Confirms the wire-level encoding of `ROW` matches - /// what the server expects — the unit-level `test_row_all_primitives_round_trip` - /// proves Rust↔Rust round-trip; this test proves Rust→server→Rust. - #[tokio::test] - async fn append_and_scan_with_row_rich_types() { - fn assert_f32_special(actual: f32, expected: f32) { - if expected.is_nan() { - assert!(actual.is_nan(), "expected NaN"); - } else if expected.is_infinite() { - assert!(actual.is_infinite()); - assert_eq!(actual.signum(), expected.signum()); - } else { - assert!((actual - expected).abs() < f32::EPSILON); - } - } - - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_log_row_rich_types"); - - let row_type_owned = DataTypes::row(vec![ - DataField::new("f_bool", DataTypes::boolean(), None), - DataField::new("f_int", DataTypes::int(), None), - DataField::new("f_long", DataTypes::bigint(), None), - DataField::new("f_float", DataTypes::float(), None), - DataField::new("f_double", DataTypes::double(), None), - DataField::new("f_str", DataTypes::string(), None), - DataField::new("f_bytes", DataTypes::bytes(), None), - DataField::new("f_decimal", DataTypes::decimal(10, 2), None), - DataField::new("f_date", DataTypes::date(), None), - DataField::new("f_time", DataTypes::time_with_precision(3), None), - DataField::new("f_ts_ntz", DataTypes::timestamp_with_precision(6), None), - DataField::new("f_ts_ltz", DataTypes::timestamp_ltz_with_precision(6), None), - DataField::new("f_binary_fixed", DataTypes::binary(4), None), - DataField::new("f_array_int", DataTypes::array(DataTypes::int()), None), - ]); - - let schema = Schema::builder() - .column("id", DataTypes::int()) - .column("nested", row_type_owned) - .build() - .expect("Failed to build schema"); - - let table_descriptor = TableDescriptor::builder() - .schema(schema) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - let append_writer = table - .new_append() - .expect("Failed to create append") - .create_writer() - .expect("Failed to create writer"); - - let mut nested1 = GenericRow::new(14); - nested1.set_field(0, true); - nested1.set_field(1, 100_000_i32); - nested1.set_field(2, 9_876_543_210_i64); - nested1.set_field(3, f32::INFINITY); - nested1.set_field(4, f64::NAN); - nested1.set_field(5, "hello world"); - nested1.set_field(6, b"binary".as_slice()); - nested1.set_field(7, Decimal::from_unscaled_long(12345, 10, 2).unwrap()); - nested1.set_field(8, Datum::Date(Date::new(20476))); - nested1.set_field(9, Datum::Time(Time::new(36_827_123))); - nested1.set_field( - 10, - Datum::TimestampNtz(TimestampNtz::new(1_769_163_227_123)), - ); - nested1.set_field( - 11, - Datum::TimestampLtz(TimestampLtz::new(1_769_163_227_456)), - ); - nested1.set_field(12, b"\x01\x02\x03\x04".as_slice()); - nested1.set_field(13, make_int_array(&[Some(7), None, Some(11)])); - - let mut row1 = GenericRow::new(2); - row1.set_field(0, 1_i32); - row1.set_field(1, Datum::Row(Box::new(nested1))); - - let mut row2 = GenericRow::new(2); - row2.set_field(0, 2_i32); - row2.set_field(1, Datum::Null); - - append_writer.append(&row1).expect("append row1"); - append_writer.append(&row2).expect("append row2"); - append_writer.flush().await.expect("Failed to flush"); - - let records = scan_table(&table, |scan| scan).await; - assert_eq!(records.len(), 2); - - let r0 = records[0].row(); - assert_eq!(r0.get_int(0).unwrap(), 1); - let nested = r0.get_row(1).unwrap(); - assert!(nested.get_boolean(0).unwrap()); - assert_eq!(nested.get_int(1).unwrap(), 100_000); - assert_eq!(nested.get_long(2).unwrap(), 9_876_543_210); - assert_f32_special(nested.get_float(3).unwrap(), f32::INFINITY); - assert!(nested.get_double(4).unwrap().is_nan()); - assert_eq!(nested.get_string(5).unwrap(), "hello world"); - assert_eq!(nested.get_bytes(6).unwrap(), b"binary"); - assert_eq!( - nested.get_decimal(7, 10, 2).unwrap(), - Decimal::from_unscaled_long(12345, 10, 2).unwrap(), - ); - assert_eq!(nested.get_date(8).unwrap().get_inner(), 20476); - assert_eq!(nested.get_time(9).unwrap().get_inner(), 36_827_123); - assert_eq!( - nested.get_timestamp_ntz(10, 6).unwrap().get_millisecond(), - 1_769_163_227_123, - ); - assert_eq!( - nested - .get_timestamp_ltz(11, 6) - .unwrap() - .get_epoch_millisecond(), - 1_769_163_227_456, - ); - assert_eq!(nested.get_binary(12, 4).unwrap(), b"\x01\x02\x03\x04"); - let arr = nested.get_array(13).unwrap(); - assert_eq!(arr.size(), 3); - assert_eq!(arr.get_int(0).unwrap(), 7); - assert!(arr.is_null_at(1)); - assert_eq!(arr.get_int(2).unwrap(), 11); - - let r1 = records[1].row(); - assert_eq!(r1.get_int(0).unwrap(), 2); - assert!(r1.is_null_at(1).unwrap()); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); - } - - /// Projection over a log table with ROW columns. Specifically tests that - /// `ProjectedRow::get_row` (added by this PR) works end-to-end against the - /// server — without this, the projection code path for ROW would have zero - /// integration coverage. - #[tokio::test] - async fn append_and_scan_with_row_projection() { - let cluster = get_shared_cluster(); - let connection = cluster.get_fluss_connection().await; - let admin = connection.get_admin().expect("Failed to get admin"); - - let table_path = TablePath::new("fluss", "test_log_row_projection"); - - let row_type = DataTypes::row(vec![ - DataField::new("seq", DataTypes::int(), None), - DataField::new("label", DataTypes::string(), None), - ]); - - let schema = Schema::builder() - .column("id", DataTypes::int()) - .column("nested", row_type) - .column("extra", DataTypes::string()) - .build() - .expect("Failed to build schema"); + // === Projection: ARRAY === + let a = r.get_array(2).expect("get_array over projection"); + assert_eq!(a.size(), 2); + assert_eq!(a.get_string(0).unwrap(), "alpha"); + assert_eq!(a.get_string(1).unwrap(), "beta"); - let table_descriptor = TableDescriptor::builder() - .schema(schema) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; - - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); - let append_writer = table - .new_append() - .expect("Failed to create append") - .create_writer() - .expect("Failed to create writer"); + // === Projection: scalar reordered to position 3 === + assert_eq!(r.get_int(3).unwrap(), 7); - let mut nested = GenericRow::new(2); - nested.set_field(0, 42_i32); - nested.set_field(1, "hello"); - - let mut row = GenericRow::new(3); - row.set_field(0, 7_i32); - row.set_field(1, Datum::Row(Box::new(nested))); - row.set_field(2, "ignore-me"); - append_writer.append(&row).expect("append"); - append_writer.flush().await.expect("Failed to flush"); - - let records = scan_table(&table, |scan| { - scan.project_by_name(&["nested", "id"]) - .expect("project failed") - }) - .await; - assert_eq!(records.len(), 1); - - let r0 = records[0].row(); - let projected_nested = r0.get_row(0).expect("get_row over projection"); - assert_eq!(projected_nested.get_int(0).unwrap(), 42); - assert_eq!(projected_nested.get_string(1).unwrap(), "hello"); - assert_eq!(r0.get_int(1).unwrap(), 7); - - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); + admin.drop_table(&table_path, false).await.expect("drop"); } + /// Log append + scan against a schema covering every supported data type. #[tokio::test] - async fn append_and_scan_with_array_rich_types() { + async fn all_supported_datatypes() { fn assert_f32_special(actual: f32, expected: f32) { if expected.is_nan() { assert!(actual.is_nan(), "expected NaN"); @@ -2072,7 +986,6 @@ mod table_test { assert!((actual - expected).abs() < f32::EPSILON); } } - fn assert_f64_special(actual: f64, expected: f64) { if expected.is_nan() { assert!(actual.is_nan(), "expected NaN"); @@ -2088,263 +1001,967 @@ mod table_test { let connection = cluster.get_fluss_connection().await; let admin = connection.get_admin().expect("Failed to get admin"); - let table_path = TablePath::new("fluss", "test_log_arrays_rich_types"); - - // Compact types: DECIMAL(10,2) precision<=18, TIMESTAMP(6) precision<=3 for millis - let dec_compact = Decimal::from_unscaled_long(12345, 10, 2).unwrap(); - let ts_compact = TimestampNtz::from_millis_nanos(1769163227123, 456000).unwrap(); - - // Non-compact types: DECIMAL(22,5) precision>18, TIMESTAMP(9) precision>3 - let dec_big = Decimal::from_unscaled_bytes(&[66, 237, 18, 59, 11, 216, 31, 4, 244], 22, 5) - .expect("big decimal"); - let ts_nano = TimestampNtz::from_millis_nanos(1769163227123, 999_999).unwrap(); - - let d = Date::new(20476); - let t = Time::new(36827123); - let elem_bytes = &[0_u8, 1, 2, 255]; - let fixed_a: Vec = vec![0xDE, 0xAD, 0xBE, 0xEF]; - let fixed_b: Vec = vec![0x01, 0x02, 0x03, 0x04]; - - let schema = Schema::builder() - .column("id", DataTypes::int()) - .column("arr_bytes", DataTypes::array(DataTypes::bytes())) - .column("arr_date", DataTypes::array(DataTypes::date())) - .column( + let table_path = TablePath::new("fluss", "test_log_complex_types"); + + let row_seq_label_owned = dt_row_seq_label(); + let row_seq_label = as_row_type(&row_seq_label_owned); + let inner_array_int = dt_array_int(); + let inner_map_string_int = dt_map_string_int(); + + let plan = ColumnPlan::new() + .add("id", DataTypes::int()) + .start_section("array_basics") + .extend(array_dt_basics_columns()) + .start_section("row_basics") + .extend(row_dt_basics_columns()) + .start_section("map_basics") + .extend(map_dt_basics_columns()) + // ARRAY rich types + .start_section("array_rich") + .add("arr_bytes", DataTypes::array(DataTypes::bytes())) + .add("arr_date", DataTypes::array(DataTypes::date())) + .add( "arr_time", DataTypes::array(DataTypes::time_with_precision(3)), ) - .column( - "arr_ts_compact", + .add( + "arr_ts", DataTypes::array(DataTypes::timestamp_with_precision(6)), ) - .column( - "arr_ts_nano", - DataTypes::array(DataTypes::timestamp_with_precision(9)), - ) - .column( - "arr_decimal_compact", - DataTypes::array(DataTypes::decimal(10, 2)), + .add( + "arr_ts_ltz", + DataTypes::array(DataTypes::timestamp_ltz_with_precision(3)), ) - .column( + .add("arr_decimal", DataTypes::array(DataTypes::decimal(10, 2))) + .add( "arr_decimal_big", DataTypes::array(DataTypes::decimal(22, 5)), ) - .column("arr_long_str", DataTypes::array(DataTypes::string())) - .column("arr_float", DataTypes::array(DataTypes::float())) - .column("arr_double", DataTypes::array(DataTypes::double())) - .column("arr_binary", DataTypes::array(DataTypes::binary(4))) - .build() - .expect("Failed to build schema"); - - let table_descriptor = TableDescriptor::builder() - .schema(schema) - .build() - .expect("Failed to build table descriptor"); - - create_table(&admin, &table_path, &table_descriptor).await; + .add("arr_float", DataTypes::array(DataTypes::float())) + .add("arr_double", DataTypes::array(DataTypes::double())) + .add("arr_binary", DataTypes::array(DataTypes::binary(4))) + // MAP rich types + .start_section("map_rich") + .add( + "map_bytes", + DataTypes::map(DataTypes::string(), DataTypes::bytes()), + ) + .add( + "map_decimal", + DataTypes::map(DataTypes::string(), DataTypes::decimal(10, 2)), + ) + .add( + "map_date", + DataTypes::map(DataTypes::string(), DataTypes::date()), + ) + .add( + "map_time", + DataTypes::map(DataTypes::string(), DataTypes::time_with_precision(3)), + ) + .add( + "map_ts", + DataTypes::map(DataTypes::string(), DataTypes::timestamp_with_precision(6)), + ) + .add( + "map_ts_ltz", + DataTypes::map( + DataTypes::string(), + DataTypes::timestamp_ltz_with_precision(3), + ), + ) + .add( + "map_float", + DataTypes::map(DataTypes::string(), DataTypes::float()), + ) + .add( + "map_double", + DataTypes::map(DataTypes::string(), DataTypes::double()), + ) + .add( + "map_bool", + DataTypes::map(DataTypes::string(), DataTypes::boolean()), + ) + .add( + "map_binary", + DataTypes::map(DataTypes::string(), DataTypes::binary(4)), + ) + .add( + "map_int_key", + DataTypes::map(DataTypes::int(), DataTypes::string()), + ) + .start_section("scalars") + .extend(scalar_dt_columns()); + let column_count = plan.len(); - let table = connection - .get_table(&table_path) - .await - .expect("Failed to get table"); + create_table( + &admin, + &table_path, + &TableDescriptor::builder() + .schema(plan.build_schema(None)) + .build() + .expect("table descriptor"), + ) + .await; - let append_writer = table + let table = connection.get_table(&table_path).await.expect("table"); + let writer = table .new_append() - .expect("Failed to create append") + .expect("append") .create_writer() - .expect("Failed to create writer"); + .expect("writer"); - let mut row = GenericRow::new(12); - row.set_field(0, 1_i32); + // Shared scalar values + let dec = Decimal::from_unscaled_long(12345, 10, 2).unwrap(); + let dec_big = Decimal::from_unscaled_bytes(&[66, 237, 18, 59, 11, 216, 31, 4, 244], 22, 5) + .expect("big decimal"); + let date_v = Date::new(20476); + let time_v = Time::new(36_827_123); + let ts_v = TimestampNtz::from_millis_nanos(1_769_163_227_123, 456_000).unwrap(); + let ts_ltz_v = TimestampLtz::new(1_769_163_227_123); + let bytes_v = vec![0xDE_u8, 0xAD, 0xBE, 0xEF]; + let fixed_a = vec![0x01_u8, 0x02, 0x03, 0x04]; + let fixed_b = vec![0xAA_u8, 0xBB, 0xCC, 0xDD]; + + // Row 0 — every column populated. + let mut row0 = GenericRow::new(column_count); + row0.set_field(0, 1_i32); + + // ARRAY basics + row0.set_field(1, make_int_array(&[Some(10), Some(20), Some(30)])); + row0.set_field(2, make_string_array(&[Some("hello"), Some("world")])); + let arr_of_arr_0 = { + let mut w = FlussArrayWriter::new(2, &inner_array_int); + w.write_array(0, &make_int_array(&[Some(1), Some(2)])); + w.write_array(1, &make_int_array(&[Some(3), Some(4)])); + w.complete().expect("arr_of_arr_0") + }; + row0.set_field(3, arr_of_arr_0); + let arr_of_row_0 = { + let mut w = FlussArrayWriter::new(2, &row_seq_label_owned); + let mut e0 = GenericRow::new(2); + e0.set_field(0, 1_i32); + e0.set_field(1, "open"); + w.write_row(0, &e0).expect("e0"); + let mut e1 = GenericRow::new(2); + e1.set_field(0, 2_i32); + e1.set_field(1, "close"); + w.write_row(1, &e1).expect("e1"); + w.complete().expect("arr_of_row_0") + }; + row0.set_field(4, arr_of_row_0); + + // ROW basics + let mut row_basic_0 = GenericRow::new(2); + row_basic_0.set_field(0, 42_i32); + row_basic_0.set_field(1, "hello"); + row0.set_field(5, Datum::Row(Box::new(row_basic_0))); + + let mut row_deep_inner_0 = GenericRow::new(1); + row_deep_inner_0.set_field(0, 99_i32); + let mut row_deep_0 = GenericRow::new(1); + row_deep_0.set_field(0, Datum::Row(Box::new(row_deep_inner_0))); + row0.set_field(6, Datum::Row(Box::new(row_deep_0))); + + let mut row_rich_0 = GenericRow::new(14); + row_rich_0.set_field(0, true); + row_rich_0.set_field(1, 100_000_i32); + row_rich_0.set_field(2, 9_876_543_210_i64); + row_rich_0.set_field(3, f32::INFINITY); + row_rich_0.set_field(4, f64::NAN); + row_rich_0.set_field(5, "hello world"); + row_rich_0.set_field(6, b"binary".as_slice()); + row_rich_0.set_field(7, dec.clone()); + row_rich_0.set_field(8, Datum::Date(Date::new(20476))); + row_rich_0.set_field(9, Datum::Time(Time::new(36_827_123))); + row_rich_0.set_field( + 10, + Datum::TimestampNtz(TimestampNtz::new(1_769_163_227_123)), + ); + row_rich_0.set_field( + 11, + Datum::TimestampLtz(TimestampLtz::new(1_769_163_227_456)), + ); + row_rich_0.set_field(12, b"\x01\x02\x03\x04".as_slice()); + row_rich_0.set_field(13, make_int_array(&[Some(7), None, Some(11)])); + row0.set_field(7, Datum::Row(Box::new(row_rich_0))); + + // MAP basics + let map_string_int_0 = { + let mut w = FlussMapWriter::new(3, &DataTypes::string(), &DataTypes::int()); + w.write_entry("a".into(), 1.into()).unwrap(); + w.write_entry("b".into(), Datum::Null).unwrap(); + w.write_entry("c".into(), 3.into()).unwrap(); + w.complete().expect("map_string_int_0") + }; + row0.set_field(8, Datum::Map(map_string_int_0)); + + let map_of_row_0 = { + let mut e0 = GenericRow::new(2); + e0.set_field(0, 1_i32); + e0.set_field(1, "open"); + let mut e1 = GenericRow::new(2); + e1.set_field(0, 2_i32); + e1.set_field(1, "close"); + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &row_seq_label_owned); + w.write_entry("e0".into(), Datum::Row(Box::new(e0))) + .unwrap(); + w.write_entry("e1".into(), Datum::Row(Box::new(e1))) + .unwrap(); + w.complete().expect("map_of_row_0") + }; + row0.set_field(9, Datum::Map(map_of_row_0)); + + let map_of_map_0 = { + let g1 = { + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &DataTypes::int()); + w.write_entry("a".into(), 1.into()).unwrap(); + w.write_entry("b".into(), 2.into()).unwrap(); + w.complete().expect("g1") + }; + let g2 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::int()); + w.write_entry("c".into(), 3.into()).unwrap(); + w.complete().expect("g2") + }; + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &inner_map_string_int); + w.write_entry("g1".into(), Datum::Map(g1)).unwrap(); + w.write_entry("g2".into(), Datum::Map(g2)).unwrap(); + w.complete().expect("map_of_map_0") + }; + row0.set_field(10, Datum::Map(map_of_map_0)); + + let map_of_array_0 = { + let primes = make_int_array(&[Some(2), Some(3), Some(5)]); + let squares = make_int_array(&[Some(1), Some(4)]); + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &inner_array_int); + w.write_entry("primes".into(), Datum::Array(primes)) + .unwrap(); + w.write_entry("squares".into(), Datum::Array(squares)) + .unwrap(); + w.complete().expect("map_of_array_0") + }; + row0.set_field(11, Datum::Map(map_of_array_0)); + + let array_of_map_0 = { + let m0 = { + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &DataTypes::int()); + w.write_entry("x".into(), 1.into()).unwrap(); + w.write_entry("y".into(), 2.into()).unwrap(); + w.complete().expect("m0") + }; + let m1 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::int()); + w.write_entry("z".into(), 9.into()).unwrap(); + w.complete().expect("m1") + }; + let mut w = FlussArrayWriter::new(2, &inner_map_string_int); + w.write_map(0, &m0); + w.write_map(1, &m1); + w.complete().expect("array_of_map_0") + }; + row0.set_field(12, array_of_map_0); - // col 1: arr_bytes — binary with null element - let arr_bytes = { + // ARRAY rich types + let arr_bytes_0 = { let mut w = FlussArrayWriter::new(2, &DataTypes::bytes()); - w.write_binary_bytes(0, elem_bytes); + w.write_binary_bytes(0, &bytes_v); w.set_null_at(1); - w.complete().expect("arr_bytes") + w.complete().expect("arr_bytes_0") }; - row.set_field(1, arr_bytes); - - // col 2: arr_date - let arr_date = { + row0.set_field(13, arr_bytes_0); + let arr_date_0 = { let mut w = FlussArrayWriter::new(2, &DataTypes::date()); - w.write_date(0, d); + w.write_date(0, date_v); w.set_null_at(1); - w.complete().expect("arr_date") + w.complete().expect("arr_date_0") }; - row.set_field(2, arr_date); - - // col 3: arr_time - let arr_time = { + row0.set_field(14, arr_date_0); + let arr_time_0 = { let mut w = FlussArrayWriter::new(2, &DataTypes::time_with_precision(3)); - w.write_time(0, t); + w.write_time(0, time_v); w.set_null_at(1); - w.complete().expect("arr_time") + w.complete().expect("arr_time_0") }; - row.set_field(3, arr_time); - - // col 4: arr_ts_compact — compact timestamp (precision 6, millis+nanos) - let arr_ts_compact = { + row0.set_field(15, arr_time_0); + let arr_ts_0 = { let mut w = FlussArrayWriter::new(2, &DataTypes::timestamp_with_precision(6)); - w.write_timestamp_ntz(0, &ts_compact, 6); + w.write_timestamp_ntz(0, &ts_v, 6); w.set_null_at(1); - w.complete().expect("arr_ts_compact") + w.complete().expect("arr_ts_0") }; - row.set_field(4, arr_ts_compact); - - // col 5: arr_ts_nano — non-compact timestamp (precision 9) - let arr_ts_nano = { - let mut w = FlussArrayWriter::new(1, &DataTypes::timestamp_with_precision(9)); - w.write_timestamp_ntz(0, &ts_nano, 9); - w.complete().expect("arr_ts_nano") + row0.set_field(16, arr_ts_0); + let arr_ts_ltz_0 = { + let mut w = FlussArrayWriter::new(2, &DataTypes::timestamp_ltz_with_precision(3)); + w.write_timestamp_ltz(0, &ts_ltz_v, 3); + w.set_null_at(1); + w.complete().expect("arr_ts_ltz_0") }; - row.set_field(5, arr_ts_nano); - - // col 6: arr_decimal_compact — compact decimal (precision 10) - let arr_decimal_compact = { + row0.set_field(17, arr_ts_ltz_0); + let arr_decimal_0 = { let mut w = FlussArrayWriter::new(2, &DataTypes::decimal(10, 2)); - w.write_decimal(0, &dec_compact, 10); + w.write_decimal(0, &dec, 10); w.set_null_at(1); - w.complete().expect("arr_decimal_compact") + w.complete().expect("arr_decimal_0") }; - row.set_field(6, arr_decimal_compact); - - // col 7: arr_decimal_big — non-compact decimal (precision 22) - let arr_decimal_big = { + row0.set_field(18, arr_decimal_0); + let arr_decimal_big_0 = { let mut w = FlussArrayWriter::new(1, &DataTypes::decimal(22, 5)); w.write_decimal(0, &dec_big, 22); - w.complete().expect("arr_decimal_big") - }; - row.set_field(7, arr_decimal_big); - - // col 8: arr_long_str — heap-backed strings (>= 8 bytes) - let arr_long_str = { - let mut w = FlussArrayWriter::new(2, &DataTypes::string()); - w.write_string(0, "abcdefghi"); - w.write_string(1, "longstring_here"); - w.complete().expect("arr_long_str") + w.complete().expect("arr_decimal_big_0") }; - row.set_field(8, arr_long_str); - - // col 9: arr_float — IEEE 754 specials - let arr_float = { + row0.set_field(19, arr_decimal_big_0); + let arr_float_0 = { let mut w = FlussArrayWriter::new(3, &DataTypes::float()); w.write_float(0, f32::NAN); w.write_float(1, f32::INFINITY); w.write_float(2, f32::NEG_INFINITY); - w.complete().expect("arr_float") + w.complete().expect("arr_float_0") }; - row.set_field(9, arr_float); - - // col 10: arr_double — IEEE 754 specials - let arr_double = { + row0.set_field(20, arr_float_0); + let arr_double_0 = { let mut w = FlussArrayWriter::new(3, &DataTypes::double()); w.write_double(0, f64::NAN); w.write_double(1, f64::INFINITY); w.write_double(2, f64::NEG_INFINITY); - w.complete().expect("arr_double") + w.complete().expect("arr_double_0") }; - row.set_field(10, arr_double); - - // col 11: arr_binary — fixed-size binary(4) - let arr_binary = { + row0.set_field(21, arr_double_0); + let arr_binary_0 = { let mut w = FlussArrayWriter::new(2, &DataTypes::binary(4)); w.write_binary_bytes(0, &fixed_a); w.write_binary_bytes(1, &fixed_b); - w.complete().expect("arr_binary") + w.complete().expect("arr_binary_0") + }; + row0.set_field(22, arr_binary_0); + + // MAP rich types + let map_bytes_0 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::bytes()); + w.write_entry("blob".into(), bytes_v.as_slice().into()) + .unwrap(); + w.complete().expect("map_bytes_0") + }; + row0.set_field(23, Datum::Map(map_bytes_0)); + let map_decimal_0 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::decimal(10, 2)); + w.write_entry("price".into(), Datum::Decimal(dec.clone())) + .unwrap(); + w.complete().expect("map_decimal_0") + }; + row0.set_field(24, Datum::Map(map_decimal_0)); + let map_date_0 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::date()); + w.write_entry("d".into(), Datum::Date(date_v)).unwrap(); + w.complete().expect("map_date_0") + }; + row0.set_field(25, Datum::Map(map_date_0)); + let map_time_0 = { + let mut w = + FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::time_with_precision(3)); + w.write_entry("t".into(), Datum::Time(time_v)).unwrap(); + w.complete().expect("map_time_0") + }; + row0.set_field(26, Datum::Map(map_time_0)); + let map_ts_0 = { + let mut w = FlussMapWriter::new( + 1, + &DataTypes::string(), + &DataTypes::timestamp_with_precision(6), + ); + w.write_entry("ts".into(), Datum::TimestampNtz(ts_v)) + .unwrap(); + w.complete().expect("map_ts_0") + }; + row0.set_field(27, Datum::Map(map_ts_0)); + let map_ts_ltz_0 = { + let mut w = FlussMapWriter::new( + 1, + &DataTypes::string(), + &DataTypes::timestamp_ltz_with_precision(3), + ); + w.write_entry("ts".into(), Datum::TimestampLtz(ts_ltz_v)) + .unwrap(); + w.complete().expect("map_ts_ltz_0") + }; + row0.set_field(28, Datum::Map(map_ts_ltz_0)); + let map_float_0 = { + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &DataTypes::float()); + w.write_entry("nan".into(), f32::NAN.into()).unwrap(); + w.write_entry("inf".into(), f32::INFINITY.into()).unwrap(); + w.complete().expect("map_float_0") + }; + row0.set_field(29, Datum::Map(map_float_0)); + let map_double_0 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::double()); + w.write_entry("pi".into(), std::f64::consts::PI.into()) + .unwrap(); + w.complete().expect("map_double_0") + }; + row0.set_field(30, Datum::Map(map_double_0)); + let map_bool_0 = { + let mut w = FlussMapWriter::new(2, &DataTypes::string(), &DataTypes::boolean()); + w.write_entry("t".into(), true.into()).unwrap(); + w.write_entry("f".into(), false.into()).unwrap(); + w.complete().expect("map_bool_0") + }; + row0.set_field(31, Datum::Map(map_bool_0)); + let map_binary_0 = { + let mut w = FlussMapWriter::new(1, &DataTypes::string(), &DataTypes::binary(4)); + w.write_entry("k".into(), fixed_a.as_slice().into()) + .unwrap(); + w.complete().expect("map_binary_0") }; - row.set_field(11, arr_binary); + row0.set_field(32, Datum::Map(map_binary_0)); + let map_int_key_0 = { + let mut w = FlussMapWriter::new(2, &DataTypes::int(), &DataTypes::string()); + w.write_entry(1.into(), "one".into()).unwrap(); + w.write_entry(2.into(), "two".into()).unwrap(); + w.complete().expect("map_int_key_0") + }; + row0.set_field(33, Datum::Map(map_int_key_0)); + + // Scalar values + let scalar_tinyint = 127_i8; + let scalar_smallint = 32_767_i16; + let scalar_bigint = 9_223_372_036_854_775_807_i64; + let scalar_float = std::f32::consts::PI; + let scalar_double = std::f64::consts::E; + let scalar_char = "hello"; + let scalar_string = "world of fluss rust client"; + let scalar_time_s = Time::new(36_827_000); + let scalar_time_ms = Time::new(36_827_123); + let scalar_time_us = Time::new(86_399_999); + let scalar_time_ns = Time::new(1); + let scalar_ts_s = TimestampNtz::new(1_769_163_227_000); + let scalar_ts_ms = TimestampNtz::new(1_769_163_227_123); + let scalar_ts_us = TimestampNtz::from_millis_nanos(1_769_163_227_123, 456_000).unwrap(); + let scalar_ts_ns = TimestampNtz::from_millis_nanos(1_769_163_227_123, 999_999).unwrap(); + let scalar_ts_ltz_s = TimestampLtz::new(1_769_163_227_000); + let scalar_ts_ltz_ms = TimestampLtz::new(1_769_163_227_123); + let scalar_ts_ltz_us = TimestampLtz::from_millis_nanos(1_769_163_227_123, 456_000).unwrap(); + let scalar_ts_ltz_ns = TimestampLtz::from_millis_nanos(1_769_163_227_123, 999_999).unwrap(); + let scalar_bytes_top: Vec = b"binary data".to_vec(); + let scalar_binary_top: Vec = vec![0xDE, 0xAD, 0xBE, 0xEF]; + let scalar_ts_us_neg = TimestampNtz::from_millis_nanos(-301_234_154_877, 456_000).unwrap(); + let scalar_ts_ns_neg = TimestampNtz::from_millis_nanos(-301_234_154_877, 999_999).unwrap(); + let scalar_ts_ltz_us_neg = + TimestampLtz::from_millis_nanos(-301_234_154_877, 456_000).unwrap(); + let scalar_ts_ltz_ns_neg = + TimestampLtz::from_millis_nanos(-301_234_154_877, 999_999).unwrap(); + + row0.set_field(34, scalar_tinyint); + row0.set_field(35, scalar_smallint); + row0.set_field(36, scalar_bigint); + row0.set_field(37, scalar_float); + row0.set_field(38, scalar_double); + row0.set_field(39, true); + row0.set_field(40, scalar_char); + row0.set_field(41, scalar_string); + row0.set_field(42, dec.clone()); + row0.set_field(43, Datum::Date(date_v)); + row0.set_field(44, scalar_time_s); + row0.set_field(45, scalar_time_ms); + row0.set_field(46, scalar_time_us); + row0.set_field(47, scalar_time_ns); + row0.set_field(48, scalar_ts_s); + row0.set_field(49, scalar_ts_ms); + row0.set_field(50, scalar_ts_us); + row0.set_field(51, scalar_ts_ns); + row0.set_field(52, scalar_ts_ltz_s); + row0.set_field(53, scalar_ts_ltz_ms); + row0.set_field(54, scalar_ts_ltz_us); + row0.set_field(55, scalar_ts_ltz_ns); + row0.set_field(56, scalar_bytes_top.as_slice()); + row0.set_field(57, scalar_binary_top.as_slice()); + row0.set_field(58, scalar_ts_us_neg); + row0.set_field(59, scalar_ts_ns_neg); + row0.set_field(60, scalar_ts_ltz_us_neg); + row0.set_field(61, scalar_ts_ltz_ns_neg); + + // Row 1 — ARRAY/MAP basic-shape edge cases (empty, null elements). + let mut row1 = GenericRow::new(column_count); + row1.set_field(0, 2_i32); + row1.set_field(1, make_int_array(&[])); + row1.set_field(2, make_string_array(&[None])); + let arr_of_arr_1 = { + let mut w = FlussArrayWriter::new(3, &inner_array_int); + w.write_array(0, &make_int_array(&[Some(5)])); + w.set_null_at(1); + w.write_array(2, &make_int_array(&[])); + w.complete().expect("arr_of_arr_1") + }; + row1.set_field(3, arr_of_arr_1); + let arr_of_row_1 = { + let mut w = FlussArrayWriter::new(3, &row_seq_label_owned); + let mut e0 = GenericRow::new(2); + e0.set_field(0, 7_i32); + e0.set_field(1, "x"); + w.write_row(0, &e0).expect("e0"); + w.set_null_at(1); + let mut e2 = GenericRow::new(2); + e2.set_field(0, 8_i32); + e2.set_field(1, "y"); + w.write_row(2, &e2).expect("e2"); + w.complete().expect("arr_of_row_1") + }; + row1.set_field(4, arr_of_row_1); + for i in plan.section_range("row_basics") { + row1.set_field(i, Datum::Null); + } + // Empty MAP + let empty_map = FlussMapWriter::new(0, &DataTypes::string(), &DataTypes::int()) + .complete() + .expect("empty_map"); + row1.set_field(8, Datum::Map(empty_map)); + for i in (plan.idx("map_string_int") + 1)..plan.len() { + row1.set_field(i, Datum::Null); + } - append_writer.append(&row).expect("append"); - append_writer.flush().await.expect("Failed to flush"); + // Row 2 — every column NULL. + let mut row2 = GenericRow::new(column_count); + row2.set_field(0, 3_i32); + for i in 1..column_count { + row2.set_field(i, Datum::Null); + } - let records = scan_table(&table, |scan| scan).await; - assert_eq!(records.len(), 1); - let r = records[0].row(); + writer.append(&row0).expect("append row0"); + writer.append(&row1).expect("append row1"); + writer.append(&row2).expect("append row2"); + writer.flush().await.expect("flush"); - // Verify arr_bytes - let ab = r.get_array(1).unwrap(); - assert_eq!(ab.size(), 2); - assert_eq!(ab.get_binary(0).unwrap(), elem_bytes); - assert!(ab.is_null_at(1)); + let records = scan_table(&table, |scan| scan).await; + assert_eq!(records.len(), 3); + let r0 = records[0].row(); + let r1 = records[1].row(); + let r2 = records[2].row(); - // Verify arr_date - let ad = r.get_array(2).unwrap(); - assert_eq!(ad.size(), 2); - assert_eq!(ad.get_date(0).unwrap().get_inner(), d.get_inner()); - assert!(ad.is_null_at(1)); + assert_eq!(r0.get_int(0).unwrap(), 1); + assert_eq!(r1.get_int(0).unwrap(), 2); + assert_eq!(r2.get_int(0).unwrap(), 3); - // Verify arr_time - let at = r.get_array(3).unwrap(); - assert_eq!(at.size(), 2); - assert_eq!(at.get_time(0).unwrap().get_inner(), t.get_inner()); - assert!(at.is_null_at(1)); + // === ARRAY: basic shapes === + let arr_int = r0.get_array(1).unwrap(); + assert_eq!(arr_int.size(), 3); + assert_eq!(arr_int.get_int(0).unwrap(), 10); + assert_eq!(arr_int.get_int(2).unwrap(), 30); + let arr_string = r0.get_array(2).unwrap(); + assert_eq!(arr_string.size(), 2); + assert_eq!(arr_string.get_string(0).unwrap(), "hello"); + assert_eq!(arr_string.get_string(1).unwrap(), "world"); + let arr_of_arr = r0.get_array(3).unwrap(); + assert_eq!(arr_of_arr.size(), 2); + let inner = arr_of_arr.get_array(0).unwrap(); + assert_eq!(inner.size(), 2); + assert_eq!(inner.get_int(0).unwrap(), 1); + assert_eq!(inner.get_int(1).unwrap(), 2); + let inner = arr_of_arr.get_array(1).unwrap(); + assert_eq!(inner.get_int(0).unwrap(), 3); + assert_eq!(inner.get_int(1).unwrap(), 4); + + // === ARRAY: edge cases on row 1 (empty + null elements + null inner) === + assert_eq!(r1.get_array(1).unwrap().size(), 0); + let arr_string_r1 = r1.get_array(2).unwrap(); + assert_eq!(arr_string_r1.size(), 1); + assert!(arr_string_r1.is_null_at(0)); + let arr_of_arr_r1 = r1.get_array(3).unwrap(); + assert_eq!(arr_of_arr_r1.size(), 3); + let aa0 = arr_of_arr_r1.get_array(0).unwrap(); + assert_eq!(aa0.size(), 1); + assert_eq!(aa0.get_int(0).unwrap(), 5); + assert!(arr_of_arr_r1.is_null_at(1)); + assert_eq!(arr_of_arr_r1.get_array(2).unwrap().size(), 0); + + // === ARRAY: null whole column on row 2 === + assert!(r2.is_null_at(1).unwrap()); + assert!(r2.is_null_at(2).unwrap()); + assert!(r2.is_null_at(3).unwrap()); - // Verify arr_ts_compact - let ats = r.get_array(4).unwrap(); - assert_eq!(ats.size(), 2); - let read_ts_compact = ats.get_timestamp_ntz(0, 6).unwrap(); + // === ARRAY: row 0 + row 1 with null element + row 2 null whole === + let aor0 = r0.get_array(4).unwrap(); + assert_eq!(aor0.size(), 2); + let e0 = aor0.get_row(0, &row_seq_label).unwrap(); + assert_eq!(e0.get_int(0).unwrap(), 1); + assert_eq!(e0.get_string(1).unwrap(), "open"); + let e1 = aor0.get_row(1, &row_seq_label).unwrap(); + assert_eq!(e1.get_int(0).unwrap(), 2); + assert_eq!(e1.get_string(1).unwrap(), "close"); + let aor1 = r1.get_array(4).unwrap(); + assert_eq!(aor1.size(), 3); + let e0 = aor1.get_row(0, &row_seq_label).unwrap(); + assert_eq!(e0.get_int(0).unwrap(), 7); + assert!(aor1.is_null_at(1)); + let e2 = aor1.get_row(2, &row_seq_label).unwrap(); + assert_eq!(e2.get_int(0).unwrap(), 8); + assert!(r2.is_null_at(4).unwrap()); + + // === ROW: basic + deep + rich types on row 0; row 2 null === + let rb = r0.get_row(5).unwrap(); + assert_eq!(rb.get_int(0).unwrap(), 42); + assert_eq!(rb.get_string(1).unwrap(), "hello"); + let rd = r0.get_row(6).unwrap(); + let rd_inner = rd.get_row(0).unwrap(); + assert_eq!(rd_inner.get_int(0).unwrap(), 99); + let rr = r0.get_row(7).unwrap(); + assert!(rr.get_boolean(0).unwrap()); + assert_eq!(rr.get_int(1).unwrap(), 100_000); + assert_eq!(rr.get_long(2).unwrap(), 9_876_543_210); + assert_f32_special(rr.get_float(3).unwrap(), f32::INFINITY); + assert!(rr.get_double(4).unwrap().is_nan()); + assert_eq!(rr.get_string(5).unwrap(), "hello world"); + assert_eq!(rr.get_bytes(6).unwrap(), b"binary"); + assert_eq!(rr.get_decimal(7, 10, 2).unwrap(), dec); + assert_eq!(rr.get_date(8).unwrap().get_inner(), 20476); + assert_eq!(rr.get_time(9).unwrap().get_inner(), 36_827_123); + assert_eq!( + rr.get_timestamp_ntz(10, 6).unwrap().get_millisecond(), + 1_769_163_227_123 + ); + assert_eq!( + rr.get_timestamp_ltz(11, 6).unwrap().get_epoch_millisecond(), + 1_769_163_227_456 + ); + assert_eq!(rr.get_binary(12, 4).unwrap(), b"\x01\x02\x03\x04"); + let f_arr = rr.get_array(13).unwrap(); + assert_eq!(f_arr.size(), 3); + assert_eq!(f_arr.get_int(0).unwrap(), 7); + assert!(f_arr.is_null_at(1)); + assert!(r2.is_null_at(5).unwrap()); + assert!(r2.is_null_at(6).unwrap()); + assert!(r2.is_null_at(7).unwrap()); + + // === MAP: basic (with null value) + empty (row 1) + null (row 2) === + let m = r0.get_map(8).unwrap(); + assert_eq!(m.size(), 3); + assert_eq!(m.get(&Datum::from("a")).unwrap(), Some(Datum::from(1_i32))); + assert_eq!(m.get(&Datum::from("b")).unwrap(), Some(Datum::Null)); + assert_eq!(m.get(&Datum::from("c")).unwrap(), Some(Datum::from(3_i32))); + assert_eq!(r1.get_map(8).unwrap().size(), 0); + assert!(r2.is_null_at(8).unwrap()); + + // === MAP === + let m = r0.get_map(9).unwrap(); + assert_eq!(m.size(), 2); + let keys = m.key_array(); + let values = m.value_array(); + assert_eq!(keys.get_string(0).unwrap(), "e0"); + let v0 = values.get_row(0, &row_seq_label).unwrap(); + assert_eq!(v0.get_int(0).unwrap(), 1); + assert_eq!(v0.get_string(1).unwrap(), "open"); + assert_eq!(keys.get_string(1).unwrap(), "e1"); + let v1 = values.get_row(1, &row_seq_label).unwrap(); + assert_eq!(v1.get_int(0).unwrap(), 2); + assert_eq!(v1.get_string(1).unwrap(), "close"); + + // === MAP === + let m = r0.get_map(10).unwrap(); + assert_eq!(m.size(), 2); + let g1 = m + .value_array() + .get_map(0, &DataTypes::string(), &DataTypes::int()) + .unwrap(); + assert_eq!(g1.size(), 2); + assert_eq!(g1.get(&Datum::from("a")).unwrap(), Some(Datum::from(1_i32))); + let g2 = m + .value_array() + .get_map(1, &DataTypes::string(), &DataTypes::int()) + .unwrap(); + assert_eq!(g2.size(), 1); + assert_eq!(g2.get(&Datum::from("c")).unwrap(), Some(Datum::from(3_i32))); + + // === MAP + ARRAY === + let m = r0.get_map(11).unwrap(); + assert_eq!(m.size(), 2); + let primes = m.value_array().get_array(0).unwrap(); + assert_eq!(primes.size(), 3); + assert_eq!(primes.get_int(2).unwrap(), 5); + let am = r0.get_array(12).unwrap(); + assert_eq!(am.size(), 2); + let am0 = am + .get_map(0, &DataTypes::string(), &DataTypes::int()) + .unwrap(); + assert_eq!(am0.size(), 2); + let am1 = am + .get_map(1, &DataTypes::string(), &DataTypes::int()) + .unwrap(); + assert_eq!(am1.size(), 1); assert_eq!( - read_ts_compact.get_millisecond(), - ts_compact.get_millisecond() + am1.get(&Datum::from("z")).unwrap(), + Some(Datum::from(9_i32)) ); + + // === ARRAY rich types === + let ab = r0.get_array(13).unwrap(); + assert_eq!(ab.size(), 2); + assert_eq!(ab.get_bytes(0).unwrap(), bytes_v.as_slice()); + assert!(ab.is_null_at(1)); + let ad = r0.get_array(14).unwrap(); + assert_eq!(ad.get_date(0).unwrap().get_inner(), date_v.get_inner()); + assert!(ad.is_null_at(1)); + let at = r0.get_array(15).unwrap(); + assert_eq!(at.get_time(0).unwrap().get_inner(), time_v.get_inner()); + assert!(at.is_null_at(1)); + let ats = r0.get_array(16).unwrap(); + let read_ts = ats.get_timestamp_ntz(0, 6).unwrap(); + assert_eq!(read_ts.get_millisecond(), ts_v.get_millisecond()); assert_eq!( - read_ts_compact.get_nano_of_millisecond(), - ts_compact.get_nano_of_millisecond() + read_ts.get_nano_of_millisecond(), + ts_v.get_nano_of_millisecond() ); assert!(ats.is_null_at(1)); - - // Verify arr_ts_nano - let ats_nano = r.get_array(5).unwrap(); - assert_eq!(ats_nano.size(), 1); - let read_ts_nano = ats_nano.get_timestamp_ntz(0, 9).unwrap(); - assert_eq!(read_ts_nano.get_millisecond(), ts_nano.get_millisecond()); + let atl = r0.get_array(17).unwrap(); assert_eq!( - read_ts_nano.get_nano_of_millisecond(), - ts_nano.get_nano_of_millisecond() + atl.get_timestamp_ltz(0, 3).unwrap().get_epoch_millisecond(), + ts_ltz_v.get_epoch_millisecond() ); - - // Verify arr_decimal_compact - let adc = r.get_array(6).unwrap(); - assert_eq!(adc.size(), 2); - assert_eq!(adc.get_decimal(0, 10, 2).unwrap(), dec_compact); + assert!(atl.is_null_at(1)); + let adc = r0.get_array(18).unwrap(); + assert_eq!(adc.get_decimal(0, 10, 2).unwrap(), dec); assert!(adc.is_null_at(1)); - - // Verify arr_decimal_big - let adb = r.get_array(7).unwrap(); - assert_eq!(adb.size(), 1); + let adb = r0.get_array(19).unwrap(); assert_eq!(adb.get_decimal(0, 22, 5).unwrap(), dec_big); - - // Verify arr_long_str - let als = r.get_array(8).unwrap(); - assert_eq!(als.size(), 2); - assert_eq!(als.get_string(0).unwrap(), "abcdefghi"); - assert_eq!(als.get_string(1).unwrap(), "longstring_here"); - - // Verify arr_float — IEEE 754 specials - let af = r.get_array(9).unwrap(); + let af = r0.get_array(20).unwrap(); assert_eq!(af.size(), 3); assert_f32_special(af.get_float(0).unwrap(), f32::NAN); assert_f32_special(af.get_float(1).unwrap(), f32::INFINITY); assert_f32_special(af.get_float(2).unwrap(), f32::NEG_INFINITY); - - // Verify arr_double — IEEE 754 specials - let adbl = r.get_array(10).unwrap(); - assert_eq!(adbl.size(), 3); + let adbl = r0.get_array(21).unwrap(); assert_f64_special(adbl.get_double(0).unwrap(), f64::NAN); assert_f64_special(adbl.get_double(1).unwrap(), f64::INFINITY); assert_f64_special(adbl.get_double(2).unwrap(), f64::NEG_INFINITY); - - // Verify arr_binary — fixed-size binary(4) - let fb: FlussArray = r.get_array(11).unwrap(); - assert_eq!(fb.size(), 2); + let fb: FlussArray = r0.get_array(22).unwrap(); assert_eq!(fb.get_binary(0).unwrap(), fixed_a.as_slice()); assert_eq!(fb.get_binary(1).unwrap(), fixed_b.as_slice()); - admin - .drop_table(&table_path, false) - .await - .expect("Failed to drop table"); + // === MAP rich types === + let m = r0.get_map(23).unwrap(); + assert_eq!(m.value_array().get_bytes(0).unwrap(), bytes_v.as_slice()); + let m = r0.get_map(24).unwrap(); + assert_eq!(m.value_array().get_decimal(0, 10, 2).unwrap(), dec); + let m = r0.get_map(25).unwrap(); + assert_eq!( + m.value_array().get_date(0).unwrap().get_inner(), + date_v.get_inner() + ); + let m = r0.get_map(26).unwrap(); + assert_eq!( + m.value_array().get_time(0).unwrap().get_inner(), + time_v.get_inner() + ); + let m = r0.get_map(27).unwrap(); + let read_ts = m.value_array().get_timestamp_ntz(0, 6).unwrap(); + assert_eq!(read_ts.get_millisecond(), ts_v.get_millisecond()); + let m = r0.get_map(28).unwrap(); + let read_ltz = m.value_array().get_timestamp_ltz(0, 3).unwrap(); + assert_eq!( + read_ltz.get_epoch_millisecond(), + ts_ltz_v.get_epoch_millisecond() + ); + let m = r0.get_map(29).unwrap(); + assert!(m.value_array().get_float(0).unwrap().is_nan()); + assert!(m.value_array().get_float(1).unwrap().is_infinite()); + let m = r0.get_map(30).unwrap(); + assert!( + (m.value_array().get_double(0).unwrap() - std::f64::consts::PI).abs() < f64::EPSILON + ); + let m = r0.get_map(31).unwrap(); + assert!(m.value_array().get_boolean(0).unwrap()); + assert!(!m.value_array().get_boolean(1).unwrap()); + let m = r0.get_map(32).unwrap(); + assert_eq!(m.value_array().get_binary(0).unwrap(), fixed_a.as_slice()); + let m = r0.get_map(33).unwrap(); + assert_eq!(m.size(), 2); + assert_eq!(m.key_array().get_int(0).unwrap(), 1); + assert_eq!(m.value_array().get_string(0).unwrap(), "one"); + + // === Convenience API: entries / get / key_type / value_type === + // (exercised on row 0's map_string_int at index 8) + let m = r0.get_map(8).unwrap(); + assert_eq!(m.key_type(), &DataTypes::string().as_non_nullable()); + assert_eq!(m.value_type(), &DataTypes::int()); + let mut got: HashMap> = HashMap::with_capacity(m.size()); + for entry in m.entries() { + let (k, v) = entry.expect("decode entry"); + let key = match k { + Datum::String(s) => s.into_owned(), + other => panic!("unexpected key variant: {other:?}"), + }; + let value = match v { + Datum::Int32(i) => Some(i), + Datum::Null => None, + other => panic!("unexpected value variant: {other:?}"), + }; + got.insert(key, value); + } + let expected: HashMap> = HashMap::from([ + ("a".to_string(), Some(1)), + ("b".to_string(), None), + ("c".to_string(), Some(3)), + ]); + assert_eq!(got, expected); + assert_eq!(m.get(&Datum::from("a")).unwrap(), Some(Datum::from(1_i32))); + assert!(m.get(&Datum::from("missing")).unwrap().is_none()); + + // === Bulk write via FlussMapWriter::extend (covered with a fresh map) === + let src: HashMap<&str, i32> = HashMap::from([("a", 1), ("b", 2), ("c", 3)]); + let extend_built = { + let mut w = FlussMapWriter::new(src.len(), &DataTypes::string(), &DataTypes::int()); + w.extend(src.clone()).expect("extend"); + w.complete().expect("extend-complete") + }; + assert_eq!(extend_built.size(), src.len()); + let extend_b = extend_built.get(&Datum::from("b")).unwrap(); + assert_eq!(extend_b, Some(Datum::from(2_i32))); + + // === Scalars: integer family === + assert_eq!(r0.get_byte(34).unwrap(), scalar_tinyint); + assert_eq!(r0.get_short(35).unwrap(), scalar_smallint); + assert_eq!(r0.get_long(36).unwrap(), scalar_bigint); + + // === Scalars: floating point === + assert!((r0.get_float(37).unwrap() - scalar_float).abs() < f32::EPSILON); + assert!((r0.get_double(38).unwrap() - scalar_double).abs() < f64::EPSILON); + + // === Scalars: boolean / char / string === + assert!(r0.get_boolean(39).unwrap()); + assert_eq!(r0.get_char(40, 10).unwrap(), scalar_char); + assert_eq!(r0.get_string(41).unwrap(), scalar_string); + + // === Scalars: decimal / date === + assert_eq!(r0.get_decimal(42, 10, 2).unwrap(), dec); + assert_eq!(r0.get_date(43).unwrap().get_inner(), date_v.get_inner()); + + // === Scalars: time across all four precisions === + assert_eq!( + r0.get_time(44).unwrap().get_inner(), + scalar_time_s.get_inner() + ); + assert_eq!( + r0.get_time(45).unwrap().get_inner(), + scalar_time_ms.get_inner() + ); + assert_eq!( + r0.get_time(46).unwrap().get_inner(), + scalar_time_us.get_inner() + ); + assert_eq!( + r0.get_time(47).unwrap().get_inner(), + scalar_time_ns.get_inner() + ); + + // === Scalars: timestamp across all four precisions === + assert_eq!( + r0.get_timestamp_ntz(48, 0).unwrap().get_millisecond(), + scalar_ts_s.get_millisecond() + ); + assert_eq!( + r0.get_timestamp_ntz(49, 3).unwrap().get_millisecond(), + scalar_ts_ms.get_millisecond() + ); + let read_us = r0.get_timestamp_ntz(50, 6).unwrap(); + assert_eq!(read_us.get_millisecond(), scalar_ts_us.get_millisecond()); + assert_eq!( + read_us.get_nano_of_millisecond(), + scalar_ts_us.get_nano_of_millisecond() + ); + let read_ns = r0.get_timestamp_ntz(51, 9).unwrap(); + assert_eq!(read_ns.get_millisecond(), scalar_ts_ns.get_millisecond()); + assert_eq!( + read_ns.get_nano_of_millisecond(), + scalar_ts_ns.get_nano_of_millisecond() + ); + + // === Scalars: timestamp_ltz across all four precisions === + assert_eq!( + r0.get_timestamp_ltz(52, 0).unwrap().get_epoch_millisecond(), + scalar_ts_ltz_s.get_epoch_millisecond() + ); + assert_eq!( + r0.get_timestamp_ltz(53, 3).unwrap().get_epoch_millisecond(), + scalar_ts_ltz_ms.get_epoch_millisecond() + ); + let read_ltz_us = r0.get_timestamp_ltz(54, 6).unwrap(); + assert_eq!( + read_ltz_us.get_epoch_millisecond(), + scalar_ts_ltz_us.get_epoch_millisecond() + ); + assert_eq!( + read_ltz_us.get_nano_of_millisecond(), + scalar_ts_ltz_us.get_nano_of_millisecond() + ); + let read_ltz_ns = r0.get_timestamp_ltz(55, 9).unwrap(); + assert_eq!( + read_ltz_ns.get_epoch_millisecond(), + scalar_ts_ltz_ns.get_epoch_millisecond() + ); + assert_eq!( + read_ltz_ns.get_nano_of_millisecond(), + scalar_ts_ltz_ns.get_nano_of_millisecond() + ); + + // === Scalars: bytes + fixed binary === + assert_eq!(r0.get_bytes(56).unwrap(), scalar_bytes_top.as_slice()); + assert_eq!(r0.get_binary(57, 4).unwrap(), scalar_binary_top.as_slice()); + + // === Scalars: negative-epoch timestamps (pre-1970) === + let read_neg_us = r0.get_timestamp_ntz(58, 6).unwrap(); + assert_eq!( + read_neg_us.get_millisecond(), + scalar_ts_us_neg.get_millisecond() + ); + assert_eq!( + read_neg_us.get_nano_of_millisecond(), + scalar_ts_us_neg.get_nano_of_millisecond() + ); + let read_neg_ns = r0.get_timestamp_ntz(59, 9).unwrap(); + assert_eq!( + read_neg_ns.get_millisecond(), + scalar_ts_ns_neg.get_millisecond() + ); + assert_eq!( + read_neg_ns.get_nano_of_millisecond(), + scalar_ts_ns_neg.get_nano_of_millisecond() + ); + let read_neg_ltz_us = r0.get_timestamp_ltz(60, 6).unwrap(); + assert_eq!( + read_neg_ltz_us.get_epoch_millisecond(), + scalar_ts_ltz_us_neg.get_epoch_millisecond() + ); + let read_neg_ltz_ns = r0.get_timestamp_ltz(61, 9).unwrap(); + assert_eq!( + read_neg_ltz_ns.get_epoch_millisecond(), + scalar_ts_ltz_ns_neg.get_epoch_millisecond() + ); + + // === Scalars: every column NULL on row 2 === + for i in plan.section_range("scalars") { + assert!( + r2.is_null_at(i).unwrap(), + "scalar column {i} should be null" + ); + } + + // === Append-side validation: malformed rows are rejected client-side === + // Field count mismatch — far fewer fields than the schema demands. + let mut undersized = GenericRow::new(2); + undersized.set_field(0, true); + let err = writer.append(&undersized).unwrap_err().to_string(); + assert!( + err.contains(&format!("Expected: {column_count}")) && err.contains("Actual: 2"), + "expected field-count error, got: {err}" + ); + + // Type mismatch — correct field count but every cell is Bool, which + // satisfies none of the column types except col_boolean. + let wrong_types = GenericRow::from_data( + (0..column_count) + .map(|_| Datum::Bool(true)) + .collect::>(), + ); + assert!( + writer.append(&wrong_types).is_err(), + "row with wrong types should be rejected, not panic" + ); + + admin.drop_table(&table_path, false).await.expect("drop"); } } diff --git a/crates/fluss/tests/integration/utils.rs b/crates/fluss/tests/integration/utils.rs index 81a7c0b1..58c77f03 100644 --- a/crates/fluss/tests/integration/utils.rs +++ b/crates/fluss/tests/integration/utils.rs @@ -17,7 +17,9 @@ */ use crate::integration::fluss_cluster::{FlussTestingCluster, FlussTestingClusterBuilder}; use fluss::client::FlussAdmin; -use fluss::metadata::{DataTypes, PartitionSpec, TableDescriptor, TablePath}; +use fluss::metadata::{ + DataField, DataType, DataTypes, PartitionSpec, RowType, Schema, TableDescriptor, TablePath, +}; use fluss::row::FlussArray; use fluss::row::binary_array::FlussArrayWriter; use std::collections::HashMap; @@ -176,3 +178,200 @@ pub async fn create_partitions( .expect("Failed to create partition"); } } + +pub fn dt_array_int() -> DataType { + DataTypes::array(DataTypes::int()) +} + +pub fn dt_map_string_int() -> DataType { + DataTypes::map(DataTypes::string(), DataTypes::int()) +} + +pub fn dt_row_seq_label() -> DataType { + DataTypes::row(vec![ + DataField::new("seq", DataTypes::int(), None), + DataField::new("label", DataTypes::string(), None), + ]) +} + +pub fn as_row_type(dt: &DataType) -> RowType { + match dt { + DataType::Row(rt) => rt.clone(), + other => panic!("expected DataType::Row, got {other:?}"), + } +} + +pub fn dt_row_deep() -> DataType { + let inner = DataTypes::row(vec![DataField::new("n", DataTypes::int(), None)]); + DataTypes::row(vec![DataField::new("inner", inner, None)]) +} + +pub fn dt_row_rich() -> DataType { + DataTypes::row(vec![ + DataField::new("f_bool", DataTypes::boolean(), None), + DataField::new("f_int", DataTypes::int(), None), + DataField::new("f_long", DataTypes::bigint(), None), + DataField::new("f_float", DataTypes::float(), None), + DataField::new("f_double", DataTypes::double(), None), + DataField::new("f_str", DataTypes::string(), None), + DataField::new("f_bytes", DataTypes::bytes(), None), + DataField::new("f_decimal", DataTypes::decimal(10, 2), None), + DataField::new("f_date", DataTypes::date(), None), + DataField::new("f_time", DataTypes::time_with_precision(3), None), + DataField::new("f_ts_ntz", DataTypes::timestamp_with_precision(6), None), + DataField::new("f_ts_ltz", DataTypes::timestamp_ltz_with_precision(6), None), + DataField::new("f_binary_fixed", DataTypes::binary(4), None), + DataField::new("f_array_int", DataTypes::array(DataTypes::int()), None), + ]) +} + +pub fn array_dt_basics_columns() -> Vec<(&'static str, DataType)> { + vec![ + ("arr_int", DataTypes::array(DataTypes::int())), + ("arr_string", DataTypes::array(DataTypes::string())), + ("arr_of_arr", DataTypes::array(dt_array_int())), + ("arr_of_row", DataTypes::array(dt_row_seq_label())), + ] +} + +pub fn row_dt_basics_columns() -> Vec<(&'static str, DataType)> { + vec![ + ("row_basic", dt_row_seq_label()), + ("row_deep", dt_row_deep()), + ("row_rich", dt_row_rich()), + ] +} + +pub fn map_dt_basics_columns() -> Vec<(&'static str, DataType)> { + vec![ + ("map_string_int", dt_map_string_int()), + ( + "map_of_row", + DataTypes::map(DataTypes::string(), dt_row_seq_label()), + ), + ( + "map_of_map", + DataTypes::map(DataTypes::string(), dt_map_string_int()), + ), + ( + "map_of_array", + DataTypes::map(DataTypes::string(), dt_array_int()), + ), + ("array_of_map", DataTypes::array(dt_map_string_int())), + ] +} + +pub fn scalar_dt_columns() -> Vec<(&'static str, DataType)> { + vec![ + ("col_tinyint", DataTypes::tinyint()), + ("col_smallint", DataTypes::smallint()), + ("col_bigint", DataTypes::bigint()), + ("col_float", DataTypes::float()), + ("col_double", DataTypes::double()), + ("col_boolean", DataTypes::boolean()), + ("col_char", DataTypes::char(10)), + ("col_string", DataTypes::string()), + ("col_decimal", DataTypes::decimal(10, 2)), + ("col_date", DataTypes::date()), + ("col_time_s", DataTypes::time_with_precision(0)), + ("col_time_ms", DataTypes::time_with_precision(3)), + ("col_time_us", DataTypes::time_with_precision(6)), + ("col_time_ns", DataTypes::time_with_precision(9)), + ("col_ts_s", DataTypes::timestamp_with_precision(0)), + ("col_ts_ms", DataTypes::timestamp_with_precision(3)), + ("col_ts_us", DataTypes::timestamp_with_precision(6)), + ("col_ts_ns", DataTypes::timestamp_with_precision(9)), + ("col_ts_ltz_s", DataTypes::timestamp_ltz_with_precision(0)), + ("col_ts_ltz_ms", DataTypes::timestamp_ltz_with_precision(3)), + ("col_ts_ltz_us", DataTypes::timestamp_ltz_with_precision(6)), + ("col_ts_ltz_ns", DataTypes::timestamp_ltz_with_precision(9)), + ("col_bytes_top", DataTypes::bytes()), + ("col_binary_top", DataTypes::binary(4)), + ("col_ts_us_neg", DataTypes::timestamp_with_precision(6)), + ("col_ts_ns_neg", DataTypes::timestamp_with_precision(9)), + ( + "col_ts_ltz_us_neg", + DataTypes::timestamp_ltz_with_precision(6), + ), + ( + "col_ts_ltz_ns_neg", + DataTypes::timestamp_ltz_with_precision(9), + ), + ] +} + +#[derive(Default)] +pub struct ColumnPlan { + cols: Vec<(&'static str, DataType)>, + index: HashMap<&'static str, usize>, + sections: Vec<(&'static str, usize)>, +} + +impl ColumnPlan { + pub fn new() -> Self { + Self::default() + } + + pub fn add(mut self, name: &'static str, dt: DataType) -> Self { + let prev = self.index.insert(name, self.cols.len()); + assert!(prev.is_none(), "duplicate column in plan: {name}"); + self.cols.push((name, dt)); + self + } + + pub fn extend>(mut self, it: I) -> Self { + for (n, dt) in it { + self = self.add(n, dt); + } + self + } + + /// Marks the next column added as the start of a named section. Each call + /// closes the previous section; the last section runs to the end of the plan. + pub fn start_section(mut self, name: &'static str) -> Self { + assert!( + !self.sections.iter().any(|(n, _)| *n == name), + "duplicate section: {name}" + ); + self.sections.push((name, self.cols.len())); + self + } + + pub fn build_schema(&self, pk: Option<&[&str]>) -> Schema { + let mut sb = Schema::builder(); + for (n, dt) in &self.cols { + sb = sb.column(*n, dt.clone()); + } + if let Some(keys) = pk { + sb = sb.primary_key(keys.iter().copied()); + } + sb.build().expect("schema build") + } + + pub fn idx(&self, name: &str) -> usize { + *self + .index + .get(name) + .unwrap_or_else(|| panic!("unknown column in plan: {name}")) + } + + pub fn len(&self) -> usize { + self.cols.len() + } + + /// Half-open range of the named section: `[its start, next section's start or plan end)`. + pub fn section_range(&self, name: &str) -> std::ops::Range { + let pos = self + .sections + .iter() + .position(|(n, _)| *n == name) + .unwrap_or_else(|| panic!("unknown section: {name}")); + let start = self.sections[pos].1; + let end = self + .sections + .get(pos + 1) + .map(|(_, s)| *s) + .unwrap_or(self.cols.len()); + start..end + } +}