diff --git a/Cargo.lock b/Cargo.lock index 20e02b3e5..e5d23c595 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5256,6 +5256,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", + "fastrand", "geo", "geo-traits", "geo-types", diff --git a/Cargo.toml b/Cargo.toml index 5a580b5ac..7fa350f98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,6 +87,7 @@ datafusion-physical-expr = { version = "50.2.0" } datafusion-physical-plan = { version = "50.2.0" } dirs = "6.0.0" env_logger = "0.11" +fastrand = "2.0" futures = { version = "0.3" } object_store = { version = "0.12.0", default-features = false } float_next_after = "1" diff --git a/rust/sedona-testing/Cargo.toml b/rust/sedona-testing/Cargo.toml index f467aad73..fe5775019 100644 --- a/rust/sedona-testing/Cargo.toml +++ b/rust/sedona-testing/Cargo.toml @@ -43,6 +43,7 @@ criterion = { workspace = true, optional = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-physical-expr = { workspace = true } +fastrand = { workspace = true } geo-traits = { workspace = true, features = ["geo-types"] } geo-types = { workspace = true } parquet = { workspace = true, features = ["arrow", "snap", "zstd"] } diff --git a/rust/sedona-testing/src/rasters.rs b/rust/sedona-testing/src/rasters.rs index 826024f9d..69d83ca15 100644 --- a/rust/sedona-testing/src/rasters.rs +++ b/rust/sedona-testing/src/rasters.rs @@ -15,9 +15,11 @@ // specific language governing permissions and limitations // under the License. use arrow_array::StructArray; -use arrow_schema::ArrowError; +use datafusion_common::Result; +use fastrand::Rng; +use sedona_raster::array::RasterStructArray; use sedona_raster::builder::RasterBuilder; -use sedona_raster::traits::{BandMetadata, RasterMetadata}; +use sedona_raster::traits::{BandMetadata, RasterMetadata, RasterRef}; use sedona_schema::raster::{BandDataType, StorageType}; /// Generate a StructArray of rasters with sequentially increasing dimensions and pixel values @@ -25,7 +27,7 @@ use sedona_schema::raster::{BandDataType, StorageType}; pub fn generate_test_rasters( count: usize, null_raster_index: Option, -) -> Result { +) -> Result { let mut builder = RasterBuilder::new(count); for i in 0..count { // If a null raster index is specified and that matches the current index, @@ -65,7 +67,324 @@ pub fn generate_test_rasters( builder.finish_raster()?; } - builder.finish() + Ok(builder.finish()?) +} + +/// Generates a set of tiled rasters arranged in a grid +/// - Each raster tile has specified dimensions and random pixel values +/// - Each raster has 3 bands which can be interpreted as RGB values +/// and the result can be visualized as a mosaic of tiles. +/// - There are nodata values at the 4 corners of the overall mosaic. +pub fn generate_tiled_rasters( + tile_size: (usize, usize), + number_of_tiles: (usize, usize), + data_type: BandDataType, + seed: Option, +) -> Result { + let mut rng = match seed { + Some(s) => Rng::with_seed(s), + None => Rng::new(), + }; + let (tile_width, tile_height) = tile_size; + let (x_tiles, y_tiles) = number_of_tiles; + let mut raster_builder = RasterBuilder::new(x_tiles * y_tiles); + let band_count = 3; + + for tile_y in 0..y_tiles { + for tile_x in 0..x_tiles { + let origin_x = (tile_x * tile_width) as f64; + let origin_y = (tile_y * tile_height) as f64; + + let raster_metadata = RasterMetadata { + width: tile_width as u64, + height: tile_height as u64, + upperleft_x: origin_x, + upperleft_y: origin_y, + scale_x: 1.0, + scale_y: 1.0, + skew_x: 0.0, + skew_y: 0.0, + }; + + raster_builder.start_raster(&raster_metadata, None)?; + + for _ in 0..band_count { + // Set a nodata value appropriate for the data type + let nodata_value = get_nodata_value_for_type(&data_type); + + let band_metadata = BandMetadata { + nodata_value: nodata_value.clone(), + storage_type: StorageType::InDb, + datatype: data_type.clone(), + outdb_url: None, + outdb_band_id: None, + }; + + raster_builder.start_band(band_metadata)?; + + let pixel_count = tile_width * tile_height; + + // Determine which corner position (if any) should have nodata in this tile + let corner_position = + get_corner_position(tile_x, tile_y, x_tiles, y_tiles, tile_width, tile_height); + let band_data = generate_random_band_data( + pixel_count, + &data_type, + nodata_value.as_deref(), + corner_position, + &mut rng, + ); + + raster_builder.band_data_writer().append_value(&band_data); + raster_builder.finish_band()?; + } + + raster_builder.finish_raster()?; + } + } + + Ok(raster_builder.finish()?) +} + +/// Determine if this tile contains a corner of the overall grid and return its position +/// Returns Some(position) if this tile contains a corner, None otherwise +fn get_corner_position( + tile_x: usize, + tile_y: usize, + x_tiles: usize, + y_tiles: usize, + tile_width: usize, + tile_height: usize, +) -> Option { + // Top-left corner (tile 0,0, pixel 0) + if tile_x == 0 && tile_y == 0 { + return Some(0); + } + // Top-right corner (tile x_tiles-1, 0, pixel tile_width-1) + if tile_x == x_tiles - 1 && tile_y == 0 { + return Some(tile_width - 1); + } + // Bottom-left corner (tile 0, y_tiles-1, pixel (tile_height-1)*tile_width) + if tile_x == 0 && tile_y == y_tiles - 1 { + return Some((tile_height - 1) * tile_width); + } + // Bottom-right corner (tile x_tiles-1, y_tiles-1, pixel tile_height*tile_width-1) + if tile_x == x_tiles - 1 && tile_y == y_tiles - 1 { + return Some(tile_height * tile_width - 1); + } + None +} + +fn generate_random_band_data( + pixel_count: usize, + data_type: &BandDataType, + nodata_bytes: Option<&[u8]>, + corner_position: Option, + rng: &mut Rng, +) -> Vec { + match data_type { + BandDataType::UInt8 => { + let mut data: Vec = (0..pixel_count).map(|_| rng.u8(..)).collect(); + // Set corner pixel to nodata value if this tile contains a corner + if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position) { + if !nodata.is_empty() && pos < data.len() { + data[pos] = nodata[0]; + } + } + data + } + BandDataType::UInt16 => { + let mut data = Vec::with_capacity(pixel_count * 2); + for _ in 0..pixel_count { + data.extend_from_slice(&rng.u16(..).to_ne_bytes()); + } + // Set corner pixel to nodata value if this tile contains a corner + if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position) { + if nodata.len() >= 2 && pos * 2 + 2 <= data.len() { + data[pos * 2..(pos * 2) + 2].copy_from_slice(&nodata[0..2]); + } + } + data + } + BandDataType::Int16 => { + let mut data = Vec::with_capacity(pixel_count * 2); + for _ in 0..pixel_count { + data.extend_from_slice(&rng.i16(..).to_ne_bytes()); + } + // Set corner pixel to nodata value if this tile contains a corner + if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position) { + if nodata.len() >= 2 && pos * 2 + 2 <= data.len() { + data[pos * 2..(pos * 2) + 2].copy_from_slice(&nodata[0..2]); + } + } + data + } + BandDataType::UInt32 => { + let mut data = Vec::with_capacity(pixel_count * 4); + for _ in 0..pixel_count { + data.extend_from_slice(&rng.u32(..).to_ne_bytes()); + } + // Set corner pixel to nodata value if this tile contains a corner + if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position) { + if nodata.len() >= 4 && pos * 4 + 4 <= data.len() { + data[pos * 4..(pos * 4) + 4].copy_from_slice(&nodata[0..4]); + } + } + data + } + BandDataType::Int32 => { + let mut data = Vec::with_capacity(pixel_count * 4); + for _ in 0..pixel_count { + data.extend_from_slice(&rng.i32(..).to_ne_bytes()); + } + // Set corner pixel to nodata value if this tile contains a corner + if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position) { + if nodata.len() >= 4 && pos * 4 + 4 <= data.len() { + data[pos * 4..(pos * 4) + 4].copy_from_slice(&nodata[0..4]); + } + } + data + } + BandDataType::Float32 => { + let mut data = Vec::with_capacity(pixel_count * 4); + for _ in 0..pixel_count { + data.extend_from_slice(&rng.f32().to_ne_bytes()); + } + // Set corner pixel to nodata value if this tile contains a corner + if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position) { + if nodata.len() >= 4 && pos * 4 + 4 <= data.len() { + data[pos * 4..(pos * 4) + 4].copy_from_slice(&nodata[0..4]); + } + } + data + } + BandDataType::Float64 => { + let mut data = Vec::with_capacity(pixel_count * 8); + for _ in 0..pixel_count { + data.extend_from_slice(&rng.f64().to_ne_bytes()); + } + // Set corner pixel to nodata value if this tile contains a corner + if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position) { + if nodata.len() >= 8 && pos * 8 + 8 <= data.len() { + data[pos * 8..(pos * 8) + 8].copy_from_slice(&nodata[0..8]); + } + } + data + } + } +} + +fn get_nodata_value_for_type(data_type: &BandDataType) -> Option> { + match data_type { + BandDataType::UInt8 => Some(vec![255u8]), + BandDataType::UInt16 => Some(u16::MAX.to_ne_bytes().to_vec()), + BandDataType::Int16 => Some(i16::MIN.to_ne_bytes().to_vec()), + BandDataType::UInt32 => Some(u32::MAX.to_ne_bytes().to_vec()), + BandDataType::Int32 => Some(i32::MIN.to_ne_bytes().to_vec()), + BandDataType::Float32 => Some(f32::NAN.to_ne_bytes().to_vec()), + BandDataType::Float64 => Some(f64::NAN.to_ne_bytes().to_vec()), + } +} + +/// Compare two RasterStructArrays for equality +pub fn assert_raster_arrays_equal( + raster_array1: &RasterStructArray, + raster_array2: &RasterStructArray, +) { + assert_eq!( + raster_array1.len(), + raster_array2.len(), + "Raster array lengths do not match" + ); + + for i in 0..raster_array1.len() { + let raster1 = raster_array1.get(i).unwrap(); + let raster2 = raster_array2.get(i).unwrap(); + assert_raster_equal(&raster1, &raster2); + } +} + +/// Compare two rasters for equality +pub fn assert_raster_equal(raster1: &impl RasterRef, raster2: &impl RasterRef) { + // Compare metadata + let meta1 = raster1.metadata(); + let meta2 = raster2.metadata(); + assert_eq!(meta1.width(), meta2.width(), "Raster widths do not match"); + assert_eq!( + meta1.height(), + meta2.height(), + "Raster heights do not match" + ); + assert_eq!( + meta1.upper_left_x(), + meta2.upper_left_x(), + "Raster upper left x does not match" + ); + assert_eq!( + meta1.upper_left_y(), + meta2.upper_left_y(), + "Raster upper left y does not match" + ); + assert_eq!( + meta1.scale_x(), + meta2.scale_x(), + "Raster scale x does not match" + ); + assert_eq!( + meta1.scale_y(), + meta2.scale_y(), + "Raster scale y does not match" + ); + assert_eq!( + meta1.skew_x(), + meta2.skew_x(), + "Raster skew x does not match" + ); + assert_eq!( + meta1.skew_y(), + meta2.skew_y(), + "Raster skew y does not match" + ); + + // Compare bands + let bands1 = raster1.bands(); + let bands2 = raster2.bands(); + assert_eq!(bands1.len(), bands2.len(), "Number of bands do not match"); + + for band_index in 0..bands1.len() { + let band1 = bands1.band(band_index + 1).unwrap(); + let band2 = bands2.band(band_index + 1).unwrap(); + + let band_meta1 = band1.metadata(); + let band_meta2 = band2.metadata(); + assert_eq!( + band_meta1.data_type(), + band_meta2.data_type(), + "Band data types do not match" + ); + assert_eq!( + band_meta1.nodata_value(), + band_meta2.nodata_value(), + "Band nodata values do not match" + ); + assert_eq!( + band_meta1.storage_type(), + band_meta2.storage_type(), + "Band storage types do not match" + ); + assert_eq!( + band_meta1.outdb_url(), + band_meta2.outdb_url(), + "Band outdb URLs do not match" + ); + assert_eq!( + band_meta1.outdb_band_id(), + band_meta2.outdb_band_id(), + "Band outdb band IDs do not match" + ); + + assert_eq!(band1.data(), band2.data(), "Band data does not match"); + } } #[cfg(test)] @@ -115,4 +434,86 @@ mod tests { assert_eq!(actual_pixel_values, expected_pixel_values); } } + + #[test] + fn test_generate_tiled_rasters() { + let tile_size = (64, 64); + let number_of_tiles = (4, 4); + let data_type = BandDataType::UInt8; + let struct_array = + generate_tiled_rasters(tile_size, number_of_tiles, data_type, Some(43)).unwrap(); + let raster_array = RasterStructArray::new(&struct_array); + assert_eq!(raster_array.len(), 16); // 4x4 tiles + for i in 0..16 { + let raster = raster_array.get(i).unwrap(); + let metadata = raster.metadata(); + assert_eq!(metadata.width(), 64); + assert_eq!(metadata.height(), 64); + assert_eq!(metadata.upper_left_x(), ((i % 4) * 64) as f64); + assert_eq!(metadata.upper_left_y(), ((i / 4) * 64) as f64); + let bands = raster.bands(); + assert_eq!(bands.len(), 3); + for band_index in 0..3 { + let band = bands.band(band_index + 1).unwrap(); + let band_metadata = band.metadata(); + assert_eq!(band_metadata.data_type(), BandDataType::UInt8); + assert_eq!(band_metadata.storage_type(), StorageType::InDb); + let band_data = band.data(); + assert_eq!(band_data.len(), 64 * 64); // 4096 pixels + } + } + } + + #[test] + fn test_raster_arrays_equal() { + let raster_array1 = generate_test_rasters(3, None).unwrap(); + let raster_struct_array1 = RasterStructArray::new(&raster_array1); + // Test that identical arrays are equal + assert_raster_arrays_equal(&raster_struct_array1, &raster_struct_array1); + } + + #[test] + #[should_panic = "Raster array lengths do not match"] + fn test_raster_arrays_not_equal() { + let raster_array1 = generate_test_rasters(3, None).unwrap(); + let raster_struct_array1 = RasterStructArray::new(&raster_array1); + + // Test that arrays with different lengths are not equal + let raster_array2 = generate_test_rasters(4, None).unwrap(); + let raster_struct_array2 = RasterStructArray::new(&raster_array2); + assert_raster_arrays_equal(&raster_struct_array1, &raster_struct_array2); + } + + #[test] + fn test_raster_equal() { + let raster_array1 = + generate_tiled_rasters((256, 256), (1, 1), BandDataType::UInt8, Some(43)).unwrap(); + let raster1 = RasterStructArray::new(&raster_array1).get(0).unwrap(); + + // Assert that the rasters are equal to themselves + assert_raster_equal(&raster1, &raster1); + } + + #[test] + #[should_panic = "Band data does not match"] + fn test_raster_different_band_data() { + let raster_array1 = + generate_tiled_rasters((128, 128), (1, 1), BandDataType::UInt8, Some(43)).unwrap(); + let raster_array2 = + generate_tiled_rasters((128, 128), (1, 1), BandDataType::UInt8, Some(47)).unwrap(); + + let raster1 = RasterStructArray::new(&raster_array1).get(0).unwrap(); + let raster2 = RasterStructArray::new(&raster_array2).get(0).unwrap(); + assert_raster_equal(&raster1, &raster2); + } + + #[test] + #[should_panic = "Raster upper left x does not match"] + fn test_raster_different_metadata() { + let raster_array = + generate_tiled_rasters((128, 128), (2, 1), BandDataType::UInt8, Some(43)).unwrap(); + let raster1 = RasterStructArray::new(&raster_array).get(0).unwrap(); + let raster2 = RasterStructArray::new(&raster_array).get(1).unwrap(); + assert_raster_equal(&raster1, &raster2); + } }