Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: check geometry column data format
- Loading branch information
Showing
16 changed files
with
853 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
query_server/query/src/extension/physical/optimizer_rule/add_assert.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
use std::str::FromStr; | ||
use std::sync::Arc; | ||
|
||
use datafusion::common::tree_node::{Transformed, TreeNode}; | ||
use datafusion::common::Result as DFResult; | ||
use datafusion::config::ConfigOptions; | ||
use datafusion::error::DataFusionError; | ||
use datafusion::physical_optimizer::PhysicalOptimizerRule; | ||
use datafusion::physical_plan::ExecutionPlan; | ||
use models::gis::data_type::GeometryType; | ||
use models::schema::GIS_SUB_TYPE_META_KEY; | ||
use spi::QueryError; | ||
|
||
use crate::extension::physical::plan_node::assert::geom_write::AssertGeomType; | ||
use crate::extension::physical::plan_node::assert::AssertExec; | ||
use crate::extension::physical::plan_node::table_writer::TableWriterExec; | ||
use crate::extension::utils::downcast_execution_plan; | ||
|
||
#[non_exhaustive] | ||
pub struct AddAssertExec {} | ||
|
||
impl AddAssertExec { | ||
pub fn new() -> Self { | ||
Self {} | ||
} | ||
} | ||
|
||
impl Default for AddAssertExec { | ||
fn default() -> Self { | ||
Self::new() | ||
} | ||
} | ||
|
||
impl PhysicalOptimizerRule for AddAssertExec { | ||
fn optimize( | ||
&self, | ||
plan: Arc<dyn ExecutionPlan>, | ||
_config: &ConfigOptions, | ||
) -> DFResult<Arc<dyn ExecutionPlan>> { | ||
plan.transform_down(&|plan| { | ||
if let Some(exec) = downcast_execution_plan::<TableWriterExec>(plan.as_ref()) { | ||
if let Some(new_child) = add_table_write_asserter_if_necessary(exec)? { | ||
let new_plan = plan.with_new_children(vec![new_child])?; | ||
return Ok(Transformed::Yes(new_plan)); | ||
} | ||
} | ||
|
||
Ok(Transformed::No(plan)) | ||
}) | ||
} | ||
|
||
fn name(&self) -> &str { | ||
"add_assert_exec" | ||
} | ||
|
||
fn schema_check(&self) -> bool { | ||
true | ||
} | ||
} | ||
|
||
fn add_table_write_asserter_if_necessary( | ||
exec: &TableWriterExec, | ||
) -> DFResult<Option<Arc<dyn ExecutionPlan>>> { | ||
let schema = exec.sink_schema(); | ||
let child = exec.children()[0].clone(); | ||
|
||
let geoms_with_idx = schema | ||
.fields() | ||
.iter() | ||
.enumerate() | ||
.filter_map(|(idx, field)| { | ||
match field | ||
.metadata() | ||
.get(GIS_SUB_TYPE_META_KEY) | ||
.map(|e| GeometryType::from_str(e)) | ||
{ | ||
Some(Ok(sub_type)) => { | ||
// The target table for the write operation contains a column of type geometry | ||
Ok(Some((sub_type, idx))) | ||
} | ||
Some(Err(err)) => { | ||
// Contains a column of type geometry, but the type is not recognized | ||
Err(DataFusionError::External(Box::new( | ||
QueryError::InvalidGeometryType { reason: err }, | ||
))) | ||
} | ||
None => { | ||
// Not contain a column of type geometry | ||
Ok(None) | ||
} | ||
} | ||
.transpose() | ||
}) | ||
.collect::<DFResult<Vec<_>>>()?; | ||
|
||
if geoms_with_idx.is_empty() { | ||
return Ok(None); | ||
} | ||
|
||
let assert_expr = Arc::new(AssertGeomType::new(geoms_with_idx)); | ||
let new_child = Arc::new(AssertExec::new(assert_expr, child)); | ||
|
||
Ok(Some(new_child)) | ||
} |
1 change: 1 addition & 0 deletions
1
query_server/query/src/extension/physical/optimizer_rule/mod.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
//! physical plan optimizer rule | ||
pub mod add_assert; | ||
pub mod add_state_store; | ||
pub mod add_traced_proxy; |
92 changes: 92 additions & 0 deletions
92
query_server/query/src/extension/physical/plan_node/assert/geom_write.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
use std::fmt::{Debug, Display}; | ||
|
||
use datafusion::arrow::array::{downcast_array, Array, StringArray}; | ||
use datafusion::arrow::record_batch::RecordBatch; | ||
use datafusion::common::Result as DFResult; | ||
use datafusion::error::DataFusionError; | ||
use geo::Geometry; | ||
use geozero::wkt::WktStr; | ||
use geozero::ToGeo; | ||
use models::gis::data_type::GeometryType; | ||
use spi::QueryError; | ||
|
||
use super::AssertExpr; | ||
|
||
#[derive(Debug)] | ||
pub struct AssertGeomType { | ||
geom_with_idx: Vec<(GeometryType, usize)>, | ||
} | ||
|
||
impl AssertGeomType { | ||
pub fn new(geom_with_idx: Vec<(GeometryType, usize)>) -> Self { | ||
Self { geom_with_idx } | ||
} | ||
} | ||
|
||
impl AssertExpr for AssertGeomType { | ||
fn assert(&self, batch: &RecordBatch) -> DFResult<()> { | ||
for (sub_type, idx) in &self.geom_with_idx { | ||
let column = batch.column(*idx).as_ref(); | ||
check_wkt(sub_type, column)?; | ||
} | ||
|
||
Ok(()) | ||
} | ||
} | ||
|
||
impl Display for AssertGeomType { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
let str = self | ||
.geom_with_idx | ||
.iter() | ||
.map(|(_, idx)| idx.to_string()) | ||
.collect::<Vec<_>>() | ||
.join(", "); | ||
|
||
write!(f, "AssertGeomType({})", str) | ||
} | ||
} | ||
|
||
macro_rules! define_check_wkt { | ||
($( | ||
$sub_type:ident $(= $string_keyword:expr)? | ||
),*) => { | ||
fn check_wkt(sub_type: &GeometryType, array: &dyn Array) -> DFResult<()> { | ||
let str_array = downcast_array::<StringArray>(array); | ||
|
||
match sub_type { | ||
$(GeometryType::$sub_type => { | ||
for ele in str_array.iter().flatten() { | ||
let geom = WktStr(ele).to_geo().map_err(|err| { | ||
DataFusionError::External(Box::new(QueryError::InvalidGeometryType { | ||
reason: format!("{}, expect {}, got {:?}", err, stringify!($sub_type), ele), | ||
})) | ||
})?; | ||
|
||
if let Geometry::$sub_type(_) = &geom { | ||
continue; | ||
} | ||
|
||
return Err(DataFusionError::External(Box::new( | ||
QueryError::InvalidGeometryType { | ||
reason: format!("expect {}, got {:?}", stringify!($sub_type), geom), | ||
}, | ||
))); | ||
} | ||
}),* | ||
} | ||
|
||
Ok(()) | ||
} | ||
}; | ||
} | ||
|
||
define_check_wkt!( | ||
Point, | ||
LineString, | ||
Polygon, | ||
MultiPoint, | ||
MultiLineString, | ||
MultiPolygon, | ||
GeometryCollection | ||
); |
Oops, something went wrong.