diff --git a/datafusion/core/src/logical_plan/builder.rs b/datafusion/core/src/logical_plan/builder.rs index 557472abb62c..d13d542ef833 100644 --- a/datafusion/core/src/logical_plan/builder.rs +++ b/datafusion/core/src/logical_plan/builder.rs @@ -51,7 +51,10 @@ use crate::logical_plan::{ CrossJoin, DFField, DFSchema, DFSchemaRef, Limit, Partitioning, Repartition, SubqueryType, Values, }; -use crate::sql::utils::{group_window_expr_by_sort_keys, resolve_exprs_to_aliases}; +use crate::sql::utils::{ + group_window_expr_by_sort_keys, realias_duplicate_expr_aliases, + resolve_exprs_to_aliases, +}; /// Default table name for unnamed table pub const UNNAMED_TABLE: &str = "?table?"; @@ -1222,6 +1225,13 @@ pub fn project_with_alias( .push(columnize_expr(normalize_col(e, &plan)?, input_schema)), } } + + // NOTE (cubesql): realias expressions that have the same name and qualifier + if alias.is_some() { + projected_expr = + realias_duplicate_expr_aliases(projected_expr, input_schema, alias.clone())?; + } + validate_unique_names("Projections", projected_expr.iter(), input_schema)?; let input_schema = DFSchema::new_with_metadata( exprlist_to_fields(&projected_expr, &plan)?, diff --git a/datafusion/core/src/sql/planner.rs b/datafusion/core/src/sql/planner.rs index ef4ecd1c6bb2..81a9e504919a 100644 --- a/datafusion/core/src/sql/planner.rs +++ b/datafusion/core/src/sql/planner.rs @@ -38,7 +38,9 @@ use crate::logical_plan::{ use crate::optimizer::utils::exprlist_to_columns; use crate::prelude::JoinType; use crate::scalar::ScalarValue; -use crate::sql::utils::{find_udtf_exprs, make_decimal_type, normalize_ident}; +use crate::sql::utils::{ + find_udtf_exprs, make_decimal_type, normalize_ident, realias_duplicate_expr_aliases, +}; use crate::{ error::{DataFusionError, Result}, physical_plan::aggregates, @@ -1038,6 +1040,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ); } + // NOTE (cubesql): realias expressions that have the same name and qualifier + let select_exprs = + realias_duplicate_expr_aliases(select_exprs, plan.schema(), None)?; + // having and group by clause may reference aliases defined in select projection let projected_plan = self.project(plan.clone(), select_exprs.clone())?; @@ -3191,21 +3197,35 @@ mod tests { #[test] fn select_repeated_column() { - let sql = "SELECT age, age FROM person"; - let err = logical_plan(sql).expect_err("query should have failed"); - assert_eq!( - r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 0 and \"#person.age\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, - format!("{:?}", err) + // let sql = "SELECT age, age FROM person"; + // let err = logical_plan(sql).expect_err("query should have failed"); + // assert_eq!( + // r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 0 and \"#person.age\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, + // format!("{:?}", err) + // ); + + // NOTE: this is supported with cubesql patches + quick_test( + "SELECT age, age FROM person", + "Projection: #person.age, #person.age AS age__1\ + \n TableScan: person projection=None", ); } #[test] fn select_wildcard_with_repeated_column() { - let sql = "SELECT *, age FROM person"; - let err = logical_plan(sql).expect_err("query should have failed"); - assert_eq!( - r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 3 and \"#person.age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.")"##, - format!("{:?}", err) + // let sql = "SELECT *, age FROM person"; + // let err = logical_plan(sql).expect_err("query should have failed"); + // assert_eq!( + // r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 3 and \"#person.age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.")"##, + // format!("{:?}", err) + // ); + + // NOTE: this is supported with cubesql patches + quick_test( + "SELECT *, age FROM person", + "Projection: #person.id, #person.first_name, #person.last_name, #person.age, #person.state, #person.salary, #person.birth_date, #person.😀, #person.age AS age__1\ + \n TableScan: person projection=None", ); } @@ -3779,11 +3799,19 @@ mod tests { #[test] fn select_simple_aggregate_repeated_aggregate() { - let sql = "SELECT MIN(age), MIN(age) FROM person"; - let err = logical_plan(sql).expect_err("query should have failed"); - assert_eq!( - r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 0 and \"MIN(#person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, - format!("{:?}", err) + // let sql = "SELECT MIN(age), MIN(age) FROM person"; + // let err = logical_plan(sql).expect_err("query should have failed"); + // assert_eq!( + // r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 0 and \"MIN(#person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, + // format!("{:?}", err) + // ); + + // NOTE: this is supported with cubesql patches + quick_test( + "SELECT MIN(age), MIN(age) FROM person", + "Projection: #MIN(person.age), #MIN(person.age) AS MIN(person.age)__1\ + \n Aggregate: groupBy=[[]], aggr=[[MIN(#person.age)]]\ + \n TableScan: person projection=None", ); } @@ -3809,11 +3837,19 @@ mod tests { #[test] fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() { - let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person"; - let err = logical_plan(sql).expect_err("query should have failed"); - assert_eq!( - r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age) AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, - format!("{:?}", err) + // let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person"; + // let err = logical_plan(sql).expect_err("query should have failed"); + // assert_eq!( + // r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age) AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, + // format!("{:?}", err) + // ); + + // NOTE: this is supported with cubesql patches + quick_test( + "SELECT MIN(age) AS a, MIN(age) AS a FROM person", + "Projection: #MIN(person.age) AS a, #MIN(person.age) AS a__1\ + \n Aggregate: groupBy=[[]], aggr=[[MIN(#person.age)]]\ + \n TableScan: person projection=None", ); } @@ -3839,11 +3875,19 @@ mod tests { #[test] fn select_simple_aggregate_with_groupby_with_aliases_repeated() { - let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state"; - let err = logical_plan(sql).expect_err("query should have failed"); - assert_eq!( - r##"Plan("Projections require unique expression names but the expression \"#person.state AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, - format!("{:?}", err) + // let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state"; + // let err = logical_plan(sql).expect_err("query should have failed"); + // assert_eq!( + // r##"Plan("Projections require unique expression names but the expression \"#person.state AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, + // format!("{:?}", err) + // ); + + // NOTE: this is supported with cubesql patches + quick_test( + "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state", + "Projection: #person.state AS a, #MIN(person.age) AS a__1\ + \n Aggregate: groupBy=[[#person.state]], aggr=[[MIN(#person.age)]]\ + \n TableScan: person projection=None", ); } @@ -4008,12 +4052,20 @@ mod tests { #[test] fn select_simple_aggregate_with_groupby_aggregate_repeated() { - let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state"; - let err = logical_plan(sql).expect_err("query should have failed"); - assert_eq!( - r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 1 and \"MIN(#person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.")"##, - format!("{:?}", err) - ); + // let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state"; + // let err = logical_plan(sql).expect_err("query should have failed"); + // assert_eq!( + // r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 1 and \"MIN(#person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.")"##, + // format!("{:?}", err) + // ); + + // NOTE: this is supported with cubesql patches + quick_test( + "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state", + "Projection: #person.state, #MIN(person.age), #MIN(person.age) AS MIN(person.age)__1\ + \n Aggregate: groupBy=[[#person.state]], aggr=[[MIN(#person.age)]]\ + \n TableScan: person projection=None", + ) } #[test] diff --git a/datafusion/core/src/sql/utils.rs b/datafusion/core/src/sql/utils.rs index 033b7ff8729c..49cb3a1ab8dd 100644 --- a/datafusion/core/src/sql/utils.rs +++ b/datafusion/core/src/sql/utils.rs @@ -21,15 +21,16 @@ use arrow::datatypes::{DataType, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION}; use datafusion_common::DFSchema; use sqlparser::ast::Ident; -use crate::logical_plan::ExprVisitable; use crate::logical_plan::{Expr, Like, LogicalPlan}; +use crate::logical_plan::{ExprSchemable, ExprVisitable}; use crate::scalar::ScalarValue; use crate::{ error::{DataFusionError, Result}, logical_plan::{Column, ExpressionVisitor, Recursion}, }; use datafusion_expr::expr::GroupingSet; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; +use std::mem::replace; /// Collect all deeply nested `Expr::AggregateFunction` and /// `Expr::AggregateUDF`. They are returned in order of occurrence (depth @@ -781,6 +782,94 @@ pub(crate) fn normalize_ident(id: Ident) -> String { id.value } +/// Structure holding qualifier and name of an alias. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct QualifiedAlias { + qualifier: Option, + name: String, +} + +impl QualifiedAlias { + fn new(qualifier: Option, name: String) -> Self { + Self { qualifier, name } + } + + fn from_expr_schema_and_alias( + expr: &Expr, + schema: &DFSchema, + alias: Option, + ) -> Result { + let field = expr.to_field(schema)?; + let qualifier = alias.or_else(|| field.qualifier().cloned()); + let name = field.name().clone(); + Ok(Self::new(qualifier, name)) + } + + fn with_name(&self, name: &str) -> Self { + Self { + qualifier: self.qualifier.clone(), + name: name.to_string(), + } + } +} + +/// Realias duplicate expression aliases in the provided list of expressions. +pub(crate) fn realias_duplicate_expr_aliases( + mut exprs: Vec, + schema: &DFSchema, + alias: Option, +) -> Result> { + // Two-pass algorithm is used: first collect all the aliases and indices of repeated aliases, + // then realias the collected indices on the second pass. + // This is to avoid realiasing to a name that is valid but is used by another expression + // that was not originally processed. + let mut aliases = HashSet::new(); + let mut indices_to_realias = vec![]; + for (index, expr) in exprs.iter().enumerate() { + let qualified_alias = + QualifiedAlias::from_expr_schema_and_alias(expr, schema, alias.clone())?; + let is_duplicate = !aliases.insert(qualified_alias); + if is_duplicate { + indices_to_realias.push(index); + } + } + const MAX_SUFFIX_LIMIT: usize = 100; + 'outer: for index in indices_to_realias { + let qualified_alias = QualifiedAlias::from_expr_schema_and_alias( + &exprs[index], + schema, + alias.clone(), + )?; + for suffix in 1..=MAX_SUFFIX_LIMIT { + let new_name = format!("{}__{}", qualified_alias.name, suffix); + let new_qualified_alias = qualified_alias.with_name(&new_name); + let is_duplicate = !aliases.insert(new_qualified_alias); + if !is_duplicate { + set_expr_alias(&mut exprs[index], new_name); + continue 'outer; + } + } + return Err(DataFusionError::Internal(format!( + "Unable to realias duplicate expression alias: {:?}", + exprs[index] + ))); + } + Ok(exprs) +} + +/// Set an alias for an expression, replacing an existing alias or adding one if necessary. +fn set_expr_alias(expr: &mut Expr, alias: String) { + match expr { + Expr::Alias(_, name) => { + *name = alias; + } + _ => { + // Expr::Wildcard is simply a placeholder to please borrow checker + *expr = Expr::Alias(Box::new(replace(expr, Expr::Wildcard)), alias); + } + } +} + #[cfg(test)] mod tests { use super::*;