Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion datafusion/core/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ use crate::logical_plan::{
CrossJoin, DFField, DFSchema, DFSchemaRef, Limit, Partitioning, Repartition,
SubqueryType, Values,
};
use crate::sql::utils::{group_window_expr_by_sort_keys, resolve_exprs_to_aliases};
use crate::sql::utils::{
group_window_expr_by_sort_keys, realias_duplicate_expr_aliases,
resolve_exprs_to_aliases,
};

/// Default table name for unnamed table
pub const UNNAMED_TABLE: &str = "?table?";
Expand Down Expand Up @@ -1222,6 +1225,13 @@ pub fn project_with_alias(
.push(columnize_expr(normalize_col(e, &plan)?, input_schema)),
}
}

// NOTE (cubesql): realias expressions that have the same name and qualifier
if alias.is_some() {
projected_expr =
realias_duplicate_expr_aliases(projected_expr, input_schema, alias.clone())?;
}

validate_unique_names("Projections", projected_expr.iter(), input_schema)?;
let input_schema = DFSchema::new_with_metadata(
exprlist_to_fields(&projected_expr, &plan)?,
Expand Down
116 changes: 84 additions & 32 deletions datafusion/core/src/sql/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ use crate::logical_plan::{
use crate::optimizer::utils::exprlist_to_columns;
use crate::prelude::JoinType;
use crate::scalar::ScalarValue;
use crate::sql::utils::{find_udtf_exprs, make_decimal_type, normalize_ident};
use crate::sql::utils::{
find_udtf_exprs, make_decimal_type, normalize_ident, realias_duplicate_expr_aliases,
};
use crate::{
error::{DataFusionError, Result},
physical_plan::aggregates,
Expand Down Expand Up @@ -1038,6 +1040,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
);
}

// NOTE (cubesql): realias expressions that have the same name and qualifier
let select_exprs =
realias_duplicate_expr_aliases(select_exprs, plan.schema(), None)?;

// having and group by clause may reference aliases defined in select projection
let projected_plan = self.project(plan.clone(), select_exprs.clone())?;

Expand Down Expand Up @@ -3191,21 +3197,35 @@ mod tests {

#[test]
fn select_repeated_column() {
let sql = "SELECT age, age FROM person";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 0 and \"#person.age\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
format!("{:?}", err)
// let sql = "SELECT age, age FROM person";
// let err = logical_plan(sql).expect_err("query should have failed");
// assert_eq!(
// r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 0 and \"#person.age\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
// format!("{:?}", err)
// );

// NOTE: this is supported with cubesql patches
quick_test(
"SELECT age, age FROM person",
"Projection: #person.age, #person.age AS age__1\
\n TableScan: person projection=None",
);
}

#[test]
fn select_wildcard_with_repeated_column() {
let sql = "SELECT *, age FROM person";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 3 and \"#person.age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.")"##,
format!("{:?}", err)
// let sql = "SELECT *, age FROM person";
// let err = logical_plan(sql).expect_err("query should have failed");
// assert_eq!(
// r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 3 and \"#person.age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.")"##,
// format!("{:?}", err)
// );

// NOTE: this is supported with cubesql patches
quick_test(
"SELECT *, age FROM person",
"Projection: #person.id, #person.first_name, #person.last_name, #person.age, #person.state, #person.salary, #person.birth_date, #person.😀, #person.age AS age__1\
\n TableScan: person projection=None",
);
}

Expand Down Expand Up @@ -3779,11 +3799,19 @@ mod tests {

#[test]
fn select_simple_aggregate_repeated_aggregate() {
let sql = "SELECT MIN(age), MIN(age) FROM person";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 0 and \"MIN(#person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
format!("{:?}", err)
// let sql = "SELECT MIN(age), MIN(age) FROM person";
// let err = logical_plan(sql).expect_err("query should have failed");
// assert_eq!(
// r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 0 and \"MIN(#person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
// format!("{:?}", err)
// );

// NOTE: this is supported with cubesql patches
quick_test(
"SELECT MIN(age), MIN(age) FROM person",
"Projection: #MIN(person.age), #MIN(person.age) AS MIN(person.age)__1\
\n Aggregate: groupBy=[[]], aggr=[[MIN(#person.age)]]\
\n TableScan: person projection=None",
);
}

Expand All @@ -3809,11 +3837,19 @@ mod tests {

#[test]
fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age) AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
format!("{:?}", err)
// let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
// let err = logical_plan(sql).expect_err("query should have failed");
// assert_eq!(
// r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age) AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
// format!("{:?}", err)
// );

// NOTE: this is supported with cubesql patches
quick_test(
"SELECT MIN(age) AS a, MIN(age) AS a FROM person",
"Projection: #MIN(person.age) AS a, #MIN(person.age) AS a__1\
\n Aggregate: groupBy=[[]], aggr=[[MIN(#person.age)]]\
\n TableScan: person projection=None",
);
}

Expand All @@ -3839,11 +3875,19 @@ mod tests {

#[test]
fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
r##"Plan("Projections require unique expression names but the expression \"#person.state AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
format!("{:?}", err)
// let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
// let err = logical_plan(sql).expect_err("query should have failed");
// assert_eq!(
// r##"Plan("Projections require unique expression names but the expression \"#person.state AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
// format!("{:?}", err)
// );

// NOTE: this is supported with cubesql patches
quick_test(
"SELECT state AS a, MIN(age) AS a FROM person GROUP BY state",
"Projection: #person.state AS a, #MIN(person.age) AS a__1\
\n Aggregate: groupBy=[[#person.state]], aggr=[[MIN(#person.age)]]\
\n TableScan: person projection=None",
);
}

Expand Down Expand Up @@ -4008,12 +4052,20 @@ mod tests {

#[test]
fn select_simple_aggregate_with_groupby_aggregate_repeated() {
let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 1 and \"MIN(#person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.")"##,
format!("{:?}", err)
);
// let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
// let err = logical_plan(sql).expect_err("query should have failed");
// assert_eq!(
// r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 1 and \"MIN(#person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.")"##,
// format!("{:?}", err)
// );

// NOTE: this is supported with cubesql patches
quick_test(
"SELECT state, MIN(age), MIN(age) FROM person GROUP BY state",
"Projection: #person.state, #MIN(person.age), #MIN(person.age) AS MIN(person.age)__1\
\n Aggregate: groupBy=[[#person.state]], aggr=[[MIN(#person.age)]]\
\n TableScan: person projection=None",
)
}

#[test]
Expand Down
93 changes: 91 additions & 2 deletions datafusion/core/src/sql/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@ use arrow::datatypes::{DataType, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION};
use datafusion_common::DFSchema;
use sqlparser::ast::Ident;

use crate::logical_plan::ExprVisitable;
use crate::logical_plan::{Expr, Like, LogicalPlan};
use crate::logical_plan::{ExprSchemable, ExprVisitable};
use crate::scalar::ScalarValue;
use crate::{
error::{DataFusionError, Result},
logical_plan::{Column, ExpressionVisitor, Recursion},
};
use datafusion_expr::expr::GroupingSet;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::mem::replace;

/// Collect all deeply nested `Expr::AggregateFunction` and
/// `Expr::AggregateUDF`. They are returned in order of occurrence (depth
Expand Down Expand Up @@ -781,6 +782,94 @@ pub(crate) fn normalize_ident(id: Ident) -> String {
id.value
}

/// Structure holding qualifier and name of an alias.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct QualifiedAlias {
qualifier: Option<String>,
name: String,
}

impl QualifiedAlias {
fn new(qualifier: Option<String>, name: String) -> Self {
Self { qualifier, name }
}

fn from_expr_schema_and_alias(
expr: &Expr,
schema: &DFSchema,
alias: Option<String>,
) -> Result<Self> {
let field = expr.to_field(schema)?;
let qualifier = alias.or_else(|| field.qualifier().cloned());
let name = field.name().clone();
Ok(Self::new(qualifier, name))
}

fn with_name(&self, name: &str) -> Self {
Self {
qualifier: self.qualifier.clone(),
name: name.to_string(),
}
}
}

/// Realias duplicate expression aliases in the provided list of expressions.
pub(crate) fn realias_duplicate_expr_aliases(
mut exprs: Vec<Expr>,
schema: &DFSchema,
alias: Option<String>,
) -> Result<Vec<Expr>> {
// Two-pass algorithm is used: first collect all the aliases and indices of repeated aliases,
// then realias the collected indices on the second pass.
// This is to avoid realiasing to a name that is valid but is used by another expression
// that was not originally processed.
let mut aliases = HashSet::new();
let mut indices_to_realias = vec![];
for (index, expr) in exprs.iter().enumerate() {
let qualified_alias =
QualifiedAlias::from_expr_schema_and_alias(expr, schema, alias.clone())?;
let is_duplicate = !aliases.insert(qualified_alias);
if is_duplicate {
indices_to_realias.push(index);
}
}
const MAX_SUFFIX_LIMIT: usize = 100;
'outer: for index in indices_to_realias {
let qualified_alias = QualifiedAlias::from_expr_schema_and_alias(
&exprs[index],
schema,
alias.clone(),
)?;
for suffix in 1..=MAX_SUFFIX_LIMIT {
let new_name = format!("{}__{}", qualified_alias.name, suffix);
let new_qualified_alias = qualified_alias.with_name(&new_name);
let is_duplicate = !aliases.insert(new_qualified_alias);
if !is_duplicate {
set_expr_alias(&mut exprs[index], new_name);
continue 'outer;
}
}
return Err(DataFusionError::Internal(format!(
"Unable to realias duplicate expression alias: {:?}",
exprs[index]
)));
}
Ok(exprs)
}

/// Set an alias for an expression, replacing an existing alias or adding one if necessary.
fn set_expr_alias(expr: &mut Expr, alias: String) {
match expr {
Expr::Alias(_, name) => {
*name = alias;
}
_ => {
// Expr::Wildcard is simply a placeholder to please borrow checker
*expr = Expr::Alias(Box::new(replace(expr, Expr::Wildcard)), alias);
Comment on lines +867 to +868
Copy link

Copilot AI Aug 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment suggests this is a workaround for the borrow checker, but it's unclear why Expr::Wildcard was chosen as the placeholder. Consider explaining why this specific variant is appropriate or if there's a better alternative.

Suggested change
// Expr::Wildcard is simply a placeholder to please borrow checker
*expr = Expr::Alias(Box::new(replace(expr, Expr::Wildcard)), alias);
// Use std::mem::replace to move the original expression into the Alias variant.
// This avoids borrow checker issues and preserves the original expression semantics.
*expr = Expr::Alias(Box::new(replace(expr, Expr::Alias(Box::new(Expr::Literal(ScalarValue::Null)), alias.clone()))), alias);

Copilot uses AI. Check for mistakes.
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading