Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 9 additions & 13 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -593,15 +593,8 @@ impl LogicalPlanBuilder {

/// Apply a union, removing duplicate rows
pub fn union_distinct(self, plan: LogicalPlan) -> Result<Self> {
// unwrap top-level Distincts, to avoid duplication
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The key point of this PR is that reshuffling around of DISTINCT should be done as an optimization pass, rather than as part of the builder, if I understand it. This makes sense to me

let left_plan: LogicalPlan = match self.plan {
LogicalPlan::Distinct(Distinct { input }) => (*input).clone(),
_ => self.plan,
};
let right_plan: LogicalPlan = match plan {
LogicalPlan::Distinct(Distinct { input }) => (*input).clone(),
_ => plan,
};
let left_plan: LogicalPlan = self.plan;
let right_plan: LogicalPlan = plan;

Ok(Self::from(LogicalPlan::Distinct(Distinct {
input: Arc::new(union(left_plan, right_plan)?),
Expand Down Expand Up @@ -1629,13 +1622,16 @@ mod tests {
.union_distinct(plan.build()?)?
.build()?;

// output has only one union
let expected = "\
Distinct:\
\n Union\
\n TableScan: employee_csv projection=[state, salary]\
\n TableScan: employee_csv projection=[state, salary]\
\n TableScan: employee_csv projection=[state, salary]\
\n Distinct:\
\n Union\
\n Distinct:\
\n Union\
\n TableScan: employee_csv projection=[state, salary]\
\n TableScan: employee_csv projection=[state, salary]\
\n TableScan: employee_csv projection=[state, salary]\
\n TableScan: employee_csv projection=[state, salary]";

assert_eq!(expected, format!("{plan:?}"));
Expand Down