diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 634c8dcff138b..85776670e5c4e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -320,15 +320,12 @@ object ColumnPruning extends Rule[LogicalPlan] { output1.zip(output2).forall(pair => pair._1.semanticEquals(pair._2)) def apply(plan: LogicalPlan): LogicalPlan = plan transform { - // Prunes the unused columns from project list of Project/Aggregate/Window/Expand + // Prunes the unused columns from project list of Project/Aggregate/Expand case p @ Project(_, p2: Project) if (p2.outputSet -- p.references).nonEmpty => p.copy(child = p2.copy(projectList = p2.projectList.filter(p.references.contains))) case p @ Project(_, a: Aggregate) if (a.outputSet -- p.references).nonEmpty => p.copy( child = a.copy(aggregateExpressions = a.aggregateExpressions.filter(p.references.contains))) - case p @ Project(_, w: Window) if (w.windowOutputSet -- p.references).nonEmpty => - p.copy(child = w.copy( - windowExpressions = w.windowExpressions.filter(p.references.contains))) case a @ Project(_, e @ Expand(_, _, grandChild)) if (e.outputSet -- a.references).nonEmpty => val newOutput = e.output.filter(a.references.contains(_)) val newProjects = e.projections.map { proj => @@ -378,12 +375,17 @@ object ColumnPruning extends Rule[LogicalPlan] { p } - // Eliminate no-op Projects - case p @ Project(projectList, child) if sameOutput(child.output, p.output) => child + // Prune unnecessary window expressions + case p @ Project(_, w: Window) if (w.windowOutputSet -- p.references).nonEmpty => + p.copy(child = w.copy( + windowExpressions = w.windowExpressions.filter(p.references.contains))) // Eliminate no-op Window case w: Window if w.windowExpressions.isEmpty => w.child + // Eliminate no-op Projects + case p @ Project(projectList, child) if sameOutput(child.output, p.output) => child + // Can't prune the columns on LeafNode case p @ Project(_, l: LeafNode) => p