From a9eaa3ad99dc847625e49339cdffe46bac07b392 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 1 Apr 2016 16:10:29 -0700 Subject: [PATCH] Push casts beneath CaseWhen and If operations. --- .../sql/catalyst/optimizer/Optimizer.scala | 19 +++++++ .../optimizer/CastPushDownSuite.scala | 52 +++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CastPushDownSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index a7a948ef1b97d..2cc5d6cc9ca1f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -89,6 +89,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { PruneFilters, EliminateSorts, SimplifyCasts, + CastPushDown, SimplifyCaseConversionExpressions, EliminateSerialization) :: Batch("Decimal Optimizations", FixedPoint(100), @@ -1172,6 +1173,24 @@ object SimplifyCasts extends Rule[LogicalPlan] { } } +/** + * Pushes [[Cast]]s beneath [[CaseWhen]] and [[If]] expressions. + */ +object CastPushDown extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { + case Cast(CaseWhen(branches, elseValue), dataType) => + CaseWhen( + branches.map { case (condition, value) => + (condition, Cast(value, dataType)) + }, + elseValue.map { value => + Cast(value, dataType) + }) + case Cast(If(condition, trueValue, falseValue), dataType) => + If(condition, Cast(trueValue, dataType), Cast(falseValue, dataType)) + } +} + /** * Removes nodes that are not necessary. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CastPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CastPushDownSuite.scala new file mode 100644 index 0000000000000..126c1e6880c76 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CastPushDownSuite.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules._ +import org.apache.spark.sql.types.BooleanType + + +class CastPushDownSuite extends PlanTest with PredicateHelper { + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = Batch("CastPushDown", FixedPoint(1), CastPushDown) :: Nil + } + + protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { + val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation).analyze + val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation).analyze) + comparePlans(actual, correctAnswer) + } + + test("push cast beneath case when") { + assertEquivalent( + Cast(CaseWhen(Seq(TrueLiteral -> Literal(1)), Literal(0)), BooleanType), + CaseWhen(Seq(TrueLiteral -> Cast(Literal(1), BooleanType)), Cast(Literal(0), BooleanType))) + } + + test("push cast beneath if") { + assertEquivalent( + Cast(If(TrueLiteral, Literal(1), Literal(0)), BooleanType), + If(TrueLiteral, Cast(Literal(1), BooleanType), Cast(Literal(0), BooleanType))) + } +}