From 2dfe50f607a6dced1554d07daa3a8d2e9664ffa9 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Mon, 24 Nov 2014 21:54:34 -0800 Subject: [PATCH 1/8] return null when divider is 0 of Double type --- .../org/apache/spark/sql/catalyst/expressions/Expression.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 39b120e8de485..763b467bd6d39 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -142,6 +142,8 @@ abstract class Expression extends TreeNode[Expression] { val evalE2 = e2.eval(i: Row) if (evalE2 == null) { null + } else if (evalE2 == 0) { + null } else { e1.dataType match { case ft: FractionalType => From cf28c5888f6a7445d02269f221e821d76add871c Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 25 Nov 2014 18:39:49 -0800 Subject: [PATCH 2/8] divide fix --- .../sql/catalyst/expressions/Expression.scala | 2 -- .../sql/catalyst/expressions/arithmetic.scala | 14 +++++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 763b467bd6d39..39b120e8de485 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -142,8 +142,6 @@ abstract class Expression extends TreeNode[Expression] { val evalE2 = e2.eval(i: Row) if (evalE2 == null) { null - } else if (evalE2 == 0) { - null } else { e1.dataType match { case ft: FractionalType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index d17c9553ac24e..5a03029ca57a1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -106,12 +106,16 @@ case class Multiply(left: Expression, right: Expression) extends BinaryArithmeti case class Divide(left: Expression, right: Expression) extends BinaryArithmetic { def symbol = "/" - override def nullable = left.nullable || right.nullable || dataType.isInstanceOf[DecimalType] + override def nullable = true - override def eval(input: Row): Any = dataType match { - case _: FractionalType => f2(input, left, right, _.div(_, _)) - case _: IntegralType => i2(input, left , right, _.quot(_, _)) - } + override def eval(input: Row): Any = + if(right.eval(input) == 0) { + null + } else + dataType match { + case _: FractionalType => f2(input, left, right, _.div(_, _)) + case _: IntegralType => i2(input, left , right, _.quot(_, _)) + } } From 22ecd9a07180a8d81e34ed30cdd6f1601100e49a Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 25 Nov 2014 18:49:42 -0800 Subject: [PATCH 3/8] fix style --- .../apache/spark/sql/catalyst/expressions/arithmetic.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 5a03029ca57a1..6626dda3beea3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -111,10 +111,11 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic override def eval(input: Row): Any = if(right.eval(input) == 0) { null - } else + } else { dataType match { - case _: FractionalType => f2(input, left, right, _.div(_, _)) - case _: IntegralType => i2(input, left , right, _.quot(_, _)) + case _: FractionalType => f2(input, left, right, _.div(_, _)) + case _: IntegralType => i2(input, left , right, _.quot(_, _)) + } } } From cee92bd627557ce8cf1d4c53ced3a2406d748c47 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Thu, 27 Nov 2014 00:55:41 -0800 Subject: [PATCH 4/8] avoid evaluation 2 times --- .../sql/catalyst/expressions/Expression.scala | 43 ++++++++++++++++++- .../sql/catalyst/expressions/arithmetic.scala | 16 +++---- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 39b120e8de485..c4e2e2e87d25a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -153,10 +153,29 @@ abstract class Expression extends TreeNode[Expression] { } } + /** + * Evaluation helper function for 1 Fractional children expression. + * if the expression result is null, the evaluation result should be null. + */ + @inline + protected final def f1(i: Row, e1: Expression, f: ((Fractional[Any], Any) => Any)): Any = { + val evalE1 = e1.eval(i: Row) + if(evalE1 == null) { + null + } else { + e1.dataType match { + case ft: FractionalType => + f.asInstanceOf[(Fractional[ft.JvmType], ft.JvmType) => ft.JvmType]( + ft.fractional, evalE1.asInstanceOf[ft.JvmType]) + case other => sys.error(s"Type $other does not support fractional operations") + } + } + } + /** * Evaluation helper function for 2 Integral children expressions. Those expressions are * supposed to be in the same data type, and also the return type. - * Either one of the expressions result is null, the evaluation result should be null. + * if the expression result is null, the evaluation result should be null. */ @inline protected final def i2( @@ -189,6 +208,28 @@ abstract class Expression extends TreeNode[Expression] { } } + /** + * Evaluation helper function for 1 Integral children expression. + * Either one of the expressions result is null, the evaluation result should be null. + */ + @inline + protected final def i1(i: Row, e1: Expression, f: ((Integral[Any], Any) => Any)): Any = { + val evalE1 = e1.eval(i) + if(evalE1 == null) { + null + } else { + e1.dataType match { + case i: IntegralType => + f.asInstanceOf[(Integral[i.JvmType], i.JvmType) => i.JvmType]( + i.integral, evalE1.asInstanceOf[i.JvmType]) + case i: FractionalType => + f.asInstanceOf[(Integral[i.JvmType], i.JvmType) => i.JvmType]( + i.asIntegral, evalE1.asInstanceOf[i.JvmType]) + case other => sys.error(s"Type $other does not support numeric operations") + } + } + } + /** * Evaluation helper function for 2 Comparable children expressions. Those expressions are * supposed to be in the same data type, and the return type should be Integer: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 6626dda3beea3..d3432331759cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -108,15 +108,15 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic override def nullable = true - override def eval(input: Row): Any = - if(right.eval(input) == 0) { - null - } else { - dataType match { - case _: FractionalType => f2(input, left, right, _.div(_, _)) - case _: IntegralType => i2(input, left , right, _.quot(_, _)) - } + override def eval(input: Row): Any = { + val evalE2 = right.eval(input) + dataType match { + case _ if evalE2 == null => null + case _ if evalE2 == 0 => null + case ft: FractionalType => f1(input, left, _.div(_, evalE2.asInstanceOf[ft.JvmType])) + case it: IntegralType => i1(input, left, _.quot(_, evalE2.asInstanceOf[it.JvmType])) } + } } From 6f5716ffabf8d13e6b6fc88563aa609e5cfff795 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Thu, 27 Nov 2014 02:33:31 -0800 Subject: [PATCH 5/8] fix comments --- .../apache/spark/sql/catalyst/expressions/Expression.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index c4e2e2e87d25a..bc45881e42748 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -175,7 +175,7 @@ abstract class Expression extends TreeNode[Expression] { /** * Evaluation helper function for 2 Integral children expressions. Those expressions are * supposed to be in the same data type, and also the return type. - * if the expression result is null, the evaluation result should be null. + * Either one of the expressions result is null, the evaluation result should be null. */ @inline protected final def i2( @@ -210,7 +210,7 @@ abstract class Expression extends TreeNode[Expression] { /** * Evaluation helper function for 1 Integral children expression. - * Either one of the expressions result is null, the evaluation result should be null. + * if the expression result is null, the evaluation result should be null. */ @inline protected final def i1(i: Row, e1: Expression, f: ((Integral[Any], Any) => Any)): Any = { From 36236a516eeb58b82da35207df138cf421bf67c5 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Mon, 1 Dec 2014 18:52:30 -0800 Subject: [PATCH 6/8] add test cases --- .../expressions/ExpressionEvaluationSuite.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index 3f5b9f698f827..25f56424888aa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -149,6 +149,21 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))) && In(Literal(2), Seq(Literal(1), Literal(2))), true) } + test("Divide") { + checkEvaluation(Divide(Literal(2), Literal(1)), 2) + checkEvaluation(Divide(Literal(1.0), Literal(2.0)), 0.5) + checkEvaluation(Divide(Literal(1), Literal(2)), 0) + checkEvaluation(Divide(Literal(1), Literal(0)), null) + checkEvaluation(Divide(Literal(1.0), Literal(0.0)), null) + checkEvaluation(Divide(Literal(0.0), Literal(0.0)), null) + checkEvaluation(Divide(Literal(0), Literal(null, IntegerType)), null) + checkEvaluation(Divide(Literal(1), Literal(null, IntegerType)), null) + checkEvaluation(Divide(Literal(null, IntegerType), Literal(0)), null) + checkEvaluation(Divide(Literal(null, DoubleType), Literal(0.0)), null) + checkEvaluation(Divide(Literal(null, IntegerType), Literal(1)), null) + checkEvaluation(Divide(Literal(null, IntegerType), Literal(null, IntegerType)), null) + } + test("INSET") { val hS = HashSet[Any]() + 1 + 2 val nS = HashSet[Any]() + 1 + 2 + null From 85c28baa3cc4f4e453cd576abfe4dcb5850ca625 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Mon, 1 Dec 2014 21:32:37 -0800 Subject: [PATCH 7/8] temp --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 67f8d411b6bb4..404fafaac07ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -359,7 +359,9 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin case Add(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 + $eval2" } case Subtract(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 - $eval2" } case Multiply(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 * $eval2" } - case Divide(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 / $eval2" } + case Divide(e1, e2) => (e1, e2) evaluate { + case (eval1, eval2) => q"if ($eval2 == 0) null else $eval1 / $eval2" + } case IsNotNull(e) => val eval = expressionEvaluator(e) From 2e98677495c71151e76432aa146572780b9c29b1 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Mon, 1 Dec 2014 22:48:36 -0800 Subject: [PATCH 8/8] fix code gen for divide 0 --- .../expressions/codegen/CodeGenerator.scala | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 404fafaac07ec..ab71e15e1f573 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -359,9 +359,24 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin case Add(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 + $eval2" } case Subtract(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 - $eval2" } case Multiply(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 * $eval2" } - case Divide(e1, e2) => (e1, e2) evaluate { - case (eval1, eval2) => q"if ($eval2 == 0) null else $eval1 / $eval2" - } + case Divide(e1, e2) => + val eval1 = expressionEvaluator(e1) + val eval2 = expressionEvaluator(e2) + + eval1.code ++ eval2.code ++ + q""" + var $nullTerm = false + var $primitiveTerm: ${termForType(e1.dataType)} = 0 + + if (${eval1.nullTerm} || ${eval2.nullTerm} ) { + $nullTerm = true + } else if (${eval2.primitiveTerm} == 0) + $nullTerm = true + else { + $nullTerm = false + $primitiveTerm = ${eval1.primitiveTerm} / ${eval2.primitiveTerm} + } + """.children case IsNotNull(e) => val eval = expressionEvaluator(e)