apache · viirya · Jun 9, 2015 · Jun 10, 2015 · Jun 10, 2015 · Jun 10, 2015
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -143,7 +143,8 @@ def _():
     'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' +
              'polar coordinates (r, theta).',
     'hypot': 'Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.',
-    'pow': 'Returns the value of the first argument raised to the power of the second argument.'
+    'pow': 'Returns the value of the first argument raised to the power of the second argument.',
+    'log': 'Returns the first argument-based logarithm of the second argument',
 }
 
 _window_functions = {

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -107,7 +107,7 @@ object FunctionRegistry {
     expression[Expm1]("expm1"),
     expression[Floor]("floor"),
     expression[Hypot]("hypot"),
-    expression[Log]("log"),
+    expression[Logarithm]("log"),
     expression[Log10]("log10"),
     expression[Log1p]("log1p"),
     expression[Pi]("pi"),

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
@@ -254,3 +254,54 @@ case class Pow(left: Expression, right: Expression)
       """
   }
 }
+
+object Logarithm {
+  def apply(child: Expression): Expression = new Log(child)
+}
+
+case class Logarithm(left: Expression, right: Expression)
+  extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") {
+  override def eval(input: Row): Any = {
+    val evalE2 = right.eval(input)
+    if (evalE2 == null) {
+      null
+    } else {
+      val evalE1 = left.eval(input)
+      var result: Double = 0.0
+      if (evalE1 == null) {
+        result = math.log(evalE2.asInstanceOf[Double])
+      } else {
+        result = math.log(evalE2.asInstanceOf[Double]) / math.log(evalE1.asInstanceOf[Double])
+      }
+      if (result.isNaN) null else result
+    }
+  }
+
+  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+    if (left.dataType != right.dataType) {
+      // log.warn(s"${left.dataType} != ${right.dataType}")
+    }
+
+    val eval1 = left.gen(ctx)
+    val eval2 = right.gen(ctx)
+    val resultCode =
+      s"java.lang.Math.log(${eval2.primitive}) / java.lang.Math.log(${eval1.primitive})"
+
+    s"""
+      ${eval2.code}
+      boolean ${ev.isNull} = ${eval2.isNull};
+      ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
+      if (!${ev.isNull}) {
+        ${eval1.code}
+        if (!${eval1.isNull}) {
+          ${ev.primitive} = ${resultCode};
+        } else {
+          ${ev.primitive} = java.lang.Math.log(${eval2.primitive});
+        }
+      }
+      if (Double.valueOf(${ev.primitive}).isNaN()) {
+        ${ev.isNull} = true;
+      }
+    """
+  }
+}
diff --git a/...atalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/...atalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
@@ -204,4 +204,18 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     testBinary(Atan2, math.atan2)
   }
 
+  test("binary log") {
+    val f = (c1: Double, c2: Double) => math.log(c2) / math.log(c1)
+    val domain = (1 to 20).map(v => (v * 0.1, v * 0.2))
+
+    domain.foreach { case (v1, v2) =>
+        checkEvaluation(Logarithm(Literal(v1), Literal(v2)), f(v1 + 0.0, v2 + 0.0), EmptyRow)
+        checkEvaluation(Logarithm(Literal(v2), Literal(v1)), f(v2 + 0.0, v1 + 0.0), EmptyRow)
+    }
+    // When base is null, Logarithm is as same as Log
+    checkEvaluation(Logarithm(Literal.create(null, DoubleType), Literal(1.0)),
+      math.log(1.0), create_row(null))
+    checkEvaluation(Logarithm(Literal(1.0), Literal.create(null, DoubleType)),
+      null, create_row(null))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1083,6 +1083,22 @@ object functions {
    */
   def log(columnName: String): Column = log(Column(columnName))
 
+  /**
+   * Returns the first argument-base logarithm of the second argument.
+   *
+   * @group math_funcs
+   * @since 1.4.0
+   */
+  def log(base: Double, a: Column): Column = Logarithm(lit(base).expr, a.expr)
+
+  /**
+   * Returns the first argument-base logarithm of the second argument.
+   *
+   * @group math_funcs
+   * @since 1.4.0
+   */
+  def log(base: Double, a: String): Column = log(base, Column(a))
+
   /**
    * Computes the logarithm of the given value in base 10.
    *

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -110,6 +110,19 @@ class DataFrameFunctionsSuite extends QueryTest {
       testData2.collect().toSeq.map(r => Row(~r.getInt(0))))
   }
 
+  test("log") {
+    val df = Seq[(Integer, Integer)]((123, null)).toDF("a", "b")
+    checkAnswer(
+      df.select(org.apache.spark.sql.functions.log("a"),
+      org.apache.spark.sql.functions.log(2.0, "a"),
+      org.apache.spark.sql.functions.log("b")),
+      Row(math.log(123), math.log(123) / math.log(2), null))
+
+    checkAnswer(
+      df.selectExpr("log(a)", "log(2.0, a)", "log(b)"),
+      Row(math.log(123), math.log(123) / math.log(2), null))
+  }
+
   test("length") {
     checkAnswer(
       nullStrings.select(strlen($"s"), strlen("s")),