From 39d6475d8cf357488fd1ec736b4d910f8237fc5b Mon Sep 17 00:00:00 2001 From: Cheng Hao Date: Wed, 16 Jul 2014 15:59:50 +0800 Subject: [PATCH 1/3] Add HiveDecimal & HiveVarchar support in unwrap data --- .../scala/org/apache/spark/sql/hive/hiveUdfs.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index 9b105308ab7cf..4c40ae1774939 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.hive import scala.collection.mutable.ArrayBuffer -import org.apache.hadoop.hive.common.`type`.HiveDecimal +import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar} import org.apache.hadoop.hive.ql.exec.UDF import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry} import org.apache.hadoop.hive.ql.udf.{UDFType => HiveUDFType} @@ -280,6 +280,16 @@ private[hive] case class HiveGenericUdf(name: String, children: Seq[Expression]) private[hive] trait HiveInspectors { def unwrapData(data: Any, oi: ObjectInspector): Any = oi match { + case hvoi: HiveVarcharObjectInspector => if (data == null) { + null + } else { + hvoi.getPrimitiveJavaObject(data).getValue + } + case hdoi: HiveDecimalObjectInspector => if (data == null) { + null + } else { + BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue()) + } case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data) case li: ListObjectInspector => Option(li.getList(data)) From afc39da00f53f15edb466768c24cdd73ec5bc119 Mon Sep 17 00:00:00 2001 From: Cheng Hao Date: Wed, 16 Jul 2014 16:21:25 +0800 Subject: [PATCH 2/3] Polish the code --- .../scala/org/apache/spark/sql/hive/hiveUdfs.scala | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index 4c40ae1774939..3a1296d02117c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -280,16 +280,10 @@ private[hive] case class HiveGenericUdf(name: String, children: Seq[Expression]) private[hive] trait HiveInspectors { def unwrapData(data: Any, oi: ObjectInspector): Any = oi match { - case hvoi: HiveVarcharObjectInspector => if (data == null) { - null - } else { - hvoi.getPrimitiveJavaObject(data).getValue - } - case hdoi: HiveDecimalObjectInspector => if (data == null) { - null - } else { - BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue()) - } + case hvoi: HiveVarcharObjectInspector => + if (data == null) null else hvoi.getPrimitiveJavaObject(data).getValue + case hdoi: HiveDecimalObjectInspector => + if (data == null) null else BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue()) case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data) case li: ListObjectInspector => Option(li.getList(data)) From 34cc21a2af2c9efa8b2d6bf12af59931f3ffaeef Mon Sep 17 00:00:00 2001 From: Cheng Hao Date: Thu, 17 Jul 2014 08:25:09 +0800 Subject: [PATCH 3/3] update the table scan accodringly since the unwrapData function changed --- .../spark/sql/hive/execution/HiveTableScan.scala | 12 +----------- .../scala/org/apache/spark/sql/hive/hiveUdfs.scala | 2 +- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index ef8bae74530ec..e7016fa16eea9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -96,19 +96,9 @@ case class HiveTableScan( .getOrElse(sys.error(s"Can't find attribute $a")) val fieldObjectInspector = ref.getFieldObjectInspector - val unwrapHiveData = fieldObjectInspector match { - case _: HiveVarcharObjectInspector => - (value: Any) => value.asInstanceOf[HiveVarchar].getValue - case _: HiveDecimalObjectInspector => - (value: Any) => BigDecimal(value.asInstanceOf[HiveDecimal].bigDecimalValue()) - case _ => - identity[Any] _ - } - (row: Any, _: Array[String]) => { val data = objectInspector.getStructFieldData(row, ref) - val hiveData = unwrapData(data, fieldObjectInspector) - if (hiveData != null) unwrapHiveData(hiveData) else null + unwrapData(data, fieldObjectInspector) } } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index 3a1296d02117c..fc33c5b460d70 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.hive import scala.collection.mutable.ArrayBuffer -import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar} +import org.apache.hadoop.hive.common.`type`.HiveDecimal import org.apache.hadoop.hive.ql.exec.UDF import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry} import org.apache.hadoop.hive.ql.udf.{UDFType => HiveUDFType}