Skip to content

Commit

Permalink
Type Coercion should support every type to have null value
Browse files Browse the repository at this point in the history
  • Loading branch information
adrian-wang committed Sep 3, 2014
1 parent e70aff6 commit c619f0a
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ object HiveTypeCoercion {
// See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types.
// The conversion for integral and floating point types have a linear widening hierarchy:
val numericPrecedence =
Seq(NullType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType)
// Boolean is only wider than Void
val booleanPrecedence = Seq(NullType, BooleanType)
val allPromotions: Seq[Seq[DataType]] = numericPrecedence :: booleanPrecedence :: Nil
Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType)
val allPromotions: Seq[Seq[DataType]] = numericPrecedence :: Nil
}

/**
Expand All @@ -55,12 +53,17 @@ trait HiveTypeCoercion {

trait TypeWidening {
def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] = {
// Try and find a promotion rule that contains both types in question.
val applicableConversion =
HiveTypeCoercion.allPromotions.find(p => p.contains(t1) && p.contains(t2))

// If found return the widest common type, otherwise None
applicableConversion.map(_.filter(t => t == t1 || t == t2).last)
val valueTypes = Seq(t1, t2).filter(t => t != NullType)
if (valueTypes.distinct.size > 1) {
// Try and find a promotion rule that contains both types in question.
val applicableConversion =
HiveTypeCoercion.allPromotions.find(p => p.contains(t1) && p.contains(t2))

// If found return the widest common type, otherwise None
applicableConversion.map(_.filter(t => t == t1 || t == t2).last)
} else {
Some(if (valueTypes.size == 0) NullType else valueTypes.head)
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class HiveTypeCoercionSuite extends FunSuite {
val rules = new HiveTypeCoercion { }
import rules._

test("tightest common bound for numeric and boolean types") {
test("tightest common bound for types") {
def widenTest(t1: DataType, t2: DataType, tightestCommon: Option[DataType]) {
var found = WidenTypes.findTightestCommonType(t1, t2)
assert(found == tightestCommon,
Expand All @@ -37,6 +37,9 @@ class HiveTypeCoercionSuite extends FunSuite {
s"Expected $tightestCommon as tightest common type for $t2 and $t1, found $found")
}

// Null
widenTest(NullType, NullType, Some(NullType))

// Boolean
widenTest(NullType, BooleanType, Some(BooleanType))
widenTest(BooleanType, BooleanType, Some(BooleanType))
Expand All @@ -60,12 +63,28 @@ class HiveTypeCoercionSuite extends FunSuite {
widenTest(DoubleType, DoubleType, Some(DoubleType))

// Integral mixed with floating point.
widenTest(NullType, FloatType, Some(FloatType))
widenTest(NullType, DoubleType, Some(DoubleType))
widenTest(IntegerType, FloatType, Some(FloatType))
widenTest(IntegerType, DoubleType, Some(DoubleType))
widenTest(IntegerType, DoubleType, Some(DoubleType))
widenTest(LongType, FloatType, Some(FloatType))
widenTest(LongType, DoubleType, Some(DoubleType))

// StringType
widenTest(NullType, StringType, Some(StringType))
widenTest(StringType, StringType, Some(StringType))
widenTest(IntegerType, StringType, None)
widenTest(LongType, StringType, None)

// TimestampType
widenTest(NullType, TimestampType, Some(TimestampType))
widenTest(TimestampType, TimestampType, Some(TimestampType))
widenTest(IntegerType, TimestampType, None)
widenTest(StringType, TimestampType, None)

// ComplexType
widenTest(NullType, MapType(IntegerType, StringType, false), Some(MapType(IntegerType, StringType, false)))
widenTest(NullType, StructType(Seq()), Some(StructType(Seq())))
widenTest(StringType, MapType(IntegerType, StringType, true), None)
widenTest(ArrayType(IntegerType), StructType(Seq()), None)
}
}

0 comments on commit c619f0a

Please sign in to comment.