Skip to content

Commit

Permalink
DRILL-3263: Read Smallint and Tinyint columns in Hive tables as Integer.
Browse files Browse the repository at this point in the history
Smallint and Tinyint are not fully implemented, this will be addressed when DRILL-2470 is fixed. Until these types are ready for full use throughout Drill, we will be reading smallint and tinyint data as integers, as we have much more thorough support and testing for the integer type.

Disabled unit tests for Hive functions that take tinyint and smallint as input or produce them as output.
  • Loading branch information
jaltekruse committed Jun 18, 2015
1 parent fb25973 commit 437706f
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 47 deletions.
Expand Up @@ -71,6 +71,12 @@ public abstract class HiveFieldConverter {

private static Map<PrimitiveCategory, Class< ? extends HiveFieldConverter>> primMap = Maps.newHashMap();

// TODO (DRILL-2470)
// Byte and short (tinyint and smallint in SQL types) are currently read as integers
// as these smaller integer types are not fully supported in Drill today.
// Here the same types are used, as we have to read out of the correct typed converter
// from the hive side, in the FieldConverter classes below for Byte and Short we convert
// to integer when writing into Drill's vectors.
static {
primMap.put(PrimitiveCategory.BINARY, Binary.class);
primMap.put(PrimitiveCategory.BOOLEAN, Boolean.class);
Expand Down Expand Up @@ -143,14 +149,6 @@ public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector
}
}

public static class Byte extends HiveFieldConverter {
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
final byte value = (byte) ((ByteObjectInspector)oi).getPrimitiveJavaObject(hiveFieldValue);
((NullableTinyIntVector) outputVV).getMutator().setSafe(outputIndex, value);
}
}

public static class Decimal9 extends HiveFieldConverter {
private final Decimal9Holder holder = new Decimal9Holder();

Expand Down Expand Up @@ -247,19 +245,30 @@ public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector
}
}

public static class Long extends HiveFieldConverter {
// TODO (DRILL-2470)
// Byte and short (tinyint and smallint in SQL types) are currently read as integers
// as these smaller integer types are not fully supported in Drill today.
public static class Short extends HiveFieldConverter {
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
final long value = (long) ((LongObjectInspector)oi).getPrimitiveJavaObject(hiveFieldValue);
((NullableBigIntVector) outputVV).getMutator().setSafe(outputIndex, value);
final int value = (short) ((ShortObjectInspector)oi).getPrimitiveJavaObject(hiveFieldValue);
((NullableIntVector) outputVV).getMutator().setSafe(outputIndex, value);
}
}

public static class Short extends HiveFieldConverter {
public static class Byte extends HiveFieldConverter {
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
final short value = (short) ((ShortObjectInspector)oi).getPrimitiveJavaObject(hiveFieldValue);
((NullableSmallIntVector) outputVV).getMutator().setSafe(outputIndex, value);
final int value = (byte)((ByteObjectInspector)oi).getPrimitiveJavaObject(hiveFieldValue);
((NullableIntVector) outputVV).getMutator().setSafe(outputIndex, value);
}
}

public static class Long extends HiveFieldConverter {
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
final long value = (long) ((LongObjectInspector)oi).getPrimitiveJavaObject(hiveFieldValue);
((NullableBigIntVector) outputVV).getMutator().setSafe(outputIndex, value);
}
}

Expand Down
Expand Up @@ -359,8 +359,6 @@ private MinorType getMinorTypeFromHivePrimitiveTypeInfo(PrimitiveTypeInfo primit
return TypeProtos.MinorType.VARBINARY;
case BOOLEAN:
return MinorType.BIT;
case BYTE:
return MinorType.TINYINT;
case DECIMAL: {

if (context.getOptions().getOption(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY).bool_val == false) {
Expand All @@ -375,12 +373,15 @@ private MinorType getMinorTypeFromHivePrimitiveTypeInfo(PrimitiveTypeInfo primit
return MinorType.FLOAT8;
case FLOAT:
return MinorType.FLOAT4;
// TODO (DRILL-2470)
// Byte and short (tinyint and smallint in SQL types) are currently read as integers
// as these smaller integer types are not fully supported in Drill today.
case SHORT:
case BYTE:
case INT:
return MinorType.INT;
case LONG:
return MinorType.BIGINT;
case SHORT:
return MinorType.SMALLINT;
case STRING:
case VARCHAR:
return MinorType.VARCHAR;
Expand Down Expand Up @@ -451,14 +452,6 @@ protected void populatePartitionVectors(int recordCount) {
}
break;
}
case BYTE: {
TinyIntVector v = (TinyIntVector) vector;
byte value = (byte) val;
for (int j = 0; j < recordCount; j++) {
v.getMutator().setSafe(j, value);
}
break;
}
case DOUBLE: {
Float8Vector v = (Float8Vector) vector;
double value = (double) val;
Expand All @@ -475,6 +468,8 @@ protected void populatePartitionVectors(int recordCount) {
}
break;
}
case BYTE:
case SHORT:
case INT: {
IntVector v = (IntVector) vector;
int value = (int) val;
Expand All @@ -491,14 +486,6 @@ protected void populatePartitionVectors(int recordCount) {
}
break;
}
case SHORT: {
SmallIntVector v = (SmallIntVector) vector;
short value = (short) val;
for (int j = 0; j < recordCount; j++) {
v.getMutator().setSafe(j, value);
}
break;
}
case VARCHAR:
case STRING: {
VarCharVector v = (VarCharVector) vector;
Expand Down Expand Up @@ -603,8 +590,6 @@ private Object convertPartitionType(TypeInfo typeInfo, String value) {
return value.getBytes();
case BOOLEAN:
return Boolean.parseBoolean(value);
case BYTE:
return Byte.parseByte(value);
case DECIMAL: {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
return HiveDecimalUtils.enforcePrecisionScale(HiveDecimal.create(value),
Expand All @@ -614,12 +599,12 @@ private Object convertPartitionType(TypeInfo typeInfo, String value) {
return Double.parseDouble(value);
case FLOAT:
return Float.parseFloat(value);
case BYTE:
case SHORT:
case INT:
return Integer.parseInt(value);
case LONG:
return Long.parseLong(value);
case SHORT:
return Short.parseShort(value);
case STRING:
case VARCHAR:
return value.getBytes();
Expand Down
Expand Up @@ -82,10 +82,8 @@ private RelDataType getRelDataTypeFromHivePrimitiveType(RelDataTypeFactory typeF
return typeFactory.createSqlType(SqlTypeName.BOOLEAN);

case BYTE:
return typeFactory.createSqlType(SqlTypeName.TINYINT);

case SHORT:
return typeFactory.createSqlType(SqlTypeName.SMALLINT);
return typeFactory.createSqlType(SqlTypeName.INTEGER);

case INT:
return typeFactory.createSqlType(SqlTypeName.INTEGER);
Expand Down
Expand Up @@ -151,13 +151,15 @@ public GenericUDFTestBOOLEAN() {
}
}

// TODO(DRILL-2470) - re-enable the test case for this function in TestSampleHiveUDFs
@Description(name = "testHiveUDFBYTE", value = "_FUNC_(BYTE) - Tests byte data as input and output")
public static class GenericUDFTestBYTE extends GenericUDFTestBase {
public GenericUDFTestBYTE() {
super("testHiveUDFBYTE", PrimitiveCategory.BYTE);
}
}

// TODO(DRILL-2470) - re-enable the test case for this function in TestSampleHiveUDFs
@Description(name = "testHiveUDFSHORT", value = "_FUNC_(SHORT) - Tests short data as input and output")
public static class GenericUDFTestSHORT extends GenericUDFTestBase {
public GenericUDFTestSHORT() {
Expand Down
Expand Up @@ -47,13 +47,15 @@ public void booleanInOut() throws Exception{
helper(query, expected);
}

@Ignore("DRILL-2470")
@Test
public void byteInOut() throws Exception{
String query = "SELECT testHiveUDFByte(tinyint_field) as col1 FROM hive.readtest";
String expected = "col1\n" + "34\n" + "null\n";
helper(query, expected);
}

@Ignore("DRILL-2470")
@Test
public void shortInOut() throws Exception{
String query = "SELECT testHiveUDFShort(smallint_field) as col1 FROM hive.readtest";
Expand Down
Expand Up @@ -92,7 +92,7 @@ public void readAllSupportedHiveDataTypes() throws Exception {
.baselineValues(
"binaryfield",
false,
(byte) 34,
34,
new BigDecimal("66"),
new BigDecimal("2347.92"),
new BigDecimal("2758725827.99990"),
Expand All @@ -102,14 +102,14 @@ public void readAllSupportedHiveDataTypes() throws Exception {
4.67f,
123456,
234235L,
(short) 3455,
3455,
"stringfield",
"varcharfield",
new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()),
new DateTime(Date.valueOf("2013-07-05").getTime()),
"binary",
true,
(byte) 64,
64,
new BigDecimal("37"),
new BigDecimal("36.90"),
new BigDecimal("3289379872.94565"),
Expand All @@ -119,7 +119,7 @@ public void readAllSupportedHiveDataTypes() throws Exception {
4.67f,
123456,
234235L,
(short) 3455,
3455,
"string",
"varchar",
new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()),
Expand All @@ -128,7 +128,7 @@ public void readAllSupportedHiveDataTypes() throws Exception {
null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null,
"binary",
true,
(byte) 64,
64,
new BigDecimal("37"),
new BigDecimal("36.90"),
new BigDecimal("3289379872.94565"),
Expand All @@ -138,7 +138,7 @@ public void readAllSupportedHiveDataTypes() throws Exception {
4.67f,
123456,
234235L,
(short) 3455,
3455,
"string",
"varchar",
new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()),
Expand Down

0 comments on commit 437706f

Please sign in to comment.