From a8b6e3361fb69abb70649d72bbc74708977a84bd Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Wed, 7 Oct 2015 17:40:55 +0900 Subject: [PATCH 1/5] Fix getBytes to getTextBytes or use proper charset. --- .../src/main/java/org/apache/tajo/datum/DateDatum.java | 5 +++++ .../src/main/java/org/apache/tajo/datum/TimestampDatum.java | 5 +++++ .../org/apache/tajo/storage/parquet/TajoWriteSupport.java | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java index f69aa4445e..ac84e259dd 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java @@ -54,6 +54,11 @@ public TimeMeta asTimeMeta() { return tm; } + @Override + public byte[] asTextBytes() { + return asChars().getBytes(TextDatum.DEFAULT_CHARSET); + } + public int getCenturyOfEra() { return asTimeMeta().getCenturyOfEra(); } diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java index aaf7beb1a8..5b4c152a51 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java @@ -169,6 +169,11 @@ public int size() { return Bytes.toBytes(timestamp); } + @Override + public byte[] asTextBytes() { + return asChars().getBytes(TextDatum.DEFAULT_CHARSET); + } + @Override public Datum equalsTo(Datum datum) { if (datum.type() == TajoDataTypes.Type.TIME) { diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java index acb901580e..8d9fdfdb64 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java @@ -138,7 +138,7 @@ private void writeValue(Type fieldType, Column column, Tuple tuple, int index) { recordConsumer.addBinary(Binary.fromByteArray(tuple.getTextBytes(index))); break; case TEXT: - recordConsumer.addBinary(Binary.fromByteArray(tuple.getBytes(index))); + recordConsumer.addBinary(Binary.fromByteArray(tuple.getTextBytes(index))); break; case PROTOBUF: case BLOB: From 0ea19f306e2c42dfd7129f905fcb104a908ed5a6 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Thu, 8 Oct 2015 12:00:43 +0900 Subject: [PATCH 2/5] Added a test case --- .../storage/parquet/TajoWriteSupport.java | 4 +- .../org/apache/tajo/storage/TestStorages.java | 42 +++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java index 8d9fdfdb64..e5ad28c595 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java @@ -101,7 +101,7 @@ private void writeRecordFields(GroupType schema, Schema tajoSchema, Type fieldType = fields.get(index); if (!tuple.isBlankOrNull(tajoIndex)) { recordConsumer.startField(fieldType.getName(), index); - writeValue(fieldType, column, tuple, tajoIndex); + writeValue(column, tuple, tajoIndex); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + @@ -111,7 +111,7 @@ private void writeRecordFields(GroupType schema, Schema tajoSchema, } } - private void writeValue(Type fieldType, Column column, Tuple tuple, int index) { + private void writeValue(Column column, Tuple tuple, int index) { switch (column.getDataType().getType()) { case BOOLEAN: recordConsumer.addBoolean(tuple.getBool(index)); diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java index 278de45a55..52813e9a05 100644 --- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java +++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java @@ -1114,4 +1114,46 @@ public final void testInsertFixedCharTypeWithOverSize() throws Exception { assertTrue(ok); } + + @Test + public void testDateTextHandling() throws Exception { + if (!dataFormat.equalsIgnoreCase(BuiltinStorages.PARQUET)) { + return; + } + + Schema schema = new Schema(); + schema.addColumn("col1", Type.TEXT); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(dataFormat, options); + + FileTablespace sm = TablespaceManager.getLocalFs(); + Path tablePath = new Path(testDir, "testTextHandling.data"); + + Appender appender = sm.getAppender(meta, schema, tablePath); + + appender.init(); + + VTuple tuple = new VTuple(1); + tuple.put(0, DatumFactory.createDate(1994,7,30)); + + appender.addTuple(tuple); + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = sm.getScanner(meta, schema, fragment, null); + scanner.init(); + + Tuple retrieved; + while ((retrieved = scanner.next()) != null) { + assertEquals(tuple.get(0).asChars(), retrieved.asDatum(0).asChars()); + } + scanner.close(); + + if (internalType){ + OldStorageManager.clearCache(); + } + } } \ No newline at end of file From 02db378576be6431a2a9c898a82313018c0acb2e Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Thu, 8 Oct 2015 13:55:57 +0900 Subject: [PATCH 3/5] Some store types to test are added --- .../src/test/java/org/apache/tajo/storage/TestStorages.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java index 52813e9a05..217598fe45 100644 --- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java +++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java @@ -1117,7 +1117,9 @@ public final void testInsertFixedCharTypeWithOverSize() throws Exception { @Test public void testDateTextHandling() throws Exception { - if (!dataFormat.equalsIgnoreCase(BuiltinStorages.PARQUET)) { + if (dataFormat.equalsIgnoreCase(BuiltinStorages.AVRO) || + dataFormat.equalsIgnoreCase(BuiltinStorages.RAW) || + dataFormat.equalsIgnoreCase(BuiltinStorages.DRAW)) { return; } From b49b558f0248e20118a67ab6641eb7f1b3606da4 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Thu, 8 Oct 2015 15:09:57 +0900 Subject: [PATCH 4/5] dummy --- .../src/test/java/org/apache/tajo/storage/TestStorages.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java index 217598fe45..9984856e10 100644 --- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java +++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java @@ -779,7 +779,6 @@ public void testSequenceFileBinarySerializeDeserialize() throws IOException { appender.init(); QueryId queryid = new QueryId("12345", 5); - ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); VTuple tuple = new VTuple(13); tuple.put(new Datum[] { @@ -795,7 +794,7 @@ public void testSequenceFileBinarySerializeDeserialize() throws IOException { DatumFactory.createBlob("hyunsik babo".getBytes()), DatumFactory.createInet4("192.168.0.1"), NullDatum.get(), - factory.createDatum(queryid.getProto()) + ProtobufDatumFactory.createDatum(queryid.getProto()) }); appender.addTuple(tuple); appender.flush(); From 7be937c5b2b627888b599565b1ba8fb309986844 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Thu, 8 Oct 2015 15:27:17 +0900 Subject: [PATCH 5/5] trigger --- .../src/test/java/org/apache/tajo/storage/TestStorages.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java index 9984856e10..11729e37d4 100644 --- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java +++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java @@ -706,7 +706,6 @@ public void testSequenceFileTextSerializeDeserialize() throws IOException { appender.init(); QueryId queryid = new QueryId("12345", 5); - ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); VTuple tuple = new VTuple(new Datum[] { DatumFactory.createBool(true), @@ -721,7 +720,7 @@ public void testSequenceFileTextSerializeDeserialize() throws IOException { DatumFactory.createBlob("hyunsik babo".getBytes()), DatumFactory.createInet4("192.168.0.1"), NullDatum.get(), - factory.createDatum(queryid.getProto()) + ProtobufDatumFactory.createDatum(queryid.getProto()) }); appender.addTuple(tuple); appender.flush();