From 7ef2784b3c3e2773cb625388000f80664ad0090e Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Sat, 27 Sep 2014 00:03:40 -0700 Subject: [PATCH 1/6] TAJO-1042: Implement block iteration interfaces for executors and scanners. --- .../tajo/engine/planner/physical/MemTableScanner.java | 7 +++++++ .../tajo/engine/planner/physical/PairWiseMerger.java | 7 +++++++ .../tajo/engine/planner/physical/PhysicalExec.java | 7 +++++++ .../org/apache/tajo/engine/utils/TupleCacheScanner.java | 7 +++++++ .../main/java/org/apache/tajo/storage/FileScanner.java | 7 +++++++ .../main/java/org/apache/tajo/storage/MergeScanner.java | 7 +++++++ .../src/main/java/org/apache/tajo/storage/Scanner.java | 9 +++++++++ .../java/org/apache/tajo/storage/v2/FileScannerV2.java | 7 +++++++ 8 files changed, 58 insertions(+) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MemTableScanner.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MemTableScanner.java index 7bd6a703ea..a2dc87e83d 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MemTableScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MemTableScanner.java @@ -23,9 +23,11 @@ import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.storage.Scanner; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.VTuple; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import java.io.IOException; import java.util.Collection; @@ -73,6 +75,11 @@ public Tuple next() throws IOException { } } + @Override + public boolean nextFetch(OffHeapRowBlock rowBlock) { + throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); + } + @Override public void reset() throws IOException { init(); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PairWiseMerger.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PairWiseMerger.java index 2ac8662d6c..f801b35fc2 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PairWiseMerger.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PairWiseMerger.java @@ -24,9 +24,11 @@ import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.storage.Scanner; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.VTuple; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import java.io.IOException; import java.util.Comparator; @@ -158,6 +160,11 @@ public Tuple next() throws IOException { return outTuple; } + @Override + public boolean nextFetch(OffHeapRowBlock rowBlock) { + throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); + } + @Override public void reset() throws IOException { if (state == State.INITED) { diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalExec.java index 31cfc4d359..859c053ff9 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalExec.java @@ -25,7 +25,10 @@ import org.apache.tajo.catalog.SchemaObject; import org.apache.tajo.catalog.statistics.TableStats; import org.apache.tajo.engine.codegen.CompilationError; +import org.apache.tajo.exception.UnimplementedException; +import org.apache.tajo.exception.UnsupportedException; import org.apache.tajo.storage.Tuple; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import org.apache.tajo.worker.TaskAttemptContext; import java.io.IOException; @@ -60,6 +63,10 @@ protected void compile() throws CompilationError { public abstract Tuple next() throws IOException; + public boolean nextFetch(OffHeapRowBlock rowBlock) { + throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); + } + public abstract void rescan() throws IOException; public abstract void close() throws IOException; diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleCacheScanner.java b/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleCacheScanner.java index 743d70c695..10b3a22575 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleCacheScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleCacheScanner.java @@ -21,8 +21,10 @@ import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.storage.Scanner; import org.apache.tajo.storage.Tuple; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import java.io.IOException; import java.util.Iterator; @@ -62,6 +64,11 @@ public Tuple next() throws IOException { } } + @Override + public boolean nextFetch(OffHeapRowBlock rowBlock) { + throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); + } + @Override public void reset() throws IOException { init(); diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/FileScanner.java b/tajo-storage/src/main/java/org/apache/tajo/storage/FileScanner.java index f15c4c97a5..6aa59e6a45 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/FileScanner.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/FileScanner.java @@ -29,7 +29,9 @@ import org.apache.tajo.catalog.statistics.ColumnStats; import org.apache.tajo.catalog.statistics.TableStats; import org.apache.tajo.conf.TajoConf; +import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.storage.fragment.FileFragment; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import java.io.IOException; @@ -80,6 +82,11 @@ public Schema getSchema() { return schema; } + @Override + public boolean nextFetch(OffHeapRowBlock rowBlock) { + throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); + } + @Override public void setTarget(Column[] targets) { if (inited) { diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/MergeScanner.java b/tajo-storage/src/main/java/org/apache/tajo/storage/MergeScanner.java index 8917f21ce3..890455ad90 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/MergeScanner.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/MergeScanner.java @@ -26,7 +26,9 @@ import org.apache.tajo.catalog.statistics.ColumnStats; import org.apache.tajo.catalog.statistics.TableStats; import org.apache.tajo.conf.TajoConf; +import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.storage.fragment.FileFragment; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import java.io.IOException; import java.util.Iterator; @@ -113,6 +115,11 @@ public Tuple next() throws IOException { return tuple; } + @Override + public boolean nextFetch(OffHeapRowBlock rowBlock) { + throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); + } + @Override public void reset() throws IOException { this.iterator = fragments.iterator(); diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/Scanner.java b/tajo-storage/src/main/java/org/apache/tajo/storage/Scanner.java index 16c4faa4a5..f532e0e646 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/Scanner.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/Scanner.java @@ -21,6 +21,8 @@ import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SchemaObject; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.exception.UnimplementedException; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import java.io.Closeable; import java.io.IOException; @@ -41,6 +43,13 @@ public interface Scanner extends SchemaObject, Closeable { * @throws IOException if internal I/O error occurs during next method */ Tuple next() throws IOException; + + /** + * + * @param rowBlock + * @return + */ + boolean nextFetch(OffHeapRowBlock rowBlock); /** * Reset the cursor. After executed, the scanner diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/FileScannerV2.java b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/FileScannerV2.java index da7084c721..ebbe2f43e1 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/FileScannerV2.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/FileScannerV2.java @@ -28,9 +28,11 @@ import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.catalog.statistics.ColumnStats; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.storage.Scanner; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.fragment.FileFragment; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; @@ -222,6 +224,11 @@ public Tuple next() throws IOException { return nextTuple(); } + @Override + public boolean nextFetch(OffHeapRowBlock rowBlock) { + throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); + } + @Override public float getProgress() { return progress; From a71a98147ced8021bc5a521666efdfeff4ae84c7 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Sat, 27 Sep 2014 10:50:46 -0700 Subject: [PATCH 2/6] TAJO-1080: Cleanup BooleanDatum and Inet4Datum. --- .../java/org/apache/tajo/datum/BooleanDatum.java | 8 +++----- .../main/java/org/apache/tajo/datum/Inet4Datum.java | 7 ++----- .../main/java/org/apache/tajo/datum/Int2Datum.java | 13 ------------- .../main/java/org/apache/tajo/util/NetUtils.java | 9 +++++++++ .../apache/tajo/datum/TestArithmeticOperator.java | 10 +++++----- 5 files changed, 19 insertions(+), 28 deletions(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/BooleanDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/BooleanDatum.java index 93933a85a9..a8eeca0c2f 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/BooleanDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/BooleanDatum.java @@ -167,12 +167,10 @@ public BooleanDatum equalsTo(Datum datum) { public int compareTo(Datum datum) { switch (datum.type()) { case BOOLEAN: - if (val && !datum.asBool()) { - return -1; - } else if (val && datum.asBool()) { - return 1; - } else { + if ((val ^ datum.asBool()) == false) { // if both are the same regardless of its value. return 0; + } else { + return val ? -1 : 1; } default: throw new InvalidOperationException(datum.type()); diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java index 1de81cd9f5..ed48a028e3 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java @@ -22,6 +22,7 @@ import com.google.gson.annotations.Expose; import org.apache.tajo.exception.InvalidOperationException; import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.NetUtils; import static org.apache.tajo.common.TajoDataTypes.Type; @@ -36,11 +37,7 @@ public class Inet4Datum extends Datum { public Inet4Datum(String addr) { super(Type.INET4); - String [] elems = addr.split("\\."); - address = Integer.parseInt(elems[3]) & 0xFF - | ((Integer.parseInt(elems[2]) << 8) & 0xFF00) - | ((Integer.parseInt(elems[1]) << 16) & 0xFF0000) - | ((Integer.parseInt(elems[0]) << 24) & 0xFF000000); + address = NetUtils.convertIPStringToInt(addr); } public Inet4Datum(byte[] addr) { diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java index 2a6c691cbf..7bd24da791 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java @@ -195,7 +195,6 @@ public int compareTo(Datum datum) { public Datum plus(Datum datum) { switch (datum.type()) { case INT2: - return DatumFactory.createInt2((short) (val + datum.asInt2())); case INT4: return DatumFactory.createInt4(val + datum.asInt4()); case INT8: @@ -220,7 +219,6 @@ public Datum plus(Datum datum) { public Datum minus(Datum datum) { switch (datum.type()) { case INT2: - return DatumFactory.createInt2((short) (val - datum.asInt2())); case INT4: return DatumFactory.createInt4(val - datum.asInt4()); case INT8: @@ -245,7 +243,6 @@ public Datum minus(Datum datum) { public Datum multiply(Datum datum) { switch (datum.type()) { case INT2: - return DatumFactory.createInt4(val * datum.asInt2()); case INT4: return DatumFactory.createInt4(val * datum.asInt4()); case INT8: @@ -268,11 +265,6 @@ public Datum multiply(Datum datum) { public Datum divide(Datum datum) { switch (datum.type()) { case INT2: - short paramValueI2 = datum.asInt2(); - if (!validateDivideZero(paramValueI2)) { - return NullDatum.get(); - } - return DatumFactory.createInt2((short) (val / paramValueI2)); case INT4: int paramValueI4 = datum.asInt4(); if (!validateDivideZero(paramValueI4)) { @@ -308,11 +300,6 @@ public Datum divide(Datum datum) { public Datum modular(Datum datum) { switch (datum.type()) { case INT2: - short paramValueI2 = datum.asInt2(); - if (!validateDivideZero(paramValueI2)) { - return NullDatum.get(); - } - return DatumFactory.createInt2((short) (val % paramValueI2)); case INT4: int paramValueI4 = datum.asInt4(); if (!validateDivideZero(paramValueI4)) { diff --git a/tajo-common/src/main/java/org/apache/tajo/util/NetUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/NetUtils.java index 829829f9cc..fc24a5beb0 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/NetUtils.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/NetUtils.java @@ -101,4 +101,13 @@ public static String normalizeHost(String host) { } return host; } + + public static int convertIPStringToInt(String ipAddr) { + String [] elems = ipAddr.split("\\."); + int address = Integer.parseInt(elems[3]) & 0xFF + | ((Integer.parseInt(elems[2]) << 8) & 0xFF00) + | ((Integer.parseInt(elems[1]) << 16) & 0xFF0000) + | ((Integer.parseInt(elems[0]) << 24) & 0xFF000000); + return address; + } } \ No newline at end of file diff --git a/tajo-common/src/test/java/org/apache/tajo/datum/TestArithmeticOperator.java b/tajo-common/src/test/java/org/apache/tajo/datum/TestArithmeticOperator.java index 42623bd3d8..8915ea3dad 100644 --- a/tajo-common/src/test/java/org/apache/tajo/datum/TestArithmeticOperator.java +++ b/tajo-common/src/test/java/org/apache/tajo/datum/TestArithmeticOperator.java @@ -62,20 +62,20 @@ public void setUp() { @Test public void testInt2Datum() throws Exception { //plus - runAndAssert("plus", new Int2Datum((short)10), new Int2Datum((short)5), new Int2Datum((short)15)); + runAndAssert("plus", new Int2Datum((short)10), new Int2Datum((short)5), new Int4Datum((short)15)); runAndAssert("plus", new Int2Datum((short)10), new Int4Datum(5), new Int4Datum(15)); runAndAssert("plus", new Int2Datum((short)10), new Int8Datum(5), new Int8Datum(15)); runAndAssert("plus", new Int2Datum((short)10), new Float4Datum(5.0f), new Float4Datum(15.0f)); runAndAssert("plus", new Int2Datum((short)10), new Float8Datum(5.0), new Float8Datum(15.0)); //minus - runAndAssert("minus", new Int2Datum((short)10), new Int2Datum((short)5), new Int2Datum((short)5)); + runAndAssert("minus", new Int2Datum((short)10), new Int2Datum((short)5), new Int4Datum((short)5)); runAndAssert("minus", new Int2Datum((short)10), new Int4Datum(5), new Int4Datum(5)); runAndAssert("minus", new Int2Datum((short)10), new Int8Datum(5), new Int8Datum(5)); runAndAssert("minus", new Int2Datum((short)10), new Float4Datum(5.0f), new Float4Datum(5.0f)); runAndAssert("minus", new Int2Datum((short)10), new Float8Datum(5.0), new Float8Datum(5.0)); - runAndAssert("minus", new Int2Datum((short)5), new Int2Datum((short)10), new Int2Datum((short)-5)); + runAndAssert("minus", new Int2Datum((short)5), new Int2Datum((short)10), new Int4Datum((short)-5)); runAndAssert("minus", new Int2Datum((short)5), new Int4Datum(10), new Int4Datum(-5)); runAndAssert("minus", new Int2Datum((short)5), new Int8Datum(10), new Int8Datum(-5)); runAndAssert("minus", new Int2Datum((short)5), new Float4Datum(10.0f), new Float4Datum(-5.0f)); @@ -89,7 +89,7 @@ public void testInt2Datum() throws Exception { runAndAssert("multiply", new Int2Datum((short)10), new Float8Datum(5.0), new Float8Datum(50.0)); //divide - runAndAssert("divide", new Int2Datum((short)10), new Int2Datum((short)5), new Int2Datum((short)2)); + runAndAssert("divide", new Int2Datum((short)10), new Int2Datum((short)5), new Int4Datum((short)2)); runAndAssert("divide", new Int2Datum((short)10), new Int4Datum(5), new Int4Datum(2)); runAndAssert("divide", new Int2Datum((short)10), new Int8Datum(5), new Int8Datum(2)); runAndAssert("divide", new Int2Datum((short)10), new Float4Datum(5.0f), new Float4Datum(2.0f)); @@ -102,7 +102,7 @@ public void testInt2Datum() throws Exception { runAndAssert("divide", new Int2Datum((short)10), new Float8Datum(0.0), NullDatum.get()); //modular - runAndAssert("modular", new Int2Datum((short)10), new Int2Datum((short)3), new Int2Datum((short)1)); + runAndAssert("modular", new Int2Datum((short)10), new Int2Datum((short)3), new Int4Datum((short)1)); runAndAssert("modular", new Int2Datum((short)10), new Int4Datum(3), new Int4Datum(1)); runAndAssert("modular", new Int2Datum((short)10), new Int8Datum(3), new Int8Datum(1)); runAndAssert("modular", new Int2Datum((short)10), new Float4Datum(3.0f), new Float4Datum(1.0f)); From c128eca315fdb084815c4a9ac4fb9c00c87f5466 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Sat, 27 Sep 2014 13:48:26 -0700 Subject: [PATCH 3/6] TAJO-1043: Implement nextFetch(RowBlock) of CSVScanner. (hyunsik) --- .../java/org/apache/tajo/storage/CSVFile.java | 51 ++ .../org/apache/tajo/storage/FileScanner.java | 2 +- .../java/org/apache/tajo/storage/Scanner.java | 2 +- .../storage/TextSerializerDeserializer.java | 94 +- .../tajo/tuple/offheap/UnSafeTuple.java | 4 + .../apache/tajo/storage/TestNextFetches.java | 854 ++++++++++++++++++ 6 files changed, 1004 insertions(+), 3 deletions(-) create mode 100644 tajo-storage/src/test/java/org/apache/tajo/storage/TestNextFetches.java diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/CSVFile.java b/tajo-storage/src/main/java/org/apache/tajo/storage/CSVFile.java index 211379467e..06ff081271 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/CSVFile.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/CSVFile.java @@ -35,11 +35,15 @@ import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.exception.UnsupportedException; import org.apache.tajo.storage.compress.CodecPool; import org.apache.tajo.storage.exception.AlreadyExistsStorageException; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.rcfile.NonSyncByteArrayOutputStream; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; +import org.apache.tajo.tuple.offheap.OffHeapRowBlockWriter; +import org.apache.tajo.tuple.offheap.RowWriter; import org.apache.tajo.util.BytesUtils; import java.io.*; @@ -480,6 +484,53 @@ public Tuple next() throws IOException { } } + TextSerializerDeserializer deserializer = new TextSerializerDeserializer(); + + boolean hasNext() throws IOException { + if (currentIdx == validIdx) { + if (eof) { + return false; + } else { + page(); + + if(currentIdx == validIdx){ + return false; + } + } + } + + return true; + } + + @Override + public boolean nextFetch(OffHeapRowBlock rowBlock) throws IOException { + rowBlock.clear(); + OffHeapRowBlockWriter writer = (OffHeapRowBlockWriter) rowBlock.getWriter(); + + while(hasNext() && rowBlock.rows() < rowBlock.maxRowNum()) { + byte[][] cells = BytesUtils.splitPreserveAllTokens(buffer.getData(), startOffsets.get(currentIdx), + rowLengthList.get(currentIdx), delimiter, targetColumnIndexes); + currentIdx++; + + int fieldIdx = 0; + writer.startRow(); + for (; fieldIdx < cells.length && fieldIdx < schema.size(); fieldIdx++) { + if (cells[fieldIdx] == null) { + writer.skipField(); + } else { + deserializer.write(writer, schema.getColumn(fieldIdx), cells[fieldIdx], 0, cells[fieldIdx].length, nullChars); + + } + } + for (; fieldIdx < schema.size(); fieldIdx++) { + writer.skipField(); + } + writer.endRow(); + } + + return rowBlock.rows() > 0; + } + private boolean isCompress() { return codec != null; } diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/FileScanner.java b/tajo-storage/src/main/java/org/apache/tajo/storage/FileScanner.java index 6aa59e6a45..d4357e33e2 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/FileScanner.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/FileScanner.java @@ -83,7 +83,7 @@ public Schema getSchema() { } @Override - public boolean nextFetch(OffHeapRowBlock rowBlock) { + public boolean nextFetch(OffHeapRowBlock rowBlock) throws IOException { throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); } diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/Scanner.java b/tajo-storage/src/main/java/org/apache/tajo/storage/Scanner.java index f532e0e646..3478e23cdb 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/Scanner.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/Scanner.java @@ -49,7 +49,7 @@ public interface Scanner extends SchemaObject, Closeable { * @param rowBlock * @return */ - boolean nextFetch(OffHeapRowBlock rowBlock); + boolean nextFetch(OffHeapRowBlock rowBlock) throws IOException; /** * Reset the cursor. After executed, the scanner diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java b/tajo-storage/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java index b42c1b5142..6dfe6c1386 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java @@ -25,8 +25,11 @@ import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.*; import org.apache.tajo.datum.protobuf.ProtobufJsonFormat; +import org.apache.tajo.tuple.offheap.RowWriter; import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.NetUtils; import org.apache.tajo.util.NumberUtil; +import org.apache.tajo.util.datetime.DateTimeUtil; import java.io.IOException; import java.io.OutputStream; @@ -35,7 +38,7 @@ public class TextSerializerDeserializer implements SerializerDeserializer { public static final byte[] trueBytes = "true".getBytes(); public static final byte[] falseBytes = "false".getBytes(); - private ProtobufJsonFormat protobufJsonFormat = ProtobufJsonFormat.getInstance(); + private static ProtobufJsonFormat protobufJsonFormat = ProtobufJsonFormat.getInstance(); @Override @@ -213,6 +216,95 @@ public Datum deserialize(Column col, byte[] bytes, int offset, int length, byte[ return datum; } + public static void write(RowWriter writer, Column col, byte [] bytes, int offset, int length, byte [] nullChar) throws IOException { + TajoDataTypes.Type type = col.getDataType().getType(); + boolean nullField; + if (type == TajoDataTypes.Type.TEXT || type == TajoDataTypes.Type.CHAR) { + nullField = isNullText(bytes, offset, length, nullChar); + } else { + nullField = isNull(bytes, offset, length, nullChar); + } + + if (nullField) { + writer.skipField(); + return; + } else { + switch (col.getDataType().getType()) { + case BOOLEAN: + writer.putBool(bytes[offset] == 't' || bytes[offset] == 'T'); + break; + + case CHAR: + case TEXT: + writer.putText(bytes); + break; + + case INT1: + case INT2: + writer.putInt2((short) NumberUtil.parseInt(bytes, offset, length)); + break; + + case INT4: + writer.putInt4(NumberUtil.parseInt(bytes, offset, length)); + break; + + case INT8: + writer.putInt8(Long.parseLong(new String(bytes, offset, length))); + break; + + case FLOAT4: + writer.putFloat4(Float.parseFloat(new String(bytes, offset, length))); + break; + + case FLOAT8: + writer.putFloat8(Double.parseDouble(new String(bytes, offset, length))); + break; + + case DATE: + writer.putDate(DateTimeUtil.toJulianDate(new String(bytes, offset, length))); + break; + + case TIME: + writer.putInt8(DateTimeUtil.toJulianTime(new String(bytes, offset, length))); + break; + + case TIMESTAMP: + writer.putInt8(DateTimeUtil.toJulianTimestamp(new String(bytes, offset, length))); + break; + + case INTERVAL: + writer.putInterval(DatumFactory.createInterval(new String(bytes, offset, length))); + break; + + case PROTOBUF: + ProtobufDatumFactory factory = ProtobufDatumFactory.get(col.getDataType()); + Message.Builder builder = factory.newBuilder(); + try { + byte[] protoBytes = new byte[length]; + System.arraycopy(bytes, offset, protoBytes, 0, length); + protobufJsonFormat.merge(protoBytes, builder); + writer.putProtoDatum(factory.createDatum(builder.build())); + } catch (IOException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + + break; + + case INET4: + writer.putInet4(NetUtils.convertIPStringToInt(new String(bytes, offset, length))); + break; + + case BLOB: + writer.putBlob(Base64.decodeBase64(bytes)); + break; + + default: + writer.skipField(); + } + } + } + private static boolean isNull(byte[] val, int offset, int length, byte[] nullBytes) { return length == 0 || ((length == nullBytes.length) && Bytes.equals(val, offset, length, nullBytes, 0, nullBytes.length)); diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/UnSafeTuple.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/UnSafeTuple.java index d8bafea539..6f4d385043 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/UnSafeTuple.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/UnSafeTuple.java @@ -145,6 +145,8 @@ public Datum get(int fieldId) { switch (types[fieldId].getType()) { case BOOLEAN: return DatumFactory.createBool(getBool(fieldId)); + case CHAR: + return DatumFactory.createChar(getBytes(fieldId)); case INT1: case INT2: return DatumFactory.createInt2(getInt2(fieldId)); @@ -158,6 +160,8 @@ public Datum get(int fieldId) { return DatumFactory.createFloat8(getFloat8(fieldId)); case TEXT: return DatumFactory.createText(getText(fieldId)); + case BLOB: + return DatumFactory.createBlob(getBytes(fieldId)); case TIMESTAMP: return DatumFactory.createTimestamp(getInt8(fieldId)); case DATE: diff --git a/tajo-storage/src/test/java/org/apache/tajo/storage/TestNextFetches.java b/tajo-storage/src/test/java/org/apache/tajo/storage/TestNextFetches.java new file mode 100644 index 0000000000..d1b3afd189 --- /dev/null +++ b/tajo-storage/src/test/java/org/apache/tajo/storage/TestNextFetches.java @@ -0,0 +1,854 @@ +/* + * Lisensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; +import org.apache.tajo.QueryId; +import org.apache.tajo.TajoIdProtos; +import org.apache.tajo.catalog.CatalogUtil; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.catalog.TableMeta; +import org.apache.tajo.catalog.proto.CatalogProtos.StoreType; +import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.conf.TajoConf; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.datum.ProtobufDatumFactory; +import org.apache.tajo.storage.fragment.FileFragment; +import org.apache.tajo.storage.rcfile.RCFile; +import org.apache.tajo.storage.sequencefile.SequenceFileScanner; +import org.apache.tajo.tuple.RowBlockReader; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; +import org.apache.tajo.tuple.offheap.UnSafeTuple; +import org.apache.tajo.tuple.offheap.ZeroCopyTuple; +import org.apache.tajo.unit.StorageUnit; +import org.apache.tajo.util.CommonTestingUtil; +import org.apache.tajo.util.FileUtil; +import org.apache.tajo.util.KeyValueSet; +import org.apache.tajo.util.UnsafeUtil; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import sun.misc.Unsafe; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +@RunWith(Parameterized.class) +public class TestNextFetches { + private TajoConf conf; + private static String TEST_PATH = "target/test-data/TestStorages"; + + private static String TEST_PROJECTION_AVRO_SCHEMA = + "{\n" + + " \"type\": \"record\",\n" + + " \"namespace\": \"org.apache.tajo\",\n" + + " \"name\": \"testProjection\",\n" + + " \"fields\": [\n" + + " { \"name\": \"id\", \"type\": \"int\" },\n" + + " { \"name\": \"age\", \"type\": \"long\" },\n" + + " { \"name\": \"score\", \"type\": \"float\" }\n" + + " ]\n" + + "}\n"; + + private static String TEST_NULL_HANDLING_TYPES_AVRO_SCHEMA = + "{\n" + + " \"type\": \"record\",\n" + + " \"namespace\": \"org.apache.tajo\",\n" + + " \"name\": \"testNullHandlingTypes\",\n" + + " \"fields\": [\n" + + " { \"name\": \"col1\", \"type\": [\"null\", \"boolean\"] },\n" + + " { \"name\": \"col2\", \"type\": [\"null\", \"int\"] },\n" + + " { \"name\": \"col3\", \"type\": [\"null\", \"string\"] },\n" + + " { \"name\": \"col4\", \"type\": [\"null\", \"int\"] },\n" + + " { \"name\": \"col5\", \"type\": [\"null\", \"int\"] },\n" + + " { \"name\": \"col6\", \"type\": [\"null\", \"long\"] },\n" + + " { \"name\": \"col7\", \"type\": [\"null\", \"float\"] },\n" + + " { \"name\": \"col8\", \"type\": [\"null\", \"double\"] },\n" + + " { \"name\": \"col9\", \"type\": [\"null\", \"string\"] },\n" + + " { \"name\": \"col10\", \"type\": [\"null\", \"bytes\"] },\n" + + " { \"name\": \"col11\", \"type\": [\"null\", \"bytes\"] },\n" + + " { \"name\": \"col12\", \"type\": \"null\" },\n" + + " { \"name\": \"col13\", \"type\": [\"null\", \"bytes\"] }\n" + + " ]\n" + + "}\n"; + + private StoreType storeType; + private boolean splitable; + private boolean statsable; + private Path testDir; + private FileSystem fs; + + public TestNextFetches(StoreType type, boolean splitable, boolean statsable) throws IOException { + this.storeType = type; + this.splitable = splitable; + this.statsable = statsable; + + conf = new TajoConf(); + + if (storeType == StoreType.RCFILE) { + conf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, 100); + } + + testDir = CommonTestingUtil.getTestDir(TEST_PATH); + fs = testDir.getFileSystem(conf); + } + + @Parameterized.Parameters + public static Collection generateParameters() { + return Arrays.asList(new Object[][] { + {StoreType.CSV, true, true}, + // TODO - to be implemented +// {StoreType.RAW, false, false}, +// {StoreType.RCFILE, true, true}, +// {StoreType.PARQUET, false, false}, +// {StoreType.SEQUENCEFILE, true, true}, +// {StoreType.AVRO, false, false}, + }); + } + + @Test + public void testSplitable() throws IOException { + if (splitable) { + Schema schema = new Schema(); + schema.addColumn("id", Type.INT4); + schema.addColumn("age", Type.INT8); + + TableMeta meta = CatalogUtil.newTableMeta(storeType); + Path tablePath = new Path(testDir, "Splitable.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.enableStats(); + appender.init(); + int tupleNum = 10000; + VTuple vTuple; + + for (int i = 0; i < tupleNum; i++) { + vTuple = new VTuple(2); + vTuple.put(0, DatumFactory.createInt4(i + 1)); + vTuple.put(1, DatumFactory.createInt8(25l)); + appender.addTuple(vTuple); + } + appender.close(); + TableStats stat = appender.getStats(); + assertEquals(tupleNum, stat.getNumRows().longValue()); + + FileStatus status = fs.getFileStatus(tablePath); + long fileLen = status.getLen(); + long randomNum = (long) (Math.random() * fileLen) + 1; + + FileFragment[] tablets = new FileFragment[2]; + tablets[0] = new FileFragment("Splitable", tablePath, 0, randomNum); + tablets[1] = new FileFragment("Splitable", tablePath, randomNum, (fileLen - randomNum)); + + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0], schema); + assertTrue(scanner.isSplittable()); + scanner.init(); + int tupleCnt = 0; + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + while (scanner.nextFetch(rowBlock)) { + tupleCnt += rowBlock.rows(); + } + scanner.close(); + + scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[1], schema); + assertTrue(scanner.isSplittable()); + scanner.init(); + while (scanner.nextFetch(rowBlock)) { + tupleCnt += rowBlock.rows(); + } + scanner.close(); + + assertEquals(tupleNum, tupleCnt); + + rowBlock.release(); + } + } + + @Test + public void testSplitableForRCFileBug() throws IOException { + if (storeType == StoreType.RCFILE) { + Schema schema = new Schema(); + schema.addColumn("id", Type.INT4); + schema.addColumn("age", Type.INT8); + + TableMeta meta = CatalogUtil.newTableMeta(storeType); + Path tablePath = new Path(testDir, "Splitable.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.enableStats(); + appender.init(); + int tupleNum = 10000; + VTuple vTuple; + + for (int i = 0; i < tupleNum; i++) { + vTuple = new VTuple(2); + vTuple.put(0, DatumFactory.createInt4(i + 1)); + vTuple.put(1, DatumFactory.createInt8(25l)); + appender.addTuple(vTuple); + } + appender.close(); + TableStats stat = appender.getStats(); + assertEquals(tupleNum, stat.getNumRows().longValue()); + + FileStatus status = fs.getFileStatus(tablePath); + long fileLen = status.getLen(); + long randomNum = 122; // header size + + FileFragment[] tablets = new FileFragment[2]; + tablets[0] = new FileFragment("Splitable", tablePath, 0, randomNum); + tablets[1] = new FileFragment("Splitable", tablePath, randomNum, (fileLen - randomNum)); + + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0], schema); + assertTrue(scanner.isSplittable()); + scanner.init(); + int tupleCnt = 0; + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + while (scanner.nextFetch(rowBlock)) { + tupleCnt += rowBlock.rows(); + } + scanner.close(); + + scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[1], schema); + assertTrue(scanner.isSplittable()); + scanner.init(); + while (scanner.nextFetch(rowBlock)) { + tupleCnt += rowBlock.rows(); + } + scanner.close(); + + assertEquals(tupleNum, tupleCnt); + + rowBlock.release(); + } + } + + @Test + public void testProjection() throws IOException { + Schema schema = new Schema(); + schema.addColumn("id", Type.INT4); + schema.addColumn("age", Type.INT8); + schema.addColumn("score", Type.FLOAT4); + + TableMeta meta = CatalogUtil.newTableMeta(storeType); + meta.setOptions(StorageUtil.newPhysicalProperties(storeType)); + if (storeType == StoreType.AVRO) { + meta.putOption(StorageConstants.AVRO_SCHEMA_LITERAL, + TEST_PROJECTION_AVRO_SCHEMA); + } + + Path tablePath = new Path(testDir, "testProjection.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.init(); + int tupleNum = 10000; + VTuple vTuple; + + for (int i = 0; i < tupleNum; i++) { + vTuple = new VTuple(3); + vTuple.put(0, DatumFactory.createInt4(i + 1)); + vTuple.put(1, DatumFactory.createInt8(i + 2)); + vTuple.put(2, DatumFactory.createFloat4(i + 3)); + appender.addTuple(vTuple); + } + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + FileFragment fragment = new FileFragment("testReadAndWrite", tablePath, 0, status.getLen()); + + Schema target = new Schema(); + target.addColumn("age", Type.INT8); + target.addColumn("score", Type.FLOAT4); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment, target); + scanner.init(); + int tupleCnt = 0; + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + ZeroCopyTuple tuple = new ZeroCopyTuple(); + while (scanner.nextFetch(rowBlock)) { + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(tuple)) { + if (storeType == StoreType.RCFILE + || storeType == StoreType.TREVNI + || storeType == StoreType.CSV + || storeType == StoreType.PARQUET + || storeType == StoreType.SEQUENCEFILE + || storeType == StoreType.AVRO) { + assertTrue(tuple.isNull(0)); + } + assertTrue(tupleCnt + 2 == tuple.getInt8(1)); + assertTrue(tupleCnt + 3 == tuple.getFloat4(2)); + tupleCnt++; + } + } + scanner.close(); + + assertEquals(tupleNum, tupleCnt); + + rowBlock.release(); + } + + @Test + public void testVariousTypes() throws IOException { + Schema schema = new Schema(); + schema.addColumn("col1", Type.BOOLEAN); + schema.addColumn("col2", Type.CHAR, 7); + schema.addColumn("col3", Type.INT2); + schema.addColumn("col4", Type.INT4); + schema.addColumn("col5", Type.INT8); + schema.addColumn("col6", Type.FLOAT4); + schema.addColumn("col7", Type.FLOAT8); + schema.addColumn("col8", Type.TEXT); + schema.addColumn("col9", Type.BLOB); + schema.addColumn("col10", Type.INET4); + schema.addColumn("col11", Type.NULL_TYPE); + schema.addColumn("col12", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(storeType, options); + meta.setOptions(StorageUtil.newPhysicalProperties(storeType)); + if (storeType == StoreType.AVRO) { + String path = FileUtil.getResourcePath("testVariousTypes.avsc").toString(); + meta.putOption(StorageConstants.AVRO_SCHEMA_URL, path); + } + + Path tablePath = new Path(testDir, "testVariousTypes.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.init(); + + QueryId queryid = new QueryId("12345", 5); + ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); + + Tuple tuple = new VTuple(12); + tuple.put(new Datum[] { + DatumFactory.createBool(true), + DatumFactory.createChar("hyunsik"), + DatumFactory.createInt2((short) 17), + DatumFactory.createInt4(59), + DatumFactory.createInt8(23l), + DatumFactory.createFloat4(77.9f), + DatumFactory.createFloat8(271.9f), + DatumFactory.createText("hyunsik"), + DatumFactory.createBlob("hyunsik".getBytes()), + DatumFactory.createInet4("192.168.0.1"), + NullDatum.get(), + factory.createDatum(queryid.getProto()) + }); + appender.addTuple(tuple); + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); + scanner.init(); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + ZeroCopyTuple zcTuple = new ZeroCopyTuple(); + while (scanner.nextFetch(rowBlock)) { + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(zcTuple)) { + for (int i = 0; i < tuple.size(); i++) { + assertEquals(tuple.get(i), zcTuple.get(i)); + } + } + } + scanner.close(); + + rowBlock.release(); + } + + @Test + public void testNullHandlingTypes() throws IOException { + Schema schema = new Schema(); + schema.addColumn("col1", Type.BOOLEAN); + schema.addColumn("col2", Type.CHAR, 7); + schema.addColumn("col3", Type.INT2); + schema.addColumn("col4", Type.INT4); + schema.addColumn("col5", Type.INT8); + schema.addColumn("col6", Type.FLOAT4); + schema.addColumn("col7", Type.FLOAT8); + schema.addColumn("col8", Type.TEXT); + schema.addColumn("col9", Type.BLOB); + schema.addColumn("col10", Type.INET4); + schema.addColumn("col11", Type.NULL_TYPE); + schema.addColumn("col12", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(storeType, options); + meta.setOptions(StorageUtil.newPhysicalProperties(storeType)); + meta.putOption(StorageConstants.CSVFILE_NULL, "\\\\N"); + meta.putOption(StorageConstants.RCFILE_NULL, "\\\\N"); + meta.putOption(StorageConstants.RCFILE_SERDE, TextSerializerDeserializer.class.getName()); + meta.putOption(StorageConstants.SEQUENCEFILE_NULL, "\\"); + if (storeType == StoreType.AVRO) { + meta.putOption(StorageConstants.AVRO_SCHEMA_LITERAL, + TEST_NULL_HANDLING_TYPES_AVRO_SCHEMA); + } + + Path tablePath = new Path(testDir, "testVariousTypes.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.init(); + + QueryId queryid = new QueryId("12345", 5); + ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); + + Tuple seedTuple = new VTuple(12); + seedTuple.put(new Datum[]{ + DatumFactory.createBool(true), // 0 + DatumFactory.createChar("hyunsik"), // 1 + DatumFactory.createInt2((short) 17), // 2 + DatumFactory.createInt4(59), // 3 + DatumFactory.createInt8(23l), // 4 + DatumFactory.createFloat4(77.9f), // 5 + DatumFactory.createFloat8(271.9f), // 6 + DatumFactory.createText("hyunsik"), // 7 + DatumFactory.createBlob("hyunsik".getBytes()),// 8 + DatumFactory.createInet4("192.168.0.1"), // 9 + NullDatum.get(), // 10 + factory.createDatum(queryid.getProto()) // 11 + }); + + // Making tuples with different null column positions + Tuple tuple; + for (int i = 0; i < 12; i++) { + tuple = new VTuple(12); + for (int j = 0; j < 12; j++) { + if (i == j) { // i'th column will have NULL value + tuple.put(j, NullDatum.get()); + } else { + tuple.put(j, seedTuple.get(j)); + } + } + appender.addTuple(tuple); + } + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); + scanner.init(); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + ZeroCopyTuple retrieved = new ZeroCopyTuple(); + + int i = 0; + while (scanner.nextFetch(rowBlock)) { + RowBlockReader reader = rowBlock.getReader(); + + while(reader.next(retrieved)) { + assertEquals(12, retrieved.size()); + for (int j = 0; j < 12; j++) { + if (i == j) { + assertEquals(NullDatum.get(), retrieved.get(j)); + } else { + assertEquals(seedTuple.get(j), retrieved.get(j)); + } + } + + i++; + } + } + scanner.close(); + + rowBlock.release(); + } + + @Test + public void testRCFileTextSerializeDeserialize() throws IOException { + if(storeType != StoreType.RCFILE) return; + + Schema schema = new Schema(); + schema.addColumn("col1", Type.BOOLEAN); + schema.addColumn("col2", Type.CHAR, 7); + schema.addColumn("col3", Type.INT2); + schema.addColumn("col4", Type.INT4); + schema.addColumn("col5", Type.INT8); + schema.addColumn("col6", Type.FLOAT4); + schema.addColumn("col7", Type.FLOAT8); + schema.addColumn("col8", Type.TEXT); + schema.addColumn("col9", Type.BLOB); + schema.addColumn("col10", Type.INET4); + schema.addColumn("col11", Type.NULL_TYPE); + schema.addColumn("col12", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(storeType, options); + meta.putOption(StorageConstants.CSVFILE_SERDE, TextSerializerDeserializer.class.getName()); + + Path tablePath = new Path(testDir, "testVariousTypes.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.enableStats(); + appender.init(); + + QueryId queryid = new QueryId("12345", 5); + ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); + + Tuple tuple = new VTuple(12); + tuple.put(new Datum[] { + DatumFactory.createBool(true), + DatumFactory.createChar("jinho"), + DatumFactory.createInt2((short) 17), + DatumFactory.createInt4(59), + DatumFactory.createInt8(23l), + DatumFactory.createFloat4(77.9f), + DatumFactory.createFloat8(271.9f), + DatumFactory.createText("jinho"), + DatumFactory.createBlob("hyunsik babo".getBytes()), + DatumFactory.createInet4("192.168.0.1"), + NullDatum.get(), + factory.createDatum(queryid.getProto()) + }); + appender.addTuple(tuple); + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + assertEquals(appender.getStats().getNumBytes().longValue(), status.getLen()); + + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); + scanner.init(); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + ZeroCopyTuple retrieved = new ZeroCopyTuple(); + while (scanner.nextFetch(rowBlock)) { + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(retrieved)) { + for (int i = 0; i < tuple.size(); i++) { + assertEquals(tuple.get(i), retrieved.get(i)); + } + } + } + scanner.close(); + assertEquals(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue()); + assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue()); + + rowBlock.release(); + } + + @Test + public void testRCFileBinarySerializeDeserialize() throws IOException { + if(storeType != StoreType.RCFILE) return; + + Schema schema = new Schema(); + schema.addColumn("col1", Type.BOOLEAN); + schema.addColumn("col2", Type.CHAR, 7); + schema.addColumn("col3", Type.INT2); + schema.addColumn("col4", Type.INT4); + schema.addColumn("col5", Type.INT8); + schema.addColumn("col6", Type.FLOAT4); + schema.addColumn("col7", Type.FLOAT8); + schema.addColumn("col8", Type.TEXT); + schema.addColumn("col9", Type.BLOB); + schema.addColumn("col10", Type.INET4); + schema.addColumn("col11", Type.NULL_TYPE); + schema.addColumn("col12", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(storeType, options); + meta.putOption(StorageConstants.RCFILE_SERDE, BinarySerializerDeserializer.class.getName()); + + Path tablePath = new Path(testDir, "testVariousTypes.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.enableStats(); + appender.init(); + + QueryId queryid = new QueryId("12345", 5); + ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); + + Tuple tuple = new VTuple(12); + tuple.put(new Datum[] { + DatumFactory.createBool(true), + DatumFactory.createBit((byte) 0x99), + DatumFactory.createChar("jinho"), + DatumFactory.createInt2((short) 17), + DatumFactory.createInt4(59), + DatumFactory.createInt8(23l), + DatumFactory.createFloat4(77.9f), + DatumFactory.createFloat8(271.9f), + DatumFactory.createText("jinho"), + DatumFactory.createBlob("hyunsik babo".getBytes()), + DatumFactory.createInet4("192.168.0.1"), + NullDatum.get(), + factory.createDatum(queryid.getProto()) + }); + appender.addTuple(tuple); + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + assertEquals(appender.getStats().getNumBytes().longValue(), status.getLen()); + + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); + scanner.init(); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + ZeroCopyTuple retrieved = new ZeroCopyTuple(); + while (scanner.nextFetch(rowBlock)) { + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(retrieved)) { + for (int i = 0; i < tuple.size(); i++) { + assertEquals(tuple.get(i), retrieved.get(i)); + } + } + } + scanner.close(); + assertEquals(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue()); + assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue()); + + rowBlock.release(); + } + + @Test + public void testSequenceFileTextSerializeDeserialize() throws IOException { + if(storeType != StoreType.SEQUENCEFILE) return; + + Schema schema = new Schema(); + schema.addColumn("col1", Type.BOOLEAN); + schema.addColumn("col2", Type.CHAR, 7); + schema.addColumn("col3", Type.INT2); + schema.addColumn("col4", Type.INT4); + schema.addColumn("col5", Type.INT8); + schema.addColumn("col6", Type.FLOAT4); + schema.addColumn("col7", Type.FLOAT8); + schema.addColumn("col8", Type.TEXT); + schema.addColumn("col9", Type.BLOB); + schema.addColumn("col10", Type.INET4); + schema.addColumn("col11", Type.NULL_TYPE); + schema.addColumn("col12", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(storeType, options); + meta.putOption(StorageConstants.SEQUENCEFILE_SERDE, TextSerializerDeserializer.class.getName()); + + Path tablePath = new Path(testDir, "testVariousTypes.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.enableStats(); + appender.init(); + + QueryId queryid = new QueryId("12345", 5); + ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); + + Tuple tuple = new VTuple(12); + tuple.put(new Datum[] { + DatumFactory.createBool(true), + DatumFactory.createChar("jinho"), + DatumFactory.createInt2((short) 17), + DatumFactory.createInt4(59), + DatumFactory.createInt8(23l), + DatumFactory.createFloat4(77.9f), + DatumFactory.createFloat8(271.9f), + DatumFactory.createText("jinho"), + DatumFactory.createBlob("hyunsik babo".getBytes()), + DatumFactory.createInet4("192.168.0.1"), + NullDatum.get(), + factory.createDatum(queryid.getProto()) + }); + appender.addTuple(tuple); + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + assertEquals(appender.getStats().getNumBytes().longValue(), status.getLen()); + + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); + scanner.init(); + + assertTrue(scanner instanceof SequenceFileScanner); + Writable key = ((SequenceFileScanner) scanner).getKey(); + assertEquals(key.getClass().getCanonicalName(), LongWritable.class.getCanonicalName()); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + ZeroCopyTuple retrieved = new ZeroCopyTuple(); + + while (scanner.nextFetch(rowBlock)) { + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(retrieved)) { + for (int i = 0; i < tuple.size(); i++) { + assertEquals(tuple.get(i), retrieved.get(i)); + } + } + } + scanner.close(); + assertEquals(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue()); + assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue()); + + rowBlock.release(); + } + + @Test + public void testSequenceFileBinarySerializeDeserialize() throws IOException { + if(storeType != StoreType.SEQUENCEFILE) return; + + Schema schema = new Schema(); + schema.addColumn("col1", Type.BOOLEAN); + schema.addColumn("col2", Type.BIT); + schema.addColumn("col3", Type.CHAR, 7); + schema.addColumn("col4", Type.INT2); + schema.addColumn("col5", Type.INT4); + schema.addColumn("col6", Type.INT8); + schema.addColumn("col7", Type.FLOAT4); + schema.addColumn("col8", Type.FLOAT8); + schema.addColumn("col9", Type.TEXT); + schema.addColumn("col10", Type.BLOB); + schema.addColumn("col11", Type.INET4); + schema.addColumn("col12", Type.NULL_TYPE); + schema.addColumn("col13", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(storeType, options); + meta.putOption(StorageConstants.SEQUENCEFILE_SERDE, BinarySerializerDeserializer.class.getName()); + + Path tablePath = new Path(testDir, "testVariousTypes.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.enableStats(); + appender.init(); + + QueryId queryid = new QueryId("12345", 5); + ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); + + Tuple tuple = new VTuple(13); + tuple.put(new Datum[] { + DatumFactory.createBool(true), + DatumFactory.createBit((byte) 0x99), + DatumFactory.createChar("jinho"), + DatumFactory.createInt2((short) 17), + DatumFactory.createInt4(59), + DatumFactory.createInt8(23l), + DatumFactory.createFloat4(77.9f), + DatumFactory.createFloat8(271.9f), + DatumFactory.createText("jinho"), + DatumFactory.createBlob("hyunsik babo".getBytes()), + DatumFactory.createInet4("192.168.0.1"), + NullDatum.get(), + factory.createDatum(queryid.getProto()) + }); + appender.addTuple(tuple); + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + assertEquals(appender.getStats().getNumBytes().longValue(), status.getLen()); + + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); + scanner.init(); + + assertTrue(scanner instanceof SequenceFileScanner); + Writable key = ((SequenceFileScanner) scanner).getKey(); + assertEquals(key.getClass().getCanonicalName(), BytesWritable.class.getCanonicalName()); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + ZeroCopyTuple retrieved = new ZeroCopyTuple(); + + while (scanner.nextFetch(rowBlock)) { + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(retrieved)) { + for (int i = 0; i < tuple.size(); i++) { + assertEquals(tuple.get(i), retrieved.get(i)); + } + } + } + scanner.close(); + assertEquals(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue()); + assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue()); + } + + @Test + public void testTime() throws IOException { + if (storeType == StoreType.CSV || storeType == StoreType.RAW) { + Schema schema = new Schema(); + schema.addColumn("col1", Type.DATE); + schema.addColumn("col2", Type.TIME); + schema.addColumn("col3", Type.TIMESTAMP); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(storeType, options); + + Path tablePath = new Path(testDir, "testTime.data"); + Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); + appender.init(); + + Tuple tuple = new VTuple(3); + tuple.put(new Datum[]{ + DatumFactory.createDate("1980-04-01"), + DatumFactory.createTime("12:34:56"), + DatumFactory.createTimestmpDatumWithUnixTime((int)(System.currentTimeMillis() / 1000)) + }); + appender.addTuple(tuple); + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); + scanner.init(); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); + rowBlock.setRows(1024); + + ZeroCopyTuple retrieved = new ZeroCopyTuple(); + + while (scanner.nextFetch(rowBlock)) { + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(retrieved)) { + for (int i = 0; i < tuple.size(); i++) { + assertEquals(tuple.get(i), retrieved.get(i)); + } + } + } + scanner.close(); + + rowBlock.release(); + } + } + +} From 292f6743845139dfc4ae5d7d672386b25202beed Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Sun, 28 Sep 2014 10:42:20 -0700 Subject: [PATCH 4/6] TAJO-1083: StoreTableExec should be block iterative. --- .../java/org/apache/tajo/SessionVars.java | 2 + .../java/org/apache/tajo/conf/TajoConf.java | 1 + .../org/apache/tajo/engine/eval/EvalNode.java | 7 + .../apache/tajo/engine/planner/Projector.java | 23 +- .../engine/planner/physical/PhysicalExec.java | 2 +- .../engine/planner/physical/SeqScanExec.java | 24 ++ .../planner/physical/StoreTableExec.java | 28 +++ .../tajo/engine/utils/TupleBuilderUtil.java | 102 +++++++++ .../org/apache/tajo/master/GlobalEngine.java | 20 ++ .../java/org/apache/tajo/worker/Task.java | 14 +- .../physical/block/TestBlockIteratorExec.java | 211 ++++++++++++++++++ .../TestTajoCli/testHelpSessionVars.result | 1 + .../tajo/tuple/offheap/OffHeapRowBlock.java | 6 +- .../tuple/offheap/OffHeapRowBlockWriter.java | 2 +- .../tuple/offheap/ResizableLimitSpec.java | 2 +- .../apache/tajo/storage/TestNextFetches.java | 23 +- 16 files changed, 449 insertions(+), 19 deletions(-) create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleBuilderUtil.java create mode 100644 tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/block/TestBlockIteratorExec.java diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java index cc875b2c1a..b63d4f4536 100644 --- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java +++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java @@ -99,6 +99,8 @@ public enum SessionVars implements ConfigKey { "shuffle output size for partition table write (mb)", DEFAULT), // for physical Executors + EXEC_ENGINE(ConfVars.$EXECUTOR_ENGINE, + "executor engine types that queries will use. Types: volcano and block (default is volcano)", DEFAULT), EXTSORT_BUFFER_SIZE(ConfVars.$EXECUTOR_EXTERNAL_SORT_BUFFER_SIZE, "sort buffer size for external sort (mb)", DEFAULT), HASH_JOIN_SIZE_LIMIT(ConfVars.$EXECUTOR_HASH_JOIN_SIZE_THRESHOLD, "limited size for hash join (mb)", DEFAULT), INNER_HASH_JOIN_SIZE_LIMIT(ConfVars.$EXECUTOR_INNER_HASH_JOIN_SIZE_THRESHOLD, diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index b5a9b506cb..b1229ebc93 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -317,6 +317,7 @@ public static enum ConfVars implements ConfigKey { $DIST_QUERY_TABLE_PARTITION_VOLUME("tajo.dist-query.table-partition.task-volume-mb", 256), // for physical Executors + $EXECUTOR_ENGINE("tajo.executor.engine", "volcano"), // volcano, and block $EXECUTOR_EXTERNAL_SORT_BUFFER_SIZE("tajo.executor.external-sort.buffer-mb", 200L), $EXECUTOR_HASH_JOIN_SIZE_THRESHOLD("tajo.executor.join.common.in-memory-hash-threshold-bytes", (long)256 * 1048576), diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalNode.java b/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalNode.java index 754f8885ce..b48700180f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalNode.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalNode.java @@ -23,8 +23,10 @@ import org.apache.tajo.common.TajoDataTypes.DataType; import org.apache.tajo.datum.Datum; import org.apache.tajo.engine.json.CoreGsonHelper; +import org.apache.tajo.engine.utils.TupleBuilderUtil; import org.apache.tajo.json.GsonObject; import org.apache.tajo.storage.Tuple; +import org.apache.tajo.tuple.offheap.RowWriter; /** * An annotated expression which includes actual data domains. @@ -59,6 +61,11 @@ public String toJson() { public abstract T eval(Schema schema, Tuple tuple); + public void eval(Schema schema, Tuple tuple, RowWriter builder) { + Datum result = eval(schema, tuple); + TupleBuilderUtil.writeEvalResult(builder, result.type(), result); + } + @Deprecated public abstract void preOrder(EvalNodeVisitor visitor); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/Projector.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/Projector.java index d8499d0696..0d8bd5fa3f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/Projector.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/Projector.java @@ -21,7 +21,10 @@ import org.apache.tajo.SessionVars; import org.apache.tajo.catalog.Schema; import org.apache.tajo.engine.eval.EvalNode; +import org.apache.tajo.engine.utils.TupleBuilderUtil; import org.apache.tajo.storage.Tuple; +import org.apache.tajo.tuple.TupleBuilder; +import org.apache.tajo.tuple.offheap.RowWriter; import org.apache.tajo.worker.TaskAttemptContext; public class Projector { @@ -33,7 +36,14 @@ public class Projector { private final int targetNum; private final EvalNode[] evals; + private final boolean useJITInSession; + private final boolean useJITInOperator; + public Projector(TaskAttemptContext context, Schema inSchema, Schema outSchema, Target [] targets) { + this(context, inSchema, outSchema, targets, true); + } + + public Projector(TaskAttemptContext context, Schema inSchema, Schema outSchema, Target [] targets, boolean useJIT) { this.context = context; this.inSchema = inSchema; if (targets == null) { @@ -45,7 +55,10 @@ public Projector(TaskAttemptContext context, Schema inSchema, Schema outSchema, this.targetNum = this.targets.length; evals = new EvalNode[targetNum]; - if (context.getQueryContext().getBool(SessionVars.CODEGEN)) { + useJITInOperator = useJIT; + useJITInSession = context.getQueryContext().getBool(SessionVars.CODEGEN); + + if (useJITInOperator && useJITInSession) { EvalNode eval; for (int i = 0; i < targetNum; i++) { eval = this.targets[i].getEvalTree(); @@ -63,4 +76,12 @@ public void eval(Tuple in, Tuple out) { out.put(i, evals[i].eval(inSchema, in)); } } + + public void eval(Tuple in, RowWriter builder) { + if (useJITInOperator && useJITInSession) { + TupleBuilderUtil.evaluateNative(inSchema, in, builder, evals); + } else { + TupleBuilderUtil.evaluate(inSchema, in, builder, evals); + } + } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalExec.java index 859c053ff9..99cf61015c 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalExec.java @@ -63,7 +63,7 @@ protected void compile() throws CompilationError { public abstract Tuple next() throws IOException; - public boolean nextFetch(OffHeapRowBlock rowBlock) { + public boolean nextFetch(OffHeapRowBlock rowBlock) throws IOException { throw new UnimplementedException("nextFetch(OffHeapRowBlock) is not implemented"); } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index 122d4f3908..fd11c7b0bc 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -41,6 +41,11 @@ import org.apache.tajo.storage.*; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.FragmentConvertor; +import org.apache.tajo.tuple.RowBlockReader; +import org.apache.tajo.tuple.TupleBuilder; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; +import org.apache.tajo.tuple.offheap.ZeroCopyTuple; +import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.worker.TaskAttemptContext; import java.io.IOException; @@ -67,6 +72,8 @@ public class SeqScanExec extends PhysicalExec { private boolean cacheRead = false; + private OffHeapRowBlock inRowBlock; + public SeqScanExec(TaskAttemptContext context, AbstractStorageManager sm, ScanNode plan, CatalogProtos.FragmentProto [] fragments) throws IOException { super(context, plan.getInSchema(), plan.getOutSchema()); @@ -94,6 +101,8 @@ public SeqScanExec(TaskAttemptContext context, AbstractStorageManager sm, ScanNo && plan.getTableDesc().getPartitionMethod().getPartitionType() == CatalogProtos.PartitionType.COLUMN) { rewriteColumnPartitionedTableSchema(); } + + inRowBlock = new OffHeapRowBlock(inSchema, 64 * StorageUnit.KB); } /** @@ -289,6 +298,21 @@ public Tuple next() throws IOException { } } + public boolean nextFetch(OffHeapRowBlock rowBlock) throws IOException { + boolean noMoreTuple = scanner.nextFetch(inRowBlock); + if (!noMoreTuple) { + return false; + } + + ZeroCopyTuple zcTuple = new ZeroCopyTuple(); + RowBlockReader reader = inRowBlock.getReader(); + while (reader.next(zcTuple)) { + projector.eval(zcTuple, rowBlock.getWriter()); + } + + return true; + } + @Override public void rescan() throws IOException { scanner.reset(); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/StoreTableExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/StoreTableExec.java index 3199b56c07..f88af150ac 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/StoreTableExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/StoreTableExec.java @@ -28,9 +28,13 @@ import org.apache.tajo.catalog.statistics.TableStats; import org.apache.tajo.engine.planner.logical.InsertNode; import org.apache.tajo.engine.planner.logical.PersistentStoreNode; +import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.storage.Appender; import org.apache.tajo.storage.StorageManagerFactory; import org.apache.tajo.storage.Tuple; +import org.apache.tajo.tuple.RowBlockReader; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; +import org.apache.tajo.tuple.offheap.ZeroCopyTuple; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.worker.TaskAttemptContext; @@ -121,6 +125,30 @@ public Tuple next() throws IOException { return null; } + ZeroCopyTuple zcTuple = new ZeroCopyTuple(); + RowBlockReader reader; + + public boolean nextFetch(OffHeapRowBlock rowBlock) throws IOException { + if (child.nextFetch(rowBlock)) { + reader = rowBlock.getReader(); + while (reader.next(zcTuple)) { + appender.addTuple(zcTuple);; + + if (maxPerFileSize > 0 && maxPerFileSize <= appender.getEstimatedOutputSize()) { + appender.close(); + + writtenFileNum++; + StatisticsUtil.aggregateTableStat(sumStats, appender.getStats()); + openNewFile(writtenFileNum); + } + } + + return true; + } else { + return false; + } + } + @Override public void rescan() throws IOException { // nothing to do diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleBuilderUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleBuilderUtil.java new file mode 100644 index 0000000000..dc0f0582af --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleBuilderUtil.java @@ -0,0 +1,102 @@ +/* + * Lisensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.utils; + +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.engine.eval.EvalNode; +import org.apache.tajo.exception.UnsupportedException; +import org.apache.tajo.storage.Tuple; +import org.apache.tajo.tuple.TupleBuilder; +import org.apache.tajo.tuple.offheap.RowWriter; + +public class TupleBuilderUtil { + + public static void evaluate(Schema inSchema, Tuple input, RowWriter builder, EvalNode[] evals) { + builder.startRow(); + for (int i = 0; i < evals.length; i++) { + Datum result = evals[i].eval(inSchema, input); + writeEvalResult(builder, result.type(), result); + } + builder.endRow(); + } + + public static void evaluateNative(Schema inSchema, Tuple input, RowWriter builder, EvalNode[] evals) { + builder.startRow(); + for (int i = 0; i < evals.length; i++) { + evals[i].eval(inSchema, input, builder); + } + builder.endRow(); + } + + public static void writeEvalResult(RowWriter builder, TajoDataTypes.Type type, Datum datum) { + switch (type) { + case NULL_TYPE: + builder.skipField(); + break; + case BOOLEAN: + builder.putBool(datum.asBool()); + break; + case INT1: + case INT2: + builder.putInt2(datum.asInt2()); + break; + case INT4: + builder.putInt4(datum.asInt4()); + break; + case INT8: + builder.putInt8(datum.asInt8()); + break; + case FLOAT4: + builder.putFloat4(datum.asFloat4()); + break; + case FLOAT8: + builder.putFloat8(datum.asFloat8()); + break; + case TIMESTAMP: + builder.putTimestamp(datum.asInt8()); + break; + case TIME: + builder.putTime(datum.asInt8()); + break; + case DATE: + builder.putDate(datum.asInt4()); + break; + case INTERVAL: + builder.putInterval((org.apache.tajo.datum.IntervalDatum) datum); + break; + case CHAR: + case TEXT: + builder.putText(datum.asTextBytes()); + break; + case BLOB: + builder.putBlob(datum.asByteArray()); + break; + case INET4: + builder.putInet4(datum.asInt4()); + break; + case PROTOBUF: + builder.putProtoDatum((org.apache.tajo.datum.ProtobufDatum) datum); + break; + default: + throw new UnsupportedException("Unknown Type: " + type.name()); + } + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/master/GlobalEngine.java b/tajo-core/src/main/java/org/apache/tajo/master/GlobalEngine.java index 504a7929d7..23c494901b 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/GlobalEngine.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/GlobalEngine.java @@ -115,6 +115,26 @@ public void stop() { super.stop(); } + public SQLAnalyzer getSQLAnalyzer() { + return analyzer; + } + + public PreLogicalPlanVerifier getPreLogicalPlanVerifier() { + return preVerifier; + } + + public LogicalPlanner getLogicalPlanner() { + return planner; + } + + public LogicalOptimizer getLogicalOptimizer() { + return optimizer; + } + + public LogicalPlanVerifier getLogicalPlanVerifier() { + return annotatedPlanVerifier; + } + private QueryContext createQueryContext(Session session) { QueryContext newQueryContext = new QueryContext(context.getConf(), session); diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/Task.java b/tajo-core/src/main/java/org/apache/tajo/worker/Task.java index 5127e90696..67210eaed3 100644 --- a/tajo-core/src/main/java/org/apache/tajo/worker/Task.java +++ b/tajo-core/src/main/java/org/apache/tajo/worker/Task.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.tajo.QueryUnitAttemptId; +import org.apache.tajo.SessionVars; import org.apache.tajo.TajoConstants; import org.apache.tajo.TajoProtos; import org.apache.tajo.TajoProtos.TaskAttemptState; @@ -57,6 +58,8 @@ import org.apache.tajo.storage.HashShuffleAppenderManager; import org.apache.tajo.storage.StorageUtil; import org.apache.tajo.storage.fragment.FileFragment; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; +import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.NetUtils; import org.jboss.netty.channel.socket.ClientSocketChannelFactory; import org.jboss.netty.handler.codec.http.QueryStringDecoder; @@ -446,7 +449,16 @@ public void run() throws Exception { createPlan(context, plan); this.executor.init(); - while(!killed && !aborted && executor.next() != null) { + String engineType = context.getQueryContext().get(SessionVars.EXEC_ENGINE); + LOG.info(engineType.toUpperCase() + " Executor Engine is chosen."); + if (engineType.equalsIgnoreCase("volcano")) { + while (!killed && !aborted && executor.next() != null) { + } + } else if (engineType.equalsIgnoreCase("block")) { + OffHeapRowBlock rowBlock = new OffHeapRowBlock(executor.getSchema(), 64 * StorageUnit.KB); + while (!killed && !aborted && executor.nextFetch(rowBlock)) { + } + rowBlock.release(); } } catch (Throwable e) { error = e ; diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/block/TestBlockIteratorExec.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/block/TestBlockIteratorExec.java new file mode 100644 index 0000000000..f6a1e0546a --- /dev/null +++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/block/TestBlockIteratorExec.java @@ -0,0 +1,211 @@ +/* + * Lisensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner.physical.block; + +import com.google.common.collect.Lists; +import org.apache.hadoop.fs.Path; +import org.apache.tajo.*; +import org.apache.tajo.algebra.Expr; +import org.apache.tajo.catalog.*; +import org.apache.tajo.catalog.proto.CatalogProtos; +import org.apache.tajo.engine.parser.SQLAnalyzer; +import org.apache.tajo.engine.planner.*; +import org.apache.tajo.engine.planner.enforce.Enforcer; +import org.apache.tajo.engine.planner.logical.*; +import org.apache.tajo.engine.planner.physical.*; +import org.apache.tajo.engine.query.QueryContext; +import org.apache.tajo.master.GlobalEngine; +import org.apache.tajo.storage.*; +import org.apache.tajo.storage.fragment.FileFragment; +import org.apache.tajo.tuple.RowBlockReader; +import org.apache.tajo.tuple.offheap.OffHeapRowBlock; +import org.apache.tajo.tuple.offheap.ZeroCopyTuple; +import org.apache.tajo.unit.StorageUnit; +import org.apache.tajo.util.CommonTestingUtil; +import org.apache.tajo.worker.TaskAttemptContext; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.List; + +import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; + +public class TestBlockIteratorExec extends QueryTestCaseBase { + + private static SQLAnalyzer analyzer; + private static LogicalPlanner planner; + private static LogicalOptimizer optimizer; + private static PhysicalPlanner physicalPlanner; + private static AbstractStorageManager sm; + + @BeforeClass + public static void setUp() throws IOException { + GlobalEngine engine = testingCluster.getMaster().getContext().getGlobalEngine(); + analyzer = engine.getSQLAnalyzer(); + planner = engine.getLogicalPlanner(); + optimizer = engine.getLogicalOptimizer(); + + Path path = CommonTestingUtil.getTestDir("target/test-data/TestBlockExecutor"); + sm = StorageManagerFactory.getStorageManager(conf, path); + + physicalPlanner = new PhysicalPlannerImpl(conf, sm); + } + + private static int i = 0; + static Path outputPath; + + /** + * Build a physical execution plan, which is a tree consisting of a number of physical executors. + * + * @param sql a SQL statement + * @return Physical Execution Plan + * @throws PlanningException + * @throws IOException + */ + public static PhysicalExec buildPhysicalPlan(String sql) throws PlanningException, IOException { + Expr expr = analyzer.parse(sql); + + QueryContext context = LocalTajoTestingUtility.createDummyContext(conf); + LogicalPlan plan = planner.createPlan(context, expr); + optimizer.optimize(context, plan); + + LogicalNode [] founds = PlannerUtil.findAllNodes(plan.getRootBlock().getRoot(), NodeType.SCAN); + + List mergedFragments = Lists.newArrayList(); + + for (LogicalNode node : founds) { + ScanNode scan = (ScanNode) node; + TableDesc table = scan.getTableDesc(); + FileFragment[] frags = StorageManager.splitNG(conf, scan.getCanonicalName(), table.getMeta(), table.getPath(), + Integer.MAX_VALUE); + + for (FileFragment f : frags) { + mergedFragments.add(f); + } + } + + Path workDir = CommonTestingUtil.getTestDir("target/test-data/testdir_" + (i++)); + + TaskAttemptContext ctx = new TaskAttemptContext(new QueryContext(conf), + LocalTajoTestingUtility.newQueryUnitAttemptId(), mergedFragments.toArray(new FileFragment[mergedFragments.size()]), workDir); + + outputPath = new Path(workDir, "output"); + ctx.setOutputPath(outputPath); + ctx.setEnforcer(new Enforcer()); + + return physicalPlanner.createPlan(ctx, plan.getRootBlock().getRoot()); + } + + @Test + public void testSeqScan() throws IOException, PlanningException { + PhysicalExec exec = buildPhysicalPlan("select * from lineitem"); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(exec.getSchema(), 64 * StorageUnit.KB); + rowBlock.setMaxRow(1024); + + exec.init(); + + int countForTuple = 0; + int countForRowBlock = 0; + while(exec.nextFetch(rowBlock)) { + ZeroCopyTuple tuple = new ZeroCopyTuple(); + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(tuple)) { + countForTuple++; + } + countForRowBlock += rowBlock.rows(); + } + exec.close(); + rowBlock.release(); + + assertEquals(5, countForTuple); + assertEquals(5, countForRowBlock); + } + + @Test + public void testScanWithProjector() throws IOException, PlanningException { + PhysicalExec exec = buildPhysicalPlan("select l_orderkey, l_partkey from lineitem"); + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(exec.getSchema(), 64 * StorageUnit.KB); + rowBlock.setMaxRow(1024); + + exec.init(); + + int countForTuple = 0; + int countForRowBlock = 0; + while(exec.nextFetch(rowBlock)) { + ZeroCopyTuple tuple = new ZeroCopyTuple(); + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(tuple)) { + countForTuple++; + } + countForRowBlock += rowBlock.rows(); + } + exec.close(); + rowBlock.release(); + + assertEquals(5, countForTuple); + assertEquals(5, countForRowBlock); + } + + @Test + public void testStoreTableExec() throws IOException, PlanningException { + PhysicalExec exec = buildPhysicalPlan("create table t1 using CSV as select * from lineitem"); + + + OffHeapRowBlock rowBlock = new OffHeapRowBlock(exec.getSchema(), 64 * StorageUnit.KB); + rowBlock.setMaxRow(1024); + + exec.init(); + + int countForTuple = 0; + int countForRowBlock = 0; + while(exec.nextFetch(rowBlock)) { + ZeroCopyTuple tuple = new ZeroCopyTuple(); + RowBlockReader reader = rowBlock.getReader(); + while (reader.next(tuple)) { + countForTuple++; + } + countForRowBlock += rowBlock.rows(); + } + exec.close(); + + assertEquals(5, countForTuple); + assertEquals(5, countForRowBlock); + + TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV); + Scanner scanner = StorageManagerFactory.getStorageManager(conf).getFileScanner(meta, exec.getSchema(), outputPath); + scanner.init(); + + int readTupleCount = 0; + while (scanner.nextFetch(rowBlock)) { + readTupleCount += rowBlock.rows(); + } + scanner.close(); + + assertEquals(5, readTupleCount); + + rowBlock.release(); + } +} \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result index e6b12b1fcb..f6edb3d2de 100644 --- a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result +++ b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result @@ -25,6 +25,7 @@ Available Session Variables: \set JOIN_PER_SHUFFLE_SIZE [int value] - shuffle output size for join (mb) \set GROUPBY_PER_SHUFFLE_SIZE [int value] - shuffle output size for sort (mb) \set TABLE_PARTITION_PER_SHUFFLE_SIZE [int value] - shuffle output size for partition table write (mb) +\set EXEC_ENGINE [text value] - executor engine types that queries will use. Types: volcano and block (default is volcano) \set EXTSORT_BUFFER_SIZE [long value] - sort buffer size for external sort (mb) \set HASH_JOIN_SIZE_LIMIT [long value] - limited size for hash join (mb) \set INNER_HASH_JOIN_SIZE_LIMIT [long value] - limited size for hash inner join (mb) diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowBlock.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowBlock.java index 689efb7419..ea86a5a7b3 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowBlock.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowBlock.java @@ -127,7 +127,11 @@ public int rows() { return rowNum; } - public void setRows(int rowNum) { + public void setMaxRow(int rowNum) { + this.maxRowNum = rowNum; + } + + void setRow(int rowNum) { this.rowNum = rowNum; } diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowBlockWriter.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowBlockWriter.java index d177e0caff..ba59b1654d 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowBlockWriter.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowBlockWriter.java @@ -48,7 +48,7 @@ public void ensureSize(int size) { @Override public void endRow() { super.endRow(); - rowBlock.setRows(rowBlock.rows() + 1); + rowBlock.setRow(rowBlock.rows() + 1); } @Override diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/ResizableLimitSpec.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/ResizableLimitSpec.java index 14e67b25eb..8d782ebdfd 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/ResizableLimitSpec.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/ResizableLimitSpec.java @@ -105,7 +105,7 @@ public boolean canIncrease(long currentSize) { } public long remain(long currentSize) { - Preconditions.checkArgument(currentSize > 0, "Size must be greater than 0 bytes."); + Preconditions.checkArgument(currentSize > 0, "Size must be greater than 0 bytes. But, its size is " + currentSize); return limitBytes > Integer.MAX_VALUE ? Integer.MAX_VALUE - currentSize : limitBytes - currentSize; } diff --git a/tajo-storage/src/test/java/org/apache/tajo/storage/TestNextFetches.java b/tajo-storage/src/test/java/org/apache/tajo/storage/TestNextFetches.java index d1b3afd189..e81964ba8b 100644 --- a/tajo-storage/src/test/java/org/apache/tajo/storage/TestNextFetches.java +++ b/tajo-storage/src/test/java/org/apache/tajo/storage/TestNextFetches.java @@ -42,17 +42,14 @@ import org.apache.tajo.storage.sequencefile.SequenceFileScanner; import org.apache.tajo.tuple.RowBlockReader; import org.apache.tajo.tuple.offheap.OffHeapRowBlock; -import org.apache.tajo.tuple.offheap.UnSafeTuple; import org.apache.tajo.tuple.offheap.ZeroCopyTuple; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.CommonTestingUtil; import org.apache.tajo.util.FileUtil; import org.apache.tajo.util.KeyValueSet; -import org.apache.tajo.util.UnsafeUtil; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import sun.misc.Unsafe; import java.io.IOException; import java.util.Arrays; @@ -173,7 +170,7 @@ public void testSplitable() throws IOException { int tupleCnt = 0; OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); while (scanner.nextFetch(rowBlock)) { tupleCnt += rowBlock.rows(); @@ -233,7 +230,7 @@ public void testSplitableForRCFileBug() throws IOException { int tupleCnt = 0; OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); while (scanner.nextFetch(rowBlock)) { tupleCnt += rowBlock.rows(); @@ -294,7 +291,7 @@ public void testProjection() throws IOException { int tupleCnt = 0; OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); ZeroCopyTuple tuple = new ZeroCopyTuple(); while (scanner.nextFetch(rowBlock)) { @@ -376,7 +373,7 @@ public void testVariousTypes() throws IOException { scanner.init(); OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); ZeroCopyTuple zcTuple = new ZeroCopyTuple(); while (scanner.nextFetch(rowBlock)) { @@ -465,7 +462,7 @@ public void testNullHandlingTypes() throws IOException { scanner.init(); OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); ZeroCopyTuple retrieved = new ZeroCopyTuple(); @@ -548,7 +545,7 @@ public void testRCFileTextSerializeDeserialize() throws IOException { scanner.init(); OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); ZeroCopyTuple retrieved = new ZeroCopyTuple(); while (scanner.nextFetch(rowBlock)) { @@ -624,7 +621,7 @@ public void testRCFileBinarySerializeDeserialize() throws IOException { scanner.init(); OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); ZeroCopyTuple retrieved = new ZeroCopyTuple(); while (scanner.nextFetch(rowBlock)) { @@ -703,7 +700,7 @@ public void testSequenceFileTextSerializeDeserialize() throws IOException { assertEquals(key.getClass().getCanonicalName(), LongWritable.class.getCanonicalName()); OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); ZeroCopyTuple retrieved = new ZeroCopyTuple(); @@ -785,7 +782,7 @@ public void testSequenceFileBinarySerializeDeserialize() throws IOException { assertEquals(key.getClass().getCanonicalName(), BytesWritable.class.getCanonicalName()); OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); ZeroCopyTuple retrieved = new ZeroCopyTuple(); @@ -833,7 +830,7 @@ public void testTime() throws IOException { scanner.init(); OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 64 * StorageUnit.KB); - rowBlock.setRows(1024); + rowBlock.setMaxRow(1024); ZeroCopyTuple retrieved = new ZeroCopyTuple(); From 0f17fef890ba6e804505766db2fc7b6a25ba8e85 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Sun, 28 Sep 2014 17:26:33 -0700 Subject: [PATCH 5/6] TAJO-1084: Generated codes should access directly UnSafeTuple and RowWriter. --- .../java/org/apache/tajo/SessionVars.java | 2 +- .../org/apache/tajo/annotation/UsedByJIT.java | 36 ++ .../java/org/apache/tajo/conf/TajoConf.java | 4 +- .../tajo/engine/codegen/CaseWhenEmitter.java | 2 +- .../tajo/engine/codegen/CompilationError.java | 6 +- .../tajo/engine/codegen/EvalCodeEmitter.java | 2 +- .../engine/codegen/EvalCodeGenContext.java | 186 +---------- .../engine/codegen/EvalCodeGenerator.java | 71 ++-- .../tajo/engine/codegen/EvalNodeCompiler.java | 310 ++++++++++++++++++ .../engine/codegen/ExecutorPreCompiler.java | 46 +-- .../engine/codegen/TajoGeneratorAdapter.java | 171 +++++++++- .../engine/codegen/TupleComparerCompiler.java | 14 + .../apache/tajo/engine/codegen/Variables.java | 29 ++ ...sPreBuilder.java => VariablesBuilder.java} | 25 +- .../org/apache/tajo/engine/eval/EvalNode.java | 6 +- .../planner/physical/ExternalSortExec.java | 2 +- .../engine/planner/physical/SeqScanExec.java | 5 +- .../engine/planner/physical/SortExec.java | 9 +- .../planner/physical/StoreTableExec.java | 2 + .../worker/ExecutionBlockSharedResource.java | 29 +- .../java/org/apache/tajo/worker/Task.java | 5 +- .../codegen/TestTupleComparerCompiler.java | 83 +++-- .../apache/tajo/engine/eval/ExprTestBase.java | 7 +- .../physical/block/TestBlockIteratorExec.java | 1 + .../tajo/engine/query/TestSelectQuery.java | 2 +- .../TestTajoCli/testHelpSessionVars.result | 2 +- .../apache/tajo/tuple/BaseTupleBuilder.java | 8 +- .../apache/tajo/tuple/offheap/HeapTuple.java | 31 +- .../offheap/HeapTupleBytesComparator.java | 105 ++++++ .../tajo/tuple/offheap/OffHeapRowWriter.java | 62 +++- .../apache/tajo/tuple/offheap/RowWriter.java | 4 + .../tajo/tuple/offheap/UnSafeTuple.java | 2 +- 32 files changed, 949 insertions(+), 320 deletions(-) create mode 100644 tajo-common/src/main/java/org/apache/tajo/annotation/UsedByJIT.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalNodeCompiler.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/codegen/Variables.java rename tajo-core/src/main/java/org/apache/tajo/engine/codegen/{VariablesPreBuilder.java => VariablesBuilder.java} (77%) create mode 100644 tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTupleBytesComparator.java diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java index b63d4f4536..b34bad3f2c 100644 --- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java +++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java @@ -99,7 +99,7 @@ public enum SessionVars implements ConfigKey { "shuffle output size for partition table write (mb)", DEFAULT), // for physical Executors - EXEC_ENGINE(ConfVars.$EXECUTOR_ENGINE, + EXECUTION_ENGINE(ConfVars.$EXECUTION_ENGINE, "executor engine types that queries will use. Types: volcano and block (default is volcano)", DEFAULT), EXTSORT_BUFFER_SIZE(ConfVars.$EXECUTOR_EXTERNAL_SORT_BUFFER_SIZE, "sort buffer size for external sort (mb)", DEFAULT), HASH_JOIN_SIZE_LIMIT(ConfVars.$EXECUTOR_HASH_JOIN_SIZE_THRESHOLD, "limited size for hash join (mb)", DEFAULT), diff --git a/tajo-common/src/main/java/org/apache/tajo/annotation/UsedByJIT.java b/tajo-common/src/main/java/org/apache/tajo/annotation/UsedByJIT.java new file mode 100644 index 0000000000..d9fc5eec34 --- /dev/null +++ b/tajo-common/src/main/java/org/apache/tajo/annotation/UsedByJIT.java @@ -0,0 +1,36 @@ +/* + * Lisensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.annotation; + + +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +import static java.lang.annotation.ElementType.*; +import static java.lang.annotation.ElementType.CONSTRUCTOR; +import static java.lang.annotation.ElementType.LOCAL_VARIABLE; + +/** + * It annotates that the method or fields are used by the codes by runtime generated code. + */ +@Target({TYPE, FIELD, METHOD, CONSTRUCTOR, LOCAL_VARIABLE}) +@Retention(RetentionPolicy.SOURCE) +public @interface UsedByJIT { +} diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index b1229ebc93..f821a3376a 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -316,8 +316,10 @@ public static enum ConfVars implements ConfigKey { $DIST_QUERY_GROUPBY_PARTITION_VOLUME("tajo.dist-query.groupby.partition-volume-mb", 256), $DIST_QUERY_TABLE_PARTITION_VOLUME("tajo.dist-query.table-partition.task-volume-mb", 256), + // execution engine + $EXECUTION_ENGINE("tajo.executor.engine", "volcano"), // volcano, and block + // for physical Executors - $EXECUTOR_ENGINE("tajo.executor.engine", "volcano"), // volcano, and block $EXECUTOR_EXTERNAL_SORT_BUFFER_SIZE("tajo.executor.external-sort.buffer-mb", 200L), $EXECUTOR_HASH_JOIN_SIZE_THRESHOLD("tajo.executor.join.common.in-memory-hash-threshold-bytes", (long)256 * 1048576), diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/CaseWhenEmitter.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/CaseWhenEmitter.java index 16bd39688c..d5b4ab83e3 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/CaseWhenEmitter.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/CaseWhenEmitter.java @@ -1,5 +1,5 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one + * Lisensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/CompilationError.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/CompilationError.java index 325ed2d389..cd7d5649a0 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/CompilationError.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/CompilationError.java @@ -27,10 +27,12 @@ public CompilationError(String message) { } public CompilationError(EvalNode evalNode, Throwable t, byte [] clazz) { - super("Compilation Error: " + evalNode.toString() + "\n\nBYTES CODE DUMP:\n" + CodeGenUtils.disassemble(clazz), t); + super(t.getMessage() + + "\nCompilation Error: " + evalNode.toString() + "\n\nBYTES CODE DUMP:\n" + CodeGenUtils.disassemble(clazz), t); } public CompilationError(BaseTupleComparator comp, Throwable t, byte [] clazz) { - super("Compilation Error: " + comp.toString() + "\n\nBYTES CODE DUMP:\n" + CodeGenUtils.disassemble(clazz), t); + super(t.getMessage() + + "\nCompilation Error: " + comp.toString() + "\n\nBYTES CODE DUMP:\n" + CodeGenUtils.disassemble(clazz), t); } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeEmitter.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeEmitter.java index d94c9071ba..9f280612fb 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeEmitter.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeEmitter.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeGenContext.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeGenContext.java index 1e51ba55fd..e9aca73de8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeGenContext.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeGenContext.java @@ -18,206 +18,44 @@ package org.apache.tajo.engine.codegen; -import com.google.common.collect.Maps; import org.apache.tajo.catalog.Schema; -import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.datum.Datum; -import org.apache.tajo.datum.IntervalDatum; -import org.apache.tajo.engine.eval.*; +import org.apache.tajo.engine.eval.EvalNode; import org.apache.tajo.org.objectweb.asm.ClassWriter; import org.apache.tajo.org.objectweb.asm.MethodVisitor; import org.apache.tajo.org.objectweb.asm.Opcodes; import org.apache.tajo.org.objectweb.asm.commons.GeneratorAdapter; -import org.apache.tajo.storage.Tuple; - -import java.util.Map; -import java.util.Stack; public class EvalCodeGenContext extends TajoGeneratorAdapter { final String owner; final Schema schema; final ClassWriter classWriter; final EvalNode evalNode; - final Map symbols; - int seqId = 0; + final Variables variables; - public EvalCodeGenContext(String className, Schema schema, ClassWriter classWriter, EvalNode evalNode) { + public EvalCodeGenContext(String className, Schema schema, ClassWriter classWriter, String methodName, + String methodDesc, EvalNode evalNode, Variables variables) { this.owner = className; this.classWriter = classWriter; this.schema = schema; this.evalNode = evalNode; - this.symbols = Maps.newHashMap(); - - emitClassDefinition(); - emitMemberFields(); - classWriter.visitEnd(); - emitConstructor(); + this.variables = variables; - String methodName = "eval"; - String methodDesc = TajoGeneratorAdapter.getMethodDescription(Datum.class, new Class[]{Schema.class, Tuple.class}); MethodVisitor evalMethod = classWriter.visitMethod(Opcodes.ACC_PUBLIC, methodName, methodDesc, null, null); evalMethod.visitCode(); this.methodvisitor = evalMethod; generatorAdapter = new GeneratorAdapter(this.methodvisitor, access, methodDesc, methodDesc); } - public void emitClassDefinition() { - classWriter.visit(Opcodes.V1_6, Opcodes.ACC_PUBLIC, this.owner, null, - TajoGeneratorAdapter.getInternalName(EvalNode.class), null); - } - - public void emitMemberFields() { - classWriter.visitField(Opcodes.ACC_PRIVATE, "schema", - "L" + TajoGeneratorAdapter.getInternalName(Schema.class) + ";", null, null); - - VariablesPreBuilder builder = new VariablesPreBuilder(); - builder.visit(this, evalNode, new Stack()); - } - - public static void emitCreateSchema(TajoGeneratorAdapter adapter, MethodVisitor mv, Schema schema) { - mv.visitLdcInsn(schema.toJson()); - adapter.invokeStatic(EvalCodeGenerator.class, "createSchema", Schema.class, new Class[] {String.class}); - } - - public static void emitCreateEval(TajoGeneratorAdapter adapter, MethodVisitor mv, EvalNode evalNode) { - mv.visitLdcInsn(evalNode.toJson()); - adapter.invokeStatic(EvalCodeGenerator.class, "createEval", EvalNode.class, new Class[] {String.class}); - } - - public static void emitConstEval(TajoGeneratorAdapter adapter, MethodVisitor mv, ConstEval evalNode) { - mv.visitLdcInsn(evalNode.toJson()); - adapter.invokeStatic(EvalCodeGenerator.class, "createConstEval", ConstEval.class, new Class[] {String.class}); - } - - public static void emitRowConstantEval(TajoGeneratorAdapter adapter, MethodVisitor mv, RowConstantEval evalNode) { - mv.visitLdcInsn(evalNode.toJson()); - adapter.invokeStatic(EvalCodeGenerator.class, "createRowConstantEval", RowConstantEval.class, - new Class[] {String.class}); - } - - public void emitConstructor() { - // constructor method - MethodVisitor initMethod = classWriter.visitMethod(Opcodes.ACC_PUBLIC, "", "()V", null, null); - initMethod.visitCode(); - initMethod.visitVarInsn(Opcodes.ALOAD, 0); - initMethod.visitMethodInsn(Opcodes.INVOKESPECIAL, TajoGeneratorAdapter.getInternalName(EvalNode.class), "", - "()V"); - - TajoGeneratorAdapter consAdapter = new TajoGeneratorAdapter(Opcodes.ACC_PUBLIC, initMethod, "", "()V"); - - // == this.schema = schema; - if (schema != null) { - consAdapter.aload(0); - emitCreateSchema(consAdapter, initMethod, schema); - initMethod.visitFieldInsn(Opcodes.PUTFIELD, this.owner, "schema", getDescription(Schema.class)); - } - - for (Map.Entry entry : symbols.entrySet()) { - if (entry.getKey().getType() == EvalType.CONST) { - ConstEval constEval = (ConstEval) entry.getKey(); - - if (constEval.getValueType().getType() == TajoDataTypes.Type.INTERVAL) { - IntervalDatum datum = (IntervalDatum) constEval.getValue(); - - final String internalName = TajoGeneratorAdapter.getInternalName(IntervalDatum.class); - - initMethod.visitTypeInsn(Opcodes.NEW, internalName); - consAdapter.dup(); - initMethod.visitLdcInsn(datum.getMonths()); - initMethod.visitLdcInsn(datum.getMilliSeconds()); - initMethod.visitMethodInsn(Opcodes.INVOKESPECIAL, internalName, "", "(IJ)V"); - int INTERVAL_DATUM = consAdapter.astore(); - - consAdapter.aload(0); - consAdapter.aload(INTERVAL_DATUM); - initMethod.visitFieldInsn(Opcodes.PUTFIELD, this.owner, entry.getValue(), - "L" + TajoGeneratorAdapter.getInternalName(IntervalDatum.class) + ";"); - } - - } else if (entry.getKey().getType() == EvalType.IN) { - InEval inEval = (InEval) entry.getKey(); - - final String internalName = getInternalName(InEval.class); - initMethod.visitTypeInsn(Opcodes.NEW, internalName); - consAdapter.dup(); - emitCreateEval(consAdapter, initMethod, inEval.getLeftExpr()); - emitRowConstantEval(consAdapter, initMethod, (RowConstantEval) inEval.getRightExpr()); - consAdapter.push(inEval.isNot()); - consAdapter.invokeSpecial(InEval.class, "", void.class, - new Class [] {EvalNode.class, RowConstantEval.class, boolean.class}); - int IN_PREDICATE_EVAL = consAdapter.astore(); - - consAdapter.aload(0); - consAdapter.aload(IN_PREDICATE_EVAL); - initMethod.visitFieldInsn(Opcodes.PUTFIELD, this.owner, entry.getValue(), getDescription(InEval.class)); - - } else if (EvalType.isStringPatternMatchOperator(entry.getKey().getType())) { - PatternMatchPredicateEval patternPredicate = (PatternMatchPredicateEval) entry.getKey(); - - Class clazz = EvalCodeGenerator.getStringPatternEvalClass(entry.getKey().getType()); - final String internalName = TajoGeneratorAdapter.getInternalName(clazz); - - initMethod.visitTypeInsn(Opcodes.NEW, internalName); - consAdapter.dup(); - consAdapter.push(patternPredicate.isNot()); - emitCreateEval(consAdapter, initMethod, patternPredicate.getLeftExpr()); - emitConstEval(consAdapter, initMethod, (ConstEval) patternPredicate.getRightExpr()); - consAdapter.push(patternPredicate.isCaseInsensitive()); - consAdapter.invokeSpecial(clazz, "", void.class, - new Class [] {boolean.class, EvalNode.class, ConstEval.class, boolean.class}); - - int PatternEval = consAdapter.astore(); - - consAdapter.aload(0); - consAdapter.aload(PatternEval); - initMethod.visitFieldInsn(Opcodes.PUTFIELD, this.owner, entry.getValue(), getDescription(clazz)); - - } else if (entry.getKey().getType() == EvalType.FUNCTION) { - GeneralFunctionEval function = (GeneralFunctionEval) entry.getKey(); - final String internalName = TajoGeneratorAdapter.getInternalName(function.getFuncDesc().getFuncClass()); - - // new and initialization of function - initMethod.visitTypeInsn(Opcodes.NEW, internalName); - consAdapter.dup(); - initMethod.visitMethodInsn(Opcodes.INVOKESPECIAL, internalName, "", "()V"); - int FUNCTION = consAdapter.astore(); - - // commParam - int paramNum = function.getArgs().length; - initMethod.visitLdcInsn(paramNum); - consAdapter.newArray(FunctionEval.ParamType.class); - final int PARAM_TYPE_ARRAY = consAdapter.astore(); - FunctionEval.ParamType[] paramTypes = EvalCodeGenerator.getParamTypes(function.getArgs()); - for (int paramIdx = 0; paramIdx < paramTypes.length; paramIdx++) { - consAdapter.aload(PARAM_TYPE_ARRAY); - consAdapter.methodvisitor.visitLdcInsn(paramIdx); - consAdapter.methodvisitor.visitFieldInsn(Opcodes.GETSTATIC, TajoGeneratorAdapter.getInternalName(FunctionEval.ParamType.class), - paramTypes[paramIdx].name(), TajoGeneratorAdapter.getDescription(FunctionEval.ParamType.class)); - consAdapter.methodvisitor.visitInsn(Opcodes.AASTORE); - } - - initMethod.visitVarInsn(Opcodes.ALOAD, FUNCTION); - consAdapter.aload(PARAM_TYPE_ARRAY); - consAdapter.invokeVirtual(function.getFuncDesc().getFuncClass(), "init", void.class, new Class[] {FunctionEval.ParamType[].class}); - - initMethod.visitVarInsn(Opcodes.ALOAD, 0); - initMethod.visitVarInsn(Opcodes.ALOAD, FUNCTION); - initMethod.visitFieldInsn(Opcodes.PUTFIELD, this.owner, entry.getValue(), - "L" + TajoGeneratorAdapter.getInternalName(function.getFuncDesc().getFuncClass()) + ";"); - - } - } - - initMethod.visitInsn(Opcodes.RETURN); - initMethod.visitMaxs(1, 1); - initMethod.visitEnd(); - } - - public void emitReturn() { + public void emitReturnAsDatum() { convertToDatum(evalNode.getValueType(), true); methodvisitor.visitInsn(Opcodes.ARETURN); methodvisitor.visitMaxs(0, 0); methodvisitor.visitEnd(); - classWriter.visitEnd(); + } + + public void emitReturnAsBool() { + returnAsBool(); + methodvisitor.visitMaxs(0, 0); + methodvisitor.visitEnd(); } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeGenerator.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeGenerator.java index 2948dec70e..b078ebfa30 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeGenerator.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalCodeGenerator.java @@ -18,6 +18,7 @@ package org.apache.tajo.engine.codegen; +import org.apache.tajo.catalog.CatalogUtil; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.FunctionDesc; import org.apache.tajo.catalog.Schema; @@ -26,21 +27,27 @@ import org.apache.tajo.datum.IntervalDatum; import org.apache.tajo.engine.eval.*; import org.apache.tajo.engine.json.CoreGsonHelper; -import org.apache.tajo.org.objectweb.asm.ClassWriter; import org.apache.tajo.org.objectweb.asm.Label; import org.apache.tajo.org.objectweb.asm.Opcodes; import org.apache.tajo.org.objectweb.asm.Type; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.VTuple; +import org.apache.tajo.tuple.offheap.HeapTuple; +import org.apache.tajo.tuple.offheap.HeapTupleBytesComparator; -import java.lang.reflect.Constructor; import java.util.Stack; import static org.apache.tajo.common.TajoDataTypes.DataType; +import static org.apache.tajo.engine.codegen.TajoGeneratorAdapter.TUPLE; import static org.apache.tajo.engine.codegen.TajoGeneratorAdapter.getDescription; import static org.apache.tajo.engine.eval.FunctionEval.ParamType; public class EvalCodeGenerator extends SimpleEvalNodeVisitor { + public static final EvalCodeGenerator instance; + + static { + instance = new EvalCodeGenerator(); + } public static final byte UNKNOWN = 0; public static final byte TRUE = 1; @@ -66,35 +73,8 @@ public class EvalCodeGenerator extends SimpleEvalNodeVisitor new byte [] {UNKNOWN, TRUE, FALSE} // false }; - private final TajoClassLoader classLoader; - static int classSeq = 1; - - public EvalCodeGenerator(TajoClassLoader classLoader) { - this.classLoader = classLoader; - } - - public EvalNode compile(Schema schema, EvalNode expr) throws CompilationError { - - ClassWriter classWriter = new ClassWriter(ClassWriter.COMPUTE_MAXS); - - String className = EvalCodeGenerator.class.getPackage().getName() + ".CompiledEval" + classSeq++; - EvalCodeGenContext context = new EvalCodeGenContext(TajoGeneratorAdapter.getInternalName(className), - schema, classWriter, expr); - visit(context, expr, new Stack()); - context.emitReturn(); - - Class aClass = classLoader.defineClass(className, classWriter.toByteArray()); - - Constructor constructor; - EvalNode compiledEval; - - try { - constructor = aClass.getConstructor(); - compiledEval = (EvalNode) constructor.newInstance(); - } catch (Throwable t) { - throw new CompilationError(expr, t, classWriter.toByteArray()); - } - return compiledEval; + public static void visit(EvalCodeGenContext context, EvalNode eval) { + instance.visit(context, eval, new Stack()); } public EvalNode visitBinaryEval(EvalCodeGenContext context, Stack stack, BinaryEval binaryEval) { @@ -312,7 +292,7 @@ void emitLabel(EvalCodeGenContext context, Label label) { } public EvalNode visitCast(EvalCodeGenContext context, Stack stack, CastEval cast) { - DataType srcType = cast.getOperand().getValueType(); + DataType srcType = cast.getOperand().getValueType(); DataType targetType = cast.getValueType(); if (srcType.equals(targetType)) { @@ -355,23 +335,22 @@ public EvalNode visitField(EvalCodeGenContext context, Stack stack, Fi } context.methodvisitor.visitVarInsn(Opcodes.ALOAD, 2); - context.emitIsNullOfTuple(fieldIdx); - - context.push(true); + context.emitIsNullOfTuple(fieldIdx); // It will push 1 if null, and it will push 0 if not null. Label ifNull = new Label(); Label afterAll = new Label(); - context.methodvisitor.visitJumpInsn(Opcodes.IF_ICMPEQ, ifNull); + // IFNE means if the first item in stack is not 0. + context.methodvisitor.visitJumpInsn(Opcodes.IFNE, ifNull); context.methodvisitor.visitVarInsn(Opcodes.ALOAD, 2); context.emitGetValueOfTuple(columnRef.getDataType(), fieldIdx); - context.pushNullFlag(true); // not null context.methodvisitor.visitJumpInsn(Opcodes.GOTO, afterAll); context.methodvisitor.visitLabel(ifNull); context.pushDummyValue(field.getValueType()); context.pushNullFlag(false); + context.methodvisitor.visitJumpInsn(Opcodes.GOTO, afterAll); context.methodvisitor.visitLabel(afterAll); } @@ -461,6 +440,12 @@ public EvalNode visitArithmeticEval(EvalCodeGenContext context, BinaryEval evalN return evalNode; } + public static boolean isTextField(EvalNode evalNode) { + TajoDataTypes.Type type = evalNode.getValueType().getType(); + boolean textType = (type == TajoDataTypes.Type.TEXT || type == TajoDataTypes.Type.CHAR); + return textType && evalNode.getType() == EvalType.FIELD; + } + public EvalNode visitComparisonEval(EvalCodeGenContext context, BinaryEval evalNode, Stack stack) throws CompilationError { @@ -622,7 +607,7 @@ public EvalNode visitConst(EvalCodeGenContext context, ConstEval constEval, Stac break; case INTERVAL: // load pre-stored variable. - emitGetField(context, context.owner, context.symbols.get(constEval), IntervalDatum.class); + emitGetField(context, context.owner, context.variables.symbols.get(constEval), IntervalDatum.class); break; default: throw new UnsupportedOperationException(constEval.getValueType().getType().name() + @@ -633,7 +618,7 @@ public EvalNode visitConst(EvalCodeGenContext context, ConstEval constEval, Stac return constEval; } - public static ParamType [] getParamTypes(EvalNode [] arguments) { + public static ParamType[] getParamTypes(EvalNode[] arguments) { ParamType[] paramTypes = new ParamType[arguments.length]; for (int i = 0; i < arguments.length; i++) { if (arguments[i].getType() == EvalType.CONST) { @@ -657,7 +642,7 @@ public EvalNode visitFuncCall(EvalCodeGenContext context, FunctionEval func, Sta final int DATUM_ARRAY = context.astore(); stack.push(func); - EvalNode [] params = func.getArgs(); + EvalNode[] params = func.getArgs(); for (int paramIdx = 0; paramIdx < func.getArgs().length; paramIdx++) { context.aload(DATUM_ARRAY); // array ref context.methodvisitor.visitLdcInsn(paramIdx); // array idx @@ -676,7 +661,7 @@ public EvalNode visitFuncCall(EvalCodeGenContext context, FunctionEval func, Sta FunctionDesc desc = func.getFuncDesc(); - String fieldName = context.symbols.get(func); + String fieldName = context.variables.symbols.get(func); String funcDescName = "L" + TajoGeneratorAdapter.getInternalName(desc.getFuncClass()) + ";"; context.aload(0); @@ -689,7 +674,7 @@ public EvalNode visitFuncCall(EvalCodeGenContext context, FunctionEval func, Sta } public EvalNode visitInPredicate(EvalCodeGenContext context, EvalNode patternEval, Stack stack) { - String fieldName = context.symbols.get(patternEval); + String fieldName = context.variables.symbols.get(patternEval); emitGetField(context, context.owner, fieldName, InEval.class); if (context.schema != null) { emitGetField(context, context.owner, "schema", Schema.class); @@ -705,7 +690,7 @@ public EvalNode visitInPredicate(EvalCodeGenContext context, EvalNode patternEva protected EvalNode visitStringPatternMatch(EvalCodeGenContext context, EvalNode patternEval, Stack stack) { Class clazz = getStringPatternEvalClass(patternEval.getType()); - String fieldName = context.symbols.get(patternEval); + String fieldName = context.variables.symbols.get(patternEval); emitGetField(context, context.owner, fieldName, clazz); if (context.schema != null) { emitGetField(context, context.owner, "schema", Schema.class); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalNodeCompiler.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalNodeCompiler.java new file mode 100644 index 0000000000..ba21a8195b --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/EvalNodeCompiler.java @@ -0,0 +1,310 @@ +/* + * Lisensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.codegen; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.IntervalDatum; +import org.apache.tajo.engine.eval.*; +import org.apache.tajo.org.objectweb.asm.ClassWriter; +import org.apache.tajo.org.objectweb.asm.Label; +import org.apache.tajo.org.objectweb.asm.MethodVisitor; +import org.apache.tajo.org.objectweb.asm.Opcodes; +import org.apache.tajo.storage.Tuple; +import org.apache.tajo.tuple.offheap.OffHeapRowWriter; +import org.apache.tajo.tuple.offheap.RowWriter; +import org.apache.tajo.tuple.offheap.UnSafeTuple; + +import java.lang.reflect.Constructor; +import java.util.Map; +import java.util.Stack; + +import static org.apache.tajo.engine.codegen.TajoGeneratorAdapter.getDescription; + +public class EvalNodeCompiler { + private final TajoClassLoader classLoader; + static int classSeq = 1; + + public EvalNodeCompiler(TajoClassLoader classLoader) { + this.classLoader = classLoader; + } + + public EvalNode compile(Schema schema, EvalNode eval) throws CompilationError { + + ClassWriter classWriter = new ClassWriter(ClassWriter.COMPUTE_MAXS); + String className = EvalCodeGenerator.class.getPackage().getName() + ".CompiledEval" + classSeq++; + String owner = TajoGeneratorAdapter.getInternalName(className); + Variables variables = new Variables(); + emitMemberFields(variables, classWriter, eval); + emitClassDefinition(owner, classWriter); + emitConstructor(owner, classWriter, schema, variables); + + generateEvalFunc(owner, classWriter, schema, eval, variables); + generateEvalFuncNative(owner, classWriter, schema, eval, variables); + + if (eval.getValueType().getType() == TajoDataTypes.Type.BOOLEAN) { + generateIsMatchedFunc(owner, classWriter, schema, eval, variables); + } + + classWriter.visitEnd(); + + Class aClass = classLoader.defineClass(className, classWriter.toByteArray()); + + Constructor constructor; + EvalNode compiledEval; + + try { + constructor = aClass.getConstructor(); + compiledEval = (EvalNode) constructor.newInstance(); + } catch (Throwable t) { + throw new CompilationError(eval, t, classWriter.toByteArray()); + } + return compiledEval; + } + + public void emitClassDefinition(String className, ClassWriter classWriter) { + classWriter.visit(Opcodes.V1_5, Opcodes.ACC_PUBLIC, className, null, + TajoGeneratorAdapter.getInternalName(EvalNode.class), null); + } + + private void generateEvalFuncNative(String className, ClassWriter classWriter, Schema schema, EvalNode eval, + Variables vars) { + String evalDesc = TajoGeneratorAdapter.getMethodDescription( + void.class, + new Class[]{Schema.class, Tuple.class, RowWriter.class}); + + EvalCodeGenContext evalContext = new EvalCodeGenContext(className, + schema, classWriter, "eval", evalDesc, eval, vars); + + if (EvalCodeGenerator.isTextField(eval)) { + copyTextOrBytes(evalContext, (FieldEval) eval); + } else { + EvalCodeGenerator.visit(evalContext, eval); + evalContext.writeToTupleBuilder(eval.getValueType()); + } + + evalContext.methodvisitor.visitInsn(Opcodes.RETURN); + evalContext.methodvisitor.visitMaxs(0, 0); + evalContext.methodvisitor.visitEnd(); + } + + private void copyTextOrBytes(EvalCodeGenContext context, FieldEval field) { + Column columnRef = field.getColumnRef(); + int fieldIdx; + if (columnRef.hasQualifier()) { + fieldIdx = context.schema.getColumnId(columnRef.getQualifiedName()); + } else { + fieldIdx = context.schema.getColumnIdByName(columnRef.getSimpleName()); + } + + context.methodvisitor.visitVarInsn(Opcodes.ALOAD, 2); + context.emitIsNullOfTuple(fieldIdx); // It will push 1 if null, and it will push 0 if not null. + + Label ifNull = new Label(); + Label afterAll = new Label(); + // IFNE means if the first item in stack is not 0. + context.methodvisitor.visitJumpInsn(Opcodes.IFNE, ifNull); + + context.aload(EvalCodeGenContext.BUILDER); + context.methodvisitor.visitTypeInsn(Opcodes.CHECKCAST, TajoGeneratorAdapter.getInternalName(OffHeapRowWriter.class)); + context.aload(EvalCodeGenContext.TUPLE); + context.methodvisitor.visitTypeInsn(Opcodes.CHECKCAST, TajoGeneratorAdapter.getInternalName(UnSafeTuple.class)); + context.push(fieldIdx); + context.invokeVirtual(OffHeapRowWriter.class, "copyTextFrom", void.class, new Class [] {UnSafeTuple.class, int.class}); + context.gotoLabel(afterAll); + + context.methodvisitor.visitLabel(ifNull); + context.aload(EvalCodeGenContext.BUILDER); // RowWriter + context.invokeInterface(RowWriter.class, "skipField", void.class, new Class[]{}); + + context.methodvisitor.visitLabel(afterAll); + } + + private void generateEvalFunc(String className, ClassWriter classWriter, Schema schema, EvalNode eval, + Variables vars) { + String evalDesc = TajoGeneratorAdapter.getMethodDescription( + Datum.class, + new Class[]{Schema.class, Tuple.class}); + + EvalCodeGenContext evalContext = new EvalCodeGenContext(className, + schema, classWriter, "eval", evalDesc, eval, vars); + EvalCodeGenerator.visit(evalContext, eval); + evalContext.emitReturnAsDatum(); + } + + private void generateIsMatchedFunc(String className, ClassWriter classWriter, Schema schema, EvalNode eval, Variables vars) { + String isMatchedDesc = TajoGeneratorAdapter.getMethodDescription( + boolean.class, + new Class[]{Schema.class, Tuple.class}); + + EvalCodeGenContext isMatchedContext = new EvalCodeGenContext(className, + schema, classWriter, "isMatched", isMatchedDesc, eval, vars); + EvalCodeGenerator.visit(isMatchedContext, eval); + isMatchedContext.emitReturnAsBool(); + } + + public void emitMemberFields(Variables variables, ClassWriter classWriter, EvalNode evalNode) { + classWriter.visitField(Opcodes.ACC_PRIVATE, "schema", + "L" + TajoGeneratorAdapter.getInternalName(Schema.class) + ";", null, null); + + VariablesBuilder builder = new VariablesBuilder(classWriter); + builder.visit(variables, evalNode, new Stack()); + } + + public static void emitCreateSchema(TajoGeneratorAdapter adapter, MethodVisitor mv, Schema schema) { + mv.visitLdcInsn(schema.toJson()); + adapter.invokeStatic(EvalCodeGenerator.class, "createSchema", Schema.class, new Class[] {String.class}); + } + + public static void emitCreateEval(TajoGeneratorAdapter adapter, MethodVisitor mv, EvalNode evalNode) { + mv.visitLdcInsn(evalNode.toJson()); + adapter.invokeStatic(EvalCodeGenerator.class, "createEval", EvalNode.class, new Class[] {String.class}); + } + + public static void emitConstEval(TajoGeneratorAdapter adapter, MethodVisitor mv, ConstEval evalNode) { + mv.visitLdcInsn(evalNode.toJson()); + adapter.invokeStatic(EvalCodeGenerator.class, "createConstEval", ConstEval.class, new Class[] {String.class}); + } + + public static void emitRowConstantEval(TajoGeneratorAdapter adapter, MethodVisitor mv, RowConstantEval evalNode) { + mv.visitLdcInsn(evalNode.toJson()); + adapter.invokeStatic(EvalCodeGenerator.class, "createRowConstantEval", RowConstantEval.class, + new Class[] {String.class}); + } + + public void emitConstructor(String className, ClassWriter classWriter, Schema schema, Variables variables) { + // constructor method + MethodVisitor initMethod = classWriter.visitMethod(Opcodes.ACC_PUBLIC, "", "()V", null, null); + initMethod.visitCode(); + initMethod.visitVarInsn(Opcodes.ALOAD, 0); + initMethod.visitMethodInsn(Opcodes.INVOKESPECIAL, TajoGeneratorAdapter.getInternalName(EvalNode.class), "", + "()V"); + + TajoGeneratorAdapter consAdapter = new TajoGeneratorAdapter(Opcodes.ACC_PUBLIC, initMethod, "", "()V"); + + // == this.schema = schema; + if (schema != null) { + consAdapter.aload(0); + emitCreateSchema(consAdapter, initMethod, schema); + initMethod.visitFieldInsn(Opcodes.PUTFIELD, className, "schema", getDescription(Schema.class)); + } + + for (Map.Entry entry : variables.symbols.entrySet()) { + if (entry.getKey().getType() == EvalType.CONST) { + ConstEval constEval = (ConstEval) entry.getKey(); + + if (constEval.getValueType().getType() == TajoDataTypes.Type.INTERVAL) { + IntervalDatum datum = (IntervalDatum) constEval.getValue(); + + final String internalName = TajoGeneratorAdapter.getInternalName(IntervalDatum.class); + + initMethod.visitTypeInsn(Opcodes.NEW, internalName); + consAdapter.dup(); + initMethod.visitLdcInsn(datum.getMonths()); + initMethod.visitLdcInsn(datum.getMilliSeconds()); + initMethod.visitMethodInsn(Opcodes.INVOKESPECIAL, internalName, "", "(IJ)V"); + int INTERVAL_DATUM = consAdapter.astore(); + + consAdapter.aload(0); + consAdapter.aload(INTERVAL_DATUM); + initMethod.visitFieldInsn(Opcodes.PUTFIELD, className, entry.getValue(), + "L" + TajoGeneratorAdapter.getInternalName(IntervalDatum.class) + ";"); + } + + } else if (entry.getKey().getType() == EvalType.IN) { + InEval inEval = (InEval) entry.getKey(); + + final String internalName = TajoGeneratorAdapter.getInternalName(InEval.class); + initMethod.visitTypeInsn(Opcodes.NEW, internalName); + consAdapter.dup(); + emitCreateEval(consAdapter, initMethod, inEval.getLeftExpr()); + emitRowConstantEval(consAdapter, initMethod, (RowConstantEval) inEval.getRightExpr()); + consAdapter.push(inEval.isNot()); + consAdapter.invokeSpecial(InEval.class, "", void.class, + new Class [] {EvalNode.class, RowConstantEval.class, boolean.class}); + int IN_PREDICATE_EVAL = consAdapter.astore(); + + consAdapter.aload(0); + consAdapter.aload(IN_PREDICATE_EVAL); + initMethod.visitFieldInsn(Opcodes.PUTFIELD, className, entry.getValue(), getDescription(InEval.class)); + + } else if (EvalType.isStringPatternMatchOperator(entry.getKey().getType())) { + PatternMatchPredicateEval patternPredicate = (PatternMatchPredicateEval) entry.getKey(); + + Class clazz = EvalCodeGenerator.getStringPatternEvalClass(entry.getKey().getType()); + final String internalName = TajoGeneratorAdapter.getInternalName(clazz); + + initMethod.visitTypeInsn(Opcodes.NEW, internalName); + consAdapter.dup(); + consAdapter.push(patternPredicate.isNot()); + emitCreateEval(consAdapter, initMethod, patternPredicate.getLeftExpr()); + emitConstEval(consAdapter, initMethod, (ConstEval) patternPredicate.getRightExpr()); + consAdapter.push(patternPredicate.isCaseInsensitive()); + consAdapter.invokeSpecial(clazz, "", void.class, + new Class [] {boolean.class, EvalNode.class, ConstEval.class, boolean.class}); + + int PatternEval = consAdapter.astore(); + + consAdapter.aload(0); + consAdapter.aload(PatternEval); + initMethod.visitFieldInsn(Opcodes.PUTFIELD, className, entry.getValue(), getDescription(clazz)); + + } else if (entry.getKey().getType() == EvalType.FUNCTION) { + GeneralFunctionEval function = (GeneralFunctionEval) entry.getKey(); + final String internalName = TajoGeneratorAdapter.getInternalName(function.getFuncDesc().getFuncClass()); + + // new and initialization of function + initMethod.visitTypeInsn(Opcodes.NEW, internalName); + consAdapter.dup(); + initMethod.visitMethodInsn(Opcodes.INVOKESPECIAL, internalName, "", "()V"); + int FUNCTION = consAdapter.astore(); + + // commParam + int paramNum = function.getArgs().length; + initMethod.visitLdcInsn(paramNum); + consAdapter.newArray(FunctionEval.ParamType.class); + final int PARAM_TYPE_ARRAY = consAdapter.astore(); + FunctionEval.ParamType[] paramTypes = EvalCodeGenerator.getParamTypes(function.getArgs()); + for (int paramIdx = 0; paramIdx < paramTypes.length; paramIdx++) { + consAdapter.aload(PARAM_TYPE_ARRAY); + consAdapter.methodvisitor.visitLdcInsn(paramIdx); + consAdapter.methodvisitor.visitFieldInsn(Opcodes.GETSTATIC, TajoGeneratorAdapter.getInternalName(FunctionEval.ParamType.class), + paramTypes[paramIdx].name(), TajoGeneratorAdapter.getDescription(FunctionEval.ParamType.class)); + consAdapter.methodvisitor.visitInsn(Opcodes.AASTORE); + } + + initMethod.visitVarInsn(Opcodes.ALOAD, FUNCTION); + consAdapter.aload(PARAM_TYPE_ARRAY); + consAdapter.invokeVirtual(function.getFuncDesc().getFuncClass(), "init", void.class, new Class[] {FunctionEval.ParamType[].class}); + + initMethod.visitVarInsn(Opcodes.ALOAD, 0); + initMethod.visitVarInsn(Opcodes.ALOAD, FUNCTION); + initMethod.visitFieldInsn(Opcodes.PUTFIELD, className, entry.getValue(), + "L" + TajoGeneratorAdapter.getInternalName(function.getFuncDesc().getFuncClass()) + ";"); + + } + } + + initMethod.visitInsn(Opcodes.RETURN); + initMethod.visitMaxs(1, 1); + initMethod.visitEnd(); + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/ExecutorPreCompiler.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/ExecutorPreCompiler.java index 3de0bf12b6..815ac698bf 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/ExecutorPreCompiler.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/ExecutorPreCompiler.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -55,19 +55,21 @@ public static Map, EvalNode> compile(TajoClassLoader clas } public static class CompilationContext { - private final EvalCodeGenerator evalCompiler; + private final EvalNodeCompiler evalCompiler; private final TupleComparerCompiler comparerCompiler; private Map, EvalNode> compiledEvals; - private Map, TupleComparator> compiledComparators; + private Map, TupleComparator> unsafeComparators; + private Map, TupleComparator> comparators; public CompilationContext(TajoClassLoader classLoader) { - this.evalCompiler = new EvalCodeGenerator(classLoader); + this.evalCompiler = new EvalNodeCompiler(classLoader); this.comparerCompiler = new TupleComparerCompiler(classLoader); this.compiledEvals = Maps.newHashMap(); - this.compiledComparators = Maps.newHashMap(); + this.unsafeComparators = Maps.newHashMap(); + this.comparators = Maps.newHashMap(); } - public EvalCodeGenerator getEvalCompiler() { + public EvalNodeCompiler getEvalCompiler() { return evalCompiler; } @@ -79,8 +81,12 @@ public Map, EvalNode> getPrecompiedEvals() { return compiledEvals; } - public Map, TupleComparator> getPrecompiedComparators() { - return compiledComparators; + public Map, TupleComparator> getUnSafeComparators() { + return unsafeComparators; + } + + public Map, TupleComparator> getComparators() { + return comparators; } } @@ -102,22 +108,20 @@ private static void compileIfAbsent(CompilationContext context, Schema schema, E private static void compileIfAbsent(CompilationContext context, Schema schema, BaseTupleComparator comparator) { Pair key = new Pair(schema, comparator); - if (!context.compiledComparators.containsKey(key)) { - try { - TupleComparator compiled = context.comparerCompiler.compile(comparator, false); - context.compiledComparators.put(key, compiled); - } catch (Throwable t) { - // If any compilation error occurs, it works in a fallback mode. This mode just uses EvalNode objects - // instead of a compiled EvalNode. - context.compiledComparators.put(key, comparator); - LOG.warn(t); - } + if (!context.unsafeComparators.containsKey(key)) { + TupleComparator unsafeComparator = context.comparerCompiler.compile(comparator, true); + context.unsafeComparators.put(key, unsafeComparator); + } + + if (!context.comparators.containsKey(key)) { + TupleComparator compiledComparator = context.comparerCompiler.compile(comparator, false); + context.comparators.put(key, compiledComparator); } } private static void compileProjectableNode(CompilationContext context, Schema schema, Projectable node) { - Target [] targets; + Target[] targets; if (node.hasTargets()) { targets = node.getTargets(); } else { @@ -170,9 +174,7 @@ public LogicalNode visitHaving(CompilationContext context, LogicalPlan plan, Log public LogicalNode visitGroupBy(CompilationContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, GroupbyNode node, Stack stack) throws PlanningException { super.visitGroupBy(context, plan, block, node, stack); - - compileProjectableNode(context, node.getInSchema(), node); - + // Groupby executors do not use Projector. return node; } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java index c1c4801e3f..a3b1028e2a 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java @@ -27,15 +27,17 @@ import org.apache.tajo.engine.eval.EvalType; import org.apache.tajo.exception.InvalidCastException; import org.apache.tajo.exception.UnsupportedException; -import org.apache.tajo.storage.Tuple; -import org.apache.tajo.util.TUtil; -import org.apache.tajo.util.datetime.DateTimeUtil; import org.apache.tajo.org.objectweb.asm.Label; import org.apache.tajo.org.objectweb.asm.MethodVisitor; import org.apache.tajo.org.objectweb.asm.Opcodes; import org.apache.tajo.org.objectweb.asm.Type; import org.apache.tajo.org.objectweb.asm.commons.GeneratorAdapter; import org.apache.tajo.org.objectweb.asm.commons.TableSwitchGenerator; +import org.apache.tajo.storage.Tuple; +import org.apache.tajo.tuple.TupleBuilder; +import org.apache.tajo.tuple.offheap.RowWriter; +import org.apache.tajo.util.TUtil; +import org.apache.tajo.util.datetime.DateTimeUtil; import java.util.HashMap; import java.util.Map; @@ -185,9 +187,10 @@ public void emitIsNullOfTuple() { emitIsNullOfTuple(null); } - public void emitIsNullOfTuple(Integer fieldIndex) { + public void emitIsNullOfTuple(@Nullable Integer fieldIndex) { if (fieldIndex != null) { push(fieldIndex); + Preconditions.checkArgument(fieldIndex > -1, "Field index out Of range: " + fieldIndex); } invokeInterface(Tuple.class, "isNull", boolean.class, new Class[]{int.class}); @@ -200,11 +203,13 @@ public void emitIsNotNullOfTuple() { public void emitIsNotNullOfTuple(@Nullable Integer fieldIndex) { if (fieldIndex != null) { push(fieldIndex); + Preconditions.checkArgument(fieldIndex > -1, "Field index out Of range: " + fieldIndex); } invokeInterface(Tuple.class, "isNotNull", boolean.class, new Class [] {int.class}); } public void emitGetValueOfTuple(TajoDataTypes.DataType dataType, int fieldIndex) { + Preconditions.checkArgument(fieldIndex > -1, "Field index out Of range: " + fieldIndex); push(fieldIndex); TajoDataTypes.Type type = dataType.getType(); @@ -374,11 +379,11 @@ public void load(TajoDataTypes.DataType dataType, int idx) { switch (dataType.getType()) { case NULL_TYPE: case BOOLEAN: - case CHAR: case INT1: case INT2: case INT4: case INET4: + case DATE: methodvisitor.visitVarInsn(Opcodes.ILOAD, idx); break; case INT8: @@ -392,6 +397,7 @@ public void load(TajoDataTypes.DataType dataType, int idx) { case FLOAT8: methodvisitor.visitVarInsn(Opcodes.DLOAD, idx); break; + case CHAR: case TEXT: case INTERVAL: case PROTOBUF: @@ -723,6 +729,120 @@ public void convertToPrimitive(TajoDataTypes.DataType type) { methodvisitor.visitLabel(afterAll); } + public void writeToTupleBuilder(TajoDataTypes.DataType type) { + String method; + Class [] paramTypes; + switch (type.getType()) { + case NULL_TYPE: + pop(); // pop null flag + pop(type); // pop null datum + aload(BUILDER); + invokeInterface(TupleBuilder.class, "skipField", void.class, new Class [] {}); + return; + + case BOOLEAN: + method = "putBool"; + paramTypes = new Class[] {byte.class}; + break; + case INT1: + case INT2: + method = "putInt2"; + paramTypes = new Class[] {short.class}; + break; + case INT4: + method = "putInt4"; + paramTypes = new Class[] {int.class}; + break; + case INT8: + method = "putInt8"; + paramTypes = new Class[] {long.class}; + break; + case FLOAT4: + method = "putFloat4"; + paramTypes = new Class[] {float.class}; + break; + case FLOAT8: + method = "putFloat8"; + paramTypes = new Class[] {double.class}; + break; + case CHAR: + case TEXT: + method = "putText"; + paramTypes = new Class[] {String.class}; + break; + case TIMESTAMP: + method = "putTimestamp"; + paramTypes = new Class[] {long.class}; + break; + case DATE: + method = "putDate"; + paramTypes = new Class[] {int.class}; + break; + case TIME: + method = "putTime"; + paramTypes = new Class[] {long.class}; + break; + case INTERVAL: + method = "putInterval"; + paramTypes = new Class[] {IntervalDatum.class}; + break; + case INET4: + method = "putInet4"; + paramTypes = new Class[] {int.class}; + break; + case PROTOBUF: + method = "putProtoDatum"; + paramTypes = new Class[] {ProtobufDatum.class}; + break; + default: + throw new RuntimeException("Unsupported type: " + type.getType().name()); + } + + Label ifNull = new Label(); + Label afterAll = new Label(); + + emitNullityCheck(ifNull); + int value = store(type); + aload(BUILDER); + load(type, value); + if (type.getType() == PROTOBUF) { + methodvisitor.visitTypeInsn(Opcodes.CHECKCAST, TajoGeneratorAdapter.getInternalName(ProtobufDatum.class)); + } + invokeInterface(RowWriter.class, method, void.class, paramTypes); + methodvisitor.visitJumpInsn(Opcodes.GOTO, afterAll); + + methodvisitor.visitLabel(ifNull); + pop(type); + aload(BUILDER); // RowWriter + invokeInterface(RowWriter.class, "skipField", void.class, new Class[] {}); + + methodvisitor.visitLabel(afterAll); + } + + public void returnAsBool() { + Label ifNull = new Label(); + Label afterAll = new Label(); + + Label falseLabel = new Label(); + emitNullityCheck(ifNull); + push(1); + methodvisitor.visitJumpInsn(Opcodes.IF_ICMPNE, falseLabel); + push(1); + gotoLabel(afterAll); + + methodvisitor.visitLabel(falseLabel); + push(0); + gotoLabel(afterAll); + + methodvisitor.visitLabel(ifNull); + pop(); + push(0); + gotoLabel(afterAll); + + methodvisitor.visitLabel(afterAll); + methodvisitor.visitInsn(Opcodes.IRETURN); + } + public void convertToDatum(TajoDataTypes.DataType type, boolean castToDatum) { String convertMethod; Class returnType; @@ -906,8 +1026,10 @@ public void newArray(final Class clazz) { methodvisitor.visitIntInsn(Opcodes.NEWARRAY, typeCode); } - private int nextVarId = 3; - + public static final int SCHEMA = 1; + public static final int TUPLE = 2; + public static final int BUILDER = 3; + private int nextVarId = 4; private Map localVariablesMap = new HashMap(); public void astore(String name) { @@ -984,11 +1106,11 @@ public int store(TajoDataTypes.DataType type) { switch (type.getType()) { case NULL_TYPE: case BOOLEAN: - case CHAR: case INT1: case INT2: case INT4: case INET4: + case DATE: methodvisitor.visitVarInsn(Opcodes.ISTORE, varId); break; case TIME: @@ -1003,7 +1125,9 @@ public int store(TajoDataTypes.DataType type) { methodvisitor.visitVarInsn(Opcodes.DSTORE, varId); break; case INTERVAL: + case CHAR: case TEXT: + case PROTOBUF: methodvisitor.visitVarInsn(Opcodes.ASTORE, varId); break; default: @@ -1013,6 +1137,37 @@ public int store(TajoDataTypes.DataType type) { return varId; } + @SuppressWarnings("unused") + public void returnByType(TajoDataTypes.DataType dataType) { + switch (dataType.getType()) { + case BOOLEAN: + case INT1: + case INT2: + case INT4: + case INET4: + case DATE: + methodvisitor.visitInsn(Opcodes.IRETURN); + break; + case INT8: + case TIMESTAMP: + case TIME: + methodvisitor.visitInsn(Opcodes.LRETURN); + break; + case FLOAT4: + methodvisitor.visitInsn(Opcodes.FRETURN); + break; + case FLOAT8: + methodvisitor.visitInsn(Opcodes.DRETURN); + case INTERVAL: + methodvisitor.visitInsn(Opcodes.ARETURN); + break; + case TEXT: + methodvisitor.visitInsn(Opcodes.ARETURN); + default: + throw new UnsupportedException("Unknown Type: " + dataType.getType().name()); + } + } + public static interface SwitchCaseGenerator extends TableSwitchGenerator { int size(); int min(); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TupleComparerCompiler.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TupleComparerCompiler.java index f7f113d50c..f08024060b 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TupleComparerCompiler.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TupleComparerCompiler.java @@ -30,6 +30,8 @@ import org.apache.tajo.storage.BaseTupleComparator; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleComparator; +import org.apache.tajo.tuple.offheap.HeapTuple; +import org.apache.tajo.tuple.offheap.HeapTupleBytesComparator; import org.apache.tajo.tuple.offheap.UnSafeTuple; import org.apache.tajo.tuple.offheap.UnSafeTupleBytesComparator; import org.apache.tajo.util.UnsafeComparer; @@ -304,6 +306,18 @@ private void emitComparisonForText(TajoGeneratorAdapter adapter, BaseTupleCompar adapter.invokeStatic(UnSafeTupleBytesComparator.class, "compare", int.class, new Class[]{long.class, long.class}); } else { + /* It will be used for HeapTuple later. + emitGetParam(adapter, c, idx, LEFT_VALUE); + adapter.methodvisitor.visitTypeInsn(Opcodes.CHECKCAST, getInternalName(HeapTuple.class)); + adapter.push(c.getSortKeyIds()[idx]); + + emitGetParam(adapter, c, idx, RIGHT_VALUE); + adapter.methodvisitor.visitTypeInsn(Opcodes.CHECKCAST, getInternalName(HeapTuple.class)); + adapter.push(c.getSortKeyIds()[idx]); + + adapter.invokeStatic(HeapTupleBytesComparator.class, "compare", int.class, + new Class[]{HeapTuple.class, int.class, HeapTuple.class, int.class}); + */ emitGetParam(adapter, c, idx, LEFT_VALUE); adapter.push(c.getSortKeyIds()[idx]); adapter.invokeInterface(Tuple.class, "getBytes", byte [].class, new Class [] {int.class}); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/Variables.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/Variables.java new file mode 100644 index 0000000000..159608c29b --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/Variables.java @@ -0,0 +1,29 @@ +/* + * Lisensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.codegen; + +import com.google.common.collect.Maps; +import org.apache.tajo.engine.eval.EvalNode; + +import java.util.Map; + +public class Variables { + int seqId = 0; + Map symbols = Maps.newHashMap(); +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/VariablesPreBuilder.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/VariablesBuilder.java similarity index 77% rename from tajo-core/src/main/java/org/apache/tajo/engine/codegen/VariablesPreBuilder.java rename to tajo-core/src/main/java/org/apache/tajo/engine/codegen/VariablesBuilder.java index 9f50bb5ed5..80ede8a20e 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/VariablesPreBuilder.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/VariablesBuilder.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,13 +21,20 @@ import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.IntervalDatum; import org.apache.tajo.engine.eval.*; +import org.apache.tajo.org.objectweb.asm.ClassWriter; import org.apache.tajo.org.objectweb.asm.Opcodes; import java.util.Stack; -class VariablesPreBuilder extends SimpleEvalNodeVisitor { +class VariablesBuilder extends SimpleEvalNodeVisitor { - public EvalNode visitBinaryEval(EvalCodeGenContext context, Stack stack, BinaryEval binaryEval) { + private ClassWriter classWriter; + + public VariablesBuilder(ClassWriter classWriter) { + this.classWriter = classWriter; + } + + public EvalNode visitBinaryEval(Variables context, Stack stack, BinaryEval binaryEval) { super.visitBinaryEval(context, stack, binaryEval); if (EvalType.isStringPatternMatchOperator(binaryEval.getType())) { @@ -36,7 +43,7 @@ public EvalNode visitBinaryEval(EvalCodeGenContext context, Stack stac context.symbols.put(binaryEval, fieldName); Class clazz = EvalCodeGenerator.getStringPatternEvalClass(binaryEval.getType()); - context.classWriter.visitField(Opcodes.ACC_PRIVATE, fieldName, + classWriter.visitField(Opcodes.ACC_PRIVATE, fieldName, "L" + TajoGeneratorAdapter.getInternalName(clazz) + ";", null, null); } } else if (binaryEval.getType() == EvalType.IN) { @@ -44,7 +51,7 @@ public EvalNode visitBinaryEval(EvalCodeGenContext context, Stack stac String fieldName = binaryEval.getType().name() + "_" + context.seqId++; context.symbols.put(binaryEval, fieldName); - context.classWriter.visitField(Opcodes.ACC_PRIVATE, fieldName, + classWriter.visitField(Opcodes.ACC_PRIVATE, fieldName, "L" + TajoGeneratorAdapter.getInternalName(InEval.class) + ";", null, null); } } @@ -53,14 +60,14 @@ public EvalNode visitBinaryEval(EvalCodeGenContext context, Stack stac } @Override - public EvalNode visitConst(EvalCodeGenContext context, ConstEval constEval, Stack stack) { + public EvalNode visitConst(Variables context, ConstEval constEval, Stack stack) { if (constEval.getValueType().getType() == TajoDataTypes.Type.INTERVAL) { if (!context.symbols.containsKey(constEval)) { String fieldName = constEval.getValueType().getType().name() + "_" + context.seqId++; context.symbols.put(constEval, fieldName); - context.classWriter.visitField(Opcodes.ACC_PRIVATE, fieldName, + classWriter.visitField(Opcodes.ACC_PRIVATE, fieldName, "L" + TajoGeneratorAdapter.getInternalName(IntervalDatum.class) + ";", null, null); } } @@ -68,13 +75,13 @@ public EvalNode visitConst(EvalCodeGenContext context, ConstEval constEval, Stac } @Override - public EvalNode visitFuncCall(EvalCodeGenContext context, FunctionEval function, Stack stack) { + public EvalNode visitFuncCall(Variables context, FunctionEval function, Stack stack) { super.visitFuncCall(context, function, stack); if (!context.symbols.containsKey(function)) { String fieldName = function.getFuncDesc().getSignature() + "_" + context.seqId++; context.symbols.put(function, fieldName); - context.classWriter.visitField(Opcodes.ACC_PRIVATE, fieldName, + classWriter.visitField(Opcodes.ACC_PRIVATE, fieldName, "L" + TajoGeneratorAdapter.getInternalName(function.getFuncDesc().getFuncClass()) + ";", null, null); } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalNode.java b/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalNode.java index b48700180f..b7387b8974 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalNode.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/eval/EvalNode.java @@ -19,9 +19,11 @@ package org.apache.tajo.engine.eval; import com.google.gson.annotations.Expose; +import org.apache.tajo.annotation.UsedByJIT; import org.apache.tajo.catalog.Schema; import org.apache.tajo.common.TajoDataTypes.DataType; import org.apache.tajo.datum.Datum; +import org.apache.tajo.engine.codegen.CompilationError; import org.apache.tajo.engine.json.CoreGsonHelper; import org.apache.tajo.engine.utils.TupleBuilderUtil; import org.apache.tajo.json.GsonObject; @@ -61,9 +63,9 @@ public String toJson() { public abstract T eval(Schema schema, Tuple tuple); + @UsedByJIT public void eval(Schema schema, Tuple tuple, RowWriter builder) { - Datum result = eval(schema, tuple); - TupleBuilderUtil.writeEvalResult(builder, result.type(), result); + throw new CompilationError("eval(Schema, Tuple, RowWriter) is not compiled"); } @Deprecated diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java index 8d36e740c9..f02780291d 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java @@ -230,7 +230,7 @@ private List sortAndStoreAllChunks() throws IOException { } if (tupleBlock.rows() >= 0) { // if there are at least one or more input tuples - sortedTuples = OffHeapRowBlockUtils.sort(tupleBlock, getComparator()); + sortedTuples = OffHeapRowBlockUtils.sort(tupleBlock, getUnsafeComparator()); } // get total loaded (or stored) bytes and total row numbers diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index fd11c7b0bc..5ffe1222d4 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -103,6 +103,7 @@ public SeqScanExec(TaskAttemptContext context, AbstractStorageManager sm, ScanNo } inRowBlock = new OffHeapRowBlock(inSchema, 64 * StorageUnit.KB); + inRowBlock.setMaxRow(1024); } /** @@ -219,7 +220,7 @@ protected void compile() throws CompilationError { } private void initScanner(Schema projected) throws IOException { - this.projector = new Projector(context, inSchema, outSchema, plan.getTargets()); + this.projector = new Projector(context, inSchema, outSchema, plan.getTargets(), true); if (fragments != null) { if (fragments.length > 1) { this.scanner = new MergeScanner(context.getConf(), plan.getPhysicalSchema(), plan.getTableDesc().getMeta(), @@ -299,6 +300,8 @@ public Tuple next() throws IOException { } public boolean nextFetch(OffHeapRowBlock rowBlock) throws IOException { + rowBlock.clear(); + boolean noMoreTuple = scanner.nextFetch(inRowBlock); if (!noMoreTuple) { return false; diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortExec.java index e261e0ca3d..e0d3f0882a 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortExec.java @@ -29,6 +29,7 @@ import java.util.Comparator; public abstract class SortExec extends UnaryPhysicalExec { + private final Comparator unsafeComparator; private final Comparator comparator; private final SortSpec [] sortSpecs; @@ -39,8 +40,10 @@ public SortExec(TaskAttemptContext context, Schema inSchema, BaseTupleComparator comp = new BaseTupleComparator(inSchema, sortSpecs); if (context.getQueryContext().getBool(SessionVars.CODEGEN)) { - this.comparator = context.getSharedResource().getCompiledComparator(inSchema, comp); + this.unsafeComparator = context.getSharedResource().getUnSafeComparator(inSchema, comp); + this.comparator = context.getSharedResource().getComparator(inSchema, comp); } else { + this.unsafeComparator = comp; this.comparator = comp; } } @@ -53,6 +56,10 @@ public Comparator getComparator() { return comparator; } + public Comparator getUnsafeComparator() { + return unsafeComparator; + } + @Override abstract public Tuple next() throws IOException; } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/StoreTableExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/StoreTableExec.java index f88af150ac..8fec9aa7f3 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/StoreTableExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/StoreTableExec.java @@ -129,6 +129,8 @@ public Tuple next() throws IOException { RowBlockReader reader; public boolean nextFetch(OffHeapRowBlock rowBlock) throws IOException { + rowBlock.clear(); + if (child.nextFetch(rowBlock)) { reader = rowBlock.getReader(); while (reader.next(zcTuple)) { diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockSharedResource.java b/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockSharedResource.java index 4b027773af..7ffcc89b84 100644 --- a/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockSharedResource.java +++ b/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockSharedResource.java @@ -110,15 +110,38 @@ public EvalNode getCompiledEval(Schema schema, EvalNode eval) { } } + public TupleComparator compileUnSafeComparator(Schema schema, BaseTupleComparator comp) { + return compilationContext.getComparatorCompiler().compile(comp, true); + } + public TupleComparator compileComparator(Schema schema, BaseTupleComparator comp) { return compilationContext.getComparatorCompiler().compile(comp, false); } - public TupleComparator getCompiledComparator(Schema schema, BaseTupleComparator comp) { + public TupleComparator getUnSafeComparator(Schema schema, BaseTupleComparator comp) { + if (codeGenEnabled) { + Pair key = new Pair(schema, comp); + if (compilationContext.getUnSafeComparators().containsKey(key)) { + return compilationContext.getUnSafeComparators().get(key); + } else { + try { + LOG.warn(comp + " does not exist. Compiling it immediately"); + return compileUnSafeComparator(schema, comp); + } catch (Throwable t) { + LOG.warn(t); + return comp; + } + } + } else { + throw new IllegalStateException("CODEGEN is disabled"); + } + } + + public TupleComparator getComparator(Schema schema, BaseTupleComparator comp) { if (codeGenEnabled) { Pair key = new Pair(schema, comp); - if (compilationContext.getPrecompiedComparators().containsKey(key)) { - return compilationContext.getPrecompiedComparators().get(key); + if (compilationContext.getComparators().containsKey(key)) { + return compilationContext.getComparators().get(key); } else { try { LOG.warn(comp + " does not exist. Compiling it immediately"); diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/Task.java b/tajo-core/src/main/java/org/apache/tajo/worker/Task.java index 67210eaed3..89c3305fc2 100644 --- a/tajo-core/src/main/java/org/apache/tajo/worker/Task.java +++ b/tajo-core/src/main/java/org/apache/tajo/worker/Task.java @@ -449,13 +449,14 @@ public void run() throws Exception { createPlan(context, plan); this.executor.init(); - String engineType = context.getQueryContext().get(SessionVars.EXEC_ENGINE); - LOG.info(engineType.toUpperCase() + " Executor Engine is chosen."); + String engineType = context.getQueryContext().get(SessionVars.EXECUTION_ENGINE); + LOG.info(engineType.toUpperCase() + " Execution Engine is chosen."); if (engineType.equalsIgnoreCase("volcano")) { while (!killed && !aborted && executor.next() != null) { } } else if (engineType.equalsIgnoreCase("block")) { OffHeapRowBlock rowBlock = new OffHeapRowBlock(executor.getSchema(), 64 * StorageUnit.KB); + rowBlock.setMaxRow(1024); while (!killed && !aborted && executor.nextFetch(rowBlock)) { } rowBlock.release(); diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/codegen/TestTupleComparerCompiler.java b/tajo-core/src/test/java/org/apache/tajo/engine/codegen/TestTupleComparerCompiler.java index 6d9b135e80..5b0940bc41 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/codegen/TestTupleComparerCompiler.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/codegen/TestTupleComparerCompiler.java @@ -28,6 +28,7 @@ import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleComparator; import org.apache.tajo.storage.VTuple; +import org.apache.tajo.tuple.RowBlockReader; import org.apache.tajo.tuple.offheap.*; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -265,21 +266,40 @@ public void testCompareFloat4Float8() throws Exception { @Test public void testCompareText() throws Exception { SortSpec [][] sortSpecs = createSortSpecs("col6"); - TupleComparator [] comps = createComparators(sortSpecs, false); + TupleComparator [] comps = createComparators(sortSpecs, true); - Tuple t1 = new VTuple(schema.size()); - t1.put(6, DatumFactory.createText("tajo")); + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 1024); + RowWriter writer = rowBlock.getWriter(); - Tuple t2 = new VTuple(schema.size()); - t2.put(6, DatumFactory.createText("tajo")); + writer.startRow(); + writer.skipField(6); + writer.putText("tajo"); + writer.endRow(); - Tuple t3 = new VTuple(schema.size()); - t3.put(6, DatumFactory.createText("tazo")); + writer.startRow(); + writer.skipField(6); + writer.putText("tajo"); + writer.endRow(); - Tuple t4 = new VTuple(schema.size()); - t4.put(6, NullDatum.get()); + writer.startRow(); + writer.skipField(6); + writer.putText("tazo"); + writer.endRow(); - assertCompareAll(comps, sortSpecs, t1, t2, t3, t4, t4); + writer.startRow(); + writer.endRow(); + + RowBlockReader reader = rowBlock.getReader(); + + ZeroCopyTuple [] tuples = new ZeroCopyTuple[4]; + for (int i = 0; i < 4; i++) { + tuples[i] = new ZeroCopyTuple(); + assertTrue(reader.next(tuples[i])); + } + + assertCompareAll(comps, sortSpecs, tuples[0], tuples[1], tuples[2], tuples[3], tuples[3]); + + rowBlock.release(); } @Test @@ -288,25 +308,40 @@ public void testCompareTextWithNull() throws Exception { new SortSpec(new Column("col5", FLOAT8)), new SortSpec(new Column("col6", TEXT))}; BaseTupleComparator compImpl = new BaseTupleComparator(schema, sortSpecs); - TupleComparator comp = compiler.compile(compImpl, false); + TupleComparator comp = compiler.compile(compImpl, true); - Tuple t1 = new VTuple(schema.size()); - t1.put(5, NullDatum.get()); - t1.put(6, DatumFactory.createText("ARGENTINA")); + OffHeapRowBlock rowBlock = new OffHeapRowBlock(schema, 1024); + RowWriter writer = rowBlock.getWriter(); - Tuple t2 = new VTuple(schema.size()); - t2.put(5, NullDatum.get()); - t2.put(6, DatumFactory.createText("ARGENTINA")); + writer.startRow(); + writer.skipField(6); + writer.putText("ARGENTINA"); + writer.endRow(); - Tuple t3 = new VTuple(schema.size()); - t3.put(5, NullDatum.get()); - t3.put(6, DatumFactory.createText("CANADA")); + writer.startRow(); + writer.skipField(6); + writer.putText("ARGENTINA"); + writer.endRow(); - Tuple t4 = new VTuple(schema.size()); - t4.put(5, NullDatum.get()); - t4.put(6, NullDatum.get()); + writer.startRow(); + writer.skipField(6); + writer.putText("CANADA"); + writer.endRow(); + + writer.startRow(); + writer.endRow(); - assertCompare(comp, sortSpecs, t1, t2, t3, t4, t4); + RowBlockReader reader = rowBlock.getReader(); + + ZeroCopyTuple [] tuples = new ZeroCopyTuple[4]; + for (int i = 0; i < 4; i++) { + tuples[i] = new ZeroCopyTuple(); + assertTrue(reader.next(tuples[i])); + } + + assertCompare(comp, sortSpecs, tuples[0], tuples[1], tuples[2], tuples[3], tuples[3]); + + rowBlock.release(); } private void fillTextColumnToRowBlock(OffHeapRowBlock rowBlock, String text) { diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java b/tajo-core/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java index 9964a380ea..92cd218626 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java @@ -32,6 +32,7 @@ import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.*; import org.apache.tajo.engine.codegen.EvalCodeGenerator; +import org.apache.tajo.engine.codegen.EvalNodeCompiler; import org.apache.tajo.engine.codegen.TajoClassLoader; import org.apache.tajo.engine.json.CoreGsonHelper; import org.apache.tajo.engine.parser.SQLAnalyzer; @@ -247,9 +248,9 @@ public void testEval(OverridableConf overideConf, Schema schema, String tableNam try { targets = getRawTargets(context, query, condition); - EvalCodeGenerator codegen = null; + EvalNodeCompiler compiler = null; if (context.getBool(SessionVars.CODEGEN)) { - codegen = new EvalCodeGenerator(classLoader); + compiler = new EvalNodeCompiler(classLoader); } Tuple outTuple = new VTuple(targets.length); @@ -257,7 +258,7 @@ public void testEval(OverridableConf overideConf, Schema schema, String tableNam EvalNode eval = targets[i].getEvalTree(); if (context.getBool(SessionVars.CODEGEN)) { - eval = codegen.compile(inputSchema, eval); + eval = compiler.compile(inputSchema, eval); } outTuple.put(i, eval.eval(inputSchema, vtuple)); diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/block/TestBlockIteratorExec.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/block/TestBlockIteratorExec.java index f6a1e0546a..063286ae77 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/block/TestBlockIteratorExec.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/block/TestBlockIteratorExec.java @@ -113,6 +113,7 @@ public static PhysicalExec buildPhysicalPlan(String sql) throws PlanningExceptio outputPath = new Path(workDir, "output"); ctx.setOutputPath(outputPath); ctx.setEnforcer(new Enforcer()); + ctx.getQueryContext().setBool(SessionVars.CODEGEN, false); return physicalPlanner.createPlan(ctx, plan.getRootBlock().getRoot()); } diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java index 41e2f3ea73..284e01463b 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java @@ -50,7 +50,7 @@ public TestSelectQuery() { @Test public final void testNonQualifiedNames() throws Exception { - // select l_orderkey, l_partkey from lineitem; + // select l_orderkey, l_partkey, l_comment from lineitem; ResultSet res = executeQuery(); assertResultSet(res); cleanupQuery(res); diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result index f6edb3d2de..84c215cadc 100644 --- a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result +++ b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result @@ -25,7 +25,7 @@ Available Session Variables: \set JOIN_PER_SHUFFLE_SIZE [int value] - shuffle output size for join (mb) \set GROUPBY_PER_SHUFFLE_SIZE [int value] - shuffle output size for sort (mb) \set TABLE_PARTITION_PER_SHUFFLE_SIZE [int value] - shuffle output size for partition table write (mb) -\set EXEC_ENGINE [text value] - executor engine types that queries will use. Types: volcano and block (default is volcano) +\set EXECUTION_ENGINE [text value] - executor engine types that queries will use. Types: volcano and block (default is volcano) \set EXTSORT_BUFFER_SIZE [long value] - sort buffer size for external sort (mb) \set HASH_JOIN_SIZE_LIMIT [long value] - limited size for hash join (mb) \set INNER_HASH_JOIN_SIZE_LIMIT [long value] - limited size for hash inner join (mb) diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/BaseTupleBuilder.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/BaseTupleBuilder.java index e00b97722e..e87c93c988 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/BaseTupleBuilder.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/BaseTupleBuilder.java @@ -103,8 +103,10 @@ public ZeroCopyTuple buildToZeroCopyTuple() { } public void release() { - UnsafeUtil.free(buffer); - buffer = null; - address = 0; + if (buffer != null) { + UnsafeUtil.free(buffer); + buffer = null; + address = 0; + } } } diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTuple.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTuple.java index e38555c914..f3fdbb992c 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTuple.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTuple.java @@ -1,4 +1,4 @@ -/*** +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -33,21 +33,24 @@ import static org.apache.tajo.common.TajoDataTypes.DataType; +/** + * Immutable Tuple + */ public class HeapTuple implements Tuple { private static final Unsafe UNSAFE = UnsafeUtil.unsafe; private static final long BASE_OFFSET = UnsafeUtil.ARRAY_BYTE_BASE_OFFSET; - private final byte [] data; - private final DataType [] types; + final byte [] data; + private final DataType[] types; - public HeapTuple(final byte [] bytes, final DataType [] types) { + public HeapTuple(final byte [] bytes, final DataType[] types) { this.data = bytes; this.types = types; } @Override public int size() { - return data.length; + return types.length; } public ByteBuffer nioBuffer() { @@ -58,7 +61,7 @@ private int getFieldOffset(int fieldId) { return UNSAFE.getInt(data, BASE_OFFSET + SizeOf.SIZE_OF_INT + (fieldId * SizeOf.SIZE_OF_INT)); } - private int checkNullAndGetOffset(int fieldId) { + int checkNullAndGetOffset(int fieldId) { int offset = getFieldOffset(fieldId); if (offset == OffHeapRowBlock.NULL_FIELD_OFFSET) { throw new RuntimeException("Invalid Field Access: " + fieldId); @@ -88,22 +91,22 @@ public void clear() { @Override public void put(int fieldId, Datum value) { - throw new UnsupportedException("UnSafeTuple does not support put(int, Datum)."); + throw new UnsupportedException("HeapTuple does not support put(int, Datum)."); } @Override public void put(int fieldId, Datum[] values) { - throw new UnsupportedException("UnSafeTuple does not support put(int, Datum [])."); + throw new UnsupportedException("HeapTuple does not support put(int, Datum [])."); } @Override public void put(int fieldId, Tuple tuple) { - throw new UnsupportedException("UnSafeTuple does not support put(int, Tuple)."); + throw new UnsupportedException("HeapTuple does not support put(int, Tuple)."); } @Override public void put(Datum[] values) { - throw new UnsupportedException("UnSafeTuple does not support put(Datum [])."); + throw new UnsupportedException("HeapTuple does not support put(Datum [])."); } @Override @@ -115,13 +118,15 @@ public Datum get(int fieldId) { switch (types[fieldId].getType()) { case BOOLEAN: return DatumFactory.createBool(getBool(fieldId)); + case CHAR: + return DatumFactory.createChar(getBytes(fieldId)); case INT1: case INT2: return DatumFactory.createInt2(getInt2(fieldId)); case INT4: return DatumFactory.createInt4(getInt4(fieldId)); case INT8: - return DatumFactory.createInt8(getInt4(fieldId)); + return DatumFactory.createInt8(getInt8(fieldId)); case FLOAT4: return DatumFactory.createFloat4(getFloat4(fieldId)); case FLOAT8: @@ -138,6 +143,8 @@ public Datum get(int fieldId) { return getInterval(fieldId); case INET4: return DatumFactory.createInet4(getInt4(fieldId)); + case BLOB: + return DatumFactory.createBlob(getBytes(fieldId)); case PROTOBUF: return getProtobufDatum(fieldId); default: @@ -251,7 +258,7 @@ public Tuple clone() throws CloneNotSupportedException { @Override public Datum[] getValues() { - Datum [] datums = new Datum[size()]; + Datum[] datums = new Datum[size()]; for (int i = 0; i < size(); i++) { if (contains(i)) { datums[i] = get(i); diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTupleBytesComparator.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTupleBytesComparator.java new file mode 100644 index 0000000000..31840a71cb --- /dev/null +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTupleBytesComparator.java @@ -0,0 +1,105 @@ +/* + * Lisensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.tuple.offheap; + +import com.google.common.primitives.Longs; +import com.google.common.primitives.UnsignedLongs; +import org.apache.tajo.util.SizeOf; +import org.apache.tajo.util.UnsafeUtil; +import sun.misc.Unsafe; + +import java.nio.ByteOrder; + +/** + * It directly access UTF bytes in UnSafeTuple without any copy. It is used by compiled TupleComparator. + */ +public class HeapTupleBytesComparator { + private static final Unsafe UNSAFE = UnsafeUtil.unsafe; + + static final boolean littleEndian = + ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); + + public static int compare(HeapTuple t1, int fieldIdx1, HeapTuple t2, int fieldIdx2) { + long offset1 = t1.checkNullAndGetOffset(fieldIdx1); + long offset2 = t2.checkNullAndGetOffset(fieldIdx2); + + int lstrLen = UNSAFE.getInt(t1.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset1); + int rstrLen = UNSAFE.getInt(t2.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset2); + + offset1 += SizeOf.SIZE_OF_INT; + offset2 += SizeOf.SIZE_OF_INT; + + int minLength = Math.min(lstrLen, rstrLen); + int minWords = minLength / Longs.BYTES; + + /* + * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a + * time is no slower than comparing 4 bytes at a time even on 32-bit. + * On the other hand, it is substantially faster on 64-bit. + */ + for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) { + long lw = UNSAFE.getLong(t1.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset1); + long rw = UNSAFE.getLong(t2.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset2); + long diff = lw ^ rw; + + if (diff != 0) { + if (!littleEndian) { + return UnsignedLongs.compare(lw, rw); + } + + // Use binary search + int n = 0; + int y; + int x = (int) diff; + if (x == 0) { + x = (int) (diff >>> 32); + n = 32; + } + + y = x << 16; + if (y == 0) { + n += 16; + } else { + x = y; + } + + y = x << 8; + if (y == 0) { + n += 8; + } + return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL)); + } + + offset1 += SizeOf.SIZE_OF_LONG; + offset2 += SizeOf.SIZE_OF_LONG; + } + + // The epilogue to cover the last (minLength % 8) elements. + for (int i = minWords * Longs.BYTES; i < minLength; i++) { + int result = UNSAFE.getByte(t1.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset1) - + UNSAFE.getByte(t2.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset2); + offset1++; + offset2++; + if (result != 0) { + return result; + } + } + return lstrLen - rstrLen; + } +} diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowWriter.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowWriter.java index a1e5dbbfb2..22253cb22f 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowWriter.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/OffHeapRowWriter.java @@ -18,6 +18,7 @@ package org.apache.tajo.tuple.offheap; +import org.apache.tajo.annotation.UsedByJIT; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.IntervalDatum; import org.apache.tajo.datum.ProtobufDatum; @@ -110,23 +111,49 @@ public void endRow() { forward(curOffset); } + public int currentField() { + return curFieldIdx; + } + public void skipField() { - fieldOffsets[curFieldIdx++] = OffHeapRowBlock.NULL_FIELD_OFFSET; + fieldOffsets[curFieldIdx] = OffHeapRowBlock.NULL_FIELD_OFFSET; + curFieldIdx++; + } + + public void skipField(int num) { + for (int i = curFieldIdx; i < num && i < dataTypes.length; i++) { + fieldOffsets[curFieldIdx] = OffHeapRowBlock.NULL_FIELD_OFFSET; + curFieldIdx++; + } } + @UsedByJIT private void forwardField() { - fieldOffsets[curFieldIdx++] = curOffset; + fieldOffsets[curFieldIdx] = curOffset; + curFieldIdx++; } + @UsedByJIT public void putBool(boolean val) { ensureSize(SizeOf.SIZE_OF_BOOL); forwardField(); - OffHeapMemory.UNSAFE.putByte(recordStartAddr() + curOffset, (byte) (val ? 0x01 : 0x00)); + OffHeapMemory.UNSAFE.putByte(recordStartAddr() + curOffset, (byte) (val ? 0x01 : 0x02)); curOffset += SizeOf.SIZE_OF_BOOL; } + @UsedByJIT + public void putBool(byte val) { + ensureSize(SizeOf.SIZE_OF_BOOL); + forwardField(); + + OffHeapMemory.UNSAFE.putByte(recordStartAddr() + curOffset, val); + + curOffset += SizeOf.SIZE_OF_BOOL; + } + + @UsedByJIT public void putInt2(short val) { ensureSize(SizeOf.SIZE_OF_SHORT); forwardField(); @@ -135,6 +162,7 @@ public void putInt2(short val) { curOffset += SizeOf.SIZE_OF_SHORT; } + @UsedByJIT public void putInt4(int val) { ensureSize(SizeOf.SIZE_OF_INT); forwardField(); @@ -143,6 +171,7 @@ public void putInt4(int val) { curOffset += SizeOf.SIZE_OF_INT; } + @UsedByJIT public void putInt8(long val) { ensureSize(SizeOf.SIZE_OF_LONG); forwardField(); @@ -151,6 +180,7 @@ public void putInt8(long val) { curOffset += SizeOf.SIZE_OF_LONG; } + @UsedByJIT public void putFloat4(float val) { ensureSize(SizeOf.SIZE_OF_FLOAT); forwardField(); @@ -159,6 +189,7 @@ public void putFloat4(float val) { curOffset += SizeOf.SIZE_OF_FLOAT; } + @UsedByJIT public void putFloat8(double val) { ensureSize(SizeOf.SIZE_OF_DOUBLE); forwardField(); @@ -167,11 +198,13 @@ public void putFloat8(double val) { curOffset += SizeOf.SIZE_OF_DOUBLE; } + @UsedByJIT public void putText(String val) { byte[] bytes = val.getBytes(TextDatum.DEFAULT_CHARSET); putText(bytes); } + @UsedByJIT public void putText(byte[] val) { int bytesLen = val.length; @@ -186,6 +219,23 @@ public void putText(byte[] val) { curOffset += bytesLen; } + @UsedByJIT + public void copyTextFrom(UnSafeTuple tuple, int fieldIdx) { + long address = tuple.getFieldAddr(fieldIdx); + int strLen = OffHeapMemory.UNSAFE.getInt(address); + address += SizeOf.SIZE_OF_INT; + + ensureSize(SizeOf.SIZE_OF_INT + strLen); + forwardField(); + + OffHeapMemory.UNSAFE.putInt(recordStartAddr() + curOffset, strLen); + curOffset += SizeOf.SIZE_OF_INT; + + OffHeapMemory.UNSAFE.copyMemory(null, address, null, recordStartAddr() + curOffset, strLen); + curOffset += strLen; + } + + @UsedByJIT public void putBlob(byte[] val) { int bytesLen = val.length; @@ -200,18 +250,22 @@ public void putBlob(byte[] val) { curOffset += bytesLen; } + @UsedByJIT public void putTimestamp(long val) { putInt8(val); } + @UsedByJIT public void putDate(int val) { putInt4(val); } + @UsedByJIT public void putTime(long val) { putInt8(val); } + @UsedByJIT public void putInterval(IntervalDatum val) { ensureSize(SizeOf.SIZE_OF_INT + SizeOf.SIZE_OF_LONG); forwardField(); @@ -223,10 +277,12 @@ public void putInterval(IntervalDatum val) { curOffset += SizeOf.SIZE_OF_INT + SizeOf.SIZE_OF_LONG; } + @UsedByJIT public void putInet4(int val) { putInt4(val); } + @UsedByJIT public void putProtoDatum(ProtobufDatum val) { putBlob(val.asByteArray()); } diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/RowWriter.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/RowWriter.java index 59f8d1bd8b..f6d5ba2fb1 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/RowWriter.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/RowWriter.java @@ -41,8 +41,12 @@ public interface RowWriter { public void skipField(); + public void skipField(int num); + public void putBool(boolean val); + public void putBool(byte val); + public void putInt2(short val); public void putInt4(int val); diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/UnSafeTuple.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/UnSafeTuple.java index 6f4d385043..91b8e85cb7 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/UnSafeTuple.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/UnSafeTuple.java @@ -90,7 +90,7 @@ private int getFieldOffset(int fieldId) { public long getFieldAddr(int fieldId) { int fieldOffset = getFieldOffset(fieldId); - if (fieldOffset == -1) { + if (fieldOffset == OffHeapRowBlock.NULL_FIELD_OFFSET) { throw new RuntimeException("Invalid Field Access: " + fieldId); } return bb.address() + relativePos + fieldOffset; From 068b4a190d2f4646b6616651fbf77cbf43a951b2 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Tue, 30 Sep 2014 23:46:22 -0700 Subject: [PATCH 6/6] Removed direct use of UnSafe's offsets. --- .../tajo/tuple/offheap/HeapTupleBytesComparator.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTupleBytesComparator.java b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTupleBytesComparator.java index 31840a71cb..5298286ada 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTupleBytesComparator.java +++ b/tajo-storage/src/main/java/org/apache/tajo/tuple/offheap/HeapTupleBytesComparator.java @@ -39,8 +39,8 @@ public static int compare(HeapTuple t1, int fieldIdx1, HeapTuple t2, int fieldId long offset1 = t1.checkNullAndGetOffset(fieldIdx1); long offset2 = t2.checkNullAndGetOffset(fieldIdx2); - int lstrLen = UNSAFE.getInt(t1.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset1); - int rstrLen = UNSAFE.getInt(t2.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset2); + int lstrLen = UNSAFE.getInt(t1.data, UnsafeUtil.ARRAY_BYTE_BASE_OFFSET + offset1); + int rstrLen = UNSAFE.getInt(t2.data, UnsafeUtil.ARRAY_BYTE_BASE_OFFSET + offset2); offset1 += SizeOf.SIZE_OF_INT; offset2 += SizeOf.SIZE_OF_INT; @@ -54,8 +54,8 @@ public static int compare(HeapTuple t1, int fieldIdx1, HeapTuple t2, int fieldId * On the other hand, it is substantially faster on 64-bit. */ for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) { - long lw = UNSAFE.getLong(t1.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset1); - long rw = UNSAFE.getLong(t2.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset2); + long lw = UNSAFE.getLong(t1.data, UnsafeUtil.ARRAY_BYTE_BASE_OFFSET + offset1); + long rw = UNSAFE.getLong(t2.data, UnsafeUtil.ARRAY_BYTE_BASE_OFFSET + offset2); long diff = lw ^ rw; if (diff != 0) { @@ -92,8 +92,8 @@ public static int compare(HeapTuple t1, int fieldIdx1, HeapTuple t2, int fieldId // The epilogue to cover the last (minLength % 8) elements. for (int i = minWords * Longs.BYTES; i < minLength; i++) { - int result = UNSAFE.getByte(t1.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset1) - - UNSAFE.getByte(t2.data, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset2); + int result = UNSAFE.getByte(t1.data, UnsafeUtil.ARRAY_BYTE_BASE_OFFSET + offset1) - + UNSAFE.getByte(t2.data, UnsafeUtil.ARRAY_BYTE_BASE_OFFSET + offset2); offset1++; offset2++; if (result != 0) {