diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/QueryGenerator.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/QueryGenerator.java index 323e6e0178ce..e57f089496c1 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/QueryGenerator.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/QueryGenerator.java @@ -22,6 +22,7 @@ import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -37,6 +38,7 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; +import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang.StringUtils; import org.apache.pinot.spi.utils.JsonUtils; @@ -133,7 +135,7 @@ public QueryGenerator(List avroFiles, String pinotTableName, String h2Tabl _multiValueColumnMaxNumElements.put(fieldName, 0); } else { _singleValueColumnNames.add(fieldName); - if (type != Schema.Type.STRING && type != Schema.Type.BOOLEAN) { + if (type != Schema.Type.STRING && type != Schema.Type.BOOLEAN && type != Schema.Type.BYTES) { _singleValueNumericalColumnNames.add(fieldName); } } @@ -182,7 +184,12 @@ public QueryGenerator(List avroFiles, String pinotTableName, String h2Tabl * @param avroValue Avro value. */ private static void storeAvroValueIntoValueSet(Set valueSet, Object avroValue) { - if (avroValue instanceof Number) { + if (avroValue instanceof ByteBuffer) { + // for raw bytes + String hexRaw = StringUtils.stripStart(Hex.encodeHexString(((ByteBuffer) avroValue).array()), "0"); + String hexAligned = (hexRaw.length() & 0x1) == 0 ? hexRaw : "0" + hexRaw; + valueSet.add("'" + hexAligned + "'"); + } else if (avroValue instanceof Number) { // For Number object, store raw value. valueSet.add(avroValue.toString()); } else { @@ -1012,8 +1019,8 @@ public QueryFragment generatePredicate(String columnName) { } } - String pql = String.format(" REGEXP_LIKE(%s, '%s')", columnName, pqlRegexBuilder.toString()); - String sql = String.format(" REGEXP_LIKE(%s, '%s', 'i')", columnName, sqlRegexBuilder.toString()); + String pql = String.format("REGEXP_LIKE(%s, '%s')", columnName, pqlRegexBuilder.toString()); + String sql = String.format("REGEXP_LIKE(%s, '%s', 'i')", columnName, sqlRegexBuilder.toString()); return new StringQueryFragment(pql, sql); } else { String equalsPredicate = String.format("%s = %s", columnName, value); diff --git a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroSchemaUtil.java b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroSchemaUtil.java index 9ff22eec36d5..d19aea733a5f 100644 --- a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroSchemaUtil.java +++ b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroSchemaUtil.java @@ -69,6 +69,9 @@ public static ObjectNode toAvroSchemaJsonObject(FieldSpec fieldSpec) { case STRING: jsonSchema.set("type", convertStringsToJsonArray("null", "string")); return jsonSchema; + case BYTES: + jsonSchema.set("type", convertStringsToJsonArray("null", "bytes")); + return jsonSchema; default: throw new UnsupportedOperationException(); } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java index fe3e182d47e8..598f4a86a87a 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java @@ -127,6 +127,7 @@ public boolean execute() List columns = new LinkedList<>(); final HashMap dataTypes = new HashMap<>(); final HashMap fieldTypes = new HashMap<>(); + final HashMap singleValueFlags = new HashMap<>(); final HashMap timeUnits = new HashMap<>(); final HashMap cardinality = new HashMap<>(); @@ -136,7 +137,7 @@ public boolean execute() buildCardinalityRangeMaps(_schemaAnnFile, cardinality, range, pattern); final DataGeneratorSpec spec = - buildDataGeneratorSpec(schema, columns, dataTypes, fieldTypes, timeUnits, cardinality, range, pattern); + buildDataGeneratorSpec(schema, columns, dataTypes, fieldTypes, singleValueFlags, timeUnits, cardinality, range, pattern); final DataGenerator gen = new DataGenerator(); gen.init(spec); @@ -175,14 +176,16 @@ private void buildCardinalityRangeMaps(String file, HashMap car } private DataGeneratorSpec buildDataGeneratorSpec(Schema schema, List columns, - HashMap dataTypes, HashMap fieldTypes, HashMap timeUnits, - HashMap cardinality, HashMap range, HashMap> pattern) { + HashMap dataTypes, HashMap fieldTypes, HashMap singleValueFlags, + HashMap timeUnits, HashMap cardinality, HashMap range, + HashMap> pattern) { for (final FieldSpec fs : schema.getAllFieldSpecs()) { String col = fs.getName(); columns.add(col); dataTypes.put(col, fs.getDataType()); fieldTypes.put(col, fs.getFieldType()); + singleValueFlags.put(col, fs.isSingleValueField()); switch (fs.getFieldType()) { case DIMENSION: @@ -215,8 +218,8 @@ private DataGeneratorSpec buildDataGeneratorSpec(Schema schema, List col } } - return new DataGeneratorSpec(columns, cardinality, range, pattern, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, - _outDir, _overwrite); + return new DataGeneratorSpec(columns, cardinality, range, pattern, dataTypes, fieldTypes, singleValueFlags, + timeUnits, FileFormat.AVRO, _outDir, _overwrite); } public static void main(String[] args) diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/DataGenerator.java b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/DataGenerator.java index 2cdc1cc1f143..428ea59808e0 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/DataGenerator.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/DataGenerator.java @@ -156,7 +156,7 @@ private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) { spec.setName(column); spec.setDataType(dataType); - spec.setSingleValueField(true); + spec.setSingleValueField(genSpec.getSingleValueFlagsMap().getOrDefault(column, true)); return spec; } @@ -178,8 +178,8 @@ public static void main(String[] args) cardinality.put(col, 1000); } final DataGeneratorSpec spec = - new DataGeneratorSpec(Arrays.asList(columns), cardinality, range, template, dataTypes, fieldTypes, timeUnits, - FileFormat.AVRO, "/tmp/out", true); + new DataGeneratorSpec(Arrays.asList(columns), cardinality, range, template, dataTypes, fieldTypes, + new HashMap<>(), timeUnits, FileFormat.AVRO, "/tmp/out", true); final DataGenerator gen = new DataGenerator(); gen.init(spec); diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/DataGeneratorSpec.java b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/DataGeneratorSpec.java index c98cd4d0e8d5..005da055dfec 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/DataGeneratorSpec.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/DataGeneratorSpec.java @@ -41,6 +41,7 @@ public class DataGeneratorSpec { private final Map dataTypesMap; private final Map fieldTypesMap; + private final Map singleValueFlagsMap; private final Map timeUnitMap; private final FileFormat outputFileFormat; @@ -49,13 +50,14 @@ public class DataGeneratorSpec { public DataGeneratorSpec() { this(new ArrayList(), new HashMap<>(), new HashMap<>(), new HashMap<>(), - new HashMap<>(), new HashMap<>(), new HashMap<>(), + new HashMap<>(), new HashMap<>(), new HashMap<>(), new HashMap<>(), FileFormat.AVRO, "/tmp/dataGen", true); } public DataGeneratorSpec(List columns, Map cardinalityMap, Map rangeMap, - Map> patternMap, Map dataTypesMap, Map fieldTypesMap, Map timeUnitMap, - FileFormat format, String outputDir, boolean override) { + Map> patternMap, Map dataTypesMap, Map fieldTypesMap, + Map singleValueFlagsMap, Map timeUnitMap, FileFormat format, String outputDir, + boolean override) { this.columns = columns; this.cardinalityMap = cardinalityMap; this.rangeMap = rangeMap; @@ -67,6 +69,7 @@ public DataGeneratorSpec(List columns, Map cardinalityM this.dataTypesMap = dataTypesMap; this.fieldTypesMap = fieldTypesMap; + this.singleValueFlagsMap = singleValueFlagsMap; this.timeUnitMap = timeUnitMap; } @@ -78,6 +81,10 @@ public Map getFieldTypesMap() { return fieldTypesMap; } + public Map getSingleValueFlagsMap() { + return singleValueFlagsMap; + } + public Map getTimeUnitMap() { return timeUnitMap; } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/GeneratorFactory.java b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/GeneratorFactory.java index 215bf9fa5fdc..5430610ead0c 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/GeneratorFactory.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/GeneratorFactory.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.tools.data.generator; +import org.apache.commons.lang.RandomStringUtils; import org.apache.pinot.spi.data.FieldSpec.DataType; import java.util.Map; @@ -46,6 +47,8 @@ public static Generator getGeneratorFor(DataType dataType, int start, int end) { return new RangeFloatGenerator(start, end); case DOUBLE: return new RangeDoubleGenerator(start, end); + case BYTES: + return new RangeBytesGenerator(start, end); default: throw new RuntimeException(String.format("Invalid datatype '%s'", dataType)); } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/NumberGenerator.java b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/NumberGenerator.java index 32d697fcb26d..9900cfd1c960 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/NumberGenerator.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/NumberGenerator.java @@ -18,9 +18,12 @@ */ package org.apache.pinot.tools.data.generator; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.Random; + +import com.google.common.primitives.Longs; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,6 +67,8 @@ public void init() { intValues.add(new Integer(i)); } break; + case BYTES: + // use long case case LONG: longValues = new ArrayList(); final long longStart = rand.nextInt(cardinality); @@ -113,6 +118,8 @@ public Object next() { return floatValues.get(random.nextInt(cardinality)); case DOUBLE: return doubleValues.get(random.nextInt(cardinality)); + case BYTES: + return ByteBuffer.wrap(Longs.toByteArray(longValues.get(random.nextInt(cardinality)))); default: throw new RuntimeException("number generator can only accept a column of type number and this : " + columnType + " is not a supported number type"); diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/RangeBytesGenerator.java b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/RangeBytesGenerator.java new file mode 100644 index 000000000000..a29de09d0689 --- /dev/null +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/data/generator/RangeBytesGenerator.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.tools.data.generator; + +import com.google.common.primitives.Ints; + +import java.nio.ByteBuffer; +import java.util.Random; + + +public class RangeBytesGenerator implements Generator { + private final int _start; + private final int _end; + private final int _delta; + + Random _randGen = new Random(System.currentTimeMillis()); + + public RangeBytesGenerator(int r1, int r2) { + _start = (r1 < r2) ? r1 : r2; + _end = (r1 > r2) ? r1 : r2; + + _delta = _end - _start; + } + + @Override + public void init() { + } + + @Override + public Object next() { + return ByteBuffer.wrap(Ints.toByteArray(_start + _randGen.nextInt(_delta))); + } +} diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/perf/PerfBenchmarkDriver.java b/pinot-tools/src/main/java/org/apache/pinot/tools/perf/PerfBenchmarkDriver.java index f56bc383ac65..1d07bb5951f4 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/perf/PerfBenchmarkDriver.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/perf/PerfBenchmarkDriver.java @@ -104,7 +104,7 @@ public class PerfBenchmarkDriver { private PinotHelixResourceManager _helixResourceManager; public PerfBenchmarkDriver(PerfBenchmarkDriverConf conf) { - this(conf, "/tmp/", "HEAP", null, false); + this(conf, "/tmp/", "HEAP", null, conf.isVerbose()); } public PerfBenchmarkDriver(PerfBenchmarkDriverConf conf, String tempDir, String loadMode, String segmentFormatVersion, @@ -386,47 +386,68 @@ private void postQueries() public JsonNode postQuery(String query) throws Exception { - return postQuery(query, null); + return postQuery(_conf.getDialect(), query, null); } public JsonNode postQuery(String query, String optimizationFlags) + throws Exception { + return postQuery(_conf.getDialect(), query, optimizationFlags); + } + + public JsonNode postQuery(String dialect, String query, String optimizationFlags) throws Exception { ObjectNode requestJson = JsonUtils.newObjectNode(); - requestJson.put("pql", query); + requestJson.put(dialect, query); if (optimizationFlags != null && !optimizationFlags.isEmpty()) { requestJson.put("debugOptions", "optimizationFlags=" + optimizationFlags); } long start = System.currentTimeMillis(); - URLConnection conn = new URL(_brokerBaseApiUrl + "/query").openConnection(); + String queryUrl = _brokerBaseApiUrl + "/query"; + if (!"pql".equals(dialect)) { + queryUrl = _brokerBaseApiUrl + "/query/" + dialect; + } + + URLConnection conn = new URL(queryUrl).openConnection(); conn.setDoOutput(true); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(conn.getOutputStream(), - StandardCharsets.UTF_8))) { + StandardCharsets.UTF_8))) { String requestString = requestJson.toString(); writer.write(requestString); writer.flush(); - StringBuilder stringBuilder = new StringBuilder(); - try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), - StandardCharsets.UTF_8))) { - String line; - while ((line = reader.readLine()) != null) { - stringBuilder.append(line); + try { + StringBuilder stringBuilder = new StringBuilder(); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), + StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + stringBuilder.append(line); + } } - } - long totalTime = System.currentTimeMillis() - start; - String responseString = stringBuilder.toString(); - ObjectNode responseJson = (ObjectNode) JsonUtils.stringToJsonNode(responseString); - responseJson.put("totalTime", totalTime); + long totalTime = System.currentTimeMillis() - start; + String responseString = stringBuilder.toString(); + ObjectNode responseJson = (ObjectNode) JsonUtils.stringToJsonNode(responseString); + responseJson.put("totalTime", totalTime); + + if (_verbose) { + if (!responseJson.has("exceptions") || responseJson.get("exceptions").size() <= 0) { + LOGGER.info("requestString: {}", requestString); + LOGGER.info("responseString: {}", responseString); + } else { + LOGGER.error("requestString: {}", requestString); + LOGGER.error("responseString: {}", responseString); + } + } - if (_verbose && (responseJson.get("numDocsScanned").asLong() > 0)) { - LOGGER.info("requestString: {}", requestString); - LOGGER.info("responseString: {}", responseString); + return responseJson; + } catch (Exception e) { + LOGGER.error("requestString: {}", requestString); + throw e; } - return responseJson; } } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/perf/PerfBenchmarkDriverConf.java b/pinot-tools/src/main/java/org/apache/pinot/tools/perf/PerfBenchmarkDriverConf.java index c7436eb17537..532345d9a3bb 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/perf/PerfBenchmarkDriverConf.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/perf/PerfBenchmarkDriverConf.java @@ -72,6 +72,10 @@ public class PerfBenchmarkDriverConf { String resultsOutputDirectory; + boolean verbose = false; + + String dialect = "pql"; + public String getClusterName() { return clusterName; } @@ -263,4 +267,20 @@ public String getSchemaFileNamePath() { public void setSchemaFileNamePath(String schemaFileNamePath) { this.schemaFileNamePath = schemaFileNamePath; } + + public boolean isVerbose() { + return verbose; + } + + public void setVerbose(boolean verbose) { + this.verbose = verbose; + } + + public String getDialect() { + return dialect; + } + + public void setDialect(String dialect) { + this.dialect = dialect; + } } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/perf/QueryRunner.java b/pinot-tools/src/main/java/org/apache/pinot/tools/perf/QueryRunner.java index d9d6d6a8c43d..de1a65e321b6 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/perf/QueryRunner.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/perf/QueryRunner.java @@ -79,6 +79,10 @@ public class QueryRunner extends AbstractBaseCommand implements Command { private int _queueDepth = 64; @Option(name = "-timeout", required = false, metaVar = "", usage = "Timeout in milliseconds for completing all queries (default: unlimited).") private long _timeout = 0; + @Option(name = "-verbose", required = false, usage = "Enable verbose query logging (default: false).") + private boolean _verbose = false; + @Option(name = "-dialect", required = false, metaVar = "", usage = "Query dialect to use (pql|sql).") + private String _dialect = "pql"; @Option(name = "-help", required = false, help = true, aliases = {"-h", "--h", "--help"}, usage = "Print this message.") private boolean _help; @@ -134,6 +138,11 @@ public boolean execute() printUsage(); return false; } + if (!Arrays.asList("pql", "sql").contains(_dialect)) { + LOGGER.error("Argument dialect must one of either 'pql' or 'sql"); + printUsage(); + return false; + } LOGGER.info("Start query runner targeting broker: {}:{}", _brokerHost, _brokerPort); PerfBenchmarkDriverConf conf = new PerfBenchmarkDriverConf(); @@ -144,6 +153,8 @@ public boolean execute() conf.setStartController(false); conf.setStartBroker(false); conf.setStartServer(false); + conf.setVerbose(_verbose); + conf.setDialect(_dialect); Stream queries = makeQueries( IOUtils.readLines(new FileInputStream(_queryFile)),