From 278144efb8af7f4f793d3f15dc0b86b4a16c5823 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Fri, 12 Jan 2018 13:39:17 -0800 Subject: [PATCH 01/10] Add more indexing task status and error reporting --- .../data/input/impl/MapInputRowParser.java | 2 +- .../java/io/druid/indexer/IngestionState.java | 28 + .../io/druid/indexer/TaskMetricsGetter.java | 29 + .../io/druid/indexer/TaskMetricsUtils.java | 47 ++ .../java/io/druid/indexer/TaskStatusPlus.java | 67 ++- .../java/io/druid/utils/CircularBuffer.java | 78 +++ .../io/druid/indexer/TaskStatusPlusTest.java | 5 +- .../FilteredAggregatorBenchmark.java | 2 +- .../IncrementalIndexRowTypeBenchmark.java | 6 +- .../indexing/IndexIngestionBenchmark.java | 2 +- .../src/main/java/io/druid/indexer/Jobby.java | 15 + .../input/orc/OrcIndexGeneratorJobTest.java | 4 +- .../druid/indexing/kafka/KafkaIndexTask.java | 353 +++++++---- .../indexing/kafka/KafkaTuningConfig.java | 63 +- .../kafka/supervisor/KafkaSupervisorSpec.java | 3 + .../KafkaSupervisorTuningConfig.java | 13 +- .../indexing/kafka/KafkaIndexTaskTest.java | 247 +++++++- .../indexing/kafka/KafkaTuningConfigTest.java | 3 + .../kafka/supervisor/KafkaSupervisorTest.java | 3 + .../indexer/DetermineHashedPartitionsJob.java | 47 +- .../druid/indexer/DeterminePartitionsJob.java | 48 +- .../HadoopDruidDetermineConfigurationJob.java | 25 +- .../indexer/HadoopDruidIndexerConfig.java | 16 +- .../druid/indexer/HadoopDruidIndexerJob.java | 26 +- .../indexer/HadoopDruidIndexerMapper.java | 76 ++- .../io/druid/indexer/HadoopTuningConfig.java | 42 +- .../io/druid/indexer/IndexGeneratorJob.java | 77 ++- .../java/io/druid/indexer/InputRowSerde.java | 62 +- .../main/java/io/druid/indexer/JobHelper.java | 39 +- .../src/main/java/io/druid/indexer/Utils.java | 26 + .../indexer/BatchDeltaIngestionTest.java | 4 +- .../DetermineHashedPartitionsJobTest.java | 2 + .../indexer/DeterminePartitionsJobTest.java | 2 + .../indexer/HadoopDruidIndexerConfigTest.java | 4 + .../indexer/HadoopDruidIndexerMapperTest.java | 65 ++ .../druid/indexer/HadoopTuningConfigTest.java | 2 + .../indexer/IndexGeneratorCombinerTest.java | 8 +- .../druid/indexer/IndexGeneratorJobTest.java | 4 +- .../io/druid/indexer/InputRowSerdeTest.java | 42 +- .../java/io/druid/indexer/JobHelperTest.java | 2 + .../indexer/path/GranularityPathSpecTest.java | 2 + .../updater/HadoopConverterJobTest.java | 4 +- .../io/druid/indexing/common/TaskStatus.java | 59 +- .../common/index/YeOldePlumberSchool.java | 2 +- .../indexing/common/task/CompactionTask.java | 12 +- .../indexing/common/task/HadoopIndexTask.java | 553 ++++++++++++++++-- .../indexing/common/task/HadoopTask.java | 31 + .../druid/indexing/common/task/IndexTask.java | 529 ++++++++++++++--- .../overlord/ThreadPoolTaskRunner.java | 4 +- .../overlord/http/OverlordResource.java | 10 +- .../common/task/CompactionTaskTest.java | 29 +- .../indexing/common/task/IndexTaskTest.java | 448 +++++++++++++- .../indexing/common/task/TaskSerdeTest.java | 47 +- .../indexing/overlord/TaskLifecycleTest.java | 67 ++- .../util/common/parsers/ObjectFlatteners.java | 2 +- .../util/common/parsers/ParseException.java | 12 + .../segment/incremental/IncrementalIndex.java | 74 ++- .../IncrementalIndexAddResult.java | 52 ++ .../incremental/OffheapIncrementalIndex.java | 5 +- .../incremental/OnheapIncrementalIndex.java | 28 +- .../incremental/IncrementalIndexTest.java | 34 +- .../OnheapIncrementalIndexBenchmark.java | 5 +- .../incremental/TimeAndDimsCompTest.java | 12 +- .../druid/segment/indexing/TuningConfig.java | 3 + .../realtime/FireDepartmentMetrics.java | 24 + ...ireDepartmentMetricsTaskMetricsGetter.java | 81 +++ .../realtime/appenderator/Appenderator.java | 18 +- .../AppenderatorDriverAddResult.java | 28 +- .../appenderator/AppenderatorImpl.java | 7 +- .../appenderator/BaseAppenderatorDriver.java | 3 +- .../realtime/plumber/RealtimePlumber.java | 2 +- .../druid/segment/realtime/plumber/Sink.java | 5 +- .../segment/realtime/RealtimeManagerTest.java | 2 +- .../StreamAppenderatorDriverFailTest.java | 2 +- 74 files changed, 3302 insertions(+), 483 deletions(-) create mode 100644 api/src/main/java/io/druid/indexer/IngestionState.java create mode 100644 api/src/main/java/io/druid/indexer/TaskMetricsGetter.java create mode 100644 api/src/main/java/io/druid/indexer/TaskMetricsUtils.java create mode 100644 api/src/main/java/io/druid/utils/CircularBuffer.java create mode 100644 processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAddResult.java create mode 100644 server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java diff --git a/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java b/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java index 3fa2305a7007..5c168441dac2 100644 --- a/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java +++ b/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java @@ -72,7 +72,7 @@ public List parseBatch(Map theMap) } } catch (Exception e) { - throw new ParseException(e, "Unparseable timestamp found!"); + throw new ParseException(e, "Unparseable timestamp found! Event: " + theMap); } return ImmutableList.of(new MapBasedInputRow(timestamp.getMillis(), dimensions, theMap)); diff --git a/api/src/main/java/io/druid/indexer/IngestionState.java b/api/src/main/java/io/druid/indexer/IngestionState.java new file mode 100644 index 000000000000..9a52e3ee1c87 --- /dev/null +++ b/api/src/main/java/io/druid/indexer/IngestionState.java @@ -0,0 +1,28 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer; + +public enum IngestionState +{ + NOT_STARTED, + DETERMINE_PARTITIONS, + BUILD_SEGMENTS, + COMPLETED +} diff --git a/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java b/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java new file mode 100644 index 000000000000..bc5d1517ac25 --- /dev/null +++ b/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java @@ -0,0 +1,29 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer; + +import java.util.List; +import java.util.Map; + +public interface TaskMetricsGetter +{ + List getKeys(); + Map getMetrics(); +} diff --git a/api/src/main/java/io/druid/indexer/TaskMetricsUtils.java b/api/src/main/java/io/druid/indexer/TaskMetricsUtils.java new file mode 100644 index 000000000000..5845b245aa07 --- /dev/null +++ b/api/src/main/java/io/druid/indexer/TaskMetricsUtils.java @@ -0,0 +1,47 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer; + +import com.google.common.collect.Maps; + +import java.util.Map; + +public class TaskMetricsUtils +{ + public static final String ROWS_PROCESSED = "rowsProcessed"; + public static final String ROWS_PROCESSED_WITH_ERRORS = "rowsProcessedWithErrors"; + public static final String ROWS_UNPARSEABLE = "rowsUnparseable"; + public static final String ROWS_THROWN_AWAY = "rowsThrownAway"; + + public static Map makeIngestionRowMetrics( + long rowsProcessed, + long rowsProcessedWithErrors, + long rowsUnparseable, + long rowsThrownAway + ) + { + Map metricsMap = Maps.newHashMap(); + metricsMap.put(ROWS_PROCESSED, rowsProcessed); + metricsMap.put(ROWS_PROCESSED_WITH_ERRORS, rowsProcessedWithErrors); + metricsMap.put(ROWS_UNPARSEABLE, rowsUnparseable); + metricsMap.put(ROWS_THROWN_AWAY, rowsThrownAway); + return metricsMap; + } +} diff --git a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java index d8d93d6055d5..1e52a64b3525 100644 --- a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java +++ b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java @@ -25,6 +25,7 @@ import org.joda.time.DateTime; import javax.annotation.Nullable; +import java.util.Map; import java.util.Objects; public class TaskStatusPlus @@ -38,6 +39,15 @@ public class TaskStatusPlus private final TaskLocation location; private final String dataSource; + @Nullable + private final Map metrics; + + @Nullable + private final String errorMsg; + + @Nullable + private final Map context; + @JsonCreator public TaskStatusPlus( @JsonProperty("id") String id, @@ -47,7 +57,10 @@ public TaskStatusPlus( @JsonProperty("statusCode") @Nullable TaskState state, @JsonProperty("duration") @Nullable Long duration, @JsonProperty("location") TaskLocation location, - @JsonProperty("dataSource") String dataSource + @JsonProperty("dataSource") String dataSource, + @JsonProperty("metrics") Map metrics, + @JsonProperty("errorMsg") String errorMsg, + @JsonProperty("context") Map context ) { if (state != null && state.isComplete()) { @@ -61,6 +74,9 @@ public TaskStatusPlus( this.duration = duration; this.location = Preconditions.checkNotNull(location, "location"); this.dataSource = dataSource; + this.metrics = metrics; + this.errorMsg = errorMsg; + this.context = context; } @JsonProperty @@ -108,6 +124,27 @@ public TaskLocation getLocation() return location; } + @Nullable + @JsonProperty("metrics") + public Map getMetrics() + { + return metrics; + } + + @Nullable + @JsonProperty("errorMsg") + public String getErrorMsg() + { + return errorMsg; + } + + @Nullable + @JsonProperty("context") + public Map getContext() + { + return context; + } + @Override public boolean equals(Object o) { @@ -138,13 +175,37 @@ public boolean equals(Object o) if (!Objects.equals(duration, that.duration)) { return false; } - return location.equals(that.location); + + if (!Objects.equals(location, that.location)) { + return false; + } + + if (!Objects.equals(errorMsg, that.errorMsg)) { + return false; + } + + if (!Objects.equals(location, that.location)) { + return false; + } + + return Objects.equals(context, that.context); } @Override public int hashCode() { - return Objects.hash(id, type, createdTime, queueInsertionTime, state, duration, location); + return Objects.hash( + id, + type, + createdTime, + queueInsertionTime, + state, + duration, + location, + metrics, + errorMsg, + context + ); } @JsonProperty diff --git a/api/src/main/java/io/druid/utils/CircularBuffer.java b/api/src/main/java/io/druid/utils/CircularBuffer.java new file mode 100644 index 000000000000..3710499611c9 --- /dev/null +++ b/api/src/main/java/io/druid/utils/CircularBuffer.java @@ -0,0 +1,78 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.utils; + +import com.google.common.base.Preconditions; + +public class CircularBuffer +{ + public E[] getBuffer() + { + return buffer; + } + + private final E[] buffer; + + private int start = 0; + private int size = 0; + + public CircularBuffer(int capacity) + { + buffer = (E[]) new Object[capacity]; + } + + public void add(E item) + { + buffer[start++] = item; + + if (start >= buffer.length) { + start = 0; + } + + if (size < buffer.length) { + size++; + } + } + + public E getLatest(int index) + { + int bufferIndex = start - index - 1; + if (bufferIndex < 0) { + bufferIndex = buffer.length + bufferIndex; + } + return buffer[bufferIndex]; + } + + public E get(int index) + { + Preconditions.checkArgument(index >= 0 && index < size, "invalid index"); + + int bufferIndex = (start - size + index) % buffer.length; + if (bufferIndex < 0) { + bufferIndex += buffer.length; + } + return buffer[bufferIndex]; + } + + public int size() + { + return size; + } +} diff --git a/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java b/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java index 200a6b4c701b..587d3c8f2ef3 100644 --- a/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java +++ b/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java @@ -53,7 +53,10 @@ public void testSerde() throws IOException TaskState.RUNNING, 1000L, TaskLocation.create("testHost", 1010, -1), - "ds_test" + "ds_test", + null, + null, + null ); final String json = mapper.writeValueAsString(status); Assert.assertEquals(status, mapper.readValue(json, TaskStatusPlus.class)); diff --git a/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java index 378237a8578a..fee9133706e2 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java @@ -255,7 +255,7 @@ public void ingest(Blackhole blackhole) throws Exception { incIndexFilteredAgg = makeIncIndex(filteredMetrics); for (InputRow row : inputRows) { - int rv = incIndexFilteredAgg.add(row); + int rv = incIndexFilteredAgg.add(row).getRowCount(); blackhole.consume(rv); } } diff --git a/benchmarks/src/main/java/io/druid/benchmark/IncrementalIndexRowTypeBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/IncrementalIndexRowTypeBenchmark.java index cf00f91fa62a..a8d69a1c1a15 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/IncrementalIndexRowTypeBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/IncrementalIndexRowTypeBenchmark.java @@ -161,7 +161,7 @@ public void normalLongs(Blackhole blackhole) throws Exception { for (int i = 0; i < maxRows; i++) { InputRow row = longRows.get(i); - int rv = incIndex.add(row); + int rv = incIndex.add(row).getRowCount(); blackhole.consume(rv); } } @@ -174,7 +174,7 @@ public void normalFloats(Blackhole blackhole) throws Exception { for (int i = 0; i < maxRows; i++) { InputRow row = floatRows.get(i); - int rv = incFloatIndex.add(row); + int rv = incFloatIndex.add(row).getRowCount(); blackhole.consume(rv); } } @@ -187,7 +187,7 @@ public void normalStrings(Blackhole blackhole) throws Exception { for (int i = 0; i < maxRows; i++) { InputRow row = stringRows.get(i); - int rv = incStrIndex.add(row); + int rv = incStrIndex.add(row).getRowCount(); blackhole.consume(rv); } } diff --git a/benchmarks/src/main/java/io/druid/benchmark/indexing/IndexIngestionBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/indexing/IndexIngestionBenchmark.java index 6ad20611586a..f1893f87c89f 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/indexing/IndexIngestionBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/indexing/IndexIngestionBenchmark.java @@ -120,7 +120,7 @@ public void addRows(Blackhole blackhole) throws Exception { for (int i = 0; i < rowsPerSegment; i++) { InputRow row = rows.get(i); - int rv = incIndex.add(row); + int rv = incIndex.add(row).getRowCount(); blackhole.consume(rv); } } diff --git a/common/src/main/java/io/druid/indexer/Jobby.java b/common/src/main/java/io/druid/indexer/Jobby.java index 4423cad03684..c0f2d68c7c31 100644 --- a/common/src/main/java/io/druid/indexer/Jobby.java +++ b/common/src/main/java/io/druid/indexer/Jobby.java @@ -19,9 +19,24 @@ package io.druid.indexer; +import javax.annotation.Nullable; +import java.util.Map; + /** */ public interface Jobby { boolean run(); + + @Nullable + default Map getStats() + { + throw new UnsupportedOperationException("This Jobby does not implement getJobStats()."); + } + + @Nullable + default String getErrorMessage() + { + throw new UnsupportedOperationException("This Jobby does not implement getErrorMessage()."); + } } diff --git a/extensions-contrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java b/extensions-contrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java index f073ce0b5691..bf5383b5fbf6 100644 --- a/extensions-contrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java +++ b/extensions-contrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java @@ -234,6 +234,8 @@ public void setUp() throws Exception null, false, false, + null, + null, null ) ) @@ -252,7 +254,7 @@ public void testIndexGeneratorJob() throws IOException private void verifyJob(IndexGeneratorJob job) throws IOException { - JobHelper.runJobs(ImmutableList.of(job), config); + Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job), config)); int segmentNum = 0; for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) { diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index 6ebf4a835001..390a965216c2 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -49,6 +49,8 @@ import io.druid.discovery.DiscoveryDruidNode; import io.druid.discovery.DruidNodeDiscoveryProvider; import io.druid.discovery.LookupNodeService; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; import io.druid.indexing.common.TaskStatus; @@ -58,6 +60,7 @@ import io.druid.indexing.common.actions.SegmentTransactionalInsertAction; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.common.task.AbstractTask; +import io.druid.indexing.common.task.IndexTask; import io.druid.indexing.common.task.RealtimeIndexTask; import io.druid.indexing.common.task.TaskResource; import io.druid.indexing.common.task.Tasks; @@ -99,6 +102,7 @@ import io.druid.server.security.ResourceAction; import io.druid.server.security.ResourceType; import io.druid.timeline.DataSegment; +import io.druid.utils.CircularBuffer; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.clients.consumer.KafkaConsumer; @@ -246,6 +250,8 @@ public enum Status private volatile CopyOnWriteArrayList sequences; private ListeningExecutorService publishExecService; private final boolean useLegacy; + private CircularBuffer savedParseExceptions; + private IngestionState ingestionState; @JsonCreator public KafkaIndexTask( @@ -276,6 +282,7 @@ public KafkaIndexTask( this.endOffsets.putAll(ioConfig.getEndPartitions().getPartitionOffsetMap()); this.topic = ioConfig.getStartPartitions().getTopic(); this.sequences = new CopyOnWriteArrayList<>(); + this.ingestionState = IngestionState.NOT_STARTED; if (context != null && context.get(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED) != null && ((boolean) context.get(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED))) { @@ -283,6 +290,9 @@ public KafkaIndexTask( } else { useLegacy = true; } + if (tuningConfig.getMaxSavedParseExceptions() > 0) { + savedParseExceptions = new CircularBuffer(tuningConfig.getMaxSavedParseExceptions()); + } resetNextCheckpointTime(); } @@ -414,11 +424,32 @@ private void createAndStartPublishExecutor() @Override public TaskStatus run(final TaskToolbox toolbox) throws Exception { - // for backwards compatibility, should be remove from versions greater than 0.12.x - if (useLegacy) { - return runLegacy(toolbox); + try { + // for backwards compatibility, should be remove from versions greater than 0.12.x + if (useLegacy) { + return runInternalLegacy(toolbox); + } else { + return runInternal(toolbox); + } } + catch (Exception e) { + log.error(e, "Encountered exception while running task."); + Map context = Maps.newHashMap(); + List savedParseExceptionMessages = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + if (savedParseExceptionMessages != null) { + context.put("unparseableEvents", savedParseExceptionMessages); + } + return TaskStatus.failure( + getId(), + getTaskCompletionMetrics(), + e.getMessage(), + getTaskCompletionContext() + ); + } + } + private TaskStatus runInternal(final TaskToolbox toolbox) throws Exception + { log.info("Starting up!"); startTime = DateTimes.nowUtc(); @@ -595,6 +626,8 @@ public void run() Set assignment = assignPartitionsAndSeekToNext(consumer, topic); + ingestionState = IngestionState.BUILD_SEGMENTS; + // Main loop. // Could eventually support leader/follower mode (for keeping replicas more in sync) boolean stillReading = !assignment.isEmpty(); @@ -680,59 +713,67 @@ public void run() boolean isPersistRequired = false; for (InputRow row : rows) { - if (row != null && withinMinMaxRecordTime(row)) { - SequenceMetadata sequenceToUse = null; - for (SequenceMetadata sequence : sequences) { - if (sequence.canHandle(record)) { - sequenceToUse = sequence; + try { + if (row != null && withinMinMaxRecordTime(row)) { + SequenceMetadata sequenceToUse = null; + for (SequenceMetadata sequence : sequences) { + if (sequence.canHandle(record)) { + sequenceToUse = sequence; + } } - } - if (sequenceToUse == null) { - throw new ISE( - "WTH?! cannot find any valid sequence for record with partition [%d] and offset [%d]. Current sequences: %s", - record.partition(), - record.offset(), - sequences + if (sequenceToUse == null) { + throw new ISE( + "WTH?! cannot find any valid sequence for record with partition [%d] and offset [%d]. Current sequences: %s", + record.partition(), + record.offset(), + sequences + ); + } + + final AppenderatorDriverAddResult addResult = driver.add( + row, + sequenceToUse.getSequenceName(), + committerSupplier, + // skip segment lineage check as there will always be one segment + // for combination of sequence and segment granularity. + // It is necessary to skip it as the task puts messages polled from all the + // assigned Kafka partitions into a single Druid segment, thus ordering of + // messages among replica tasks across assigned partitions is not guaranteed + // which may cause replica tasks to ask for segments with different interval + // in different order which might cause SegmentAllocateAction to fail. + true, + // do not allow incremental persists to happen until all the rows from this batch + // of rows are indexed + false ); - } - final AppenderatorDriverAddResult addResult = driver.add( - row, - sequenceToUse.getSequenceName(), - committerSupplier, - // skip segment lineage check as there will always be one segment - // for combination of sequence and segment granularity. - // It is necessary to skip it as the task puts messages polled from all the - // assigned Kafka partitions into a single Druid segment, thus ordering of - // messages among replica tasks across assigned partitions is not guaranteed - // which may cause replica tasks to ask for segments with different interval - // in different order which might cause SegmentAllocateAction to fail. - true, - // do not allow incremental persists to happen until all the rows from this batch - // of rows are indexed - false - ); - - if (addResult.isOk()) { - // If the number of rows in the segment exceeds the threshold after adding a row, - // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment. - if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) { - if (!sequenceToUse.isCheckpointed()) { - sequenceToCheckpoint = sequenceToUse; + if (addResult.isOk()) { + // If the number of rows in the segment exceeds the threshold after adding a row, + // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment. + if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) { + if (!sequenceToUse.isCheckpointed()) { + sequenceToCheckpoint = sequenceToUse; + } } + isPersistRequired |= addResult.isPersistRequired(); + } else { + // Failure to allocate segment puts determinism at risk, bail out to be safe. + // May want configurable behavior here at some point. + // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. + throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); } - isPersistRequired |= addResult.isPersistRequired(); + + if (addResult.getParseException() != null) { + throw addResult.getParseException(); + } + fireDepartmentMetrics.incrementProcessed(); } else { - // Failure to allocate segment puts determinism at risk, bail out to be safe. - // May want configurable behavior here at some point. - // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. - throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); + fireDepartmentMetrics.incrementThrownAway(); } - - fireDepartmentMetrics.incrementProcessed(); - } else { - fireDepartmentMetrics.incrementThrownAway(); + } + catch (ParseException e) { + handleParseException(e, record); } } if (isPersistRequired) { @@ -757,18 +798,7 @@ public void onFailure(Throwable t) } } catch (ParseException e) { - if (tuningConfig.isReportParseExceptions()) { - throw e; - } else { - log.debug( - e, - "Dropping unparseable row from partition[%d] offset[%,d].", - record.partition(), - record.offset() - ); - - fireDepartmentMetrics.incrementUnparseable(); - } + handleParseException(e, record); } nextOffsets.put(record.partition(), record.offset() + 1); @@ -806,6 +836,7 @@ public void onFailure(Throwable t) } } } + ingestionState = IngestionState.COMPLETED; } catch (Exception e) { log.error(e, "Encountered exception in run() before persisting."); @@ -904,10 +935,21 @@ public void onFailure(Throwable t) toolbox.getDataSegmentServerAnnouncer().unannounce(); } - return success(); + Map context = Maps.newHashMap(); + List savedParseExceptionMessages = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + if (savedParseExceptionMessages != null) { + context.put("unparseableEvents", savedParseExceptionMessages); + } + + return TaskStatus.success( + getId(), + getTaskCompletionMetrics(), + null, + getTaskCompletionContext() + ); } - private TaskStatus runLegacy(final TaskToolbox toolbox) throws Exception + private TaskStatus runInternalLegacy(final TaskToolbox toolbox) throws Exception { log.info("Starting up!"); startTime = DateTimes.nowUtc(); @@ -949,6 +991,8 @@ private TaskStatus runLegacy(final TaskToolbox toolbox) throws Exception ) ); + ingestionState = IngestionState.BUILD_SEGMENTS; + try ( final Appenderator appenderator0 = newAppenderator(fireDepartmentMetrics, toolbox); final StreamAppenderatorDriver driver = newDriver(appenderator0, toolbox, fireDepartmentMetrics); @@ -1102,35 +1146,45 @@ public void run() final Map> segmentsToMoveOut = new HashMap<>(); for (InputRow row : rows) { - if (row != null && withinMinMaxRecordTime(row)) { - final String sequenceName = sequenceNames.get(record.partition()); - final AppenderatorDriverAddResult addResult = driver.add( - row, - sequenceName, - committerSupplier, - false, - false - ); - - if (addResult.isOk()) { - // If the number of rows in the segment exceeds the threshold after adding a row, - // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment. - if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) { - segmentsToMoveOut.computeIfAbsent(sequenceName, k -> new HashSet<>()) - .add(addResult.getSegmentIdentifier()); + try { + if (row != null && withinMinMaxRecordTime(row)) { + final String sequenceName = sequenceNames.get(record.partition()); + final AppenderatorDriverAddResult addResult = driver.add( + row, + sequenceName, + committerSupplier, + false, + false + ); + + if (addResult.isOk()) { + // If the number of rows in the segment exceeds the threshold after adding a row, + // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment. + if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) { + segmentsToMoveOut.computeIfAbsent(sequenceName, k -> new HashSet<>()) + .add(addResult.getSegmentIdentifier()); + } + isPersistRequired |= addResult.isPersistRequired(); + } else { + // Failure to allocate segment puts determinism at risk, bail out to be safe. + // May want configurable behavior here at some point. + // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. + throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); } - isPersistRequired |= addResult.isPersistRequired(); + + if (addResult.getParseException() != null) { + throw addResult.getParseException(); + } + fireDepartmentMetrics.incrementProcessed(); } else { - // Failure to allocate segment puts determinism at risk, bail out to be safe. - // May want configurable behavior here at some point. - // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. - throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); + fireDepartmentMetrics.incrementThrownAway(); } - fireDepartmentMetrics.incrementProcessed(); - } else { - fireDepartmentMetrics.incrementThrownAway(); + } + catch (ParseException e) { + handleParseException(e, record); } } + if (isPersistRequired) { driver.persist(committerSupplier.get()); } @@ -1140,18 +1194,7 @@ public void run() )); } catch (ParseException e) { - if (tuningConfig.isReportParseExceptions()) { - throw e; - } else { - log.debug( - e, - "Dropping unparseable row from partition[%d] offset[%,d].", - record.partition(), - record.offset() - ); - - fireDepartmentMetrics.incrementUnparseable(); - } + handleParseException(e, record); } nextOffsets.put(record.partition(), record.offset() + 1); @@ -1165,6 +1208,7 @@ public void run() } } } + ingestionState = IngestionState.COMPLETED; } catch (Exception e) { log.error(e, "Encountered exception in runLegacy() before persisting."); @@ -1272,7 +1316,72 @@ public String apply(DataSegment input) toolbox.getDataSegmentServerAnnouncer().unannounce(); } - return success(); + Map context = Maps.newHashMap(); + List savedParseExceptionMessages = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + if (savedParseExceptionMessages != null) { + context.put("unparseableEvents", savedParseExceptionMessages); + } + + return TaskStatus.success( + getId(), + getTaskCompletionMetrics(), + null, + getTaskCompletionContext() + ); + } + + private void handleParseException(ParseException pe, ConsumerRecord record) + { + if (pe.isFromPartiallyValidRow()) { + fireDepartmentMetrics.incrementProcessedWithErrors(); + } else { + fireDepartmentMetrics.incrementUnparseable(); + } + + if (tuningConfig.isLogParseExceptions()) { + log.error( + pe, + "Encountered parse exception on row from partition[%d] offset[%d]", + record.partition(), + record.offset() + ); + } + + if (savedParseExceptions != null) { + savedParseExceptions.add(pe); + } + + if (fireDepartmentMetrics.unparseable() + fireDepartmentMetrics.processedWithErrors() + > tuningConfig.getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task..."); + } + } + + + private Map getTaskCompletionContext() + { + Map context = Maps.newHashMap(); + List buildSegmentsParseExceptionMessages = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + if (buildSegmentsParseExceptionMessages != null) { + Map unparseableEventsMap = Maps.newHashMap(); + unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); + context.put("unparseableEvents", unparseableEventsMap); + } + context.put("ingestionState", ingestionState); + return context; + } + + private Map getTaskCompletionMetrics() + { + Map metrics = Maps.newHashMap(); + if (fireDepartmentMetrics != null) { + metrics.put( + "buildSegments", + FireDepartmentMetrics.getRowMetricsFromFireDepartmentMetrics(fireDepartmentMetrics) + ); + } + return metrics; } private void checkAndMaybeThrowException() @@ -1491,6 +1600,45 @@ public Response setEndOffsetsHTTP( return setEndOffsets(offsets, resume, finish); } + @GET + @Path("/rowStats") + @Produces(MediaType.APPLICATION_JSON) + public Response getRowStats( + @Context final HttpServletRequest req + ) + { + authorizationCheck(req, Action.READ); + Map returnMap = Maps.newHashMap(); + Map totalsMap = Maps.newHashMap(); + + if (fireDepartmentMetrics != null) { + totalsMap.put( + "buildSegments", + TaskMetricsUtils.makeIngestionRowMetrics( + fireDepartmentMetrics.processed(), + fireDepartmentMetrics.processedWithErrors(), + fireDepartmentMetrics.unparseable(), + fireDepartmentMetrics.thrownAway() + ) + ); + } + + returnMap.put("totals", totalsMap); + return Response.ok(returnMap).build(); + } + + @GET + @Path("/unparseableEvents") + @Produces(MediaType.APPLICATION_JSON) + public Response getUnparseableEvents( + @Context final HttpServletRequest req + ) + { + authorizationCheck(req, Action.READ); + List events = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + return Response.ok(events).build(); + } + public Response setEndOffsets( Map offsets, final boolean resume, @@ -2037,12 +2185,7 @@ private boolean withinMinMaxRecordTime(final InputRow row) "Encountered row with timestamp that cannot be represented as a long: [%s]", row ); - log.debug(errorMsg); - if (tuningConfig.isReportParseExceptions()) { - throw new ParseException(errorMsg); - } else { - return false; - } + throw new ParseException(errorMsg); } if (log.isDebugEnabled()) { diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java index 21ec1ed9f1ca..d1c53a5fda57 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java @@ -51,6 +51,10 @@ public class KafkaTuningConfig implements TuningConfig, AppenderatorConfig private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; private final Period intermediateHandoffPeriod; + private final boolean logParseExceptions; + private final int maxParseExceptions; + private final int maxSavedParseExceptions; + @JsonCreator public KafkaTuningConfig( @JsonProperty("maxRowsInMemory") @Nullable Integer maxRowsInMemory, @@ -61,11 +65,14 @@ public KafkaTuningConfig( @JsonProperty("indexSpec") @Nullable IndexSpec indexSpec, // This parameter is left for compatibility when reading existing configs, to be removed in Druid 0.12. @JsonProperty("buildV9Directly") @Nullable Boolean buildV9Directly, - @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, + @Deprecated @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, @JsonProperty("handoffConditionTimeout") @Nullable Long handoffConditionTimeout, @JsonProperty("resetOffsetAutomatically") @Nullable Boolean resetOffsetAutomatically, @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, - @JsonProperty("intermediateHandoffPeriod") @Nullable Period intermediateHandoffPeriod + @JsonProperty("intermediateHandoffPeriod") @Nullable Period intermediateHandoffPeriod, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions ) { // Cannot be a static because default basePersistDirectory is unique per-instance @@ -92,6 +99,17 @@ public KafkaTuningConfig( this.intermediateHandoffPeriod = intermediateHandoffPeriod == null ? new Period().withDays(Integer.MAX_VALUE) : intermediateHandoffPeriod; + + if (this.reportParseExceptions) { + this.maxParseExceptions = 0; + this.maxSavedParseExceptions = maxSavedParseExceptions == null ? 0 : Math.min(1, maxSavedParseExceptions); + } else { + this.maxParseExceptions = maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + this.maxSavedParseExceptions = maxSavedParseExceptions == null + ? TuningConfig.DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS + : maxSavedParseExceptions; + } + this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; } public static KafkaTuningConfig copyOf(KafkaTuningConfig config) @@ -108,7 +126,10 @@ public static KafkaTuningConfig copyOf(KafkaTuningConfig config) config.handoffConditionTimeout, config.resetOffsetAutomatically, config.segmentWriteOutMediumFactory, - config.intermediateHandoffPeriod + config.intermediateHandoffPeriod, + config.logParseExceptions, + config.maxParseExceptions, + config.maxSavedParseExceptions ); } @@ -197,6 +218,24 @@ public Period getIntermediateHandoffPeriod() return intermediateHandoffPeriod; } + @JsonProperty + public boolean isLogParseExceptions() + { + return logParseExceptions; + } + + @JsonProperty + public int getMaxParseExceptions() + { + return maxParseExceptions; + } + + @JsonProperty + public int getMaxSavedParseExceptions() + { + return maxSavedParseExceptions; + } + public KafkaTuningConfig withBasePersistDirectory(File dir) { return new KafkaTuningConfig( @@ -211,7 +250,10 @@ public KafkaTuningConfig withBasePersistDirectory(File dir) handoffConditionTimeout, resetOffsetAutomatically, segmentWriteOutMediumFactory, - intermediateHandoffPeriod + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } @@ -235,7 +277,10 @@ public boolean equals(Object o) Objects.equals(basePersistDirectory, that.basePersistDirectory) && Objects.equals(indexSpec, that.indexSpec) && Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory) && - Objects.equals(intermediateHandoffPeriod, that.intermediateHandoffPeriod); + Objects.equals(intermediateHandoffPeriod, that.intermediateHandoffPeriod) && + Objects.equals(logParseExceptions, that.logParseExceptions) && + Objects.equals(maxParseExceptions, that.maxParseExceptions) && + Objects.equals(maxSavedParseExceptions, that.maxSavedParseExceptions); } @Override @@ -252,7 +297,10 @@ public int hashCode() handoffConditionTimeout, resetOffsetAutomatically, segmentWriteOutMediumFactory, - intermediateHandoffPeriod + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } @@ -271,6 +319,9 @@ public String toString() ", resetOffsetAutomatically=" + resetOffsetAutomatically + ", segmentWriteOutMediumFactory=" + segmentWriteOutMediumFactory + ", intermediateHandoffPeriod=" + intermediateHandoffPeriod + + ", logParseExceptions=" + logParseExceptions + + ", maxParseExceptions=" + maxParseExceptions + + ", maxSavedParseExceptions=" + maxSavedParseExceptions + '}'; } } diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java index 1db4580c23f9..c796b3eb9b6c 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java @@ -90,6 +90,9 @@ public KafkaSupervisorSpec( null, null, null, + null, + null, + null, null ); this.ioConfig = Preconditions.checkNotNull(ioConfig, "ioConfig"); diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTuningConfig.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTuningConfig.java index c6e5a3fe8d65..4467a65d4e13 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTuningConfig.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTuningConfig.java @@ -57,7 +57,10 @@ public KafkaSupervisorTuningConfig( @JsonProperty("httpTimeout") Period httpTimeout, @JsonProperty("shutdownTimeout") Period shutdownTimeout, @JsonProperty("offsetFetchPeriod") Period offsetFetchPeriod, - @JsonProperty("intermediateHandoffPeriod") Period intermediateHandoffPeriod + @JsonProperty("intermediateHandoffPeriod") Period intermediateHandoffPeriod, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions ) { super( @@ -72,7 +75,10 @@ public KafkaSupervisorTuningConfig( handoffConditionTimeout, resetOffsetAutomatically, segmentWriteOutMediumFactory, - intermediateHandoffPeriod + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); this.workerThreads = workerThreads; @@ -140,6 +146,9 @@ public String toString() ", shutdownTimeout=" + shutdownTimeout + ", offsetFetchPeriod=" + offsetFetchPeriod + ", intermediateHandoffPeriod=" + getIntermediateHandoffPeriod() + + ", logParseExceptions=" + isLogParseExceptions() + + ", maxParseExceptions=" + getMaxParseExceptions() + + ", maxSavedParseExceptions=" + getMaxSavedParseExceptions() + '}'; } diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java index dfc20525513b..3ebb3d02d6a1 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -38,6 +38,10 @@ import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; +import io.druid.data.input.impl.FloatDimensionSchema; +import io.druid.data.input.impl.LongDimensionSchema; +import io.druid.data.input.impl.StringDimensionSchema; +import io.druid.indexer.TaskMetricsUtils; import io.druid.java.util.emitter.EmittingLogger; import io.druid.java.util.emitter.core.NoopEmitter; import io.druid.java.util.emitter.service.ServiceEmitter; @@ -102,6 +106,7 @@ import io.druid.query.SegmentDescriptor; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.CountAggregatorFactory; +import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.filter.SelectorDimFilter; import io.druid.query.timeseries.TimeseriesQuery; @@ -152,6 +157,7 @@ import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -183,6 +189,9 @@ public class KafkaIndexTaskTest private long handoffConditionTimeout = 0; private boolean reportParseExceptions = false; + private boolean logParseExceptions = true; + private Integer maxParseExceptions = null; + private Integer maxSavedParseExceptions = null; private boolean resetOffsetAutomatically = false; private boolean doHandoff = true; private Integer maxRowsPerSegment = null; @@ -218,7 +227,13 @@ public KafkaIndexTaskTest(boolean isIncrementalHandoffSupported) new JSONParseSpec( new TimestampSpec("timestamp", "iso", null), new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim1t", "dim2")), + Arrays.asList( + new StringDimensionSchema("dim1"), + new StringDimensionSchema("dim1t"), + new StringDimensionSchema("dim2"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), null, null ), @@ -229,7 +244,10 @@ public KafkaIndexTaskTest(boolean isIncrementalHandoffSupported) ), Map.class ), - new AggregatorFactory[]{new CountAggregatorFactory("rows")}, + new AggregatorFactory[]{ + new DoubleSumAggregatorFactory("met1sum", "met1"), + new CountAggregatorFactory("rows") + }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null, objectMapper @@ -238,17 +256,21 @@ public KafkaIndexTaskTest(boolean isIncrementalHandoffSupported) private static List> generateRecords(String topic) { return ImmutableList.of( - new ProducerRecord(topic, 0, null, JB("2008", "a", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("2009", "b", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("2010", "c", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("2011", "d", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("2011", "e", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("246140482-04-24T15:36:27.903Z", "x", "z", 1.0f)), + new ProducerRecord(topic, 0, null, JB("2008", "a", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2009", "b", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2010", "c", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2011", "d", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2011", "e", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("246140482-04-24T15:36:27.903Z", "x", "z", "10", "20.0", "1.0")), new ProducerRecord(topic, 0, null, StringUtils.toUtf8("unparseable")), + new ProducerRecord(topic, 0, null, StringUtils.toUtf8("unparseable2")), new ProducerRecord(topic, 0, null, null), - new ProducerRecord(topic, 0, null, JB("2013", "f", "y", 1.0f)), - new ProducerRecord(topic, 1, null, JB("2012", "g", "y", 1.0f)), - new ProducerRecord(topic, 1, null, JB("2011", "h", "y", 1.0f)) + new ProducerRecord(topic, 0, null, JB("2013", "f", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2049", "f", "y", "notanumber", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2049", "f", "y", "10", "notanumber", "1.0")), + new ProducerRecord(topic, 0, null, JB("2049", "f", "y", "10", "20.0", "notanumber")), + new ProducerRecord(topic, 1, null, JB("2012", "g", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 1, null, JB("2011", "h", "y", "10", "20.0", "1.0")) ); } @@ -297,6 +319,9 @@ public void setupTest() throws IOException { handoffConditionTimeout = 0; reportParseExceptions = false; + logParseExceptions = true; + maxParseExceptions = null; + maxSavedParseExceptions = null; doHandoff = true; topic = getTopicName(); records = generateRecords(topic); @@ -459,7 +484,7 @@ public void testIncrementalHandOff() throws Exception // of events fetched across two partitions from Kafka final KafkaPartitions checkpoint1 = new KafkaPartitions(topic, ImmutableMap.of(0, 5L, 1, 0L)); final KafkaPartitions checkpoint2 = new KafkaPartitions(topic, ImmutableMap.of(0, 4L, 1, 2L)); - final KafkaPartitions endPartitions = new KafkaPartitions(topic, ImmutableMap.of(0, 9L, 1, 2L)); + final KafkaPartitions endPartitions = new KafkaPartitions(topic, ImmutableMap.of(0, 10L, 1, 2L)); final KafkaIndexTask task = createTask( null, new KafkaIOConfig( @@ -496,8 +521,8 @@ public void testIncrementalHandOff() throws Exception // Check metrics Assert.assertEquals(8, task.getFireDepartmentMetrics().processed()); - Assert.assertEquals(1, task.getFireDepartmentMetrics().unparseable()); - Assert.assertEquals(2, task.getFireDepartmentMetrics().thrownAway()); + Assert.assertEquals(3, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task.getFireDepartmentMetrics().thrownAway()); // Check published metadata SegmentDescriptor desc1 = SD(task, "2008/P1D", 0); @@ -509,7 +534,7 @@ public void testIncrementalHandOff() throws Exception SegmentDescriptor desc7 = SD(task, "2013/P1D", 0); Assert.assertEquals(ImmutableSet.of(desc1, desc2, desc3, desc4, desc5, desc6, desc7), publishedDescriptors()); Assert.assertEquals( - new KafkaDataSourceMetadata(new KafkaPartitions(topic, ImmutableMap.of(0, 9L, 1, 2L))), + new KafkaDataSourceMetadata(new KafkaPartitions(topic, ImmutableMap.of(0, 10L, 1, 2L))), metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) ); @@ -924,6 +949,10 @@ public void testReportParseExceptions() throws Exception { reportParseExceptions = true; + // these will be ignored because reportParseExceptions is true + maxParseExceptions = 1000; + maxSavedParseExceptions = 2; + // Insert data try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { for (ProducerRecord record : records) { @@ -953,12 +982,161 @@ public void testReportParseExceptions() throws Exception // Check metrics Assert.assertEquals(3, task.getFireDepartmentMetrics().processed()); - Assert.assertEquals(0, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(0, task.getFireDepartmentMetrics().thrownAway()); + + // Check published metadata + Assert.assertEquals(ImmutableSet.of(), publishedDescriptors()); + Assert.assertNull(metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource())); + } + + @Test(timeout = 60_000L) + public void testMultipleParseExceptionsSuccess() throws Exception + { + reportParseExceptions = false; + maxParseExceptions = 6; + maxSavedParseExceptions = 6; + + // Insert data + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + for (ProducerRecord record : records) { + kafkaProducer.send(record).get(); + } + } + + final KafkaIndexTask task = createTask( + null, + new KafkaIOConfig( + "sequence0", + new KafkaPartitions(topic, ImmutableMap.of(0, 2L)), + new KafkaPartitions(topic, ImmutableMap.of(0, 13L)), + kafkaServer.consumerProperties(), + true, + false, + null, + null, + false + ) + ); + + final ListenableFuture future = runTask(task); + + TaskStatus status = future.get(); + + // Wait for task to exit + Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode()); + Assert.assertEquals(null, status.getErrorMsg()); + + // Check metrics + Assert.assertEquals(4, task.getFireDepartmentMetrics().processed()); + Assert.assertEquals(3, task.getFireDepartmentMetrics().processedWithErrors()); + Assert.assertEquals(3, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task.getFireDepartmentMetrics().thrownAway()); + + // Check published metadata + SegmentDescriptor desc1 = SD(task, "2010/P1D", 0); + SegmentDescriptor desc2 = SD(task, "2011/P1D", 0); + SegmentDescriptor desc3 = SD(task, "2013/P1D", 0); + SegmentDescriptor desc4 = SD(task, "2049/P1D", 0); + Assert.assertEquals(ImmutableSet.of(desc1, desc2, desc3, desc4), publishedDescriptors()); + Assert.assertEquals( + new KafkaDataSourceMetadata(new KafkaPartitions(topic, ImmutableMap.of(0, 13L))), + metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) + ); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 4L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 3L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1L + ) + ); + Assert.assertEquals(expectedMetrics, status.getMetrics()); + + Map unparseableEvents = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2049-01-01T00:00:00.000Z, event={timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=20.0, met1=notanumber}, dimensions=[dim1, dim1t, dim2, dimLong, dimFloat]}], exceptions: [Unable to parse value[notanumber] for field[met1],]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2049-01-01T00:00:00.000Z, event={timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=notanumber, met1=1.0}, dimensions=[dim1, dim1t, dim2, dimLong, dimFloat]}], exceptions: [could not convert value [notanumber] to float,]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2049-01-01T00:00:00.000Z, event={timestamp=2049, dim1=f, dim2=y, dimLong=notanumber, dimFloat=20.0, met1=1.0}, dimensions=[dim1, dim1t, dim2, dimLong, dimFloat]}], exceptions: [could not convert value [notanumber] to long,]", + "Unable to parse row [unparseable2]", + "Unable to parse row [unparseable]", + "Encountered row with timestamp that cannot be represented as a long: [MapBasedInputRow{timestamp=246140482-04-24T15:36:27.903Z, event={timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}, dimensions=[dim1, dim1t, dim2, dimLong, dimFloat]}]" + ) + ); + + Assert.assertEquals(unparseableEvents, status.getContext().get("unparseableEvents")); + } + + @Test(timeout = 60_000L) + public void testMultipleParseExceptionsFailure() throws Exception + { + reportParseExceptions = false; + maxParseExceptions = 2; + maxSavedParseExceptions = 2; + + // Insert data + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + for (ProducerRecord record : records) { + kafkaProducer.send(record).get(); + } + } + + final KafkaIndexTask task = createTask( + null, + new KafkaIOConfig( + "sequence0", + new KafkaPartitions(topic, ImmutableMap.of(0, 2L)), + new KafkaPartitions(topic, ImmutableMap.of(0, 10L)), + kafkaServer.consumerProperties(), + true, + false, + null, + null, + false + ) + ); + + final ListenableFuture future = runTask(task); + + TaskStatus status = future.get(); + + // Wait for task to exit + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + Assert.assertEquals("Max parse exceptions exceeded, terminating task...", status.getErrorMsg()); + + // Check metrics + Assert.assertEquals(3, task.getFireDepartmentMetrics().processed()); + Assert.assertEquals(0, task.getFireDepartmentMetrics().processedWithErrors()); + Assert.assertEquals(3, task.getFireDepartmentMetrics().unparseable()); Assert.assertEquals(0, task.getFireDepartmentMetrics().thrownAway()); // Check published metadata Assert.assertEquals(ImmutableSet.of(), publishedDescriptors()); Assert.assertNull(metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource())); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 3L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + ) + ); + Assert.assertEquals(expectedMetrics, status.getMetrics()); + + Map unparseableEvents = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Unable to parse row [unparseable2]", + "Unable to parse row [unparseable]" + ) + ); + + Assert.assertEquals(unparseableEvents, status.getContext().get("unparseableEvents")); } @Test(timeout = 60_000L) @@ -1051,7 +1229,7 @@ public void testRunConflicting() throws Exception new KafkaIOConfig( "sequence1", new KafkaPartitions(topic, ImmutableMap.of(0, 3L)), - new KafkaPartitions(topic, ImmutableMap.of(0, 9L)), + new KafkaPartitions(topic, ImmutableMap.of(0, 10L)), kafkaServer.consumerProperties(), true, false, @@ -1081,8 +1259,8 @@ public void testRunConflicting() throws Exception Assert.assertEquals(0, task1.getFireDepartmentMetrics().unparseable()); Assert.assertEquals(0, task1.getFireDepartmentMetrics().thrownAway()); Assert.assertEquals(3, task2.getFireDepartmentMetrics().processed()); - Assert.assertEquals(1, task2.getFireDepartmentMetrics().unparseable()); - Assert.assertEquals(2, task2.getFireDepartmentMetrics().thrownAway()); + Assert.assertEquals(3, task2.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task2.getFireDepartmentMetrics().thrownAway()); // Check published segments & metadata, should all be from the first task SegmentDescriptor desc1 = SD(task1, "2010/P1D", 0); @@ -1120,7 +1298,7 @@ public void testRunConflictingWithoutTransactions() throws Exception new KafkaIOConfig( "sequence1", new KafkaPartitions(topic, ImmutableMap.of(0, 3L)), - new KafkaPartitions(topic, ImmutableMap.of(0, 9L)), + new KafkaPartitions(topic, ImmutableMap.of(0, 10L)), kafkaServer.consumerProperties(), false, false, @@ -1156,8 +1334,8 @@ public void testRunConflictingWithoutTransactions() throws Exception Assert.assertEquals(0, task1.getFireDepartmentMetrics().unparseable()); Assert.assertEquals(0, task1.getFireDepartmentMetrics().thrownAway()); Assert.assertEquals(3, task2.getFireDepartmentMetrics().processed()); - Assert.assertEquals(1, task2.getFireDepartmentMetrics().unparseable()); - Assert.assertEquals(2, task2.getFireDepartmentMetrics().thrownAway()); + Assert.assertEquals(3, task2.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task2.getFireDepartmentMetrics().thrownAway()); // Check published segments & metadata SegmentDescriptor desc3 = SD(task2, "2011/P1D", 1); @@ -1548,8 +1726,8 @@ public void testRunAndPauseAfterReadWithModifiedEndOffsets() throws Exception // Check metrics Assert.assertEquals(4, task.getFireDepartmentMetrics().processed()); - Assert.assertEquals(1, task.getFireDepartmentMetrics().unparseable()); - Assert.assertEquals(1, task.getFireDepartmentMetrics().thrownAway()); + Assert.assertEquals(2, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(0, task.getFireDepartmentMetrics().thrownAway()); // Check published metadata SegmentDescriptor desc1 = SD(task, "2009/P1D", 0); @@ -1788,7 +1966,10 @@ private KafkaIndexTask createTask( handoffConditionTimeout, resetOffsetAutomatically, null, - intermediateHandoffPeriod + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); final Map context = isIncrementalHandoffSupported ? ImmutableMap.of(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED, true) @@ -1827,7 +2008,10 @@ private KafkaIndexTask createTask( handoffConditionTimeout, resetOffsetAutomatically, null, - null + null, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); if (isIncrementalHandoffSupported) { context.put(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED, true); @@ -2140,11 +2324,18 @@ public long countEvents(final Task task) throws Exception return results.isEmpty() ? 0 : results.get(0).getValue().getLongMetric("rows"); } - private static byte[] JB(String timestamp, String dim1, String dim2, double met1) + private static byte[] JB(String timestamp, String dim1, String dim2, String dimLong, String dimFloat, String met1) { try { return new ObjectMapper().writeValueAsBytes( - ImmutableMap.of("timestamp", timestamp, "dim1", dim1, "dim2", dim2, "met1", met1) + ImmutableMap.builder() + .put("timestamp", timestamp) + .put("dim1", dim1) + .put("dim2", dim2) + .put("dimLong", dimLong) + .put("dimFloat", dimFloat) + .put("met1", met1) + .build() ); } catch (Exception e) { diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaTuningConfigTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaTuningConfigTest.java index 5378e4fc2464..a2a67f2f01d2 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaTuningConfigTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaTuningConfigTest.java @@ -113,6 +113,9 @@ public void testCopyOf() throws Exception 5L, null, null, + null, + null, + null, null ); KafkaTuningConfig copy = KafkaTuningConfig.copyOf(original); diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java index 6355d9d83280..725edcd5033d 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java @@ -201,6 +201,9 @@ public void setupTest() throws Exception TEST_HTTP_TIMEOUT, TEST_SHUTDOWN_TIMEOUT, null, + null, + null, + null, null ); diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java index 13efd97c1b2c..ec945f625e7d 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java @@ -46,6 +46,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; @@ -55,6 +56,7 @@ import org.joda.time.DateTimeComparator; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -69,6 +71,8 @@ public class DetermineHashedPartitionsJob implements Jobby { private static final Logger log = new Logger(DetermineHashedPartitionsJob.class); private final HadoopDruidIndexerConfig config; + private String failureCause; + private Job groupByJob; public DetermineHashedPartitionsJob( HadoopDruidIndexerConfig config @@ -86,7 +90,7 @@ public boolean run() * in the final segment. */ final long startTime = System.currentTimeMillis(); - final Job groupByJob = Job.getInstance( + groupByJob = Job.getInstance( new Configuration(), StringUtils.format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals()) ); @@ -121,6 +125,7 @@ public boolean run() if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); + failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; } @@ -197,6 +202,7 @@ public boolean run() log.info("Path[%s] didn't exist!?", partitionInfoPath); } } + config.setShardSpecs(shardSpecs); log.info( "DetermineHashedPartitionsJob took %d millis", @@ -210,6 +216,42 @@ public boolean run() } } + @Override + public Map getStats() + { + if (groupByJob == null) { + return null; + } + + try { + Counters jobCounters = groupByJob.getCounters(); + + Map metrics = TaskMetricsUtils.makeIngestionRowMetrics( + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue() + ); + + return metrics; + } + catch (IllegalStateException ise) { + log.debug("Couldn't get counters due to job state"); + return null; + } + catch (Exception e) { + log.debug(e, "Encountered exception in getStats()."); + return null; + } + } + + @Nullable + @Override + public String getErrorMessage() + { + return failureCause; + } + public static class DetermineCardinalityMapper extends HadoopDruidIndexerMapper { private static HashFunction hashFunction = Hashing.murmur3_128(); @@ -269,9 +311,12 @@ protected void innerMap( } interval = maybeInterval.get(); } + hyperLogLogs .get(interval) .add(hashFunction.hashBytes(HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsBytes(groupKey)).asBytes()); + + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).increment(1); } @Override diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java index a83bf4d80087..877d8601c877 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java @@ -54,6 +54,7 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InvalidJobConfException; +import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.Mapper; @@ -70,6 +71,7 @@ import org.joda.time.Interval; import org.joda.time.chrono.ISOChronology; +import javax.annotation.Nullable; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -100,6 +102,10 @@ public class DeterminePartitionsJob implements Jobby private final HadoopDruidIndexerConfig config; + private Job groupByJob; + + private String failureCause; + public DeterminePartitionsJob( HadoopDruidIndexerConfig config ) @@ -124,7 +130,7 @@ public boolean run() } if (!config.getPartitionsSpec().isAssumeGrouped()) { - final Job groupByJob = Job.getInstance( + groupByJob = Job.getInstance( new Configuration(), StringUtils.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()) ); @@ -155,6 +161,7 @@ public boolean run() if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); + failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; } } else { @@ -212,6 +219,7 @@ public boolean run() if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); + failureCause = Utils.getFailureMessage(dimSelectionJob, config.JSON_MAPPER); return false; } @@ -255,6 +263,42 @@ public boolean run() } } + @Override + public Map getStats() + { + if (groupByJob == null) { + return null; + } + + try { + Counters jobCounters = groupByJob.getCounters(); + + Map metrics = TaskMetricsUtils.makeIngestionRowMetrics( + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue() + ); + + return metrics; + } + catch (IllegalStateException ise) { + log.debug("Couldn't get counters due to job state"); + return null; + } + catch (Exception e) { + log.debug(e, "Encountered exception in getStats()."); + return null; + } + } + + @Nullable + @Override + public String getErrorMessage() + { + return failureCause; + } + public static class DeterminePartitionsGroupByMapper extends HadoopDruidIndexerMapper { private Granularity rollupGranularity = null; @@ -282,6 +326,8 @@ protected void innerMap( new BytesWritable(HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsBytes(groupKey)), NullWritable.get() ); + + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).increment(1); } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java index 0229b073beeb..3adcf31c00e1 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java @@ -38,6 +38,7 @@ public class HadoopDruidDetermineConfigurationJob implements Jobby { private static final Logger log = new Logger(HadoopDruidDetermineConfigurationJob.class); private final HadoopDruidIndexerConfig config; + private Jobby job; @Inject public HadoopDruidDetermineConfigurationJob( @@ -50,12 +51,11 @@ public HadoopDruidDetermineConfigurationJob( @Override public boolean run() { - List jobs = Lists.newArrayList(); - JobHelper.ensurePaths(config); if (config.isDeterminingPartitions()) { - jobs.add(config.getPartitionsSpec().getPartitionJob(config)); + job = config.getPartitionsSpec().getPartitionJob(config); + return JobHelper.runSingleJob(job, config); } else { int shardsPerInterval = config.getPartitionsSpec().getNumShards(); Map> shardSpecs = Maps.newTreeMap(); @@ -86,10 +86,27 @@ public boolean run() } } config.setShardSpecs(shardSpecs); + return true; } + } - return JobHelper.runJobs(jobs, config); + @Override + public Map getStats() + { + if (job == null) { + return null; + } + return job.getStats(); } + @Override + public String getErrorMessage() + { + if (job == null) { + return null; + } + + return job.getErrorMessage(); + } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java index 15fb2fe32dd7..e63e29876ca9 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java @@ -128,7 +128,11 @@ public void configure(Binder binder) public enum IndexJobCounters { - INVALID_ROW_COUNTER + INVALID_ROW_COUNTER, + ROWS_PROCESSED_COUNTER, + ROWS_PROCESSED_WITH_ERRORS_COUNTER, + ROWS_UNPARSEABLE_COUNTER, + ROWS_THROWN_AWAY_COUNTER } public static HadoopDruidIndexerConfig fromSpec(HadoopIngestionSpec spec) @@ -372,6 +376,16 @@ public int getShardSpecCount(Bucket bucket) return schema.getTuningConfig().getShardSpecs().get(bucket.time.getMillis()).size(); } + public boolean isLogParseExceptions() + { + return schema.getTuningConfig().isLogParseExceptions(); + } + + public int getMaxParseExceptions() + { + return schema.getTuningConfig().getMaxParseExceptions(); + } + /** * Job instance should have Configuration set (by calling {@link #addJobProperties(Job)} * or via injected system properties) before this method is called. The {@link PathSpec} may diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java index d46b73cd4c7c..e4096122c025 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java @@ -26,7 +26,9 @@ import io.druid.java.util.common.logger.Logger; import io.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.util.List; +import java.util.Map; /** */ @@ -92,8 +94,28 @@ public boolean run() ); - JobHelper.runJobs(jobs, config); - return true; + return JobHelper.runJobs(jobs, config); + } + + @Override + public Map getStats() + { + if (indexJob == null) { + return null; + } + + return indexJob.getStats(); + } + + @Nullable + @Override + public String getErrorMessage() + { + if (indexJob == null) { + return null; + } + + return indexJob.getErrorMessage(); } public List getPublishedSegments() diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java index b5707c4fa598..f905a24c9c23 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java @@ -24,12 +24,15 @@ import io.druid.data.input.impl.InputRowParser; import io.druid.data.input.impl.StringInputRowParser; import io.druid.java.util.common.DateTimes; +import io.druid.java.util.common.Intervals; import io.druid.java.util.common.RE; +import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.collect.Utils; import io.druid.java.util.common.logger.Logger; import io.druid.java.util.common.parsers.ParseException; import io.druid.segment.indexing.granularity.GranularitySpec; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; @@ -63,37 +66,70 @@ public HadoopDruidIndexerConfig getConfig() protected void map(Object key, Object value, Context context) throws IOException, InterruptedException { try { - final List inputRows; - try { - inputRows = parseInputRow(value, parser); - } - catch (ParseException e) { - if (reportParseExceptions) { - throw e; - } - log.debug(e, "Ignoring invalid row [%s] due to parsing error", value); - context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1); - return; // we're ignoring this invalid row - } + final List inputRows = parseInputRow(value, parser); for (InputRow inputRow : inputRows) { - if (inputRow == null) { - // Throw away null rows from the parser. - log.debug("Throwing away row [%s]", value); - continue; + try { + if (inputRow == null) { + // Throw away null rows from the parser. + log.debug("Throwing away row [%s]", value); + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).increment(1); + continue; + } + + if (!Intervals.ETERNITY.contains(inputRow.getTimestamp())) { + final String errorMsg = StringUtils.format( + "Encountered row with timestamp that cannot be represented as a long: [%s]", + inputRow + ); + throw new ParseException(errorMsg); + } + + if (!granularitySpec.bucketIntervals().isPresent() + || granularitySpec.bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch())) + .isPresent()) { + innerMap(inputRow, context, reportParseExceptions); + } else { + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).increment(1); + } } - if (!granularitySpec.bucketIntervals().isPresent() - || granularitySpec.bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch())) - .isPresent()) { - innerMap(inputRow, context, reportParseExceptions); + catch (ParseException pe) { + handleParseException(pe, context); } } } + catch (ParseException pe) { + handleParseException(pe, context); + } catch (RuntimeException e) { throw new RE(e, "Failure on row[%s]", value); } } + private void handleParseException(ParseException pe, Context context) + { + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1); + Counter unparseableCounter = context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER); + Counter processedWithErrorsCounter = context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER); + + if (pe.isFromPartiallyValidRow()) { + processedWithErrorsCounter.increment(1); + } else { + unparseableCounter.increment(1); + } + + if (config.isLogParseExceptions()) { + log.error(pe, "Encountered parse exception: "); + } + + long rowsUnparseable = unparseableCounter.getValue(); + long rowsProcessedWithError = processedWithErrorsCounter.getValue(); + if (rowsUnparseable + rowsProcessedWithError > config.getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task...", pe); + } + } + private static List parseInputRow(Object value, InputRowParser parser) { if (parser instanceof StringInputRowParser && value instanceof Text) { diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java index aeb72c033f87..a997e40d2994 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java @@ -31,6 +31,7 @@ import io.druid.segment.IndexSpec; import io.druid.segment.indexing.TuningConfig; +import javax.annotation.Nullable; import java.util.List; import java.util.Map; @@ -67,6 +68,8 @@ public static HadoopTuningConfig makeDefaultTuningConfig() DEFAULT_NUM_BACKGROUND_PERSIST_THREADS, false, false, + null, + null, null ); } @@ -88,6 +91,8 @@ public static HadoopTuningConfig makeDefaultTuningConfig() private final boolean forceExtendableShardSpecs; private final boolean useExplicitVersion; private final List allowedHadoopPrefix; + private final boolean logParseExceptions; + private final int maxParseExceptions; @JsonCreator public HadoopTuningConfig( @@ -100,7 +105,7 @@ public HadoopTuningConfig( final @JsonProperty("leaveIntermediate") boolean leaveIntermediate, final @JsonProperty("cleanupOnFailure") Boolean cleanupOnFailure, final @JsonProperty("overwriteFiles") boolean overwriteFiles, - final @JsonProperty("ignoreInvalidRows") boolean ignoreInvalidRows, + final @Deprecated @JsonProperty("ignoreInvalidRows") boolean ignoreInvalidRows, final @JsonProperty("jobProperties") Map jobProperties, final @JsonProperty("combineText") boolean combineText, final @JsonProperty("useCombiner") Boolean useCombiner, @@ -111,7 +116,9 @@ public HadoopTuningConfig( final @JsonProperty("numBackgroundPersistThreads") Integer numBackgroundPersistThreads, final @JsonProperty("forceExtendableShardSpecs") boolean forceExtendableShardSpecs, final @JsonProperty("useExplicitVersion") boolean useExplicitVersion, - final @JsonProperty("allowedHadoopPrefix") List allowedHadoopPrefix + final @JsonProperty("allowedHadoopPrefix") List allowedHadoopPrefix, + final @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + final @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions ) { this.workingPath = workingPath; @@ -138,6 +145,13 @@ public HadoopTuningConfig( Preconditions.checkArgument(this.numBackgroundPersistThreads >= 0, "Not support persistBackgroundCount < 0"); this.useExplicitVersion = useExplicitVersion; this.allowedHadoopPrefix = allowedHadoopPrefix == null ? ImmutableList.of() : allowedHadoopPrefix; + + if (!this.ignoreInvalidRows) { + this.maxParseExceptions = 0; + } else { + this.maxParseExceptions = maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + } + this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; } @JsonProperty @@ -253,6 +267,18 @@ public List getUserAllowedHadoopPrefix() return allowedHadoopPrefix; } + @JsonProperty + public boolean isLogParseExceptions() + { + return logParseExceptions; + } + + @JsonProperty + public int getMaxParseExceptions() + { + return maxParseExceptions; + } + public HadoopTuningConfig withWorkingPath(String path) { return new HadoopTuningConfig( @@ -274,7 +300,9 @@ public HadoopTuningConfig withWorkingPath(String path) numBackgroundPersistThreads, forceExtendableShardSpecs, useExplicitVersion, - allowedHadoopPrefix + allowedHadoopPrefix, + logParseExceptions, + maxParseExceptions ); } @@ -299,7 +327,9 @@ public HadoopTuningConfig withVersion(String ver) numBackgroundPersistThreads, forceExtendableShardSpecs, useExplicitVersion, - allowedHadoopPrefix + allowedHadoopPrefix, + logParseExceptions, + maxParseExceptions ); } @@ -324,7 +354,9 @@ public HadoopTuningConfig withShardSpecs(Map> specs numBackgroundPersistThreads, forceExtendableShardSpecs, useExplicitVersion, - allowedHadoopPrefix + allowedHadoopPrefix, + logParseExceptions, + maxParseExceptions ); } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java index fc9af861875d..7ab621a7ec66 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java @@ -40,9 +40,11 @@ import io.druid.indexer.path.DatasourcePathSpec; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; +import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.concurrent.Execs; import io.druid.java.util.common.logger.Logger; +import io.druid.java.util.common.parsers.ParseException; import io.druid.query.aggregation.AggregatorFactory; import io.druid.segment.BaseProgressIndicator; import io.druid.segment.ProgressIndicator; @@ -137,6 +139,7 @@ public static List getPublishedSegments(HadoopDruidIndexerConfig co private final HadoopDruidIndexerConfig config; private IndexGeneratorStats jobStats; + private Job job; public IndexGeneratorJob( HadoopDruidIndexerConfig config @@ -160,7 +163,7 @@ public IndexGeneratorStats getJobStats() public boolean run() { try { - Job job = Job.getInstance( + job = Job.getInstance( new Configuration(), StringUtils.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()) ); @@ -230,6 +233,45 @@ public boolean run() } } + @Override + public Map getStats() + { + if (job == null) { + return null; + } + + try { + Counters jobCounters = job.getCounters(); + + Map metrics = TaskMetricsUtils.makeIngestionRowMetrics( + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue() + ); + + return metrics; + } + catch (IllegalStateException ise) { + log.debug("Couldn't get counters due to job state"); + return null; + } + catch (Exception e) { + log.debug(e, "Encountered exception in getStats()."); + return null; + } + } + + @Override + public String getErrorMessage() + { + if (job == null) { + return null; + } + + return Utils.getFailureMessage(job, config.JSON_MAPPER); + } + private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, @@ -321,10 +363,20 @@ protected void innerMap( // type SegmentInputRow serves as a marker that these InputRow instances have already been combined // and they contain the columns as they show up in the segment after ingestion, not what you would see in raw // data - byte[] serializedInputRow = inputRow instanceof SegmentInputRow ? - InputRowSerde.toBytes(typeHelperMap, inputRow, aggsForSerializingSegmentInputRow, reportParseExceptions) - : - InputRowSerde.toBytes(typeHelperMap, inputRow, aggregators, reportParseExceptions); + Pair> serializedRow = inputRow instanceof SegmentInputRow ? + InputRowSerde.toBytes( + typeHelperMap, + inputRow, + aggsForSerializingSegmentInputRow, + reportParseExceptions + ) + : + InputRowSerde.toBytes( + typeHelperMap, + inputRow, + aggregators, + reportParseExceptions + ); context.write( new SortableBytes( @@ -335,8 +387,15 @@ protected void innerMap( .put(hashedDimensions) .array() ).toBytesWritable(), - new BytesWritable(serializedInputRow) + new BytesWritable(serializedRow.lhs) ); + + ParseException pe = IncrementalIndex.getCombinedParseException(inputRow, serializedRow.rhs, null); + if (pe != null) { + throw pe; + } else { + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).increment(1); + } } } @@ -412,11 +471,11 @@ private void flushIndexToContextAndClose(BytesWritable key, IncrementalIndex ind InputRow inputRow = getInputRowFromRow(row, dimensions); // reportParseExceptions is true as any unparseable data is already handled by the mapper. - byte[] serializedRow = InputRowSerde.toBytes(typeHelperMap, inputRow, combiningAggs, true); + Pair> serializedRow = InputRowSerde.toBytes(typeHelperMap, inputRow, combiningAggs, true); context.write( key, - new BytesWritable(serializedRow) + new BytesWritable(serializedRow.lhs) ); } index.close(); @@ -636,7 +695,7 @@ public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) context.progress(); final InputRow inputRow = index.formatRow(InputRowSerde.fromBytes(typeHelperMap, bw.getBytes(), aggregators)); - int numRows = index.add(inputRow); + int numRows = index.add(inputRow).getRowCount(); ++lineCount; diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java index cd1dd531604a..a5d184f53d99 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java @@ -32,6 +32,7 @@ import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.DimensionsSpec; import io.druid.java.util.common.IAE; +import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.logger.Logger; import io.druid.java.util.common.parsers.ParseException; @@ -45,8 +46,10 @@ import io.druid.segment.serde.ComplexMetrics; import org.apache.hadoop.io.WritableUtils; +import javax.annotation.Nullable; import java.io.DataInput; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -65,7 +68,8 @@ public interface IndexSerdeTypeHelper { ValueType getType(); - void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions); + @Nullable + String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions); T deserialize(ByteArrayDataInput in); } @@ -105,7 +109,7 @@ public ValueType getType() } @Override - public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) { List values = Rows.objectToStrings(value); try { @@ -114,6 +118,7 @@ public void serialize(ByteArrayDataOutput out, Object value, boolean reportParse catch (IOException ioe) { throw new RuntimeException(ioe); } + return null; } @Override @@ -137,15 +142,24 @@ public ValueType getType() } @Override - public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) { - Long ret = DimensionHandlerUtils.convertObjectToLong(value, reportParseExceptions); + String parseExceptionMessage = null; + Long ret = null; + try { + ret = DimensionHandlerUtils.convertObjectToLong(value, reportParseExceptions); + } + catch (ParseException pe) { + parseExceptionMessage = pe.getMessage(); + } + if (ret == null) { // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged // we'll also need to change the serialized encoding so that it can represent numeric nulls ret = DimensionHandlerUtils.ZERO_LONG; } out.writeLong(ret); + return parseExceptionMessage; } @Override @@ -164,15 +178,24 @@ public ValueType getType() } @Override - public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) { - Float ret = DimensionHandlerUtils.convertObjectToFloat(value, reportParseExceptions); + String parseExceptionMessage = null; + Float ret = null; + try { + ret = DimensionHandlerUtils.convertObjectToFloat(value, reportParseExceptions); + } + catch (ParseException pe) { + parseExceptionMessage = pe.getMessage(); + } + if (ret == null) { // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged // we'll also need to change the serialized encoding so that it can represent numeric nulls ret = DimensionHandlerUtils.ZERO_FLOAT; } out.writeFloat(ret); + return parseExceptionMessage; } @Override @@ -191,15 +214,24 @@ public ValueType getType() } @Override - public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) { - Double ret = DimensionHandlerUtils.convertObjectToDouble(value, reportParseExceptions); + String parseExceptionMessage = null; + Double ret = null; + try { + ret = DimensionHandlerUtils.convertObjectToDouble(value, reportParseExceptions); + } + catch (ParseException pe) { + parseExceptionMessage = pe.getMessage(); + } + if (ret == null) { // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged // we'll also need to change the serialized encoding so that it can represent numeric nulls ret = DimensionHandlerUtils.ZERO_DOUBLE; } out.writeDouble(ret); + return parseExceptionMessage; } @Override @@ -209,7 +241,7 @@ public Double deserialize(ByteArrayDataInput in) } } - public static final byte[] toBytes( + public static final Pair> toBytes( final Map typeHelperMap, final InputRow row, AggregatorFactory[] aggs, @@ -217,6 +249,7 @@ public static final byte[] toBytes( ) { try { + List parseExceptionMessages = new ArrayList<>(); ByteArrayDataOutput out = ByteStreams.newDataOutput(); //write timestamp @@ -233,7 +266,10 @@ public static final byte[] toBytes( typeHelper = STRING_HELPER; } writeString(dim, out); - typeHelper.serialize(out, row.getRaw(dim), reportParseExceptions); + String parseExceptionMessage = typeHelper.serialize(out, row.getRaw(dim), true); + if (parseExceptionMessage != null) { + parseExceptionMessages.add(parseExceptionMessage); + } } } @@ -264,10 +300,8 @@ public InputRow get() } catch (ParseException e) { // "aggregate" can throw ParseExceptions if a selector expects something but gets something else. - if (reportParseExceptions) { - throw new ParseException(e, "Encountered parse error for aggregator[%s]", k); - } log.debug(e, "Encountered parse error, skipping aggregator[%s].", k); + parseExceptionMessages.add(e.getMessage()); } String t = aggFactory.getTypeName(); @@ -287,7 +321,7 @@ public InputRow get() } } - return out.toByteArray(); + return Pair.of(out.toByteArray(), parseExceptionMessages); } catch (IOException ex) { throw new RuntimeException(ex); diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java index 16809561cbbc..f780de181bf7 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java @@ -354,19 +354,40 @@ public static void ensurePaths(HadoopDruidIndexerConfig config) } } + public static boolean runSingleJob(Jobby job, HadoopDruidIndexerConfig config) + { + boolean succeeded = job.run(); + + if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { + if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) { + Path workingPath = config.makeIntermediatePath(); + log.info("Deleting path[%s]", workingPath); + try { + Configuration conf = injectSystemProperties(new Configuration()); + config.addJobProperties(conf); + workingPath.getFileSystem(conf).delete(workingPath, true); + } + catch (IOException e) { + log.error(e, "Failed to cleanup path[%s]", workingPath); + } + } + } + + return succeeded; + } + public static boolean runJobs(List jobs, HadoopDruidIndexerConfig config) { - String failedMessage = null; + boolean succeeded = true; for (Jobby job : jobs) { - if (failedMessage == null) { - if (!job.run()) { - failedMessage = StringUtils.format("Job[%s] failed!", job.getClass()); - } + if (!job.run()) { + succeeded = false; + break; } } if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { - if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) { + if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { @@ -380,11 +401,7 @@ public static boolean runJobs(List jobs, HadoopDruidIndexerConfig config) } } - if (failedMessage != null) { - throw new ISE(failedMessage); - } - - return true; + return succeeded; } public static DataSegment serializeOutIndex( diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java b/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java index 3f3523e74049..0729cf65ad72 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java @@ -20,15 +20,19 @@ package io.druid.indexer; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Maps; import io.druid.jackson.DefaultObjectMapper; import io.druid.java.util.common.jackson.JacksonUtils; import io.druid.java.util.common.ISE; +import io.druid.java.util.common.logger.Logger; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.TaskCompletionEvent; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.ReflectionUtils; @@ -41,6 +45,7 @@ */ public class Utils { + private static final Logger log = new Logger(Utils.class); private static final ObjectMapper jsonMapper = new DefaultObjectMapper(); public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) @@ -123,4 +128,25 @@ public static void storeStats( stats ); } + + public static String getFailureMessage(Job failedJob, ObjectMapper jsonMapper) + { + try { + Map taskDiagsMap = Maps.newHashMap(); + TaskCompletionEvent[] completionEvents = failedJob.getTaskCompletionEvents(0, 100); + for (TaskCompletionEvent tce : completionEvents) { + String[] taskDiags = failedJob.getTaskDiagnostics(tce.getTaskAttemptId()); + String combinedTaskDiags = ""; + for (String taskDiag : taskDiags) { + combinedTaskDiags += taskDiag; + } + taskDiagsMap.put(tce.getTaskAttemptId().toString(), combinedTaskDiags); + } + return jsonMapper.writeValueAsString(taskDiagsMap); + } + catch (IOException | InterruptedException ie) { + log.error("couldn't get failure cause for job."); + return null; + } + } } diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/BatchDeltaIngestionTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/BatchDeltaIngestionTest.java index 59d1f3103553..b7bb444c1cc7 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/BatchDeltaIngestionTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/BatchDeltaIngestionTest.java @@ -371,7 +371,7 @@ private void testIngestion( ) throws Exception { IndexGeneratorJob job = new IndexGeneratorJob(config); - JobHelper.runJobs(ImmutableList.of(job), config); + Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job), config)); File segmentFolder = new File( StringUtils.format( @@ -492,6 +492,8 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig( null, false, false, + null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/DetermineHashedPartitionsJobTest.java index 21024e7fbf36..906dccd55343 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/DetermineHashedPartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/DetermineHashedPartitionsJobTest.java @@ -211,6 +211,8 @@ public DetermineHashedPartitionsJobTest( null, false, false, + null, + null, null ) ); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/DeterminePartitionsJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/DeterminePartitionsJobTest.java index 0496cb3d7f5d..908425198783 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/DeterminePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/DeterminePartitionsJobTest.java @@ -271,6 +271,8 @@ public DeterminePartitionsJobTest( null, false, false, + null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java index 2b134c2aa364..2ed052eafec9 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java @@ -96,6 +96,8 @@ public void testHashedBucketSelection() null, false, false, + null, + null, null ) ); @@ -170,6 +172,8 @@ public void testNoneShardSpecBucketSelection() null, false, false, + null, + null, null ) ); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerMapperTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerMapperTest.java index d7c7fdffd553..32cdf47730f6 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerMapperTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerMapperTest.java @@ -42,11 +42,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Mapper; import org.easymock.EasyMock; import org.junit.Assert; import org.junit.Test; +import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -145,6 +148,8 @@ public void testHadoopyStringParserWithTransformSpec() throws Exception ); final Mapper.Context mapContext = EasyMock.mock(Mapper.Context.class); EasyMock.expect(mapContext.getConfiguration()).andReturn(hadoopConfig).once(); + EasyMock.expect(mapContext.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER)) + .andReturn(getTestCounter()); EasyMock.replay(mapContext); mapper.setup(mapContext); final List> rows = ImmutableList.of( @@ -190,6 +195,66 @@ private static Map rowToMap(final InputRow row) return builder.build(); } + private static Counter getTestCounter() + { + return new Counter() + { + @Override + public void setDisplayName(String displayName) + { + + } + + @Override + public String getName() + { + return null; + } + + @Override + public String getDisplayName() + { + return null; + } + + @Override + public long getValue() + { + return 0; + } + + @Override + public void setValue(long value) + { + + } + + @Override + public void increment(long incr) + { + + } + + @Override + public Counter getUnderlyingCounter() + { + return null; + } + + @Override + public void write(DataOutput out) throws IOException + { + + } + + @Override + public void readFields(DataInput in) throws IOException + { + + } + }; + } + public static class MyMapper extends HadoopDruidIndexerMapper { private final List rows = new ArrayList<>(); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopTuningConfigTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopTuningConfigTest.java index 753379ba709b..9e4a26a22b43 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopTuningConfigTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopTuningConfigTest.java @@ -58,6 +58,8 @@ public void testSerde() throws Exception null, true, true, + null, + null, null ); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java index 9eb75e27b779..39942bdc5b01 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java @@ -175,8 +175,8 @@ public void testMultipleRowsMerged() throws Exception ) ); List rows = Lists.newArrayList( - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true)), - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true)) + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true).lhs), + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true).lhs) ); Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class); @@ -253,8 +253,8 @@ public void testMultipleRowsNotMerged() throws Exception Map typeHelperMap = InputRowSerde.getTypeHelperMap(dimensionsSpec); List rows = Lists.newArrayList( - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true)), - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true)) + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true).lhs), + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true).lhs) ); Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorJobTest.java index 770d81092321..005ca237c361 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorJobTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorJobTest.java @@ -523,6 +523,8 @@ public void setUp() throws Exception null, forceExtendableShardSpecs, false, + null, + null, null ) ) @@ -580,7 +582,7 @@ public void testIndexGeneratorJob() throws IOException private void verifyJob(IndexGeneratorJob job) throws IOException { - JobHelper.runJobs(ImmutableList.of(job), config); + Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job), config)); int segmentNum = 0; for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) { diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java index 71609e42dd32..83a09a0307b8 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java @@ -30,7 +30,7 @@ import io.druid.data.input.impl.StringDimensionSchema; import io.druid.hll.HyperLogLogCollector; import io.druid.jackson.AggregatorsModule; -import io.druid.java.util.common.parsers.ParseException; +import io.druid.java.util.common.Pair; import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregator; @@ -124,7 +124,7 @@ public Aggregator factorize(ColumnSelectorFactory metricFactory) null ); - byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, false); // Ignore Unparseable aggregator + byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, false).lhs; // Ignore Unparseable aggregator InputRow out = InputRowSerde.fromBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), data, aggregatorFactories); Assert.assertEquals(timestamp, out.getTimestampFromEpoch()); @@ -173,14 +173,22 @@ public void testThrowParseExceptions() null ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("Encountered parse error for aggregator[unparseable]"); - InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + Pair> result = InputRowSerde.toBytes( + InputRowSerde.getTypeHelperMap(dimensionsSpec), + in, + aggregatorFactories, + true + ); + Assert.assertEquals( + Arrays.asList("Unable to parse value[m3v] for field[m3]"), + result.rhs + ); } @Test public void testDimensionParseExceptions() { + Pair> result; InputRow in = new MapBasedInputRow( timestamp, dims, @@ -190,8 +198,6 @@ public void testDimensionParseExceptions() new LongSumAggregatorFactory("m2out", "m2") }; - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [d1v] to long"); DimensionsSpec dimensionsSpec = new DimensionsSpec( Arrays.asList( new LongDimensionSchema("d1") @@ -199,10 +205,12 @@ public void testDimensionParseExceptions() null, null ); - InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + Assert.assertEquals( + Arrays.asList("could not convert value [d1v] to long"), + result.rhs + ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [d1v] to float"); dimensionsSpec = new DimensionsSpec( Arrays.asList( new FloatDimensionSchema("d1") @@ -210,10 +218,12 @@ public void testDimensionParseExceptions() null, null ); - InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + Assert.assertEquals( + Arrays.asList("could not convert value [d1v] to float"), + result.rhs + ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [d1v] to double"); dimensionsSpec = new DimensionsSpec( Arrays.asList( new DoubleDimensionSchema("d1") @@ -221,6 +231,10 @@ public void testDimensionParseExceptions() null, null ); - InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + Assert.assertEquals( + Arrays.asList("could not convert value [d1v] to double"), + result.rhs + ); } } diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/JobHelperTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/JobHelperTest.java index f292cfb96081..a55f5d2b5b0b 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/JobHelperTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/JobHelperTest.java @@ -127,6 +127,8 @@ public void setup() throws Exception null, false, false, + null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/path/GranularityPathSpecTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/path/GranularityPathSpecTest.java index 3aec576f4f56..b4caeed21f43 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/path/GranularityPathSpecTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/path/GranularityPathSpecTest.java @@ -73,6 +73,8 @@ public class GranularityPathSpecTest null, false, false, + null, + null, null ); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java index 844df90b095d..1384d9dd89b4 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java @@ -212,6 +212,8 @@ public InputStream openStream() throws IOException null, false, false, + null, + null, null ) ) @@ -251,7 +253,7 @@ public boolean run() new SQLMetadataStorageUpdaterJobHandler(connector) ) ); - JobHelper.runJobs(jobs, hadoopDruidIndexerConfig); + Assert.assertTrue(JobHelper.runJobs(jobs, hadoopDruidIndexerConfig)); } private List getDataSegments( diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java index b0249440c7a1..50d8f2c277c6 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java @@ -26,6 +26,8 @@ import com.google.common.base.Preconditions; import io.druid.indexer.TaskState; +import java.util.Map; + /** * Represents the status of a task from the perspective of the coordinator. The task may be ongoing * ({@link #isComplete()} false) or it may be complete ({@link #isComplete()} true). @@ -36,38 +38,57 @@ public class TaskStatus { public static TaskStatus running(String taskId) { - return new TaskStatus(taskId, TaskState.RUNNING, -1); + return new TaskStatus(taskId, TaskState.RUNNING, -1, null, null, null); } public static TaskStatus success(String taskId) { - return new TaskStatus(taskId, TaskState.SUCCESS, -1); + return new TaskStatus(taskId, TaskState.SUCCESS, -1, null, null, null); + } + + public static TaskStatus success(String taskId, Map metrics, String errorMsg, Map context) + { + return new TaskStatus(taskId, TaskState.SUCCESS, -1, metrics, errorMsg, context); } public static TaskStatus failure(String taskId) { - return new TaskStatus(taskId, TaskState.FAILED, -1); + return new TaskStatus(taskId, TaskState.FAILED, -1, null, null, null); + } + + public static TaskStatus failure(String taskId, Map metrics, String errorMsg, Map context) + { + return new TaskStatus(taskId, TaskState.FAILED, -1, metrics, errorMsg, context); } public static TaskStatus fromCode(String taskId, TaskState code) { - return new TaskStatus(taskId, code, -1); + return new TaskStatus(taskId, code, -1, null, null, null); } private final String id; private final TaskState status; private final long duration; + private final Map metrics; + private final String errorMsg; + private final Map context; @JsonCreator protected TaskStatus( @JsonProperty("id") String id, @JsonProperty("status") TaskState status, - @JsonProperty("duration") long duration + @JsonProperty("duration") long duration, + @JsonProperty("metrics") Map metrics, + @JsonProperty("errorMsg") String errorMsg, + @JsonProperty("context") Map context ) { this.id = id; this.status = status; this.duration = duration; + this.metrics = metrics; + this.errorMsg = errorMsg; + this.context = context; // Check class invariants. Preconditions.checkNotNull(id, "id"); @@ -92,6 +113,24 @@ public long getDuration() return duration; } + @JsonProperty("metrics") + public Map getMetrics() + { + return metrics; + } + + @JsonProperty("errorMsg") + public String getErrorMsg() + { + return errorMsg; + } + + @JsonProperty("context") + public Map getContext() + { + return context; + } + /** * Signals that a task is not yet complete, and is still runnable on a worker. Exactly one of isRunnable, * isSuccess, or isFailure will be true at any one time. @@ -141,7 +180,7 @@ public boolean isFailure() public TaskStatus withDuration(long _duration) { - return new TaskStatus(id, status, _duration); + return new TaskStatus(id, status, _duration, metrics, errorMsg, context); } @Override @@ -156,7 +195,10 @@ public boolean equals(Object o) TaskStatus that = (TaskStatus) o; return duration == that.duration && java.util.Objects.equals(id, that.id) && - status == that.status; + status == that.status && + java.util.Objects.equals(metrics, that.metrics) && + java.util.Objects.equals(errorMsg, that.errorMsg) && + java.util.Objects.equals(context, that.context); } @Override @@ -172,6 +214,9 @@ public String toString() .add("id", id) .add("status", status) .add("duration", duration) + .add("metrics", metrics) + .add("errorMsg", errorMsg) + .add("context", context) .toString(); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/index/YeOldePlumberSchool.java b/indexing-service/src/main/java/io/druid/indexing/common/index/YeOldePlumberSchool.java index 0bba29fbe881..5b25a3f04085 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/index/YeOldePlumberSchool.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/index/YeOldePlumberSchool.java @@ -130,7 +130,7 @@ public int add(InputRow row, Supplier committerSupplier) throws Index return -1; } - final int numRows = sink.add(row, false); + final int numRows = sink.add(row, false).getRowCount(); if (!sink.canAppendRow()) { persist(committerSupplier.get()); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/CompactionTask.java index b87eb9f09c58..c3c1eb848184 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/CompactionTask.java @@ -65,6 +65,7 @@ import io.druid.segment.indexing.granularity.ArbitraryGranularitySpec; import io.druid.segment.indexing.granularity.GranularitySpec; import io.druid.segment.loading.SegmentLoadingException; +import io.druid.server.security.AuthorizerMapper; import io.druid.timeline.DataSegment; import io.druid.timeline.TimelineObjectHolder; import io.druid.timeline.VersionedIntervalTimeline; @@ -102,6 +103,9 @@ public class CompactionTask extends AbstractTask @JsonIgnore private IndexTask indexTaskSpec; + @JsonIgnore + private final AuthorizerMapper authorizerMapper; + @JsonCreator public CompactionTask( @JsonProperty("id") final String id, @@ -112,7 +116,8 @@ public CompactionTask( @Nullable @JsonProperty("dimensions") final DimensionsSpec dimensionsSpec, @Nullable @JsonProperty("tuningConfig") final IndexTuningConfig tuningConfig, @Nullable @JsonProperty("context") final Map context, - @JacksonInject ObjectMapper jsonMapper + @JacksonInject ObjectMapper jsonMapper, + @JacksonInject AuthorizerMapper authorizerMapper ) { super(getOrMakeId(id, TYPE, dataSource), null, taskResource, dataSource, context); @@ -125,6 +130,7 @@ public CompactionTask( this.tuningConfig = tuningConfig; this.jsonMapper = jsonMapper; this.segmentProvider = segments == null ? new SegmentProvider(dataSource, interval) : new SegmentProvider(segments); + this.authorizerMapper = authorizerMapper; } @JsonProperty @@ -195,7 +201,9 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception getTaskResource(), getDataSource(), ingestionSpec, - getContext() + getContext(), + authorizerMapper, + null ); } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index 4386b5b35953..b4054c577bbf 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -23,18 +23,21 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; import io.druid.indexer.HadoopDruidDetermineConfigurationJob; import io.druid.indexer.HadoopDruidIndexerConfig; import io.druid.indexer.HadoopDruidIndexerJob; import io.druid.indexer.HadoopIngestionSpec; -import io.druid.indexer.Jobby; +import io.druid.indexer.IngestionState; import io.druid.indexer.MetadataStorageUpdaterJobHandler; +import io.druid.indexer.TaskMetricsGetter; +import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; import io.druid.indexing.common.TaskStatus; @@ -47,14 +50,35 @@ import io.druid.java.util.common.JodaUtils; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.logger.Logger; +import io.druid.segment.realtime.firehose.ChatHandler; +import io.druid.segment.realtime.firehose.ChatHandlerProvider; +import io.druid.server.security.Access; +import io.druid.server.security.Action; +import io.druid.server.security.AuthorizationUtils; +import io.druid.server.security.AuthorizerMapper; +import io.druid.server.security.ForbiddenException; +import io.druid.server.security.Resource; +import io.druid.server.security.ResourceAction; +import io.druid.server.security.ResourceType; import io.druid.timeline.DataSegment; import org.joda.time.Interval; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.SortedSet; -public class HadoopIndexTask extends HadoopTask +public class HadoopIndexTask extends HadoopTask implements ChatHandler { private static final Logger log = new Logger(HadoopIndexTask.class); @@ -72,6 +96,27 @@ private static String getTheDataSource(HadoopIngestionSpec spec) @JsonIgnore private final ObjectMapper jsonMapper; + @JsonIgnore + private final AuthorizerMapper authorizerMapper; + + @JsonIgnore + private final Optional chatHandlerProvider; + + @JsonIgnore + private InnerProcessingStatsGetter determinePartitionsStatsGetter; + + @JsonIgnore + private InnerProcessingStatsGetter buildSegmentsStatsGetter; + + @JsonIgnore + private IngestionState ingestionState; + + @JsonIgnore + private HadoopDetermineConfigInnerProcessingStatus determineConfigStatus = null; + + @JsonIgnore + private HadoopIndexGeneratorInnerProcessingStatus buildSegmentsStatus = null; + /** * @param spec is used by the HadoopDruidIndexerJob to set up the appropriate parameters * for creating Druid index segments. It may be modified. @@ -90,7 +135,9 @@ public HadoopIndexTask( @JsonProperty("hadoopDependencyCoordinates") List hadoopDependencyCoordinates, @JsonProperty("classpathPrefix") String classpathPrefix, @JacksonInject ObjectMapper jsonMapper, - @JsonProperty("context") Map context + @JsonProperty("context") Map context, + @JacksonInject AuthorizerMapper authorizerMapper, + @JacksonInject ChatHandlerProvider chatHandlerProvider ) { super( @@ -101,8 +148,8 @@ public HadoopIndexTask( : hadoopDependencyCoordinates, context ); - - + this.authorizerMapper = authorizerMapper; + this.chatHandlerProvider = Optional.fromNullable(chatHandlerProvider); this.spec = spec; // Some HadoopIngestionSpec stuff doesn't make sense in the context of the indexing service @@ -118,6 +165,7 @@ public HadoopIndexTask( this.classpathPrefix = classpathPrefix; this.jsonMapper = Preconditions.checkNotNull(jsonMapper, "null ObjectMappper"); + this.ingestionState = IngestionState.NOT_STARTED; } @Override @@ -168,9 +216,41 @@ public String getClasspathPrefix() return classpathPrefix; } - @SuppressWarnings("unchecked") @Override public TaskStatus run(TaskToolbox toolbox) throws Exception + { + try { + if (chatHandlerProvider.isPresent()) { + log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName()); + chatHandlerProvider.get().register(getId(), this, false); + } else { + log.warn("No chat handler detected"); + } + + return runInternal(toolbox); + } + catch (Exception e) { + Throwable effectiveException; + if (e instanceof RuntimeException && e.getCause() instanceof InvocationTargetException) { + InvocationTargetException ite = (InvocationTargetException) e.getCause(); + effectiveException = ite.getCause(); + log.error(effectiveException, "Got invocation target exception in run(), cause: "); + } else { + effectiveException = e; + log.error(e, "Encountered exception in run():"); + } + + return TaskStatus.failure(getId(), null, effectiveException.getMessage(), null); + } + finally { + if (chatHandlerProvider.isPresent()) { + chatHandlerProvider.get().unregister(getId()); + } + } + } + + @SuppressWarnings("unchecked") + private TaskStatus runInternal(TaskToolbox toolbox) throws Exception { final ClassLoader loader = buildClassLoader(toolbox); boolean determineIntervals = !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent(); @@ -181,8 +261,8 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception new OverlordActionBasedUsedSegmentLister(toolbox) ); - final String config = invokeForeignLoader( - "io.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessing", + Object determinePartitionsInnerProcessingRunner = getForeignClassloaderObject( + "io.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner", new String[]{ toolbox.getObjectMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), @@ -190,11 +270,49 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception }, loader ); + determinePartitionsStatsGetter = new InnerProcessingStatsGetter(determinePartitionsInnerProcessingRunner); + + String[] input1 = new String[]{ + toolbox.getObjectMapper().writeValueAsString(spec), + toolbox.getConfig().getHadoopWorkingPath(), + toolbox.getSegmentPusher().getPathForHadoop() + }; + + HadoopIngestionSpec indexerSchema = null; + final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader(); + Class aClazz = determinePartitionsInnerProcessingRunner.getClass(); + Method determinePartitionsInnerProcessingRunTask = aClazz.getMethod("runTask", input1.getClass()); + try { + Thread.currentThread().setContextClassLoader(loader); + + ingestionState = IngestionState.DETERMINE_PARTITIONS; + + final String determineConfigStatusString = (String) determinePartitionsInnerProcessingRunTask.invoke( + determinePartitionsInnerProcessingRunner, + new Object[]{input1} + ); - final HadoopIngestionSpec indexerSchema = toolbox - .getObjectMapper() - .readValue(config, HadoopIngestionSpec.class); + determineConfigStatus = toolbox + .getObjectMapper() + .readValue(determineConfigStatusString, HadoopDetermineConfigInnerProcessingStatus.class); + + indexerSchema = determineConfigStatus.getSchema(); + if (indexerSchema == null) { + return TaskStatus.failure( + getId(), + getTaskCompletionMetrics(), + determineConfigStatus.getErrorMsg(), + getTaskCompletionContext() + ); + } + } + catch (Exception e) { + throw new RuntimeException(e); + } + finally { + Thread.currentThread().setContextClassLoader(oldLoader); + } // We should have a lock from before we started running only if interval was specified String version; @@ -235,38 +353,249 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception log.info("Setting version to: %s", version); - final String segments = invokeForeignLoader( - "io.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessing", + Object innerProcessingRunner = getForeignClassloaderObject( + "io.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessingRunner", new String[]{ toolbox.getObjectMapper().writeValueAsString(indexerSchema), version }, loader ); + buildSegmentsStatsGetter = new InnerProcessingStatsGetter(innerProcessingRunner); + + String[] input = new String[]{ + toolbox.getObjectMapper().writeValueAsString(indexerSchema), + version + }; - if (segments != null) { - List publishedSegments = toolbox.getObjectMapper().readValue( - segments, - new TypeReference>() - { - } + Class aClazz2 = innerProcessingRunner.getClass(); + Method innerProcessingRunTask = aClazz2.getMethod("runTask", input.getClass()); + + try { + Thread.currentThread().setContextClassLoader(loader); + + ingestionState = IngestionState.BUILD_SEGMENTS; + final String jobStatusString = (String) innerProcessingRunTask.invoke( + innerProcessingRunner, + new Object[]{input} ); - toolbox.publishSegments(publishedSegments); - return TaskStatus.success(getId()); - } else { - return TaskStatus.failure(getId()); + buildSegmentsStatus = toolbox.getObjectMapper().readValue( + jobStatusString, + HadoopIndexGeneratorInnerProcessingStatus.class + ); + + if (buildSegmentsStatus.getDataSegments() != null) { + ingestionState = IngestionState.COMPLETED; + toolbox.publishSegments(buildSegmentsStatus.getDataSegments()); + return TaskStatus.success(getId(), getTaskCompletionMetrics(), null, getTaskCompletionContext()); + } else { + return TaskStatus.failure( + getId(), + getTaskCompletionMetrics(), + buildSegmentsStatus.getErrorMsg(), + getTaskCompletionContext() + ); + } + } + catch (Exception e) { + throw new RuntimeException(e); + } + finally { + Thread.currentThread().setContextClassLoader(oldLoader); + } + } + + @GET + @Path("/rowStats") + @Produces(MediaType.APPLICATION_JSON) + public Response getRowStats( + @Context final HttpServletRequest req, + @QueryParam("windows") List windows + ) + { + authorizationCheck(req, Action.READ); + Map returnMap = Maps.newHashMap(); + Map totalsMap = Maps.newHashMap(); + + if (determinePartitionsStatsGetter != null) { + totalsMap.put("determinePartitions", determinePartitionsStatsGetter.getTotalMetrics()); + } + + if (buildSegmentsStatsGetter != null) { + totalsMap.put("buildSegments", buildSegmentsStatsGetter.getTotalMetrics()); + } + + returnMap.put("totals", totalsMap); + return Response.ok(returnMap).build(); + } + + private Map getTaskCompletionMetrics() + { + Map metrics = Maps.newHashMap(); + if (determineConfigStatus != null) { + metrics.put( + "determinePartitions", + determineConfigStatus.getMetrics() + ); + } + if (buildSegmentsStatus != null) { + metrics.put( + "buildSegments", + buildSegmentsStatus.getMetrics() + ); + } + return metrics; + } + + private Map getTaskCompletionContext() + { + Map context = Maps.newHashMap(); + context.put("ingestionState", ingestionState); + return context; + } + + /** + * Authorizes action to be performed on this task's datasource + * + * @return authorization result + */ + private Access authorizationCheck(final HttpServletRequest req, Action action) + { + ResourceAction resourceAction = new ResourceAction( + new Resource(spec.getDataSchema().getDataSource(), ResourceType.DATASOURCE), + action + ); + + Access access = AuthorizationUtils.authorizeResourceAction(req, resourceAction, authorizerMapper); + if (!access.isAllowed()) { + throw new ForbiddenException(access.toString()); + } + + return access; + } + + + public static class InnerProcessingStatsGetter implements TaskMetricsGetter + { + public static final List KEYS = Arrays.asList( + TaskMetricsUtils.ROWS_PROCESSED, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, + TaskMetricsUtils.ROWS_THROWN_AWAY, + TaskMetricsUtils.ROWS_UNPARSEABLE + ); + + public static final Map MISSING_SAMPLE_DEFAULT_VALUES = Maps.newHashMap(); + static { + MISSING_SAMPLE_DEFAULT_VALUES.put(TaskMetricsUtils.ROWS_PROCESSED, 0.0d); + MISSING_SAMPLE_DEFAULT_VALUES.put(TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0.0d); + MISSING_SAMPLE_DEFAULT_VALUES.put(TaskMetricsUtils.ROWS_THROWN_AWAY, 0.0d); + MISSING_SAMPLE_DEFAULT_VALUES.put(TaskMetricsUtils.ROWS_UNPARSEABLE, 0.0d); + } + + private final Method getStatsMethod; + private final Object innerProcessingRunner; + + private long processed = 0; + private long processedWithErrors = 0; + private long thrownAway = 0; + private long unparseable = 0; + + public InnerProcessingStatsGetter( + Object innerProcessingRunner + ) + { + try { + Class aClazz = innerProcessingRunner.getClass(); + this.getStatsMethod = aClazz.getMethod("getStats"); + this.innerProcessingRunner = innerProcessingRunner; + } + catch (NoSuchMethodException nsme) { + throw new RuntimeException(nsme); + } + } + + @Override + public List getKeys() + { + return KEYS; + } + + @Override + public Map getMetrics() + { + try { + Map statsMap = (Map) getStatsMethod.invoke(innerProcessingRunner); + if (statsMap == null) { + return MISSING_SAMPLE_DEFAULT_VALUES; + } + + long curProcessed = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED); + long curProcessedWithErrors = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS); + long curThrownAway = (Long) statsMap.get(TaskMetricsUtils.ROWS_THROWN_AWAY); + long curUnparseable = (Long) statsMap.get(TaskMetricsUtils.ROWS_UNPARSEABLE); + + Long processedDiff = curProcessed - processed; + Long procssedWithErrorsDiff = curProcessedWithErrors - processedWithErrors; + Long thrownAwayDiff = curThrownAway - thrownAway; + Long unparseableDiff = curUnparseable - unparseable; + + processed = curProcessed; + processedWithErrors = curProcessedWithErrors; + thrownAway = curThrownAway; + unparseable = curUnparseable; + + return ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, processedDiff.doubleValue(), + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, procssedWithErrorsDiff.doubleValue(), + TaskMetricsUtils.ROWS_THROWN_AWAY, thrownAwayDiff.doubleValue(), + TaskMetricsUtils.ROWS_UNPARSEABLE, unparseableDiff.doubleValue() + ); + } + catch (Exception e) { + log.error(e, "Got exception from getMetrics(): "); + return null; + } + } + + public Map getTotalMetrics() + { + try { + Map statsMap = (Map) getStatsMethod.invoke(innerProcessingRunner); + if (statsMap == null) { + return MISSING_SAMPLE_DEFAULT_VALUES; + } + long curProcessed = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED); + long curProcessedWithErrors = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS); + long curThrownAway = (Long) statsMap.get(TaskMetricsUtils.ROWS_THROWN_AWAY); + long curUnparseable = (Long) statsMap.get(TaskMetricsUtils.ROWS_UNPARSEABLE); + + return ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, (double) curProcessed, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, (double) curProcessedWithErrors, + TaskMetricsUtils.ROWS_THROWN_AWAY, (double) curThrownAway, + TaskMetricsUtils.ROWS_UNPARSEABLE, (double) curUnparseable + ); + } + catch (Exception e) { + log.error(e, "Got exception from getTotalMetrics(): "); + return null; + } } } + /** Called indirectly in {@link HadoopIndexTask#run(TaskToolbox)}. */ @SuppressWarnings("unused") - public static class HadoopIndexGeneratorInnerProcessing + public static class HadoopDetermineConfigInnerProcessingRunner { - public static String runTask(String[] args) throws Exception + private HadoopDruidDetermineConfigurationJob job; + + public String runTask(String[] args) throws Exception { final String schema = args[0]; - String version = args[1]; + final String workingPath = args[1]; + final String segmentOutputPath = args[2]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER .readValue( @@ -275,38 +604,43 @@ public static String runTask(String[] args) throws Exception ); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec( theSchema - .withTuningConfig(theSchema.getTuningConfig().withVersion(version)) + .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath)) + .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath)) ); - // MetadataStorageUpdaterJobHandler is only needed when running standalone without indexing service - // In that case the whatever runs the Hadoop Index Task must ensure MetadataStorageUpdaterJobHandler - // can be injected based on the configuration given in config.getSchema().getIOConfig().getMetadataUpdateSpec() - final MetadataStorageUpdaterJobHandler maybeHandler; - if (config.isUpdaterJobSpecSet()) { - maybeHandler = injector.getInstance(MetadataStorageUpdaterJobHandler.class); + job = new HadoopDruidDetermineConfigurationJob(config); + + log.info("Starting a hadoop determine configuration job..."); + if (job.run()) { + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopDetermineConfigInnerProcessingStatus(config.getSchema(), job.getStats(), null) + ); } else { - maybeHandler = null; + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopDetermineConfigInnerProcessingStatus(null, job.getStats(), job.getErrorMessage()) + ); } - HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config, maybeHandler); + } - log.info("Starting a hadoop index generator job..."); - if (job.run()) { - return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(job.getPublishedSegments()); + public Map getStats() + { + if (job == null) { + return null; } - return null; + return job.getStats(); } } - /** Called indirectly in {@link HadoopIndexTask#run(TaskToolbox)}. */ @SuppressWarnings("unused") - public static class HadoopDetermineConfigInnerProcessing + public static class HadoopIndexGeneratorInnerProcessingRunner { - public static String runTask(String[] args) throws Exception + private HadoopDruidIndexerJob job; + + public String runTask(String[] args) throws Exception { final String schema = args[0]; - final String workingPath = args[1]; - final String segmentOutputPath = args[2]; + String version = args[1]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER .readValue( @@ -315,18 +649,133 @@ public static String runTask(String[] args) throws Exception ); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec( theSchema - .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath)) - .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath)) + .withTuningConfig(theSchema.getTuningConfig().withVersion(version)) ); - Jobby job = new HadoopDruidDetermineConfigurationJob(config); + // MetadataStorageUpdaterJobHandler is only needed when running standalone without indexing service + // In that case the whatever runs the Hadoop Index Task must ensure MetadataStorageUpdaterJobHandler + // can be injected based on the configuration given in config.getSchema().getIOConfig().getMetadataUpdateSpec() + final MetadataStorageUpdaterJobHandler maybeHandler; + if (config.isUpdaterJobSpecSet()) { + maybeHandler = injector.getInstance(MetadataStorageUpdaterJobHandler.class); + } else { + maybeHandler = null; + } + job = new HadoopDruidIndexerJob(config, maybeHandler); - log.info("Starting a hadoop determine configuration job..."); - if (job.run()) { - return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config.getSchema()); + log.info("Starting a hadoop index generator job..."); + try { + if (job.run()) { + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopIndexGeneratorInnerProcessingStatus( + job.getPublishedSegments(), + job.getStats(), + null + ) + ); + } else { + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopIndexGeneratorInnerProcessingStatus( + null, + job.getStats(), + job.getErrorMessage() + ) + ); + } } + catch (Exception e) { + log.error(e, "Encountered exception in HadoopIndexGeneratorInnerProcessing."); + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopIndexGeneratorInnerProcessingStatus( + null, + job.getStats(), + e.getMessage() + ) + ); + } + } + + public Map getStats() + { + if (job == null) { + return null; + } + + return job.getStats(); + } + } + + public static class HadoopIndexGeneratorInnerProcessingStatus + { + private final List dataSegments; + private final Map metrics; + private final String errorMsg; + + @JsonCreator + public HadoopIndexGeneratorInnerProcessingStatus( + @JsonProperty("dataSegments") List dataSegments, + @JsonProperty("metrics") Map metrics, + @JsonProperty("errorMsg") String errorMsg + ) + { + this.dataSegments = dataSegments; + this.metrics = metrics; + this.errorMsg = errorMsg; + } + + @JsonProperty + public List getDataSegments() + { + return dataSegments; + } + + @JsonProperty + public Map getMetrics() + { + return metrics; + } + + @JsonProperty + public String getErrorMsg() + { + return errorMsg; + } + } - return null; + public static class HadoopDetermineConfigInnerProcessingStatus + { + private final HadoopIngestionSpec schema; + private final Map metrics; + private final String errorMsg; + + @JsonCreator + public HadoopDetermineConfigInnerProcessingStatus( + @JsonProperty("schema") HadoopIngestionSpec schema, + @JsonProperty("metrics") Map metrics, + @JsonProperty("errorMsg") String errorMsg + ) + { + this.schema = schema; + this.metrics = metrics; + this.errorMsg = errorMsg; + } + + @JsonProperty + public HadoopIngestionSpec getSchema() + { + return schema; + } + + @JsonProperty + public Map getMetrics() + { + return metrics; + } + + @JsonProperty + public String getErrorMsg() + { + return errorMsg; } } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java index 94ac078c3ea8..4ff80f63c024 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java @@ -225,4 +225,35 @@ public static OutputType invokeForeignLoader( Thread.currentThread().setContextClassLoader(oldLoader); } } + + /** + * This method tries to isolate class loading during a Function call + * + * @param clazzName The Class which has a static method called `runTask` + * @param input The input for `runTask`, must have `input.getClass()` be the class of the input for runTask + * @param loader The loader to use as the context class loader during invocation + * @param The input type of the method. + * + * @return The result of the method invocation + */ + public static Object getForeignClassloaderObject( + final String clazzName, + final InputType input, + final ClassLoader loader + ) + { + log.debug("Launching [%s] on class loader [%s] with input class [%s]", clazzName, loader, input.getClass()); + final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader(); + try { + Thread.currentThread().setContextClassLoader(loader); + final Class clazz = loader.loadClass(clazzName); + return clazz.newInstance(); + } + catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { + throw Throwables.propagate(e); + } + finally { + Thread.currentThread().setContextClassLoader(oldLoader); + } + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index 7a4dff26c85b..3cc7b103e689 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -19,6 +19,7 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; @@ -31,6 +32,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import com.google.common.util.concurrent.ListenableFuture; @@ -39,6 +41,8 @@ import io.druid.data.input.InputRow; import io.druid.data.input.Rows; import io.druid.hll.HyperLogLogCollector; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; import io.druid.indexing.common.TaskLock; @@ -48,6 +52,7 @@ import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.firehose.IngestSegmentFirehoseFactory; import io.druid.java.util.common.ISE; +import io.druid.java.util.common.Intervals; import io.druid.java.util.common.JodaUtils; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularity; @@ -75,20 +80,40 @@ import io.druid.segment.realtime.appenderator.SegmentIdentifier; import io.druid.segment.realtime.appenderator.SegmentsAndMetadata; import io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher; +import io.druid.segment.realtime.firehose.ChatHandler; +import io.druid.segment.realtime.firehose.ChatHandlerProvider; import io.druid.segment.writeout.SegmentWriteOutMediumFactory; +import io.druid.server.security.Access; +import io.druid.server.security.Action; +import io.druid.server.security.AuthorizationUtils; +import io.druid.server.security.AuthorizerMapper; +import io.druid.server.security.ForbiddenException; +import io.druid.server.security.Resource; +import io.druid.server.security.ResourceAction; +import io.druid.server.security.ResourceType; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.HashBasedNumberedShardSpec; import io.druid.timeline.partition.NoneShardSpec; import io.druid.timeline.partition.NumberedShardSpec; import io.druid.timeline.partition.ShardSpec; +import io.druid.utils.CircularBuffer; import org.codehaus.plexus.util.FileUtils; import org.joda.time.DateTime; import org.joda.time.Interval; import org.joda.time.Period; import javax.annotation.Nullable; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -106,7 +131,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; -public class IndexTask extends AbstractTask +public class IndexTask extends AbstractTask implements ChatHandler { private static final Logger log = new Logger(IndexTask.class); private static final HashFunction hashFunction = Hashing.murmur3_128(); @@ -131,12 +156,35 @@ private static String makeGroupId(boolean isAppendToExisting, String dataSource) @JsonIgnore private final IndexIngestionSpec ingestionSchema; + @JsonIgnore + private IngestionState ingestionState; + + @JsonIgnore + private final AuthorizerMapper authorizerMapper; + + @JsonIgnore + private final Optional chatHandlerProvider; + + @JsonIgnore + private FireDepartmentMetrics buildSegmentsFireDepartmentMetrics; + + @JsonIgnore + private CircularBuffer buildSegmentsSavedParseExceptions; + + @JsonIgnore + private FireDepartmentMetrics determinePartitionsFireDepartmentMetrics; + + @JsonIgnore + private CircularBuffer determinePartitionsSavedParseExceptions; + @JsonCreator public IndexTask( @JsonProperty("id") final String id, @JsonProperty("resource") final TaskResource taskResource, @JsonProperty("spec") final IndexIngestionSpec ingestionSchema, - @JsonProperty("context") final Map context + @JsonProperty("context") final Map context, + @JacksonInject AuthorizerMapper authorizerMapper, + @JacksonInject ChatHandlerProvider chatHandlerProvider ) { this( @@ -145,7 +193,9 @@ public IndexTask( taskResource, ingestionSchema.dataSchema.getDataSource(), ingestionSchema, - context + context, + authorizerMapper, + chatHandlerProvider ); } @@ -155,7 +205,9 @@ public IndexTask( TaskResource resource, String dataSource, IndexIngestionSpec ingestionSchema, - Map context + Map context, + AuthorizerMapper authorizerMapper, + ChatHandlerProvider chatHandlerProvider ) { super( @@ -165,8 +217,19 @@ public IndexTask( dataSource, context ); - this.ingestionSchema = ingestionSchema; + this.authorizerMapper = authorizerMapper; + this.chatHandlerProvider = Optional.fromNullable(chatHandlerProvider); + if (ingestionSchema.getTuningConfig().getMaxSavedParseExceptions() > 0) { + determinePartitionsSavedParseExceptions = new CircularBuffer( + ingestionSchema.getTuningConfig().getMaxSavedParseExceptions() + ); + + buildSegmentsSavedParseExceptions = new CircularBuffer( + ingestionSchema.getTuningConfig().getMaxSavedParseExceptions() + ); + } + this.ingestionState = IngestionState.NOT_STARTED; } @Override @@ -209,6 +272,133 @@ static boolean isReady(TaskActionClient actionClient, SortedSet interv return true; } + @GET + @Path("/unparseableEvents") + @Produces(MediaType.APPLICATION_JSON) + public Response getUnparseableEvents( + @Context final HttpServletRequest req, + @QueryParam("full") String full + ) + { + authorizationCheck(req, Action.READ); + Map> events = Maps.newHashMap(); + + boolean needsDeterminePartitions = false; + boolean needsBuildSegments = false; + + if (full != null) { + needsDeterminePartitions = true; + needsBuildSegments = true; + } else { + switch (ingestionState) { + case DETERMINE_PARTITIONS: + needsDeterminePartitions = true; + break; + case BUILD_SEGMENTS: + case COMPLETED: + needsBuildSegments = true; + break; + default: + break; + } + } + + if (needsDeterminePartitions) { + events.put("determinePartitions", getMessagesFromSavedParseExceptions(determinePartitionsSavedParseExceptions)); + } + + if (needsBuildSegments) { + events.put("buildSegments", getMessagesFromSavedParseExceptions(buildSegmentsSavedParseExceptions)); + } + + return Response.ok(events).build(); + } + + @GET + @Path("/rowStats") + @Produces(MediaType.APPLICATION_JSON) + public Response getRowStats( + @Context final HttpServletRequest req, + @QueryParam("full") String full + ) + { + authorizationCheck(req, Action.READ); + Map returnMap = Maps.newHashMap(); + Map totalsMap = Maps.newHashMap(); + + boolean needsDeterminePartitions = false; + boolean needsBuildSegments = false; + + if (full != null) { + needsDeterminePartitions = true; + needsBuildSegments = true; + } else { + switch (ingestionState) { + case DETERMINE_PARTITIONS: + needsDeterminePartitions = true; + break; + case BUILD_SEGMENTS: + case COMPLETED: + needsBuildSegments = true; + break; + default: + break; + } + } + + if (needsDeterminePartitions) { + if (determinePartitionsFireDepartmentMetrics != null) { + totalsMap.put( + "determinePartitions", + TaskMetricsUtils.makeIngestionRowMetrics( + determinePartitionsFireDepartmentMetrics.processed(), + determinePartitionsFireDepartmentMetrics.processedWithErrors(), + determinePartitionsFireDepartmentMetrics.unparseable(), + determinePartitionsFireDepartmentMetrics.thrownAway() + ) + ); + } + } + + if (needsBuildSegments) { + if (buildSegmentsFireDepartmentMetrics != null) { + totalsMap.put( + "buildSegments", + TaskMetricsUtils.makeIngestionRowMetrics( + buildSegmentsFireDepartmentMetrics.processed(), + buildSegmentsFireDepartmentMetrics.processedWithErrors(), + buildSegmentsFireDepartmentMetrics.unparseable(), + buildSegmentsFireDepartmentMetrics.thrownAway() + ) + ); + } + } + + returnMap.put("totals", totalsMap); + return Response.ok(returnMap).build(); + } + + /** + * Authorizes action to be performed on this task's datasource + * + * @return authorization result + */ + private Access authorizationCheck(final HttpServletRequest req, Action action) + { + ResourceAction resourceAction = new ResourceAction( + new Resource(ingestionSchema.getDataSchema().getDataSource(), ResourceType.DATASOURCE), + action + ); + + Access access = AuthorizationUtils.authorizeResourceAction(req, resourceAction, authorizerMapper); + if (!access.isAllowed()) { + throw new ForbiddenException(access.toString()); + } + + return access; + } + + @JsonProperty("spec") public IndexIngestionSpec getIngestionSchema() { @@ -218,54 +408,111 @@ public IndexIngestionSpec getIngestionSchema() @Override public TaskStatus run(final TaskToolbox toolbox) throws Exception { - final boolean determineIntervals = !ingestionSchema.getDataSchema() - .getGranularitySpec() - .bucketIntervals() - .isPresent(); - - final FirehoseFactory firehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory(); - - if (firehoseFactory instanceof IngestSegmentFirehoseFactory) { - // pass toolbox to Firehose - ((IngestSegmentFirehoseFactory) firehoseFactory).setTaskToolbox(toolbox); - } - - final File firehoseTempDir = toolbox.getFirehoseTemporaryDir(); - // Firehose temporary directory is automatically removed when this IndexTask completes. - FileUtils.forceMkdir(firehoseTempDir); - - final ShardSpecs shardSpecs = determineShardSpecs(toolbox, firehoseFactory, firehoseTempDir); - - final DataSchema dataSchema; - final Map versions; - if (determineIntervals) { - final SortedSet intervals = new TreeSet<>(Comparators.intervalsByStartThenEnd()); - intervals.addAll(shardSpecs.getIntervals()); - final Map locks = Tasks.tryAcquireExclusiveLocks(toolbox.getTaskActionClient(), intervals); - versions = locks.entrySet().stream() - .collect(Collectors.toMap(Entry::getKey, entry -> entry.getValue().getVersion())); - - dataSchema = ingestionSchema.getDataSchema().withGranularitySpec( - ingestionSchema.getDataSchema() - .getGranularitySpec() - .withIntervals( - JodaUtils.condenseIntervals( - shardSpecs.getIntervals() - ) - ) + try { + if (chatHandlerProvider.isPresent()) { + log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName()); + chatHandlerProvider.get().register(getId(), this, false); + } else { + log.warn("No chat handler detected"); + } + + final boolean determineIntervals = !ingestionSchema.getDataSchema() + .getGranularitySpec() + .bucketIntervals() + .isPresent(); + + final FirehoseFactory firehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory(); + + if (firehoseFactory instanceof IngestSegmentFirehoseFactory) { + // pass toolbox to Firehose + ((IngestSegmentFirehoseFactory) firehoseFactory).setTaskToolbox(toolbox); + } + + final File firehoseTempDir = toolbox.getFirehoseTemporaryDir(); + // Firehose temporary directory is automatically removed when this IndexTask completes. + FileUtils.forceMkdir(firehoseTempDir); + + ingestionState = IngestionState.DETERMINE_PARTITIONS; + final ShardSpecs shardSpecs = determineShardSpecs(toolbox, firehoseFactory, firehoseTempDir); + final DataSchema dataSchema; + final Map versions; + if (determineIntervals) { + final SortedSet intervals = new TreeSet<>(Comparators.intervalsByStartThenEnd()); + intervals.addAll(shardSpecs.getIntervals()); + final Map locks = Tasks.tryAcquireExclusiveLocks( + toolbox.getTaskActionClient(), + intervals + ); + versions = locks.entrySet().stream() + .collect(Collectors.toMap(Entry::getKey, entry -> entry.getValue().getVersion())); + + dataSchema = ingestionSchema.getDataSchema().withGranularitySpec( + ingestionSchema.getDataSchema() + .getGranularitySpec() + .withIntervals( + JodaUtils.condenseIntervals( + shardSpecs.getIntervals() + ) + ) + ); + } else { + versions = getTaskLocks(toolbox.getTaskActionClient()) + .stream() + .collect(Collectors.toMap(TaskLock::getInterval, TaskLock::getVersion)); + dataSchema = ingestionSchema.getDataSchema(); + } + + ingestionState = IngestionState.BUILD_SEGMENTS; + return generateAndPublishSegments(toolbox, dataSchema, shardSpecs, versions, firehoseFactory, firehoseTempDir); + } + catch (Exception e) { + log.error(e, "Encountered exception in %s.", ingestionState); + return TaskStatus.failure( + getId(), + getTaskCompletionMetrics(), + e.getMessage(), + getTaskCompletionContext() ); - } else { - versions = getTaskLocks(toolbox.getTaskActionClient()) - .stream() - .collect(Collectors.toMap(TaskLock::getInterval, TaskLock::getVersion)); - dataSchema = ingestionSchema.getDataSchema(); } - if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, versions, firehoseFactory, firehoseTempDir)) { - return TaskStatus.success(getId()); - } else { - return TaskStatus.failure(getId()); + finally { + if (chatHandlerProvider.isPresent()) { + chatHandlerProvider.get().unregister(getId()); + } + } + } + + private Map getTaskCompletionContext() + { + Map context = Maps.newHashMap(); + List determinePartitionsParseExceptionMessages = getMessagesFromSavedParseExceptions(determinePartitionsSavedParseExceptions); + List buildSegmentsParseExceptionMessages = getMessagesFromSavedParseExceptions(buildSegmentsSavedParseExceptions); + if (determinePartitionsParseExceptionMessages != null || buildSegmentsParseExceptionMessages != null) { + Map unparseableEventsMap = Maps.newHashMap(); + unparseableEventsMap.put("determinePartitions", determinePartitionsParseExceptionMessages); + unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); + context.put("unparseableEvents", unparseableEventsMap); + } + context.put("ingestionState", ingestionState); + return context; + } + + private Map getTaskCompletionMetrics() + { + Map metrics = Maps.newHashMap(); + if (determinePartitionsFireDepartmentMetrics != null) { + metrics.put( + "determinePartitions", + FireDepartmentMetrics.getRowMetricsFromFireDepartmentMetrics(determinePartitionsFireDepartmentMetrics) + ); } + if (buildSegmentsFireDepartmentMetrics != null) { + metrics.put( + "buildSegments", + FireDepartmentMetrics.getRowMetricsFromFireDepartmentMetrics(buildSegmentsFireDepartmentMetrics) + ); + } + return metrics; } private static String findVersion(Map versions, Interval interval) @@ -385,7 +632,7 @@ private static ShardSpecs createShardSpecWithoutInputScan( return new ShardSpecs(shardSpecs); } - private static ShardSpecs createShardSpecsFromInput( + private ShardSpecs createShardSpecsFromInput( ObjectMapper jsonMapper, IndexIngestionSpec ingestionSchema, FirehoseFactory firehoseFactory, @@ -447,7 +694,7 @@ private static ShardSpecs createShardSpecsFromInput( return new ShardSpecs(intervalToShardSpecs); } - private static Map> collectIntervalsAndShardSpecs( + private Map> collectIntervalsAndShardSpecs( ObjectMapper jsonMapper, IndexIngestionSpec ingestionSchema, FirehoseFactory firehoseFactory, @@ -457,6 +704,8 @@ private static Map> collectIntervalsAnd boolean determineNumPartitions ) throws IOException { + determinePartitionsFireDepartmentMetrics = new FireDepartmentMetrics(); + final Map> hllCollectors = new TreeMap<>( Comparators.intervalsByStartThenEnd() ); @@ -467,12 +716,14 @@ private static Map> collectIntervalsAnd try ( final Firehose firehose = firehoseFactory.connect(ingestionSchema.getDataSchema().getParser(), firehoseTempDir) ) { + while (firehose.hasMore()) { try { final InputRow inputRow = firehose.nextRow(); // The null inputRow means the caller must skip this row. if (inputRow == null) { + determinePartitionsFireDepartmentMetrics.incrementThrownAway(); continue; } @@ -480,9 +731,17 @@ private static Map> collectIntervalsAnd if (determineIntervals) { interval = granularitySpec.getSegmentGranularity().bucket(inputRow.getTimestamp()); } else { + if (!Intervals.ETERNITY.contains(inputRow.getTimestamp())) { + final String errorMsg = StringUtils.format( + "Encountered row with timestamp that cannot be represented as a long: [%s]", + inputRow + ); + throw new ParseException(errorMsg); + } + final Optional optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp()); if (!optInterval.isPresent()) { - thrownAway++; + determinePartitionsFireDepartmentMetrics.incrementThrownAway(); continue; } interval = optInterval.get(); @@ -506,12 +765,21 @@ private static Map> collectIntervalsAnd hllCollectors.put(interval, Optional.absent()); } } + determinePartitionsFireDepartmentMetrics.incrementProcessed(); } catch (ParseException e) { - if (ingestionSchema.getTuningConfig().isReportParseExceptions()) { - throw e; - } else { - unparseable++; + if (ingestionSchema.getTuningConfig().isLogParseExceptions()) { + log.error(e, "Encountered parse exception: "); + } + + if (determinePartitionsSavedParseExceptions != null) { + determinePartitionsSavedParseExceptions.add(e); + } + + determinePartitionsFireDepartmentMetrics.incrementUnparseable(); + if (determinePartitionsFireDepartmentMetrics.unparseable() > ingestionSchema.getTuningConfig() + .getMaxParseExceptions()) { + throw new RuntimeException("Max parse exceptions exceeded, terminating task..."); } } } @@ -559,7 +827,7 @@ private static BiFunction getShardSpecCreateFunctio * * @return true if generated segments are successfully published, otherwise false */ - private boolean generateAndPublishSegments( + private TaskStatus generateAndPublishSegments( final TaskToolbox toolbox, final DataSchema dataSchema, final ShardSpecs shardSpecs, @@ -572,7 +840,7 @@ private boolean generateAndPublishSegments( final FireDepartment fireDepartmentForMetrics = new FireDepartment( dataSchema, new RealtimeIOConfig(null, null, null), null ); - final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); + buildSegmentsFireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); if (toolbox.getMonitorScheduler() != null) { toolbox.getMonitorScheduler().addMonitor( @@ -650,7 +918,7 @@ dataSchema, new RealtimeIOConfig(null, null, null), null }; try ( - final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema, tuningConfig); + final Appenderator appenderator = newAppenderator(buildSegmentsFireDepartmentMetrics, toolbox, dataSchema, tuningConfig); final BatchAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator); final Firehose firehose = firehoseFactory.connect(dataSchema.getParser(), firehoseTempDir) ) { @@ -661,13 +929,21 @@ dataSchema, new RealtimeIOConfig(null, null, null), null final InputRow inputRow = firehose.nextRow(); if (inputRow == null) { - fireDepartmentMetrics.incrementThrownAway(); + buildSegmentsFireDepartmentMetrics.incrementThrownAway(); continue; } + if (!Intervals.ETERNITY.contains(inputRow.getTimestamp())) { + final String errorMsg = StringUtils.format( + "Encountered row with timestamp that cannot be represented as a long: [%s]", + inputRow + ); + throw new ParseException(errorMsg); + } + final Optional optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp()); if (!optInterval.isPresent()) { - fireDepartmentMetrics.incrementThrownAway(); + buildSegmentsFireDepartmentMetrics.incrementThrownAway(); continue; } @@ -700,13 +976,32 @@ dataSchema, new RealtimeIOConfig(null, null, null), null throw new ISE("Failed to add a row with timestamp[%s]", inputRow.getTimestamp()); } - fireDepartmentMetrics.incrementProcessed(); + + if (addResult.getParseException() != null) { + throw addResult.getParseException(); + } else { + buildSegmentsFireDepartmentMetrics.incrementProcessed(); + } } catch (ParseException e) { - if (tuningConfig.isReportParseExceptions()) { - throw e; + if (e.isFromPartiallyValidRow()) { + buildSegmentsFireDepartmentMetrics.incrementProcessedWithErrors(); } else { - fireDepartmentMetrics.incrementUnparseable(); + buildSegmentsFireDepartmentMetrics.incrementUnparseable(); + } + + if (tuningConfig.isLogParseExceptions()) { + log.error(e, "Encountered parse exception:"); + } + + if (buildSegmentsSavedParseExceptions != null) { + buildSegmentsSavedParseExceptions.add(e); + } + + if (buildSegmentsFireDepartmentMetrics.unparseable() + + buildSegmentsFireDepartmentMetrics.processedWithErrors() > tuningConfig.getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task...", e); } } } @@ -719,15 +1014,21 @@ dataSchema, new RealtimeIOConfig(null, null, null), null pushTimeout ); + ingestionState = IngestionState.COMPLETED; if (published == null) { log.error("Failed to publish segments, aborting!"); - return false; + return TaskStatus.failure( + getId(), + getTaskCompletionMetrics(), + "Failed to publish segments.", + getTaskCompletionContext() + ); } else { log.info( "Processed[%,d] events, unparseable[%,d], thrownAway[%,d].", - fireDepartmentMetrics.processed(), - fireDepartmentMetrics.unparseable(), - fireDepartmentMetrics.thrownAway() + buildSegmentsFireDepartmentMetrics.processed(), + buildSegmentsFireDepartmentMetrics.unparseable(), + buildSegmentsFireDepartmentMetrics.thrownAway() ); log.info( "Published segments[%s]", Joiner.on(", ").join( @@ -737,7 +1038,13 @@ dataSchema, new RealtimeIOConfig(null, null, null), null ) ) ); - return true; + + return TaskStatus.success( + getId(), + getTaskCompletionMetrics(), + null, + getTaskCompletionContext() + ); } } catch (TimeoutException | ExecutionException e) { @@ -745,6 +1052,21 @@ dataSchema, new RealtimeIOConfig(null, null, null), null } } + @Nullable + public static List getMessagesFromSavedParseExceptions(CircularBuffer savedParseExceptions) + { + if (savedParseExceptions == null) { + return null; + } + + List events = new ArrayList<>(); + for (int i = 0; i < savedParseExceptions.size(); i++) { + events.add(savedParseExceptions.getLatest(i).getMessage()); + } + + return events; + } + private static boolean exceedMaxRowsInSegment(int numRowsInSegment, IndexTuningConfig indexTuningConfig) { // maxRowsInSegment should be null if numShards is set in indexTuningConfig @@ -945,6 +1267,10 @@ public static class IndexTuningConfig implements TuningConfig, AppenderatorConfi private final boolean forceGuaranteedRollup; private final boolean reportParseExceptions; private final long pushTimeout; + private final boolean logParseExceptions; + private final int maxParseExceptions; + private final int maxSavedParseExceptions; + @Nullable private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; @@ -961,10 +1287,13 @@ public IndexTuningConfig( @JsonProperty("buildV9Directly") @Nullable Boolean buildV9Directly, @JsonProperty("forceExtendableShardSpecs") @Nullable Boolean forceExtendableShardSpecs, @JsonProperty("forceGuaranteedRollup") @Nullable Boolean forceGuaranteedRollup, - @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, + @Deprecated @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, @JsonProperty("publishTimeout") @Nullable Long publishTimeout, // deprecated @JsonProperty("pushTimeout") @Nullable Long pushTimeout, - @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory + @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions ) { this( @@ -979,13 +1308,16 @@ public IndexTuningConfig( reportParseExceptions, pushTimeout != null ? pushTimeout : publishTimeout, null, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } private IndexTuningConfig() { - this(null, null, null, null, null, null, null, null, null, null, null, null); + this(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null); } private IndexTuningConfig( @@ -1000,7 +1332,10 @@ private IndexTuningConfig( @Nullable Boolean reportParseExceptions, @Nullable Long pushTimeout, @Nullable File basePersistDirectory, - @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory + @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + @Nullable Boolean logParseExceptions, + @Nullable Integer maxParseExceptions, + @Nullable Integer maxSavedParseExceptions ) { Preconditions.checkArgument( @@ -1030,6 +1365,17 @@ private IndexTuningConfig( ); this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; + + if (this.reportParseExceptions) { + this.maxParseExceptions = 0; + this.maxSavedParseExceptions = maxSavedParseExceptions == null ? 0 : Math.min(1, maxSavedParseExceptions); + } else { + this.maxParseExceptions = maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + this.maxSavedParseExceptions = maxSavedParseExceptions == null + ? TuningConfig.DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS + : maxSavedParseExceptions; + } + this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; } private static Integer initializeTargetPartitionSize(Integer numShards, Integer targetPartitionSize) @@ -1066,7 +1412,10 @@ public IndexTuningConfig withBasePersistDirectory(File dir) reportParseExceptions, pushTimeout, dir, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } @@ -1150,6 +1499,24 @@ public long getPushTimeout() return pushTimeout; } + @JsonProperty + public boolean isLogParseExceptions() + { + return logParseExceptions; + } + + @JsonProperty + public int getMaxParseExceptions() + { + return maxParseExceptions; + } + + @JsonProperty + public int getMaxSavedParseExceptions() + { + return maxSavedParseExceptions; + } + @Override public Period getIntermediatePersistPeriod() { @@ -1185,7 +1552,10 @@ public boolean equals(Object o) Objects.equals(numShards, that.numShards) && Objects.equals(indexSpec, that.indexSpec) && Objects.equals(basePersistDirectory, that.basePersistDirectory) && - Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory); + Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory) && + Objects.equals(logParseExceptions, that.logParseExceptions) && + Objects.equals(maxParseExceptions, that.maxParseExceptions) && + Objects.equals(maxSavedParseExceptions, that.maxSavedParseExceptions); } @Override @@ -1203,7 +1573,10 @@ public int hashCode() forceGuaranteedRollup, reportParseExceptions, pushTimeout, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java index 06e6342356be..9b118d271067 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java @@ -465,11 +465,11 @@ public TaskStatus call() log.warn(e, "Interrupted while running task[%s]", task); } - status = TaskStatus.failure(task.getId()); + status = TaskStatus.failure(task.getId(), null, e.toString(), null); } catch (Exception e) { log.error(e, "Exception while running task[%s]", task); - status = TaskStatus.failure(task.getId()); + status = TaskStatus.failure(task.getId(), null, e.toString(), null); } catch (Throwable t) { log.error(t, "Uncaught Throwable while running task[%s]", task); diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java index 8be1dcdbe95e..654612d0b4c8 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java @@ -621,7 +621,10 @@ public Response getCompleteTasks( status.getStatusCode(), status.getDuration(), TaskLocation.unknown(), - pair.rhs); + pair.rhs, + status.getMetrics(), + status.getErrorMsg(), + status.getContext()); })); return Response.ok(completeTasks).build(); @@ -777,7 +780,10 @@ public TaskStatusPlus apply(TaskRunnerWorkItem workItem) null, null, workItem.getLocation(), - workItem.getDataSource() + workItem.getDataSource(), + null, + null, + null ); } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java index 849aff34f0eb..fd8245eae029 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java @@ -27,6 +27,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; +import com.google.inject.Binder; +import com.google.inject.Module; import io.druid.data.input.FirehoseFactory; import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.DimensionsSpec; @@ -79,6 +81,8 @@ import io.druid.segment.loading.SegmentLoadingException; import io.druid.segment.transform.TransformingInputRowParser; import io.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import io.druid.server.security.AuthTestUtils; +import io.druid.server.security.AuthorizerMapper; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NumberedShardSpec; import org.hamcrest.CoreMatchers; @@ -200,7 +204,21 @@ private static ObjectMapper setupInjectablesInObjectMapper(ObjectMapper objectMa guiceIntrospector, objectMapper.getDeserializationConfig().getAnnotationIntrospector() ) ); - objectMapper.setInjectableValues(new GuiceInjectableValues(GuiceInjectors.makeStartupInjector())); + GuiceInjectableValues injectableValues = new GuiceInjectableValues( + GuiceInjectors.makeStartupInjectorWithModules( + ImmutableList.of( + new Module() + { + @Override + public void configure(Binder binder) + { + binder.bind(AuthorizerMapper.class).toInstance(AuthTestUtils.TEST_AUTHORIZER_MAPPER); + } + } + ) + ) + ); + objectMapper.setInjectableValues(injectableValues); objectMapper.registerModule( new SimpleModule().registerSubtypes(new NamedType(NumberedShardSpec.class, "NumberedShardSpec")) ); @@ -244,6 +262,9 @@ private static IndexTuningConfig createTuningConfig() false, null, 100L, + null, + null, + null, null ); } @@ -263,7 +284,8 @@ public void testSerdeWithInterval() throws IOException null, createTuningConfig(), ImmutableMap.of("testKey", "testContext"), - objectMapper + objectMapper, + AuthTestUtils.TEST_AUTHORIZER_MAPPER ); final byte[] bytes = objectMapper.writeValueAsBytes(task); final CompactionTask fromJson = objectMapper.readValue(bytes, CompactionTask.class); @@ -289,7 +311,8 @@ public void testSerdeWithSegments() throws IOException null, createTuningConfig(), ImmutableMap.of("testKey", "testContext"), - objectMapper + objectMapper, + AuthTestUtils.TEST_AUTHORIZER_MAPPER ); final byte[] bytes = objectMapper.writeValueAsBytes(task); final CompactionTask fromJson = objectMapper.readValue(bytes, CompactionTask.class); diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index d0cf4e39d2cf..57dff3bba48b 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -21,17 +21,25 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.io.Files; import io.druid.data.input.impl.CSVParseSpec; import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.FloatDimensionSchema; +import io.druid.data.input.impl.JSONParseSpec; +import io.druid.data.input.impl.LongDimensionSchema; import io.druid.data.input.impl.ParseSpec; import io.druid.data.input.impl.SpatialDimensionSchema; +import io.druid.data.input.impl.StringDimensionSchema; import io.druid.data.input.impl.StringInputRowParser; import io.druid.data.input.impl.TimestampSpec; +import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexer.TaskState; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; +import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.TestUtils; import io.druid.indexing.common.actions.LockAcquireAction; @@ -46,9 +54,9 @@ import io.druid.indexing.overlord.SegmentPublishResult; import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.Intervals; +import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularities; -import io.druid.java.util.common.parsers.ParseException; import io.druid.math.expr.ExprMacroTable; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; @@ -65,6 +73,7 @@ import io.druid.segment.realtime.firehose.LocalFirehoseFactory; import io.druid.segment.transform.ExpressionTransform; import io.druid.segment.transform.TransformSpec; +import io.druid.server.security.AuthTestUtils; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.HashBasedNumberedShardSpec; import io.druid.timeline.partition.NoneShardSpec; @@ -82,6 +91,7 @@ import java.io.IOException; import java.net.URI; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; @@ -151,10 +161,12 @@ public void testDeterminePartitions() throws Exception createTuningConfig(2, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(2, segments.size()); @@ -194,12 +206,14 @@ public void testForceExtendableShardSpecs() throws Exception createTuningConfig(2, null, true, false), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); Assert.assertEquals(indexTask.getId(), indexTask.getGroupId()); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(2, segments.size()); @@ -243,12 +257,14 @@ public void testTransformSpec() throws Exception createTuningConfig(2, null, true, false), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); Assert.assertEquals(indexTask.getId(), indexTask.getGroupId()); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); @@ -284,10 +300,12 @@ public void testWithArbitraryGranularity() throws Exception createTuningConfig(10, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - List segments = runTask(indexTask); + List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); } @@ -318,10 +336,12 @@ public void testIntervalBucketing() throws Exception createTuningConfig(50, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); } @@ -348,10 +368,12 @@ public void testNumShardsProvided() throws Exception createTuningConfig(null, 1, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); @@ -384,12 +406,14 @@ public void testAppendToExisting() throws Exception createTuningConfig(2, null, false, false), true ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); Assert.assertEquals("index_append_test", indexTask.getGroupId()); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(2, segmentAllocatePartitionCounter); Assert.assertEquals(2, segments.size()); @@ -431,10 +455,12 @@ public void testIntervalNotSpecified() throws Exception createTuningConfig(2, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(3, segments.size()); @@ -491,10 +517,12 @@ public void testCSVFileWithHeader() throws Exception createTuningConfig(2, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); @@ -540,10 +568,12 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception createTuningConfig(2, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); @@ -584,10 +614,12 @@ public void testWithSmallMaxTotalRows() throws Exception createTuningConfig(2, 2, 2L, null, false, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(6, segments.size()); @@ -626,10 +658,12 @@ public void testPerfectRollup() throws Exception createTuningConfig(3, 2, 2L, null, false, true, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(3, segments.size()); @@ -667,10 +701,12 @@ public void testBestEffortRollup() throws Exception createTuningConfig(3, 2, 2L, null, false, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(5, segments.size()); @@ -742,10 +778,12 @@ public void testIgnoreParseException() throws Exception null, null, parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(Arrays.asList("d"), segments.get(0).getDimensions()); Assert.assertEquals(Arrays.asList("val"), segments.get(0).getMetrics()); @@ -755,9 +793,6 @@ public void testIgnoreParseException() throws Exception @Test public void testReportParseException() throws Exception { - expectedException.expect(ParseException.class); - expectedException.expectMessage("Unparseable timestamp found!"); - final File tmpDir = temporaryFolder.newFolder(); final File tmpFile = File.createTempFile("druid", "index", tmpDir); @@ -795,12 +830,355 @@ public void testReportParseException() throws Exception null, null, parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - runTask(indexTask); + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + Assert.assertEquals("Max parse exceptions exceeded, terminating task...", status.getErrorMsg()); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + new ArrayList<>(), + "buildSegments", + Arrays.asList("Unparseable timestamp found! Event: {time=unparseable, d=a, val=1}") + ); + Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); } + @Test + public void testMultipleParseExceptionsSuccess() throws Exception + { + final File tmpDir = temporaryFolder.newFolder(); + + final File tmpFile = File.createTempFile("druid", "index", tmpDir); + + try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) { + writer.write("{\"time\":\"unparseable\",\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // unparseable time + writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // valid row + writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":\"notnumber\",\"dimFloat\":3.0,\"val\":1}\n"); // row with invalid long dimension + writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":\"notnumber\",\"val\":1}\n"); // row with invalid float dimension + writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":4.0,\"val\":\"notnumber\"}\n"); // row with invalid metric + writer.write("{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // invalid JSON + writer.write("{\"time\":\"3014-03-01T00:00:10Z\",\"dim\":\"outsideofinterval\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // thrown away + writer.write("{\"time\":\"99999999999-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // unparseable time + writer.write("this is not JSON\n"); // invalid JSON + } + + final IndexTask.IndexTuningConfig tuningConfig = new IndexTask.IndexTuningConfig( + 2, + null, + null, + null, + null, + indexSpec, + null, + true, + false, + true, + false, + null, + null, + null, + true, + 7, + 7 + ); + + final IndexIngestionSpec parseExceptionIgnoreSpec = createIngestionSpec( + tmpDir, + new JSONParseSpec( + new TimestampSpec( + "time", + "auto", + null + ), + new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), + Lists.newArrayList(), + Lists.newArrayList() + ), + null, + null + ), + null, + tuningConfig, + false + ); + + IndexTask indexTask = new IndexTask( + null, + null, + parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + null + ); + + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode()); + Assert.assertEquals(null, status.getErrorMsg()); + + Map expectedMetrics = ImmutableMap.of( + "determinePartitions", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 4L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 4L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1L + ), + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 1L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 3L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 4L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1L + ) + ); + Assert.assertEquals(expectedMetrics, status.getMetrics()); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + Arrays.asList( + "Unable to parse row [this is not JSON]", + "Unparseable timestamp found! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", + "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", + "Unparseable timestamp found! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" + ), + "buildSegments", + Arrays.asList( + "Unable to parse row [this is not JSON]", + "Unparseable timestamp found! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", + "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2014-01-01T00:00:10.000Z, event={time=2014-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=4.0, val=notnumber}, dimensions=[dim, dimLong, dimFloat]}], exceptions: [Unable to parse value[notnumber] for field[val],]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2014-01-01T00:00:10.000Z, event={time=2014-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=notnumber, val=1}, dimensions=[dim, dimLong, dimFloat]}], exceptions: [could not convert value [notnumber] to float,]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2014-01-01T00:00:10.000Z, event={time=2014-01-01T00:00:10Z, dim=b, dimLong=notnumber, dimFloat=3.0, val=1}, dimensions=[dim, dimLong, dimFloat]}], exceptions: [could not convert value [notnumber] to long,]", + "Unparseable timestamp found! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" + ) + ); + + Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + } + + @Test + public void testMultipleParseExceptionsFailure() throws Exception + { + final File tmpDir = temporaryFolder.newFolder(); + + final File tmpFile = File.createTempFile("druid", "index", tmpDir); + + try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) { + writer.write("time,dim,dimLong,dimFloat,val\n"); + writer.write("unparseable,a,2,3.0,1\n"); // unparseable + writer.write("2014-01-01T00:00:10Z,a,2,3.0,1\n"); // valid row + writer.write("9.0,a,2,3.0,1\n"); // unparseable + writer.write("3014-03-01T00:00:10Z,outsideofinterval,2,3.0,1\n"); // thrown away + writer.write("99999999999-01-01T00:00:10Z,b,2,3.0,1\n"); // unparseable + } + + // Allow up to 3 parse exceptions, and save up to 2 parse exceptions + final IndexTask.IndexTuningConfig tuningConfig = new IndexTask.IndexTuningConfig( + 2, + null, + null, + null, + null, + indexSpec, + null, + true, + false, + false, + false, + null, + null, + null, + true, + 2, + 5 + ); + + final IndexIngestionSpec parseExceptionIgnoreSpec = createIngestionSpec( + tmpDir, + new CSVParseSpec( + new TimestampSpec( + "time", + "auto", + null + ), + new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), + Lists.newArrayList(), + Lists.newArrayList() + ), + null, + Arrays.asList("time", "dim", "dimLong", "dimFloat", "val"), + true, + 0 + ), + null, + tuningConfig, + false + ); + + IndexTask indexTask = new IndexTask( + null, + null, + parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + null + ); + + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + Assert.assertEquals( + "Max parse exceptions exceeded, terminating task...", + status.getErrorMsg() + ); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 1L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 2L + ) + ); + + Assert.assertEquals(expectedMetrics, status.getMetrics()); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + new ArrayList<>(), + "buildSegments", + Arrays.asList( + "Unparseable timestamp found! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", + "Unparseable timestamp found! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", + "Unparseable timestamp found! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" + ) + ); + + Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + } + + @Test + public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exception + { + final File tmpDir = temporaryFolder.newFolder(); + + final File tmpFile = File.createTempFile("druid", "index", tmpDir); + + try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) { + writer.write("time,dim,dimLong,dimFloat,val\n"); + writer.write("unparseable,a,2,3.0,1\n"); // unparseable + writer.write("2014-01-01T00:00:10Z,a,2,3.0,1\n"); // valid row + writer.write("9.0,a,2,3.0,1\n"); // unparseable + writer.write("3014-03-01T00:00:10Z,outsideofinterval,2,3.0,1\n"); // thrown away + writer.write("99999999999-01-01T00:00:10Z,b,2,3.0,1\n"); // unparseable + } + + // Allow up to 3 parse exceptions, and save up to 2 parse exceptions + final IndexTask.IndexTuningConfig tuningConfig = new IndexTask.IndexTuningConfig( + 2, + null, + null, + null, + null, + indexSpec, + null, + true, + false, + true, + false, + null, + null, + null, + true, + 2, + 5 + ); + + final IndexIngestionSpec parseExceptionIgnoreSpec = createIngestionSpec( + tmpDir, + new CSVParseSpec( + new TimestampSpec( + "time", + "auto", + null + ), + new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), + Lists.newArrayList(), + Lists.newArrayList() + ), + null, + Arrays.asList("time", "dim", "dimLong", "dimFloat", "val"), + true, + 0 + ), + null, + tuningConfig, + false + ); + + IndexTask indexTask = new IndexTask( + null, + null, + parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + null + ); + + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + Assert.assertEquals( + "Max parse exceptions exceeded, terminating task...", + status.getErrorMsg() + ); + + Map expectedMetrics = ImmutableMap.of( + "determinePartitions", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 1L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 2L + ) + ); + + Assert.assertEquals(expectedMetrics, status.getMetrics()); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + Arrays.asList( + "Unparseable timestamp found! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", + "Unparseable timestamp found! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", + "Unparseable timestamp found! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" + ), + "buildSegments", + new ArrayList<>() + ); + + Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + } + + @Test public void testCsvWithHeaderOfEmptyColumns() throws Exception { @@ -854,10 +1232,12 @@ public void testCsvWithHeaderOfEmptyColumns() throws Exception null, null, parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; // the order of result segments can be changed because hash shardSpec is used. // the below loop is to make this test deterministic. Assert.assertEquals(2, segments.size()); @@ -885,9 +1265,6 @@ public void testCsvWithHeaderOfEmptyColumns() throws Exception @Test public void testCsvWithHeaderOfEmptyTimestamp() throws Exception { - expectedException.expect(ParseException.class); - expectedException.expectMessage("Unparseable timestamp found!"); - final File tmpDir = temporaryFolder.newFolder(); final File tmpFile = File.createTempFile("druid", "index", tmpDir); @@ -924,13 +1301,25 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception null, null, parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - runTask(indexTask); + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + Assert.assertEquals("Max parse exceptions exceeded, terminating task...", status.getErrorMsg()); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + new ArrayList<>(), + "buildSegments", + Arrays.asList("Unparseable timestamp found! Event: {column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1}") + ); + Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); } - private List runTask(IndexTask indexTask) throws Exception + private Pair> runTask(IndexTask indexTask) throws Exception { final List segments = Lists.newArrayList(); @@ -1047,11 +1436,11 @@ public Map makeLoadSpec(URI uri) ); indexTask.isReady(box.getTaskActionClient()); - indexTask.run(box); + TaskStatus status = indexTask.run(box); Collections.sort(segments); - return segments; + return Pair.of(status, segments); } private IndexTask.IndexIngestionSpec createIngestionSpec( @@ -1149,7 +1538,10 @@ private static IndexTuningConfig createTuningConfig( reportParseException, null, null, - null + null, + null, + null, + 1 ); } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java index ff5a17d8ba44..589dcf3af913 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java @@ -52,6 +52,7 @@ import io.druid.segment.realtime.plumber.Plumber; import io.druid.segment.realtime.plumber.PlumberSchool; import io.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; +import io.druid.server.security.AuthTestUtils; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NoneShardSpec; import org.hamcrest.CoreMatchers; @@ -190,8 +191,28 @@ public void testIndexTaskSerde() throws Exception jsonMapper ), new IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), true), - new IndexTuningConfig(10000, 10, null, 9999, null, indexSpec, 3, true, true, false, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + 9999, + null, + indexSpec, + 3, + true, + true, + false, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); @@ -253,8 +274,28 @@ public void testIndexTaskwithResourceSerde() throws Exception jsonMapper ), new IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), true), - new IndexTuningConfig(10000, 10, null, null, null, indexSpec, 3, true, true, false, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + null, + null, + indexSpec, + 3, + true, + true, + false, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); @@ -782,6 +823,8 @@ public void testHadoopIndexTaskSerde() throws Exception null, "blah", jsonMapper, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java index 5f9ade8ddfe6..52c1ca39fcc7 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java @@ -115,6 +115,7 @@ import io.druid.server.coordination.ServerType; import io.druid.server.initialization.ServerConfig; import io.druid.server.metrics.NoopServiceEmitter; +import io.druid.server.security.AuthTestUtils; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NoneShardSpec; import org.easymock.EasyMock; @@ -671,8 +672,28 @@ public void testIndexTask() throws Exception mapper ), new IndexIOConfig(new MockFirehoseFactory(false), false), - new IndexTuningConfig(10000, 10, null, null, null, indexSpec, 3, true, true, false, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + null, + null, + indexSpec, + 3, + true, + true, + false, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); @@ -729,8 +750,28 @@ public void testIndexTaskFailure() throws Exception mapper ), new IndexIOConfig(new MockExceptionalFirehoseFactory(), false), - new IndexTuningConfig(10000, 10, null, null, null, indexSpec, 3, true, true, false, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + null, + null, + indexSpec, + 3, + true, + true, + false, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); @@ -1094,8 +1135,28 @@ public void testResumeTasks() throws Exception mapper ), new IndexIOConfig(new MockFirehoseFactory(false), false), - new IndexTuningConfig(10000, 10, null, null, null, indexSpec, null, false, null, null, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + null, + null, + indexSpec, + null, + false, + null, + null, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/ObjectFlatteners.java b/java-util/src/main/java/io/druid/java/util/common/parsers/ObjectFlatteners.java index cae3eb686460..63c48987b9c9 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/ObjectFlatteners.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/ObjectFlatteners.java @@ -190,7 +190,7 @@ public Object setValue(final Object value) }; } ) - .collect(Collectors.toSet()); + .collect(Collectors.toCollection(LinkedHashSet::new)); } }; } diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/ParseException.java b/java-util/src/main/java/io/druid/java/util/common/parsers/ParseException.java index ca1f5ff67cf2..4c8b797ee593 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/ParseException.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/ParseException.java @@ -25,6 +25,8 @@ */ public class ParseException extends RuntimeException { + private boolean fromPartiallyValidRow = false; + public ParseException(String formatText, Object... arguments) { super(StringUtils.nonStrictFormat(formatText, arguments)); @@ -34,4 +36,14 @@ public ParseException(Throwable cause, String formatText, Object... arguments) { super(StringUtils.nonStrictFormat(formatText, arguments), cause); } + + public boolean isFromPartiallyValidRow() + { + return fromPartiallyValidRow; + } + + public void setFromPartiallyValidRow(boolean fromPartiallyValidRow) + { + this.fromPartiallyValidRow = fromPartiallyValidRow; + } } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 332781b9beeb..bed3031e1f9e 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -42,8 +42,10 @@ import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; +import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularity; +import io.druid.java.util.common.parsers.ParseException; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.PostAggregator; import io.druid.query.dimension.DimensionSpec; @@ -451,7 +453,7 @@ protected abstract AggregatorType[] initAggs( ); // Note: This method needs to be thread safe. - protected abstract Integer addToFacts( + protected abstract Pair> addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -514,31 +516,32 @@ public Map getColumnCapabilities() * * @return the number of rows in the data set after adding the InputRow */ - public int add(InputRow row) throws IndexSizeExceededException + public IncrementalIndexAddResult add(InputRow row) throws IndexSizeExceededException { return add(row, false); } - public int add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException + public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException { - TimeAndDims key = toTimeAndDims(row); - final int rv = addToFacts( + Pair> timeAndDimsResult = toTimeAndDims(row); + final Pair> addToFactsResult = addToFacts( metrics, deserializeComplexMetrics, reportParseExceptions, row, numEntries, - key, + timeAndDimsResult.lhs, in, rowSupplier, skipMaxRowsInMemoryCheck ); updateMaxIngestedTime(row.getTimestamp()); - return rv; + ParseException parseException = getCombinedParseException(row, timeAndDimsResult.rhs, addToFactsResult.rhs); + return new IncrementalIndexAddResult(addToFactsResult.lhs, parseException); } @VisibleForTesting - TimeAndDims toTimeAndDims(InputRow row) + Pair> toTimeAndDims(InputRow row) { row = formatRow(row); if (row.getTimestampFromEpoch() < minTimestamp) { @@ -549,6 +552,7 @@ TimeAndDims toTimeAndDims(InputRow row) Object[] dims; List overflow = null; + List parseExceptionMessages = new ArrayList<>(); synchronized (dimensionDescs) { dims = new Object[dimensionDescs.size()]; for (String dimension : rowDimensions) { @@ -576,10 +580,16 @@ TimeAndDims toTimeAndDims(InputRow row) } DimensionHandler handler = desc.getHandler(); DimensionIndexer indexer = desc.getIndexer(); - Object dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent( - row.getRaw(dimension), - reportParseExceptions - ); + Object dimsKey = null; + try { + dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent( + row.getRaw(dimension), + true + ); + } + catch (ParseException pe) { + parseExceptionMessages.add(pe.getMessage()); + } // Set column capabilities as data is coming in if (!capabilities.hasMultipleValues() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { @@ -622,7 +632,45 @@ TimeAndDims toTimeAndDims(InputRow row) if (row.getTimestamp() != null) { truncated = gran.bucketStart(row.getTimestamp()).getMillis(); } - return new TimeAndDims(Math.max(truncated, minTimestamp), dims, dimensionDescsList); + + TimeAndDims timeAndDims = new TimeAndDims(Math.max(truncated, minTimestamp), dims, dimensionDescsList); + return Pair.of(timeAndDims, parseExceptionMessages); + } + + public static ParseException getCombinedParseException( + InputRow row, + List dimParseExceptionMessages, + List aggParseExceptionMessages + ) + { + int numAdded = 0; + StringBuilder stringBuilder = new StringBuilder(); + + if (dimParseExceptionMessages != null) { + for (String parseExceptionMessage : dimParseExceptionMessages) { + stringBuilder.append(parseExceptionMessage); + stringBuilder.append(","); + numAdded++; + } + } + if (aggParseExceptionMessages != null) { + for (String parseExceptionMessage : aggParseExceptionMessages) { + stringBuilder.append(parseExceptionMessage); + stringBuilder.append(","); + numAdded++; + } + } + + if (numAdded == 0) { + return null; + } + ParseException pe = new ParseException( + "Found unparseable columns in row: [%s], exceptions: [%s]", + row, + stringBuilder.toString() + ); + pe.setFromPartiallyValidRow(true); + return pe; } private synchronized void updateMaxIngestedTime(DateTime eventTime) diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAddResult.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAddResult.java new file mode 100644 index 000000000000..06c537a0aa36 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAddResult.java @@ -0,0 +1,52 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.incremental; + +import io.druid.java.util.common.parsers.ParseException; + +import javax.annotation.Nullable; + +public class IncrementalIndexAddResult +{ + private final int rowCount; + + @Nullable + private final ParseException parseException; + + public IncrementalIndexAddResult( + int rowCount, + @Nullable ParseException parseException + ) + { + this.rowCount = rowCount; + this.parseException = parseException; + } + + public int getRowCount() + { + return rowCount; + } + + @Nullable + public ParseException getParseException() + { + return parseException; + } +} diff --git a/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java index d377634e0bc8..6d5458d39959 100644 --- a/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java @@ -26,6 +26,7 @@ import io.druid.data.input.InputRow; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; +import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.io.Closer; import io.druid.java.util.common.logger.Logger; @@ -138,7 +139,7 @@ protected BufferAggregator[] initAggs( } @Override - protected Integer addToFacts( + protected Pair> addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -238,7 +239,7 @@ protected Integer addToFacts( } } rowContainer.set(null); - return numEntries.get(); + return Pair.of(numEntries.get(), new ArrayList<>()); } @Override diff --git a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java index 1a377ea826be..3e42f28bf097 100644 --- a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java @@ -23,6 +23,7 @@ import com.google.common.base.Throwables; import com.google.common.collect.Maps; import io.druid.data.input.InputRow; +import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.io.Closer; import io.druid.java.util.common.logger.Logger; @@ -37,7 +38,9 @@ import javax.annotation.Nullable; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -101,7 +104,7 @@ protected Aggregator[] initAggs( } @Override - protected Integer addToFacts( + protected Pair> addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -113,17 +116,18 @@ protected Integer addToFacts( boolean skipMaxRowsInMemoryCheck ) throws IndexSizeExceededException { + List parseExceptionMessages; final int priorIndex = facts.getPriorIndex(key); Aggregator[] aggs; if (TimeAndDims.EMPTY_ROW_INDEX != priorIndex) { aggs = concurrentGet(priorIndex); - doAggregate(metrics, aggs, rowContainer, row, reportParseExceptions); + parseExceptionMessages = doAggregate(metrics, aggs, rowContainer, row); } else { aggs = new Aggregator[metrics.length]; factorizeAggs(metrics, aggs, rowContainer, row); - doAggregate(metrics, aggs, rowContainer, row, reportParseExceptions); + parseExceptionMessages = doAggregate(metrics, aggs, rowContainer, row); final int rowIndex = indexIncrement.getAndIncrement(); concurrentSet(rowIndex, aggs); @@ -140,14 +144,14 @@ protected Integer addToFacts( } else { // We lost a race aggs = concurrentGet(prev); - doAggregate(metrics, aggs, rowContainer, row, reportParseExceptions); + parseExceptionMessages = doAggregate(metrics, aggs, rowContainer, row); // Free up the misfire concurrentRemove(rowIndex); // This is expected to occur ~80% of the time in the worst scenarios } } - return numEntries.get(); + return Pair.of(numEntries.get(), parseExceptionMessages); } @Override @@ -171,14 +175,14 @@ private void factorizeAggs( rowContainer.set(null); } - private void doAggregate( + private List doAggregate( AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal rowContainer, - InputRow row, - boolean reportParseExceptions + InputRow row ) { + List parseExceptionMessages = new ArrayList<>(); rowContainer.set(row); for (int i = 0; i < aggs.length; i++) { @@ -189,16 +193,14 @@ private void doAggregate( } catch (ParseException e) { // "aggregate" can throw ParseExceptions if a selector expects something but gets something else. - if (reportParseExceptions) { - throw new ParseException(e, "Encountered parse error for aggregator[%s]", metrics[i].getName()); - } else { - log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName()); - } + log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName()); + parseExceptionMessages.add(e.getMessage()); } } } rowContainer.set(null); + return parseExceptionMessages; } private void closeAggregators() diff --git a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java index ca6ffe9fb66f..c7ad76423100 100644 --- a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java @@ -216,11 +216,10 @@ public void testUnparseableNumerics() throws IndexSizeExceededException { IncrementalIndex index = closer.closeLater(indexCreator.createIndex()); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [asdj] to long"); - index.add( + IncrementalIndexAddResult result; + result = index.add( new MapBasedInputRow( - System.currentTimeMillis() - 1, + 0, Lists.newArrayList("string", "float", "long", "double"), ImmutableMap.of( "string", "A", @@ -230,12 +229,15 @@ public void testUnparseableNumerics() throws IndexSizeExceededException ) ) ); + Assert.assertEquals(ParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=1970-01-01T00:00:00.000Z, event={string=A, float=19.0, long=asdj, double=21.0}, dimensions=[string, float, long, double]}], exceptions: [could not convert value [asdj] to long,]", + result.getParseException().getMessage() + ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [aaa] to float"); - index.add( + result = index.add( new MapBasedInputRow( - System.currentTimeMillis() - 1, + 0, Lists.newArrayList("string", "float", "long", "double"), ImmutableMap.of( "string", "A", @@ -245,12 +247,15 @@ public void testUnparseableNumerics() throws IndexSizeExceededException ) ) ); + Assert.assertEquals(ParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=1970-01-01T00:00:00.000Z, event={string=A, float=aaa, long=20, double=21.0}, dimensions=[string, float, long, double]}], exceptions: [could not convert value [aaa] to float,]", + result.getParseException().getMessage() + ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [] to double"); - index.add( + result = index.add( new MapBasedInputRow( - System.currentTimeMillis() - 1, + 0, Lists.newArrayList("string", "float", "long", "double"), ImmutableMap.of( "string", "A", @@ -260,6 +265,11 @@ public void testUnparseableNumerics() throws IndexSizeExceededException ) ) ); + Assert.assertEquals(ParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=1970-01-01T00:00:00.000Z, event={string=A, float=19.0, long=20, double=}, dimensions=[string, float, long, double]}], exceptions: [could not convert value [] to double,]", + result.getParseException().getMessage() + ); } @Test diff --git a/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java b/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java index c4f5fe3f1737..d797eefd96dc 100644 --- a/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java +++ b/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java @@ -34,6 +34,7 @@ import io.druid.data.input.InputRow; import io.druid.data.input.MapBasedInputRow; import io.druid.java.util.common.Intervals; +import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularities; import io.druid.java.util.common.granularity.Granularity; @@ -166,7 +167,7 @@ protected void concurrentSet(int offset, Aggregator[] value) } @Override - protected Integer addToFacts( + protected Pair> addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -236,7 +237,7 @@ protected Integer addToFacts( rowContainer.set(null); - return numEntries.get(); + return Pair.of(numEntries.get(), new ArrayList<>()); } @Override diff --git a/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java b/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java index 8677dc186281..c1325e94198e 100644 --- a/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java @@ -45,13 +45,13 @@ public void testBasic() throws IndexSizeExceededException .buildOnheap(); long time = System.currentTimeMillis(); - TimeAndDims td1 = index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "B")); - TimeAndDims td2 = index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "A")); - TimeAndDims td3 = index.toTimeAndDims(toMapRow(time, "billy", "A")); + TimeAndDims td1 = (TimeAndDims) index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "B")).lhs; + TimeAndDims td2 = (TimeAndDims) index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "A")).lhs; + TimeAndDims td3 = (TimeAndDims) index.toTimeAndDims(toMapRow(time, "billy", "A")).lhs; - TimeAndDims td4 = index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", "B")); - TimeAndDims td5 = index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", Arrays.asList("A", "B"))); - TimeAndDims td6 = index.toTimeAndDims(toMapRow(time + 1)); + TimeAndDims td4 = (TimeAndDims) index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", "B")).lhs; + TimeAndDims td5 = (TimeAndDims) index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", Arrays.asList("A", "B"))).lhs; + TimeAndDims td6 = (TimeAndDims) index.toTimeAndDims(toMapRow(time + 1)).lhs; Comparator comparator = index.dimsComparator(); diff --git a/server/src/main/java/io/druid/segment/indexing/TuningConfig.java b/server/src/main/java/io/druid/segment/indexing/TuningConfig.java index 998f07857527..7fd246d25732 100644 --- a/server/src/main/java/io/druid/segment/indexing/TuningConfig.java +++ b/server/src/main/java/io/druid/segment/indexing/TuningConfig.java @@ -30,4 +30,7 @@ }) public interface TuningConfig { + boolean DEFAULT_LOG_PARSE_EXCEPTIONS = false; + int DEFAULT_MAX_PARSE_EXCEPTIONS = Integer.MAX_VALUE; + int DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS = 0; } diff --git a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java index d61de1aee04e..bd9a1bf48ab5 100644 --- a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java +++ b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java @@ -20,7 +20,9 @@ package io.druid.segment.realtime; import com.google.common.base.Preconditions; +import io.druid.indexer.TaskMetricsUtils; +import java.util.Map; import java.util.concurrent.atomic.AtomicLong; /** @@ -28,6 +30,7 @@ public class FireDepartmentMetrics { private final AtomicLong processedCount = new AtomicLong(0); + private final AtomicLong processedWithErrorsCount = new AtomicLong(0); private final AtomicLong thrownAwayCount = new AtomicLong(0); private final AtomicLong unparseableCount = new AtomicLong(0); private final AtomicLong rowOutputCount = new AtomicLong(0); @@ -49,6 +52,11 @@ public void incrementProcessed() processedCount.incrementAndGet(); } + public void incrementProcessedWithErrors() + { + processedWithErrorsCount.incrementAndGet(); + } + public void incrementThrownAway() { thrownAwayCount.incrementAndGet(); @@ -124,6 +132,11 @@ public long processed() return processedCount.get(); } + public long processedWithErrors() + { + return processedWithErrorsCount.get(); + } + public long thrownAway() { return thrownAwayCount.get(); @@ -203,6 +216,7 @@ public FireDepartmentMetrics snapshot() { final FireDepartmentMetrics retVal = new FireDepartmentMetrics(); retVal.processedCount.set(processedCount.get()); + retVal.processedWithErrorsCount.set(processedWithErrorsCount.get()); retVal.thrownAwayCount.set(thrownAwayCount.get()); retVal.unparseableCount.set(unparseableCount.get()); retVal.rowOutputCount.set(rowOutputCount.get()); @@ -231,6 +245,7 @@ public FireDepartmentMetrics merge(FireDepartmentMetrics other) Preconditions.checkNotNull(other, "Cannot merge a null FireDepartmentMetrics"); FireDepartmentMetrics otherSnapshot = other.snapshot(); processedCount.addAndGet(otherSnapshot.processed()); + processedWithErrorsCount.addAndGet(otherSnapshot.processedWithErrors()); thrownAwayCount.addAndGet(otherSnapshot.thrownAway()); rowOutputCount.addAndGet(otherSnapshot.rowOutput()); unparseableCount.addAndGet(otherSnapshot.unparseable()); @@ -249,4 +264,13 @@ public FireDepartmentMetrics merge(FireDepartmentMetrics other) return this; } + public static Map getRowMetricsFromFireDepartmentMetrics(FireDepartmentMetrics fdm) + { + return TaskMetricsUtils.makeIngestionRowMetrics( + fdm.processed(), + fdm.processedWithErrors(), + fdm.unparseable(), + fdm.thrownAway() + ); + } } diff --git a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java new file mode 100644 index 000000000000..d5d0f924f047 --- /dev/null +++ b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java @@ -0,0 +1,81 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.realtime; + +import com.google.common.collect.ImmutableMap; +import io.druid.indexer.TaskMetricsGetter; +import io.druid.indexer.TaskMetricsUtils; +import io.druid.java.util.common.logger.Logger; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class FireDepartmentMetricsTaskMetricsGetter implements TaskMetricsGetter +{ + public static final List KEYS = Arrays.asList( + TaskMetricsUtils.ROWS_PROCESSED, + TaskMetricsUtils.ROWS_THROWN_AWAY, + TaskMetricsUtils.ROWS_UNPARSEABLE + ); + + private static final Logger log = new Logger(FireDepartmentMetricsTaskMetricsGetter.class); + + private final FireDepartmentMetrics fireDepartmentMetrics; + + private double processed = 0; + private double thrownAway = 0; + private double unparseable = 0; + + public FireDepartmentMetricsTaskMetricsGetter( + FireDepartmentMetrics fireDepartmentMetrics + ) + { + this.fireDepartmentMetrics = fireDepartmentMetrics; + } + + @Override + public List getKeys() + { + return KEYS; + } + + @Override + public Map getMetrics() + { + double curProcessed = fireDepartmentMetrics.processed(); + double curThrownAway = fireDepartmentMetrics.thrownAway(); + double curUnparseable = fireDepartmentMetrics.unparseable(); + + double processedDiff = curProcessed - processed; + double thrownAwayDiff = curThrownAway - thrownAway; + double unparseableDiff = curUnparseable - unparseable; + + processed = curProcessed; + thrownAway = curThrownAway; + unparseable = curUnparseable; + + return ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, processedDiff, + TaskMetricsUtils.ROWS_THROWN_AWAY, thrownAwayDiff, + TaskMetricsUtils.ROWS_UNPARSEABLE, unparseableDiff + ); + } +} diff --git a/server/src/main/java/io/druid/segment/realtime/appenderator/Appenderator.java b/server/src/main/java/io/druid/segment/realtime/appenderator/Appenderator.java index dbd1ed831378..c8bba5cdeb84 100644 --- a/server/src/main/java/io/druid/segment/realtime/appenderator/Appenderator.java +++ b/server/src/main/java/io/druid/segment/realtime/appenderator/Appenderator.java @@ -23,6 +23,7 @@ import com.google.common.util.concurrent.ListenableFuture; import io.druid.data.input.Committer; import io.druid.data.input.InputRow; +import io.druid.java.util.common.parsers.ParseException; import io.druid.query.QuerySegmentWalker; import io.druid.segment.incremental.IndexSizeExceededException; @@ -228,11 +229,20 @@ class AppenderatorAddResult private final int numRowsInSegment; private final boolean isPersistRequired; - AppenderatorAddResult(SegmentIdentifier identifier, int numRowsInSegment, boolean isPersistRequired) + @Nullable + private final ParseException parseException; + + AppenderatorAddResult( + SegmentIdentifier identifier, + int numRowsInSegment, + boolean isPersistRequired, + @Nullable ParseException parseException + ) { this.segmentIdentifier = identifier; this.numRowsInSegment = numRowsInSegment; this.isPersistRequired = isPersistRequired; + this.parseException = parseException; } SegmentIdentifier getSegmentIdentifier() @@ -249,5 +259,11 @@ boolean isPersistRequired() { return isPersistRequired; } + + @Nullable + public ParseException getParseException() + { + return parseException; + } } } diff --git a/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorDriverAddResult.java b/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorDriverAddResult.java index a1ec20baa5b7..07a01ab4d021 100644 --- a/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorDriverAddResult.java +++ b/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorDriverAddResult.java @@ -19,6 +19,8 @@ package io.druid.segment.realtime.appenderator; +import io.druid.java.util.common.parsers.ParseException; + import javax.annotation.Nullable; /** @@ -33,32 +35,44 @@ public class AppenderatorDriverAddResult private final long totalNumRowsInAppenderator; private final boolean isPersistRequired; + @Nullable + private final ParseException parseException; + public static AppenderatorDriverAddResult ok( SegmentIdentifier segmentIdentifier, int numRowsInSegment, long totalNumRowsInAppenderator, - boolean isPersistRequired + boolean isPersistRequired, + @Nullable ParseException parseException ) { - return new AppenderatorDriverAddResult(segmentIdentifier, numRowsInSegment, totalNumRowsInAppenderator, isPersistRequired); + return new AppenderatorDriverAddResult( + segmentIdentifier, + numRowsInSegment, + totalNumRowsInAppenderator, + isPersistRequired, + parseException + ); } public static AppenderatorDriverAddResult fail() { - return new AppenderatorDriverAddResult(null, 0, 0, false); + return new AppenderatorDriverAddResult(null, 0, 0, false, null); } private AppenderatorDriverAddResult( @Nullable SegmentIdentifier segmentIdentifier, int numRowsInSegment, long totalNumRowsInAppenderator, - boolean isPersistRequired + boolean isPersistRequired, + @Nullable ParseException parseException ) { this.segmentIdentifier = segmentIdentifier; this.numRowsInSegment = numRowsInSegment; this.totalNumRowsInAppenderator = totalNumRowsInAppenderator; this.isPersistRequired = isPersistRequired; + this.parseException = parseException; } public boolean isOk() @@ -85,4 +99,10 @@ public boolean isPersistRequired() { return isPersistRequired; } + + @Nullable + public ParseException getParseException() + { + return parseException; + } } diff --git a/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorImpl.java b/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorImpl.java index be32f2960fff..f4932c6bdbce 100644 --- a/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorImpl.java +++ b/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorImpl.java @@ -63,6 +63,7 @@ import io.druid.segment.QueryableIndex; import io.druid.segment.QueryableIndexSegment; import io.druid.segment.Segment; +import io.druid.segment.incremental.IncrementalIndexAddResult; import io.druid.segment.incremental.IndexSizeExceededException; import io.druid.segment.indexing.DataSchema; import io.druid.segment.loading.DataSegmentPusher; @@ -218,9 +219,11 @@ public AppenderatorAddResult add( metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch()); final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory(); final int sinkRowsInMemoryAfterAdd; + final IncrementalIndexAddResult addResult; try { - sinkRowsInMemoryAfterAdd = sink.add(row, !allowIncrementalPersists); + addResult = sink.add(row, !allowIncrementalPersists); + sinkRowsInMemoryAfterAdd = addResult.getRowCount(); } catch (IndexSizeExceededException e) { // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we @@ -250,7 +253,7 @@ public AppenderatorAddResult add( } } - return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired); + return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired, addResult.getParseException()); } @Override diff --git a/server/src/main/java/io/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java b/server/src/main/java/io/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java index 24482d19c96c..963522117a6c 100644 --- a/server/src/main/java/io/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java +++ b/server/src/main/java/io/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java @@ -300,7 +300,8 @@ protected AppenderatorDriverAddResult append( identifier, result.getNumRowsInSegment(), appenderator.getTotalRowCount(), - result.isPersistRequired() + result.isPersistRequired(), + result.getParseException() ); } catch (SegmentNotWritableException e) { diff --git a/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java b/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java index 2f6a4a1dd995..98014d61b6fb 100644 --- a/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java +++ b/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java @@ -216,7 +216,7 @@ public int add(InputRow row, Supplier committerSupplier) throws Index return -1; } - final int numRows = sink.add(row, false); + final int numRows = sink.add(row, false).getRowCount(); if (!sink.canAppendRow() || System.currentTimeMillis() > nextFlush) { persist(committerSupplier.get()); diff --git a/server/src/main/java/io/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/io/druid/segment/realtime/plumber/Sink.java index 634d8f514bd6..92790d2f20ae 100644 --- a/server/src/main/java/io/druid/segment/realtime/plumber/Sink.java +++ b/server/src/main/java/io/druid/segment/realtime/plumber/Sink.java @@ -34,6 +34,7 @@ import io.druid.segment.ReferenceCountingSegment; import io.druid.segment.column.ColumnCapabilitiesImpl; import io.druid.segment.incremental.IncrementalIndex; +import io.druid.segment.incremental.IncrementalIndexAddResult; import io.druid.segment.incremental.IncrementalIndexSchema; import io.druid.segment.incremental.IndexSizeExceededException; import io.druid.segment.indexing.DataSchema; @@ -53,7 +54,7 @@ public class Sink implements Iterable { - private static final int ADD_FAILED = -1; + private static final IncrementalIndexAddResult ADD_FAILED = new IncrementalIndexAddResult(-1, null); private final Object hydrantLock = new Object(); private final Interval interval; @@ -139,7 +140,7 @@ public FireHydrant getCurrHydrant() return currHydrant; } - public int add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException + public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException { if (currHydrant == null) { throw new IAE("No currHydrant but given row[%s]", row); diff --git a/server/src/test/java/io/druid/segment/realtime/RealtimeManagerTest.java b/server/src/test/java/io/druid/segment/realtime/RealtimeManagerTest.java index 59f145555e85..2b9a147f55f3 100644 --- a/server/src/test/java/io/druid/segment/realtime/RealtimeManagerTest.java +++ b/server/src/test/java/io/druid/segment/realtime/RealtimeManagerTest.java @@ -1062,7 +1062,7 @@ public int add(InputRow row, Supplier committerSupplier) throws Index return -1; } - return sink.add(row, false); + return sink.add(row, false).getRowCount(); } public Sink getSink(long timestamp) diff --git a/server/src/test/java/io/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java b/server/src/test/java/io/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java index a249ccb7f088..a39b4cfc4b6f 100644 --- a/server/src/test/java/io/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java +++ b/server/src/test/java/io/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java @@ -310,7 +310,7 @@ public AppenderatorAddResult add( { rows.computeIfAbsent(identifier, k -> new ArrayList<>()).add(row); numRows++; - return new AppenderatorAddResult(identifier, numRows, false); + return new AppenderatorAddResult(identifier, numRows, false, null); } @Override From bbda06b47b00db74a22865c7c70a12b6991b145a Mon Sep 17 00:00:00 2001 From: jon-wei Date: Thu, 15 Mar 2018 17:07:26 -0700 Subject: [PATCH 02/10] PR comments, add support in AppenderatorDriverRealtimeIndexTask --- .../java/io/druid/indexer/TaskStatusPlus.java | 22 +- .../java/io/druid/utils/CircularBuffer.java | 1 + .../druid/indexing/kafka/KafkaIndexTask.java | 192 ++++++------ .../indexing/kafka/KafkaTuningConfig.java | 6 +- .../indexing/kafka/KafkaIndexTaskTest.java | 3 +- .../RealtimeAppenderatorTuningConfig.java | 43 ++- .../AppenderatorDriverRealtimeIndexTask.java | 174 ++++++++++- .../indexing/common/task/HadoopIndexTask.java | 37 +-- .../druid/indexing/common/task/IndexTask.java | 103 +++---- .../indexing/common/task/IndexTaskUtils.java | 78 +++++ ...penderatorDriverRealtimeIndexTaskTest.java | 280 +++++++++++++++--- .../indexing/common/task/IndexTaskTest.java | 21 +- .../common/task/RealtimeIndexTaskTest.java | 16 +- .../segment/incremental/IncrementalIndex.java | 70 ++++- .../incremental/OffheapIncrementalIndex.java | 5 +- .../incremental/OnheapIncrementalIndex.java | 5 +- .../OnheapIncrementalIndexBenchmark.java | 6 +- .../incremental/TimeAndDimsCompTest.java | 12 +- .../realtime/plumber/RealtimePlumber.java | 8 +- .../coordination/ChangeRequestHistory.java | 44 +-- .../ChangeRequestHistoryTest.java | 3 +- 21 files changed, 759 insertions(+), 370 deletions(-) create mode 100644 indexing-service/src/main/java/io/druid/indexing/common/task/IndexTaskUtils.java diff --git a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java index 1e52a64b3525..f1272c984dcb 100644 --- a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java +++ b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java @@ -124,6 +124,12 @@ public TaskLocation getLocation() return location; } + @JsonProperty + public String getDataSource() + { + return dataSource; + } + @Nullable @JsonProperty("metrics") public Map getMetrics() @@ -180,11 +186,15 @@ public boolean equals(Object o) return false; } - if (!Objects.equals(errorMsg, that.errorMsg)) { + if (!Objects.equals(dataSource, that.dataSource)) { return false; } - if (!Objects.equals(location, that.location)) { + if (!Objects.equals(metrics, that.metrics)) { + return false; + } + + if (!Objects.equals(errorMsg, that.errorMsg)) { return false; } @@ -202,16 +212,10 @@ public int hashCode() state, duration, location, + dataSource, metrics, errorMsg, context ); } - - @JsonProperty - public String getDataSource() - { - return dataSource; - } - } diff --git a/api/src/main/java/io/druid/utils/CircularBuffer.java b/api/src/main/java/io/druid/utils/CircularBuffer.java index 3710499611c9..bac7277b367d 100644 --- a/api/src/main/java/io/druid/utils/CircularBuffer.java +++ b/api/src/main/java/io/druid/utils/CircularBuffer.java @@ -35,6 +35,7 @@ public E[] getBuffer() public CircularBuffer(int capacity) { + Preconditions.checkArgument(capacity > 0, "Capacity must be greater than 0."); buffer = (E[]) new Object[capacity]; } diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index 390a965216c2..62c353b191ff 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -60,7 +60,7 @@ import io.druid.indexing.common.actions.SegmentTransactionalInsertAction; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.common.task.AbstractTask; -import io.druid.indexing.common.task.IndexTask; +import io.druid.indexing.common.task.IndexTaskUtils; import io.druid.indexing.common.task.RealtimeIndexTask; import io.druid.indexing.common.task.TaskResource; import io.druid.indexing.common.task.Tasks; @@ -95,12 +95,7 @@ import io.druid.segment.realtime.firehose.ChatHandlerProvider; import io.druid.server.security.Access; import io.druid.server.security.Action; -import io.druid.server.security.AuthorizationUtils; import io.druid.server.security.AuthorizerMapper; -import io.druid.server.security.ForbiddenException; -import io.druid.server.security.Resource; -import io.druid.server.security.ResourceAction; -import io.druid.server.security.ResourceType; import io.druid.timeline.DataSegment; import io.druid.utils.CircularBuffer; import org.apache.kafka.clients.consumer.ConsumerRecord; @@ -435,14 +430,14 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception catch (Exception e) { log.error(e, "Encountered exception while running task."); Map context = Maps.newHashMap(); - List savedParseExceptionMessages = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + List savedParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); if (savedParseExceptionMessages != null) { context.put("unparseableEvents", savedParseExceptionMessages); } return TaskStatus.failure( getId(), getTaskCompletionMetrics(), - e.getMessage(), + Throwables.getStackTraceAsString(e), getTaskCompletionContext() ); } @@ -713,67 +708,63 @@ public void run() boolean isPersistRequired = false; for (InputRow row : rows) { - try { - if (row != null && withinMinMaxRecordTime(row)) { - SequenceMetadata sequenceToUse = null; - for (SequenceMetadata sequence : sequences) { - if (sequence.canHandle(record)) { - sequenceToUse = sequence; - } - } - - if (sequenceToUse == null) { - throw new ISE( - "WTH?! cannot find any valid sequence for record with partition [%d] and offset [%d]. Current sequences: %s", - record.partition(), - record.offset(), - sequences - ); + if (row != null && withinMinMaxRecordTime(row)) { + SequenceMetadata sequenceToUse = null; + for (SequenceMetadata sequence : sequences) { + if (sequence.canHandle(record)) { + sequenceToUse = sequence; } + } - final AppenderatorDriverAddResult addResult = driver.add( - row, - sequenceToUse.getSequenceName(), - committerSupplier, - // skip segment lineage check as there will always be one segment - // for combination of sequence and segment granularity. - // It is necessary to skip it as the task puts messages polled from all the - // assigned Kafka partitions into a single Druid segment, thus ordering of - // messages among replica tasks across assigned partitions is not guaranteed - // which may cause replica tasks to ask for segments with different interval - // in different order which might cause SegmentAllocateAction to fail. - true, - // do not allow incremental persists to happen until all the rows from this batch - // of rows are indexed - false + if (sequenceToUse == null) { + throw new ISE( + "WTH?! cannot find any valid sequence for record with partition [%d] and offset [%d]. Current sequences: %s", + record.partition(), + record.offset(), + sequences ); + } - if (addResult.isOk()) { - // If the number of rows in the segment exceeds the threshold after adding a row, - // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment. - if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) { - if (!sequenceToUse.isCheckpointed()) { - sequenceToCheckpoint = sequenceToUse; - } + final AppenderatorDriverAddResult addResult = driver.add( + row, + sequenceToUse.getSequenceName(), + committerSupplier, + // skip segment lineage check as there will always be one segment + // for combination of sequence and segment granularity. + // It is necessary to skip it as the task puts messages polled from all the + // assigned Kafka partitions into a single Druid segment, thus ordering of + // messages among replica tasks across assigned partitions is not guaranteed + // which may cause replica tasks to ask for segments with different interval + // in different order which might cause SegmentAllocateAction to fail. + true, + // do not allow incremental persists to happen until all the rows from this batch + // of rows are indexed + false + ); + + if (addResult.isOk()) { + // If the number of rows in the segment exceeds the threshold after adding a row, + // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment. + if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) { + if (!sequenceToUse.isCheckpointed()) { + sequenceToCheckpoint = sequenceToUse; } - isPersistRequired |= addResult.isPersistRequired(); - } else { - // Failure to allocate segment puts determinism at risk, bail out to be safe. - // May want configurable behavior here at some point. - // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. - throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); } + isPersistRequired |= addResult.isPersistRequired(); + } else { + // Failure to allocate segment puts determinism at risk, bail out to be safe. + // May want configurable behavior here at some point. + // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. + throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); + } - if (addResult.getParseException() != null) { - throw addResult.getParseException(); - } - fireDepartmentMetrics.incrementProcessed(); + if (addResult.getParseException() != null) { + handleParseException(addResult.getParseException(), record); } else { - fireDepartmentMetrics.incrementThrownAway(); + fireDepartmentMetrics.incrementProcessed(); } - } - catch (ParseException e) { - handleParseException(e, record); + } else { + fireDepartmentMetrics.incrementThrownAway(); } } if (isPersistRequired) { @@ -936,7 +927,7 @@ public void onFailure(Throwable t) } Map context = Maps.newHashMap(); - List savedParseExceptionMessages = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + List savedParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); if (savedParseExceptionMessages != null) { context.put("unparseableEvents", savedParseExceptionMessages); } @@ -1146,42 +1137,38 @@ public void run() final Map> segmentsToMoveOut = new HashMap<>(); for (InputRow row : rows) { - try { - if (row != null && withinMinMaxRecordTime(row)) { - final String sequenceName = sequenceNames.get(record.partition()); - final AppenderatorDriverAddResult addResult = driver.add( - row, - sequenceName, - committerSupplier, - false, - false - ); - - if (addResult.isOk()) { - // If the number of rows in the segment exceeds the threshold after adding a row, - // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment. - if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) { - segmentsToMoveOut.computeIfAbsent(sequenceName, k -> new HashSet<>()) - .add(addResult.getSegmentIdentifier()); - } - isPersistRequired |= addResult.isPersistRequired(); - } else { - // Failure to allocate segment puts determinism at risk, bail out to be safe. - // May want configurable behavior here at some point. - // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. - throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); + if (row != null && withinMinMaxRecordTime(row)) { + final String sequenceName = sequenceNames.get(record.partition()); + final AppenderatorDriverAddResult addResult = driver.add( + row, + sequenceName, + committerSupplier, + false, + false + ); + + if (addResult.isOk()) { + // If the number of rows in the segment exceeds the threshold after adding a row, + // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment. + if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) { + segmentsToMoveOut.computeIfAbsent(sequenceName, k -> new HashSet<>()) + .add(addResult.getSegmentIdentifier()); } + isPersistRequired |= addResult.isPersistRequired(); + } else { + // Failure to allocate segment puts determinism at risk, bail out to be safe. + // May want configurable behavior here at some point. + // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. + throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); + } - if (addResult.getParseException() != null) { - throw addResult.getParseException(); - } - fireDepartmentMetrics.incrementProcessed(); + if (addResult.getParseException() != null) { + handleParseException(addResult.getParseException(), record); } else { - fireDepartmentMetrics.incrementThrownAway(); + fireDepartmentMetrics.incrementProcessed(); } - } - catch (ParseException e) { - handleParseException(e, record); + } else { + fireDepartmentMetrics.incrementThrownAway(); } } @@ -1317,7 +1304,7 @@ public String apply(DataSegment input) } Map context = Maps.newHashMap(); - List savedParseExceptionMessages = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + List savedParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); if (savedParseExceptionMessages != null) { context.put("unparseableEvents", savedParseExceptionMessages); } @@ -1358,11 +1345,10 @@ private void handleParseException(ParseException pe, ConsumerRecord getTaskCompletionContext() { Map context = Maps.newHashMap(); - List buildSegmentsParseExceptionMessages = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); if (buildSegmentsParseExceptionMessages != null) { Map unparseableEventsMap = Maps.newHashMap(); unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); @@ -1449,17 +1435,7 @@ public boolean canRestore() */ private Access authorizationCheck(final HttpServletRequest req, Action action) { - ResourceAction resourceAction = new ResourceAction( - new Resource(dataSchema.getDataSource(), ResourceType.DATASOURCE), - action - ); - - Access access = AuthorizationUtils.authorizeResourceAction(req, resourceAction, authorizerMapper); - if (!access.isAllowed()) { - throw new ForbiddenException(access.toString()); - } - - return access; + return IndexTaskUtils.datasourceAuthorizationCheck(req, action, getDataSource(), authorizerMapper); } @VisibleForTesting @@ -1635,7 +1611,7 @@ public Response getUnparseableEvents( ) { authorizationCheck(req, Action.READ); - List events = IndexTask.getMessagesFromSavedParseExceptions(savedParseExceptions); + List events = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); return Response.ok(events).build(); } diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java index d1c53a5fda57..4c0277646368 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java @@ -278,9 +278,9 @@ public boolean equals(Object o) Objects.equals(indexSpec, that.indexSpec) && Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory) && Objects.equals(intermediateHandoffPeriod, that.intermediateHandoffPeriod) && - Objects.equals(logParseExceptions, that.logParseExceptions) && - Objects.equals(maxParseExceptions, that.maxParseExceptions) && - Objects.equals(maxSavedParseExceptions, that.maxSavedParseExceptions); + logParseExceptions == that.logParseExceptions && + maxParseExceptions == that.maxParseExceptions && + maxSavedParseExceptions == that.maxSavedParseExceptions; } @Override diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java index 3ebb3d02d6a1..d37fa27823a2 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -42,6 +42,7 @@ import io.druid.data.input.impl.LongDimensionSchema; import io.druid.data.input.impl.StringDimensionSchema; import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexing.common.task.IndexTaskTest; import io.druid.java.util.emitter.EmittingLogger; import io.druid.java.util.emitter.core.NoopEmitter; import io.druid.java.util.emitter.service.ServiceEmitter; @@ -1105,7 +1106,7 @@ public void testMultipleParseExceptionsFailure() throws Exception // Wait for task to exit Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); - Assert.assertEquals("Max parse exceptions exceeded, terminating task...", status.getErrorMsg()); + IndexTaskTest.checkTaskStatusErrorMsgForParseExceptionsExceeded(status); // Check metrics Assert.assertEquals(3, task.getFireDepartmentMetrics().processed()); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/index/RealtimeAppenderatorTuningConfig.java b/indexing-service/src/main/java/io/druid/indexing/common/index/RealtimeAppenderatorTuningConfig.java index a7084f69d436..06c6069ae009 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/index/RealtimeAppenderatorTuningConfig.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/index/RealtimeAppenderatorTuningConfig.java @@ -65,6 +65,10 @@ private static File createNewBasePersistDirectory() @Nullable private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; + private final boolean logParseExceptions; + private final int maxParseExceptions; + private final int maxSavedParseExceptions; + @JsonCreator public RealtimeAppenderatorTuningConfig( @JsonProperty("maxRowsInMemory") Integer maxRowsInMemory, @@ -77,7 +81,10 @@ public RealtimeAppenderatorTuningConfig( @JsonProperty("reportParseExceptions") Boolean reportParseExceptions, @JsonProperty("publishAndHandoffTimeout") Long publishAndHandoffTimeout, @JsonProperty("alertTimeout") Long alertTimeout, - @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory + @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions ) { this.maxRowsInMemory = maxRowsInMemory == null ? defaultMaxRowsInMemory : maxRowsInMemory; @@ -100,6 +107,17 @@ public RealtimeAppenderatorTuningConfig( this.alertTimeout = alertTimeout == null ? defaultAlertTimeout : alertTimeout; Preconditions.checkArgument(this.alertTimeout >= 0, "alertTimeout must be >= 0"); this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; + + if (this.reportParseExceptions) { + this.maxParseExceptions = 0; + this.maxSavedParseExceptions = maxSavedParseExceptions == null ? 0 : Math.min(1, maxSavedParseExceptions); + } else { + this.maxParseExceptions = maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + this.maxSavedParseExceptions = maxSavedParseExceptions == null + ? TuningConfig.DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS + : maxSavedParseExceptions; + } + this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; } @Override @@ -176,6 +194,24 @@ public SegmentWriteOutMediumFactory getSegmentWriteOutMediumFactory() return segmentWriteOutMediumFactory; } + @JsonProperty + public boolean isLogParseExceptions() + { + return logParseExceptions; + } + + @JsonProperty + public int getMaxParseExceptions() + { + return maxParseExceptions; + } + + @JsonProperty + public int getMaxSavedParseExceptions() + { + return maxSavedParseExceptions; + } + public RealtimeAppenderatorTuningConfig withBasePersistDirectory(File dir) { return new RealtimeAppenderatorTuningConfig( @@ -189,7 +225,10 @@ public RealtimeAppenderatorTuningConfig withBasePersistDirectory(File dir) reportParseExceptions, publishAndHandoffTimeout, alertTimeout, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 4af077fe501a..60f4b9e0f1b4 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -19,13 +19,16 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Optional; import com.google.common.base.Supplier; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import io.druid.data.input.Committer; @@ -35,6 +38,8 @@ import io.druid.discovery.DiscoveryDruidNode; import io.druid.discovery.DruidNodeDiscoveryProvider; import io.druid.discovery.LookupNodeService; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; import io.druid.indexing.common.TaskStatus; @@ -64,14 +69,27 @@ import io.druid.segment.realtime.appenderator.SegmentsAndMetadata; import io.druid.segment.realtime.appenderator.StreamAppenderatorDriver; import io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher; +import io.druid.segment.realtime.firehose.ChatHandler; +import io.druid.segment.realtime.firehose.ChatHandlerProvider; import io.druid.segment.realtime.firehose.ClippedFirehoseFactory; import io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory; import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory; import io.druid.segment.realtime.plumber.Committers; +import io.druid.server.security.Action; +import io.druid.server.security.AuthorizerMapper; +import io.druid.utils.CircularBuffer; import org.apache.commons.io.FileUtils; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; import java.io.File; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Random; @@ -83,7 +101,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -public class AppenderatorDriverRealtimeIndexTask extends AbstractTask +public class AppenderatorDriverRealtimeIndexTask extends AbstractTask implements ChatHandler { private static final String CTX_KEY_LOOKUP_TIER = "lookupTier"; @@ -129,12 +147,26 @@ private static String makeTaskId(RealtimeAppenderatorIngestionSpec spec) @JsonIgnore private volatile Thread runThread = null; + @JsonIgnore + private CircularBuffer savedParseExceptions; + + @JsonIgnore + private final Optional chatHandlerProvider; + + @JsonIgnore + private final AuthorizerMapper authorizerMapper; + + @JsonIgnore + private IngestionState ingestionState; + @JsonCreator public AppenderatorDriverRealtimeIndexTask( @JsonProperty("id") String id, @JsonProperty("resource") TaskResource taskResource, @JsonProperty("spec") RealtimeAppenderatorIngestionSpec spec, - @JsonProperty("context") Map context + @JsonProperty("context") Map context, + @JacksonInject ChatHandlerProvider chatHandlerProvider, + @JacksonInject AuthorizerMapper authorizerMapper ) { super( @@ -146,6 +178,14 @@ public AppenderatorDriverRealtimeIndexTask( ); this.spec = spec; this.pendingHandoffs = new ConcurrentLinkedQueue<>(); + this.chatHandlerProvider = Optional.fromNullable(chatHandlerProvider); + this.authorizerMapper = authorizerMapper; + + if (spec.getTuningConfig().getMaxSavedParseExceptions() > 0) { + savedParseExceptions = new CircularBuffer<>(spec.getTuningConfig().getMaxSavedParseExceptions()); + } + + this.ingestionState = IngestionState.NOT_STARTED; } @Override @@ -216,6 +256,13 @@ dataSchema, new RealtimeIOConfig(null, null, null), null StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics); try { + if (chatHandlerProvider.isPresent()) { + log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName()); + chatHandlerProvider.get().register(getId(), this, false); + } else { + log.warn("No chat handler detected"); + } + toolbox.getDataSegmentServerAnnouncer().announce(); toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode); @@ -247,6 +294,8 @@ dataSchema, new RealtimeIOConfig(null, null, null), null } } + ingestionState = IngestionState.BUILD_SEGMENTS; + // Time to read data! while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) { try { @@ -272,19 +321,20 @@ dataSchema, new RealtimeIOConfig(null, null, null), null throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp()); } - metrics.incrementProcessed(); + if (addResult.getParseException() != null) { + handleParseException(addResult.getParseException()); + } else { + metrics.incrementProcessed(); + } } } catch (ParseException e) { - if (tuningConfig.isReportParseExceptions()) { - throw e; - } else { - log.debug(e, "Discarded row due to exception, considering unparseable."); - metrics.incrementUnparseable(); - } + handleParseException(e); } } + ingestionState = IngestionState.COMPLETED; + if (!gracefullyStopped) { synchronized (this) { if (gracefullyStopped) { @@ -311,9 +361,18 @@ dataSchema, new RealtimeIOConfig(null, null, null), null catch (Throwable e) { log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()) .emit(); - throw e; + return TaskStatus.failure( + getId(), + getTaskCompletionMetrics(), + Throwables.getStackTraceAsString(e), + getTaskCompletionContext() + ); } finally { + if (chatHandlerProvider.isPresent()) { + chatHandlerProvider.get().unregister(getId()); + } + CloseQuietly.close(firehose); CloseQuietly.close(appenderator); CloseQuietly.close(driver); @@ -325,7 +384,12 @@ dataSchema, new RealtimeIOConfig(null, null, null), null } log.info("Job done!"); - return TaskStatus.success(getId()); + return TaskStatus.success( + getId(), + getTaskCompletionMetrics(), + null, + getTaskCompletionContext() + ); } @Override @@ -385,6 +449,46 @@ public RealtimeAppenderatorIngestionSpec getSpec() return spec; } + + @GET + @Path("/rowStats") + @Produces(MediaType.APPLICATION_JSON) + public Response getRowStats( + @Context final HttpServletRequest req + ) + { + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); + Map returnMap = Maps.newHashMap(); + Map totalsMap = Maps.newHashMap(); + + if (metrics != null) { + totalsMap.put( + "buildSegments", + TaskMetricsUtils.makeIngestionRowMetrics( + metrics.processed(), + metrics.processedWithErrors(), + metrics.unparseable(), + metrics.thrownAway() + ) + ); + } + + returnMap.put("totals", totalsMap); + return Response.ok(returnMap).build(); + } + + @GET + @Path("/unparseableEvents") + @Produces(MediaType.APPLICATION_JSON) + public Response getUnparseableEvents( + @Context final HttpServletRequest req + ) + { + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); + List events = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); + return Response.ok(events).build(); + } + /** * Is a firehose from this factory drainable by closing it? If so, we should drain on stopGracefully rather than * abruptly stopping. @@ -402,6 +506,54 @@ && isFirehoseDrainableByClosing(((TimedShutoffFirehoseFactory) firehoseFactory). && isFirehoseDrainableByClosing(((ClippedFirehoseFactory) firehoseFactory).getDelegate())); } + private Map getTaskCompletionContext() + { + Map context = Maps.newHashMap(); + List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); + if (buildSegmentsParseExceptionMessages != null) { + Map unparseableEventsMap = Maps.newHashMap(); + unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); + context.put("unparseableEvents", unparseableEventsMap); + } + context.put("ingestionState", ingestionState); + return context; + } + + private Map getTaskCompletionMetrics() + { + Map metricsMap = Maps.newHashMap(); + if (metrics != null) { + metricsMap.put( + "buildSegments", + FireDepartmentMetrics.getRowMetricsFromFireDepartmentMetrics(metrics) + ); + } + return metricsMap; + } + + private void handleParseException(ParseException pe) + { + if (pe.isFromPartiallyValidRow()) { + metrics.incrementProcessedWithErrors(); + } else { + metrics.incrementUnparseable(); + } + + if (spec.getTuningConfig().isLogParseExceptions()) { + log.error(pe, "Encountered parse exception: "); + } + + if (savedParseExceptions != null) { + savedParseExceptions.add(pe); + } + + if (metrics.unparseable() + metrics.processedWithErrors() + > spec.getTuningConfig().getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task..."); + } + } + private void setupTimeoutAlert() { if (spec.getTuningConfig().getAlertTimeout() > 0) { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index b4054c577bbf..279c940f79d2 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -26,6 +26,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; @@ -52,14 +53,8 @@ import io.druid.java.util.common.logger.Logger; import io.druid.segment.realtime.firehose.ChatHandler; import io.druid.segment.realtime.firehose.ChatHandlerProvider; -import io.druid.server.security.Access; import io.druid.server.security.Action; -import io.druid.server.security.AuthorizationUtils; import io.druid.server.security.AuthorizerMapper; -import io.druid.server.security.ForbiddenException; -import io.druid.server.security.Resource; -import io.druid.server.security.ResourceAction; -import io.druid.server.security.ResourceType; import io.druid.timeline.DataSegment; import org.joda.time.Interval; @@ -240,7 +235,12 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception log.error(e, "Encountered exception in run():"); } - return TaskStatus.failure(getId(), null, effectiveException.getMessage(), null); + return TaskStatus.failure( + getId(), + getTaskCompletionMetrics(), + Throwables.getStackTraceAsString(effectiveException), + getTaskCompletionContext() + ); } finally { if (chatHandlerProvider.isPresent()) { @@ -414,7 +414,7 @@ public Response getRowStats( @QueryParam("windows") List windows ) { - authorizationCheck(req, Action.READ); + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); Map returnMap = Maps.newHashMap(); Map totalsMap = Maps.newHashMap(); @@ -455,27 +455,6 @@ private Map getTaskCompletionContext() return context; } - /** - * Authorizes action to be performed on this task's datasource - * - * @return authorization result - */ - private Access authorizationCheck(final HttpServletRequest req, Action action) - { - ResourceAction resourceAction = new ResourceAction( - new Resource(spec.getDataSchema().getDataSource(), ResourceType.DATASOURCE), - action - ); - - Access access = AuthorizationUtils.authorizeResourceAction(req, resourceAction, authorizerMapper); - if (!access.isAllowed()) { - throw new ForbiddenException(access.toString()); - } - - return access; - } - - public static class InnerProcessingStatsGetter implements TaskMetricsGetter { public static final List KEYS = Arrays.asList( diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index 3cc7b103e689..e0b1ceb23391 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -83,14 +83,8 @@ import io.druid.segment.realtime.firehose.ChatHandler; import io.druid.segment.realtime.firehose.ChatHandlerProvider; import io.druid.segment.writeout.SegmentWriteOutMediumFactory; -import io.druid.server.security.Access; import io.druid.server.security.Action; -import io.druid.server.security.AuthorizationUtils; import io.druid.server.security.AuthorizerMapper; -import io.druid.server.security.ForbiddenException; -import io.druid.server.security.Resource; -import io.druid.server.security.ResourceAction; -import io.druid.server.security.ResourceType; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.HashBasedNumberedShardSpec; import io.druid.timeline.partition.NoneShardSpec; @@ -113,7 +107,6 @@ import javax.ws.rs.core.Response; import java.io.File; import java.io.IOException; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -280,7 +273,7 @@ public Response getUnparseableEvents( @QueryParam("full") String full ) { - authorizationCheck(req, Action.READ); + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); Map> events = Maps.newHashMap(); boolean needsDeterminePartitions = false; @@ -304,11 +297,17 @@ public Response getUnparseableEvents( } if (needsDeterminePartitions) { - events.put("determinePartitions", getMessagesFromSavedParseExceptions(determinePartitionsSavedParseExceptions)); + events.put( + "determinePartitions", + IndexTaskUtils.getMessagesFromSavedParseExceptions(determinePartitionsSavedParseExceptions) + ); } if (needsBuildSegments) { - events.put("buildSegments", getMessagesFromSavedParseExceptions(buildSegmentsSavedParseExceptions)); + events.put( + "buildSegments", + IndexTaskUtils.getMessagesFromSavedParseExceptions(buildSegmentsSavedParseExceptions) + ); } return Response.ok(events).build(); @@ -322,7 +321,7 @@ public Response getRowStats( @QueryParam("full") String full ) { - authorizationCheck(req, Action.READ); + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); Map returnMap = Maps.newHashMap(); Map totalsMap = Maps.newHashMap(); @@ -378,27 +377,6 @@ public Response getRowStats( return Response.ok(returnMap).build(); } - /** - * Authorizes action to be performed on this task's datasource - * - * @return authorization result - */ - private Access authorizationCheck(final HttpServletRequest req, Action action) - { - ResourceAction resourceAction = new ResourceAction( - new Resource(ingestionSchema.getDataSchema().getDataSource(), ResourceType.DATASOURCE), - action - ); - - Access access = AuthorizationUtils.authorizeResourceAction(req, resourceAction, authorizerMapper); - if (!access.isAllowed()) { - throw new ForbiddenException(access.toString()); - } - - return access; - } - - @JsonProperty("spec") public IndexIngestionSpec getIngestionSchema() { @@ -470,7 +448,7 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception return TaskStatus.failure( getId(), getTaskCompletionMetrics(), - e.getMessage(), + Throwables.getStackTraceAsString(e), getTaskCompletionContext() ); } @@ -485,8 +463,10 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception private Map getTaskCompletionContext() { Map context = Maps.newHashMap(); - List determinePartitionsParseExceptionMessages = getMessagesFromSavedParseExceptions(determinePartitionsSavedParseExceptions); - List buildSegmentsParseExceptionMessages = getMessagesFromSavedParseExceptions(buildSegmentsSavedParseExceptions); + List determinePartitionsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions( + determinePartitionsSavedParseExceptions); + List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions( + buildSegmentsSavedParseExceptions); if (determinePartitionsParseExceptionMessages != null || buildSegmentsParseExceptionMessages != null) { Map unparseableEventsMap = Maps.newHashMap(); unparseableEventsMap.put("determinePartitions", determinePartitionsParseExceptionMessages); @@ -976,33 +956,14 @@ dataSchema, new RealtimeIOConfig(null, null, null), null throw new ISE("Failed to add a row with timestamp[%s]", inputRow.getTimestamp()); } - if (addResult.getParseException() != null) { - throw addResult.getParseException(); + handleParseException(addResult.getParseException()); } else { buildSegmentsFireDepartmentMetrics.incrementProcessed(); } } catch (ParseException e) { - if (e.isFromPartiallyValidRow()) { - buildSegmentsFireDepartmentMetrics.incrementProcessedWithErrors(); - } else { - buildSegmentsFireDepartmentMetrics.incrementUnparseable(); - } - - if (tuningConfig.isLogParseExceptions()) { - log.error(e, "Encountered parse exception:"); - } - - if (buildSegmentsSavedParseExceptions != null) { - buildSegmentsSavedParseExceptions.add(e); - } - - if (buildSegmentsFireDepartmentMetrics.unparseable() - + buildSegmentsFireDepartmentMetrics.processedWithErrors() > tuningConfig.getMaxParseExceptions()) { - log.error("Max parse exceptions exceeded, terminating task..."); - throw new RuntimeException("Max parse exceptions exceeded, terminating task...", e); - } + handleParseException(e); } } @@ -1052,19 +1013,27 @@ dataSchema, new RealtimeIOConfig(null, null, null), null } } - @Nullable - public static List getMessagesFromSavedParseExceptions(CircularBuffer savedParseExceptions) + private void handleParseException(ParseException e) { - if (savedParseExceptions == null) { - return null; + if (e.isFromPartiallyValidRow()) { + buildSegmentsFireDepartmentMetrics.incrementProcessedWithErrors(); + } else { + buildSegmentsFireDepartmentMetrics.incrementUnparseable(); } - List events = new ArrayList<>(); - for (int i = 0; i < savedParseExceptions.size(); i++) { - events.add(savedParseExceptions.getLatest(i).getMessage()); + if (ingestionSchema.tuningConfig.isLogParseExceptions()) { + log.error(e, "Encountered parse exception:"); } - return events; + if (buildSegmentsSavedParseExceptions != null) { + buildSegmentsSavedParseExceptions.add(e); + } + + if (buildSegmentsFireDepartmentMetrics.unparseable() + + buildSegmentsFireDepartmentMetrics.processedWithErrors() > ingestionSchema.tuningConfig.getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task...", e); + } } private static boolean exceedMaxRowsInSegment(int numRowsInSegment, IndexTuningConfig indexTuningConfig) @@ -1553,9 +1522,9 @@ public boolean equals(Object o) Objects.equals(indexSpec, that.indexSpec) && Objects.equals(basePersistDirectory, that.basePersistDirectory) && Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory) && - Objects.equals(logParseExceptions, that.logParseExceptions) && - Objects.equals(maxParseExceptions, that.maxParseExceptions) && - Objects.equals(maxSavedParseExceptions, that.maxSavedParseExceptions); + logParseExceptions == that.logParseExceptions && + maxParseExceptions == that.maxParseExceptions && + maxSavedParseExceptions == that.maxSavedParseExceptions; } @Override diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTaskUtils.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTaskUtils.java new file mode 100644 index 000000000000..aee89b58a5aa --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTaskUtils.java @@ -0,0 +1,78 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common.task; + +import io.druid.server.security.Access; +import io.druid.server.security.Action; +import io.druid.server.security.AuthorizationUtils; +import io.druid.server.security.AuthorizerMapper; +import io.druid.server.security.ForbiddenException; +import io.druid.server.security.Resource; +import io.druid.server.security.ResourceAction; +import io.druid.server.security.ResourceType; +import io.druid.utils.CircularBuffer; + +import javax.annotation.Nullable; +import javax.servlet.http.HttpServletRequest; +import java.util.ArrayList; +import java.util.List; + +public class IndexTaskUtils +{ + @Nullable + public static List getMessagesFromSavedParseExceptions(CircularBuffer savedParseExceptions) + { + if (savedParseExceptions == null) { + return null; + } + + List events = new ArrayList<>(); + for (int i = 0; i < savedParseExceptions.size(); i++) { + events.add(savedParseExceptions.getLatest(i).getMessage()); + } + + return events; + } + + /** + * Authorizes action to be performed on a task's datasource + * + * @return authorization result + */ + public static Access datasourceAuthorizationCheck( + final HttpServletRequest req, + Action action, + String datasource, + AuthorizerMapper authorizerMapper + ) + { + ResourceAction resourceAction = new ResourceAction( + new Resource(datasource, ResourceType.DATASOURCE), + action + ); + + Access access = AuthorizationUtils.authorizeResourceAction(req, resourceAction, authorizerMapper); + if (!access.isAllowed()) { + throw new ForbiddenException(access.toString()); + } + + return access; + } +} diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java index cfa91b1a2205..f0481a0d763b 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java @@ -34,13 +34,18 @@ import io.druid.data.input.FirehoseFactory; import io.druid.data.input.InputRow; import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.FloatDimensionSchema; import io.druid.data.input.impl.InputRowParser; +import io.druid.data.input.impl.LongDimensionSchema; import io.druid.data.input.impl.MapInputRowParser; +import io.druid.data.input.impl.StringDimensionSchema; import io.druid.data.input.impl.TimeAndDimsParseSpec; import io.druid.data.input.impl.TimestampSpec; import io.druid.discovery.DataNodeService; import io.druid.discovery.DruidNodeAnnouncer; import io.druid.discovery.LookupNodeService; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskState; import io.druid.indexing.common.SegmentLoaderFactory; import io.druid.indexing.common.TaskStatus; @@ -117,12 +122,12 @@ import io.druid.server.DruidNode; import io.druid.server.coordination.DataSegmentServerAnnouncer; import io.druid.server.coordination.ServerType; +import io.druid.server.security.AuthTestUtils; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.LinearShardSpec; import io.druid.timeline.partition.NumberedShardSpec; import org.apache.commons.io.FileUtils; import org.easymock.EasyMock; -import org.hamcrest.CoreMatchers; import org.joda.time.DateTime; import org.joda.time.Period; import org.junit.After; @@ -130,8 +135,6 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.junit.internal.matchers.ThrowableCauseMatcher; -import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; @@ -149,7 +152,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; @@ -294,11 +296,11 @@ public void testDefaultResource() throws Exception } - @Test(timeout = 60_000L, expected = ExecutionException.class) + @Test(timeout = 60_000L) public void testHandoffTimeout() throws Exception { expectPublishedSegments(1); - final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, true, 100L); + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, true, 100L, true, 0, 1); final ListenableFuture statusFuture = runTask(task); // Wait for firehose to show up, it starts off null. @@ -318,7 +320,8 @@ public void testHandoffTimeout() throws Exception firehose.close(); // handoff would timeout, resulting in exception - statusFuture.get(); + TaskStatus status = statusFuture.get(); + Assert.assertTrue(status.getErrorMsg().contains("java.util.concurrent.TimeoutException: Timeout waiting for task.")); } @Test(timeout = 60_000L) @@ -520,7 +523,7 @@ public void testTransformSpec() throws Exception new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil()) ) ); - final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0); + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0, true, 0, 1); final ListenableFuture statusFuture = runTask(task); // Wait for firehose to show up, it starts off null. @@ -595,10 +598,10 @@ public void testReportParseExceptionsOnBadMetric() throws Exception firehose.addRows( ImmutableList.of( - ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), - ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "foo"), + ImmutableMap.of("t", 2000000L, "dim1", "foo", "met1", "1"), + ImmutableMap.of("t", 3000000L, "dim1", "foo", "met1", "foo"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", "foo"), - ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0) + ImmutableMap.of("t", 4000000L, "dim2", "bar", "met1", 2.0) ) ); @@ -606,26 +609,17 @@ public void testReportParseExceptionsOnBadMetric() throws Exception firehose.close(); // Wait for the task to finish. - expectedException.expect(ExecutionException.class); - expectedException.expectCause(CoreMatchers.instanceOf(ParseException.class)); - expectedException.expectCause( - ThrowableMessageMatcher.hasMessage( - CoreMatchers.containsString("Encountered parse error for aggregator[met1]") - ) - ); - expectedException.expect( - ThrowableCauseMatcher.hasCause( - ThrowableCauseMatcher.hasCause( - CoreMatchers.allOf( - CoreMatchers.instanceOf(ParseException.class), - ThrowableMessageMatcher.hasMessage( - CoreMatchers.containsString("Unable to parse value[foo] for field[met1]") - ) - ) - ) + TaskStatus status = statusFuture.get(); + Assert.assertTrue(status.getErrorMsg().contains("java.lang.RuntimeException: Max parse exceptions exceeded, terminating task...")); + + Map expectedUnparseables = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=1970-01-01T00:50:00.000Z, event={t=3000000, dim1=foo, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [Unable to parse value[foo] for field[met1],]" ) ); - statusFuture.get(); + + Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); } @Test(timeout = 60_000L) @@ -633,7 +627,7 @@ public void testNoReportParseExceptions() throws Exception { expectPublishedSegments(1); - final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, false); + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, null, 1); final ListenableFuture statusFuture = runTask(task); // Wait for firehose to show up, it starts off null. @@ -671,7 +665,8 @@ public void testNoReportParseExceptions() throws Exception DataSegment publishedSegment = Iterables.getOnlyElement(publishedSegments); // Check metrics. - Assert.assertEquals(3, task.getMetrics().processed()); + Assert.assertEquals(2, task.getMetrics().processed()); + Assert.assertEquals(1, task.getMetrics().processedWithErrors()); Assert.assertEquals(0, task.getMetrics().thrownAway()); Assert.assertEquals(2, task.getMetrics().unparseable()); @@ -696,9 +691,186 @@ public void testNoReportParseExceptions() throws Exception } handOffCallbacks.clear(); + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 2L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 1L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + ) + ); + + // Wait for the task to finish. + final TaskStatus taskStatus = statusFuture.get(); + Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode()); + Assert.assertEquals(expectedMetrics, taskStatus.getMetrics()); + } + + @Test(timeout = 60_000L) + public void testMultipleParseExceptionsSuccess() throws Exception + { + expectPublishedSegments(1); + + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, 10, 10); + final ListenableFuture statusFuture = runTask(task); + + // Wait for firehose to show up, it starts off null. + while (task.getFirehose() == null) { + Thread.sleep(50); + } + + final TestFirehose firehose = (TestFirehose) task.getFirehose(); + + firehose.addRows( + Arrays.asList( + // Good row- will be processed. + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "1"), + + // Null row- will be thrown away. + null, + + // Bad metric- will count as processed, but that particular metric won't update. + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "foo"), + + // Bad long dim- will count as processed, but bad dims will get default values + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "dimLong", "notnumber", "dimFloat", "notnumber", "met1", "foo"), + + // Bad row- will be unparseable. + ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), + + // Good row- will be processed. + ImmutableMap.of("t", 1521251960729L, "dim2", "bar", "met1", 2.0) + ) + ); + + // Stop the firehose, this will drain out existing events. + firehose.close(); + + // Wait for publish. + Collection publishedSegments = awaitSegments(); + + DataSegment publishedSegment = Iterables.getOnlyElement(publishedSegments); + + // Check metrics. + Assert.assertEquals(2, task.getMetrics().processed()); + Assert.assertEquals(2, task.getMetrics().processedWithErrors()); + Assert.assertEquals(0, task.getMetrics().thrownAway()); + Assert.assertEquals(2, task.getMetrics().unparseable()); + + // Do some queries. + Assert.assertEquals(4, sumMetric(task, null, "rows")); + Assert.assertEquals(3, sumMetric(task, null, "met1")); + + awaitHandoffs(); + + // Simulate handoff. + for (Map.Entry> entry : handOffCallbacks.entrySet()) { + final Pair executorRunnablePair = entry.getValue(); + Assert.assertEquals( + new SegmentDescriptor( + publishedSegment.getInterval(), + publishedSegment.getVersion(), + publishedSegment.getShardSpec().getPartitionNum() + ), + entry.getKey() + ); + executorRunnablePair.lhs.execute(executorRunnablePair.rhs); + } + handOffCallbacks.clear(); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 2L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 2L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + ) + ); + // Wait for the task to finish. final TaskStatus taskStatus = statusFuture.get(); Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode()); + Assert.assertEquals(expectedMetrics, taskStatus.getMetrics()); + Map expectedUnparseables = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Unparseable timestamp found! Event: {dim1=foo, met1=2.0, __fail__=x}", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2018-03-17T01:59:20.729Z, event={t=1521251960729, dim1=foo, dimLong=notnumber, dimFloat=notnumber, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [could not convert value [notnumber] to long,could not convert value [notnumber] to float,Unable to parse value[foo] for field[met1],]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2018-03-17T01:59:20.729Z, event={t=1521251960729, dim1=foo, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [Unable to parse value[foo] for field[met1],]", + "Unparseable timestamp found! Event: null" + ) + ); + Assert.assertEquals(expectedUnparseables, taskStatus.getContext().get("unparseableEvents")); + Assert.assertEquals(IngestionState.COMPLETED, taskStatus.getContext().get("ingestionState")); + } + + @Test(timeout = 60_000L) + public void testMultipleParseExceptionsFailure() throws Exception + { + expectPublishedSegments(1); + + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, 3, 10); + final ListenableFuture statusFuture = runTask(task); + + // Wait for firehose to show up, it starts off null. + while (task.getFirehose() == null) { + Thread.sleep(50); + } + + final TestFirehose firehose = (TestFirehose) task.getFirehose(); + + firehose.addRows( + Arrays.asList( + // Good row- will be processed. + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "1"), + + // Null row- will be thrown away. + null, + + // Bad metric- will count as processed, but that particular metric won't update. + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "foo"), + + // Bad long dim- will count as processed, but bad dims will get default values + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "dimLong", "notnumber", "dimFloat", "notnumber", "met1", "foo"), + + // Bad row- will be unparseable. + ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), + + // Good row- will be processed. + ImmutableMap.of("t", 1521251960729L, "dim2", "bar", "met1", 2.0) + ) + ); + + // Stop the firehose, this will drain out existing events. + firehose.close(); + + // Wait for the task to finish. + final TaskStatus taskStatus = statusFuture.get(); + Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode()); + Assert.assertTrue(taskStatus.getErrorMsg().contains("Max parse exceptions exceeded, terminating task...")); + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 1L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 2L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + ) + ); + Assert.assertEquals(expectedMetrics, taskStatus.getMetrics()); + Map expectedUnparseables = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Unparseable timestamp found! Event: {dim1=foo, met1=2.0, __fail__=x}", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2018-03-17T01:59:20.729Z, event={t=1521251960729, dim1=foo, dimLong=notnumber, dimFloat=notnumber, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [could not convert value [notnumber] to long,could not convert value [notnumber] to float,Unable to parse value[foo] for field[met1],]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2018-03-17T01:59:20.729Z, event={t=1521251960729, dim1=foo, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [Unable to parse value[foo] for field[met1],]", + "Unparseable timestamp found! Event: null" + ) + ); + Assert.assertEquals(expectedUnparseables, taskStatus.getContext().get("unparseableEvents")); + Assert.assertEquals(IngestionState.BUILD_SEGMENTS, taskStatus.getContext().get("ingestionState")); } @Test(timeout = 60_000L) @@ -929,14 +1101,18 @@ public void testRestoreCorruptData() throws Exception final ListenableFuture statusFuture = runTask(task2); // Wait for the task to finish. - boolean caught = false; - try { - statusFuture.get(); - } - catch (Exception expected) { - caught = true; - } - Assert.assertTrue("expected exception", caught); + TaskStatus status = statusFuture.get(); + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 0L, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, + TaskMetricsUtils.ROWS_UNPARSEABLE, 0L, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + ) + ); + Assert.assertEquals(expectedMetrics, status.getMetrics()); + Assert.assertTrue(status.getErrorMsg().contains("java.lang.IllegalArgumentException\n\tat java.nio.Buffer.position")); } } @@ -989,19 +1165,22 @@ public TaskStatus call() throws Exception private AppenderatorDriverRealtimeIndexTask makeRealtimeTask(final String taskId) { - return makeRealtimeTask(taskId, TransformSpec.NONE, true, 0); + return makeRealtimeTask(taskId, TransformSpec.NONE, true, 0, true, 0, 1); } private AppenderatorDriverRealtimeIndexTask makeRealtimeTask(final String taskId, boolean reportParseExceptions) { - return makeRealtimeTask(taskId, TransformSpec.NONE, reportParseExceptions, 0); + return makeRealtimeTask(taskId, TransformSpec.NONE, reportParseExceptions, 0, true, null, 1); } private AppenderatorDriverRealtimeIndexTask makeRealtimeTask( final String taskId, final TransformSpec transformSpec, final boolean reportParseExceptions, - final long handoffTimeout + final long handoffTimeout, + final Boolean logParseExceptions, + final Integer maxParseExceptions, + final Integer maxSavedParseExceptions ) { ObjectMapper objectMapper = new DefaultObjectMapper(); @@ -1012,7 +1191,13 @@ private AppenderatorDriverRealtimeIndexTask makeRealtimeTask( new TimeAndDimsParseSpec( new TimestampSpec("t", "auto", null), new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim1t")), + ImmutableList.of( + new StringDimensionSchema("dim1"), + new StringDimensionSchema("dim2"), + new StringDimensionSchema("dim1t"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), null, null ) @@ -1041,13 +1226,18 @@ private AppenderatorDriverRealtimeIndexTask makeRealtimeTask( reportParseExceptions, handoffTimeout, null, - null + null, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); return new AppenderatorDriverRealtimeIndexTask( taskId, null, new RealtimeAppenderatorIngestionSpec(dataSchema, realtimeIOConfig, tuningConfig), - null + null, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER ) { @Override diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index 57dff3bba48b..7ce888ddacf8 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -837,7 +837,7 @@ public void testReportParseException() throws Exception TaskStatus status = runTask(indexTask).lhs; Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); - Assert.assertEquals("Max parse exceptions exceeded, terminating task...", status.getErrorMsg()); + checkTaskStatusErrorMsgForParseExceptionsExceeded(status); Map expectedUnparseables = ImmutableMap.of( "determinePartitions", @@ -1041,10 +1041,7 @@ public void testMultipleParseExceptionsFailure() throws Exception TaskStatus status = runTask(indexTask).lhs; Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); - Assert.assertEquals( - "Max parse exceptions exceeded, terminating task...", - status.getErrorMsg() - ); + checkTaskStatusErrorMsgForParseExceptionsExceeded(status); Map expectedMetrics = ImmutableMap.of( "buildSegments", @@ -1147,10 +1144,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc TaskStatus status = runTask(indexTask).lhs; Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); - Assert.assertEquals( - "Max parse exceptions exceeded, terminating task...", - status.getErrorMsg() - ); + checkTaskStatusErrorMsgForParseExceptionsExceeded(status); Map expectedMetrics = ImmutableMap.of( "determinePartitions", @@ -1308,7 +1302,8 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception TaskStatus status = runTask(indexTask).lhs; Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); - Assert.assertEquals("Max parse exceptions exceeded, terminating task...", status.getErrorMsg()); + + checkTaskStatusErrorMsgForParseExceptionsExceeded(status); Map expectedUnparseables = ImmutableMap.of( "determinePartitions", @@ -1319,6 +1314,12 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); } + public static void checkTaskStatusErrorMsgForParseExceptionsExceeded(TaskStatus status) + { + // full stacktrace will be too long and make tests brittle (e.g. if line # changes), just match the main message + Assert.assertTrue(status.getErrorMsg().contains("Max parse exceptions exceeded, terminating task...")); + } + private Pair> runTask(IndexTask indexTask) throws Exception { final List segments = Lists.newArrayList(); diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java index 9f050fdcab47..6c8a69df7231 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java @@ -128,7 +128,6 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.junit.internal.matchers.ThrowableCauseMatcher; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; @@ -479,21 +478,10 @@ public void testReportParseExceptionsOnBadMetric() throws Exception expectedException.expectCause(CoreMatchers.instanceOf(ParseException.class)); expectedException.expectCause( ThrowableMessageMatcher.hasMessage( - CoreMatchers.containsString("Encountered parse error for aggregator[met1]") - ) - ); - expectedException.expect( - ThrowableCauseMatcher.hasCause( - ThrowableCauseMatcher.hasCause( - CoreMatchers.allOf( - CoreMatchers.instanceOf(ParseException.class), - ThrowableMessageMatcher.hasMessage( - CoreMatchers.containsString("Unable to parse value[foo] for field[met1]") - ) - ) - ) + CoreMatchers.containsString("[Unable to parse value[foo] for field[met1]") ) ); + statusFuture.get(); } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index bed3031e1f9e..ecfd6b95a7a9 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -42,7 +42,6 @@ import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; -import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularity; import io.druid.java.util.common.parsers.ParseException; @@ -453,7 +452,7 @@ protected abstract AggregatorType[] initAggs( ); // Note: This method needs to be thread safe. - protected abstract Pair> addToFacts( + protected abstract AddToFactsResult addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -481,6 +480,55 @@ protected abstract Pair> addToFacts( protected abstract boolean isNull(int rowOffset, int aggOffset); + public static class TimeAndDimsResult + { + private TimeAndDims timeAndDims; + private List parseExceptionMessages; + + public TimeAndDimsResult( + TimeAndDims timeAndDims, + List parseExceptionMessages + ) + { + this.timeAndDims = timeAndDims; + this.parseExceptionMessages = parseExceptionMessages; + } + + public TimeAndDims getTimeAndDims() + { + return timeAndDims; + } + + public List getParseExceptionMessages() + { + return parseExceptionMessages; + } + } + + public static class AddToFactsResult + { + private int rowCount; + private List parseExceptionMessages; + + public AddToFactsResult( + int rowCount, + List parseExceptionMessages + ) + { + this.rowCount = rowCount; + this.parseExceptionMessages = parseExceptionMessages; + } + + public int getRowCount() + { + return rowCount; + } + + public List getParseExceptionMessages() + { + return parseExceptionMessages; + } + } @Override public void close() @@ -523,25 +571,29 @@ public IncrementalIndexAddResult add(InputRow row) throws IndexSizeExceededExcep public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException { - Pair> timeAndDimsResult = toTimeAndDims(row); - final Pair> addToFactsResult = addToFacts( + TimeAndDimsResult timeAndDimsResult = toTimeAndDims(row); + final AddToFactsResult addToFactsResult = addToFacts( metrics, deserializeComplexMetrics, reportParseExceptions, row, numEntries, - timeAndDimsResult.lhs, + timeAndDimsResult.getTimeAndDims(), in, rowSupplier, skipMaxRowsInMemoryCheck ); updateMaxIngestedTime(row.getTimestamp()); - ParseException parseException = getCombinedParseException(row, timeAndDimsResult.rhs, addToFactsResult.rhs); - return new IncrementalIndexAddResult(addToFactsResult.lhs, parseException); + ParseException parseException = getCombinedParseException( + row, + timeAndDimsResult.getParseExceptionMessages(), + addToFactsResult.getParseExceptionMessages() + ); + return new IncrementalIndexAddResult(addToFactsResult.getRowCount(), parseException); } @VisibleForTesting - Pair> toTimeAndDims(InputRow row) + TimeAndDimsResult toTimeAndDims(InputRow row) { row = formatRow(row); if (row.getTimestampFromEpoch() < minTimestamp) { @@ -634,7 +686,7 @@ Pair> toTimeAndDims(InputRow row) } TimeAndDims timeAndDims = new TimeAndDims(Math.max(truncated, minTimestamp), dims, dimensionDescsList); - return Pair.of(timeAndDims, parseExceptionMessages); + return new TimeAndDimsResult(timeAndDims, parseExceptionMessages); } public static ParseException getCombinedParseException( diff --git a/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java index 6d5458d39959..23df282a786b 100644 --- a/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java @@ -26,7 +26,6 @@ import io.druid.data.input.InputRow; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; -import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.io.Closer; import io.druid.java.util.common.logger.Logger; @@ -139,7 +138,7 @@ protected BufferAggregator[] initAggs( } @Override - protected Pair> addToFacts( + protected AddToFactsResult addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -239,7 +238,7 @@ protected Pair> addToFacts( } } rowContainer.set(null); - return Pair.of(numEntries.get(), new ArrayList<>()); + return new AddToFactsResult(numEntries.get(), new ArrayList<>()); } @Override diff --git a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java index 3e42f28bf097..a961beb27817 100644 --- a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java @@ -23,7 +23,6 @@ import com.google.common.base.Throwables; import com.google.common.collect.Maps; import io.druid.data.input.InputRow; -import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.io.Closer; import io.druid.java.util.common.logger.Logger; @@ -104,7 +103,7 @@ protected Aggregator[] initAggs( } @Override - protected Pair> addToFacts( + protected AddToFactsResult addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -151,7 +150,7 @@ protected Pair> addToFacts( } } - return Pair.of(numEntries.get(), parseExceptionMessages); + return new AddToFactsResult(numEntries.get(), parseExceptionMessages); } @Override diff --git a/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java b/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java index d797eefd96dc..53ee9ec92c63 100644 --- a/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java +++ b/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java @@ -34,7 +34,6 @@ import io.druid.data.input.InputRow; import io.druid.data.input.MapBasedInputRow; import io.druid.java.util.common.Intervals; -import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularities; import io.druid.java.util.common.granularity.Granularity; @@ -167,7 +166,7 @@ protected void concurrentSet(int offset, Aggregator[] value) } @Override - protected Pair> addToFacts( + protected AddToFactsResult addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -236,8 +235,7 @@ protected Pair> addToFacts( rowContainer.set(null); - - return Pair.of(numEntries.get(), new ArrayList<>()); + return new AddToFactsResult(numEntries.get(), new ArrayList<>()); } @Override diff --git a/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java b/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java index c1325e94198e..40a15fb1ea0e 100644 --- a/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java @@ -45,13 +45,13 @@ public void testBasic() throws IndexSizeExceededException .buildOnheap(); long time = System.currentTimeMillis(); - TimeAndDims td1 = (TimeAndDims) index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "B")).lhs; - TimeAndDims td2 = (TimeAndDims) index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "A")).lhs; - TimeAndDims td3 = (TimeAndDims) index.toTimeAndDims(toMapRow(time, "billy", "A")).lhs; + TimeAndDims td1 = index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "B")).getTimeAndDims(); + TimeAndDims td2 = index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "A")).getTimeAndDims(); + TimeAndDims td3 = index.toTimeAndDims(toMapRow(time, "billy", "A")).getTimeAndDims(); - TimeAndDims td4 = (TimeAndDims) index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", "B")).lhs; - TimeAndDims td5 = (TimeAndDims) index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", Arrays.asList("A", "B"))).lhs; - TimeAndDims td6 = (TimeAndDims) index.toTimeAndDims(toMapRow(time + 1)).lhs; + TimeAndDims td4 = index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", "B")).getTimeAndDims(); + TimeAndDims td5 = index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", Arrays.asList("A", "B"))).getTimeAndDims(); + TimeAndDims td6 = index.toTimeAndDims(toMapRow(time + 1)).getTimeAndDims(); Comparator comparator = index.dimsComparator(); diff --git a/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java b/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java index 98014d61b6fb..ed4a2661d42b 100644 --- a/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java +++ b/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java @@ -61,6 +61,7 @@ import io.druid.segment.QueryableIndex; import io.druid.segment.QueryableIndexSegment; import io.druid.segment.Segment; +import io.druid.segment.incremental.IncrementalIndexAddResult; import io.druid.segment.incremental.IndexSizeExceededException; import io.druid.segment.indexing.DataSchema; import io.druid.segment.indexing.RealtimeTuningConfig; @@ -216,13 +217,16 @@ public int add(InputRow row, Supplier committerSupplier) throws Index return -1; } - final int numRows = sink.add(row, false).getRowCount(); + final IncrementalIndexAddResult addResult = sink.add(row, false); + if (config.isReportParseExceptions() && addResult.getParseException() != null) { + throw addResult.getParseException(); + } if (!sink.canAppendRow() || System.currentTimeMillis() > nextFlush) { persist(committerSupplier.get()); } - return numRows; + return addResult.getRowCount(); } private Sink getSink(long timestamp) diff --git a/server/src/main/java/io/druid/server/coordination/ChangeRequestHistory.java b/server/src/main/java/io/druid/server/coordination/ChangeRequestHistory.java index afb9857dd556..31bf1f1e2e10 100644 --- a/server/src/main/java/io/druid/server/coordination/ChangeRequestHistory.java +++ b/server/src/main/java/io/druid/server/coordination/ChangeRequestHistory.java @@ -22,13 +22,13 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.AbstractFuture; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ThreadFactoryBuilder; import io.druid.java.util.common.IAE; import io.druid.java.util.common.StringUtils; +import io.druid.utils.CircularBuffer; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -327,46 +327,4 @@ public boolean cancel(boolean interruptIfRunning) return true; } } - - static class CircularBuffer - { - private final E[] buffer; - - private int start = 0; - private int size = 0; - - CircularBuffer(int capacity) - { - buffer = (E[]) new Object[capacity]; - } - - void add(E item) - { - buffer[start++] = item; - - if (start >= buffer.length) { - start = 0; - } - - if (size < buffer.length) { - size++; - } - } - - E get(int index) - { - Preconditions.checkArgument(index >= 0 && index < size, "invalid index"); - - int bufferIndex = (start - size + index) % buffer.length; - if (bufferIndex < 0) { - bufferIndex += buffer.length; - } - return buffer[bufferIndex]; - } - - int size() - { - return size; - } - } } diff --git a/server/src/test/java/io/druid/server/coordination/ChangeRequestHistoryTest.java b/server/src/test/java/io/druid/server/coordination/ChangeRequestHistoryTest.java index 9575c94d238c..da537e8f1b62 100644 --- a/server/src/test/java/io/druid/server/coordination/ChangeRequestHistoryTest.java +++ b/server/src/test/java/io/druid/server/coordination/ChangeRequestHistoryTest.java @@ -22,6 +22,7 @@ import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import io.druid.utils.CircularBuffer; import org.junit.Assert; import org.junit.Test; @@ -173,7 +174,7 @@ public void testNonImmediateFuture() throws Exception @Test public void testCircularBuffer() throws Exception { - ChangeRequestHistory.CircularBuffer circularBuffer = new ChangeRequestHistory.CircularBuffer<>( + CircularBuffer circularBuffer = new CircularBuffer<>( 3); circularBuffer.add(1); From 8867c2f84c56de3abb581458266a6128692ce332 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 20 Mar 2018 16:11:33 -0700 Subject: [PATCH 03/10] Use TaskReport instead of metrics/context --- .../java/io/druid/indexer/TaskReport.java | 47 +++++++++ .../java/io/druid/indexer/TaskStatusPlus.java | 95 ++++++------------ .../io/druid/indexer/TaskStatusPlusTest.java | 1 - .../druid/indexing/kafka/KafkaIndexTask.java | 37 ++++--- .../indexing/kafka/KafkaIndexTaskTest.java | 17 +++- .../IngestionStatsAndErrorsTaskReport.java | 61 ++++++++++++ ...IngestionStatsAndErrorsTaskReportData.java | 97 +++++++++++++++++++ .../io/druid/indexing/common/TaskStatus.java | 76 +++++++-------- .../AppenderatorDriverRealtimeIndexTask.java | 34 ++++--- .../indexing/common/task/HadoopIndexTask.java | 41 +++++--- .../druid/indexing/common/task/IndexTask.java | 39 +++++--- .../overlord/ThreadPoolTaskRunner.java | 4 +- .../overlord/http/OverlordResource.java | 4 +- ...penderatorDriverRealtimeIndexTaskTest.java | 43 ++++++-- .../indexing/common/task/IndexTaskTest.java | 36 +++++-- 15 files changed, 442 insertions(+), 190 deletions(-) create mode 100644 api/src/main/java/io/druid/indexer/TaskReport.java create mode 100644 indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java create mode 100644 indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java diff --git a/api/src/main/java/io/druid/indexer/TaskReport.java b/api/src/main/java/io/druid/indexer/TaskReport.java new file mode 100644 index 000000000000..47f23f81da4f --- /dev/null +++ b/api/src/main/java/io/druid/indexer/TaskReport.java @@ -0,0 +1,47 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer; + +import com.google.common.collect.Maps; + +import java.util.Map; + +/** + * TaskReport can be optionally included in io.druid.indexing.common.TaskStatus to report some ingestion results to + * Supervisors or supervisorTasks. See ParallelIndexSinglePhaseSupervisorTask and ParallelIndexSinglePhaseSubTask + * as an example. + */ +public interface TaskReport +{ + String getTaskId(); + + String getReportKey(); + + Object getPayload(); + + static Map buildTaskReports(TaskReport... taskReports) + { + Map taskReportMap = Maps.newHashMap(); + for (TaskReport taskReport : taskReports) { + taskReportMap.put(taskReport.getReportKey(), taskReport); + } + return taskReportMap; + } +} diff --git a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java index f1272c984dcb..26ad0837d3f7 100644 --- a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java +++ b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java @@ -39,14 +39,11 @@ public class TaskStatusPlus private final TaskLocation location; private final String dataSource; - @Nullable - private final Map metrics; - @Nullable private final String errorMsg; @Nullable - private final Map context; + private final Map taskReports; @JsonCreator public TaskStatusPlus( @@ -58,9 +55,8 @@ public TaskStatusPlus( @JsonProperty("duration") @Nullable Long duration, @JsonProperty("location") TaskLocation location, @JsonProperty("dataSource") String dataSource, - @JsonProperty("metrics") Map metrics, @JsonProperty("errorMsg") String errorMsg, - @JsonProperty("context") Map context + @JsonProperty("taskReports") Map taskReports ) { if (state != null && state.isComplete()) { @@ -74,9 +70,8 @@ public TaskStatusPlus( this.duration = duration; this.location = Preconditions.checkNotNull(location, "location"); this.dataSource = dataSource; - this.metrics = metrics; this.errorMsg = errorMsg; - this.context = context; + this.taskReports = taskReports; } @JsonProperty @@ -130,13 +125,6 @@ public String getDataSource() return dataSource; } - @Nullable - @JsonProperty("metrics") - public Map getMetrics() - { - return metrics; - } - @Nullable @JsonProperty("errorMsg") public String getErrorMsg() @@ -145,10 +133,10 @@ public String getErrorMsg() } @Nullable - @JsonProperty("context") - public Map getContext() + @JsonProperty("taskReports") + public Map getTaskReports() { - return context; + return taskReports; } @Override @@ -157,65 +145,36 @@ public boolean equals(Object o) if (this == o) { return true; } - if (o == null || getClass() != o.getClass()) { return false; } - - final TaskStatusPlus that = (TaskStatusPlus) o; - if (!id.equals(that.id)) { - return false; - } - if (!type.equals(that.type)) { - return false; - } - if (!createdTime.equals(that.createdTime)) { - return false; - } - if (!queueInsertionTime.equals(that.queueInsertionTime)) { - return false; - } - if (!Objects.equals(state, that.state)) { - return false; - } - if (!Objects.equals(duration, that.duration)) { - return false; - } - - if (!Objects.equals(location, that.location)) { - return false; - } - - if (!Objects.equals(dataSource, that.dataSource)) { - return false; - } - - if (!Objects.equals(metrics, that.metrics)) { - return false; - } - - if (!Objects.equals(errorMsg, that.errorMsg)) { - return false; - } - - return Objects.equals(context, that.context); + TaskStatusPlus that = (TaskStatusPlus) o; + return Objects.equals(getId(), that.getId()) && + Objects.equals(getType(), that.getType()) && + Objects.equals(getCreatedTime(), that.getCreatedTime()) && + Objects.equals(getQueueInsertionTime(), that.getQueueInsertionTime()) && + getState() == that.getState() && + Objects.equals(getDuration(), that.getDuration()) && + Objects.equals(getLocation(), that.getLocation()) && + Objects.equals(getDataSource(), that.getDataSource()) && + Objects.equals(getErrorMsg(), that.getErrorMsg()) && + Objects.equals(getTaskReports(), that.getTaskReports()); } @Override public int hashCode() { return Objects.hash( - id, - type, - createdTime, - queueInsertionTime, - state, - duration, - location, - dataSource, - metrics, - errorMsg, - context + getId(), + getType(), + getCreatedTime(), + getQueueInsertionTime(), + getState(), + getDuration(), + getLocation(), + getDataSource(), + getErrorMsg(), + getTaskReports() ); } } diff --git a/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java b/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java index 587d3c8f2ef3..f3f0aaaa6829 100644 --- a/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java +++ b/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java @@ -55,7 +55,6 @@ public void testSerde() throws IOException TaskLocation.create("testHost", 1010, -1), "ds_test", null, - null, null ); final String json = mapper.writeValueAsString(status); diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index f18c335046b4..51835439a8d7 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -51,8 +51,11 @@ import io.druid.discovery.LookupNodeService; import io.druid.indexer.IngestionState; import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexer.TaskReport; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.CheckPointDataSourceMetadataAction; @@ -436,9 +439,8 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception } return TaskStatus.failure( getId(), - getTaskCompletionMetrics(), Throwables.getStackTraceAsString(e), - getTaskCompletionContext() + getTaskCompletionReports() ); } } @@ -934,9 +936,8 @@ public void onFailure(Throwable t) return TaskStatus.success( getId(), - getTaskCompletionMetrics(), null, - getTaskCompletionContext() + getTaskCompletionReports() ); } @@ -1311,9 +1312,8 @@ public String apply(DataSegment input) return TaskStatus.success( getId(), - getTaskCompletionMetrics(), null, - getTaskCompletionContext() + getTaskCompletionReports() ); } @@ -1345,20 +1345,31 @@ private void handleParseException(ParseException pe, ConsumerRecord getTaskCompletionContext() + private Map getTaskCompletionReports() { - Map context = Maps.newHashMap(); + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + ingestionState, + getTaskCompletionUnparseableEvents(), + getTaskCompletionRowStats() + ) + ) + ); + } + + private Map getTaskCompletionUnparseableEvents() + { + Map unparseableEventsMap = Maps.newHashMap(); List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); if (buildSegmentsParseExceptionMessages != null) { - Map unparseableEventsMap = Maps.newHashMap(); unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); - context.put("unparseableEvents", unparseableEventsMap); } - context.put("ingestionState", ingestionState); - return context; + return unparseableEventsMap; } - private Map getTaskCompletionMetrics() + private Map getTaskCompletionRowStats() { Map metrics = Maps.newHashMap(); if (fireDepartmentMetrics != null) { diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java index a2673c606434..578d2e905dc7 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -41,6 +41,7 @@ import io.druid.data.input.impl.LongDimensionSchema; import io.druid.data.input.impl.StringDimensionSchema; import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.task.IndexTaskTest; import io.druid.client.cache.CacheConfig; import io.druid.client.cache.MapCache; @@ -1044,6 +1045,10 @@ public void testMultipleParseExceptionsSuccess() throws Exception metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) ); + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( @@ -1053,7 +1058,7 @@ public void testMultipleParseExceptionsSuccess() throws Exception TaskMetricsUtils.ROWS_THROWN_AWAY, 1L ) ); - Assert.assertEquals(expectedMetrics, status.getMetrics()); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Map unparseableEvents = ImmutableMap.of( "buildSegments", @@ -1067,7 +1072,7 @@ public void testMultipleParseExceptionsSuccess() throws Exception ) ); - Assert.assertEquals(unparseableEvents, status.getContext().get("unparseableEvents")); + Assert.assertEquals(unparseableEvents, reportData.getUnparseableEvents()); } @Test(timeout = 60_000L) @@ -1117,6 +1122,10 @@ public void testMultipleParseExceptionsFailure() throws Exception Assert.assertEquals(ImmutableSet.of(), publishedDescriptors()); Assert.assertNull(metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource())); + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( @@ -1126,7 +1135,7 @@ public void testMultipleParseExceptionsFailure() throws Exception TaskMetricsUtils.ROWS_THROWN_AWAY, 0L ) ); - Assert.assertEquals(expectedMetrics, status.getMetrics()); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Map unparseableEvents = ImmutableMap.of( "buildSegments", @@ -1136,7 +1145,7 @@ public void testMultipleParseExceptionsFailure() throws Exception ) ); - Assert.assertEquals(unparseableEvents, status.getContext().get("unparseableEvents")); + Assert.assertEquals(unparseableEvents, reportData.getUnparseableEvents()); } @Test(timeout = 60_000L) diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java new file mode 100644 index 000000000000..278d328fe315 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java @@ -0,0 +1,61 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common; + +import com.fasterxml.jackson.annotation.JsonProperty; +import io.druid.indexer.TaskReport; + +public class IngestionStatsAndErrorsTaskReport implements TaskReport +{ + public static final String REPORT_KEY = "ingestionStatsAndErrors"; + + @JsonProperty + private String taskId; + + @JsonProperty + private IngestionStatsAndErrorsTaskReportData payload; + + public IngestionStatsAndErrorsTaskReport( + @JsonProperty("taskId") String taskId, + @JsonProperty("payload") IngestionStatsAndErrorsTaskReportData payload + ) + { + this.taskId = taskId; + this.payload = payload; + } + + @Override + public String getTaskId() + { + return taskId; + } + + @Override + public String getReportKey() + { + return REPORT_KEY; + } + + @Override + public Object getPayload() + { + return payload; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java new file mode 100644 index 000000000000..e148fcdaa129 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java @@ -0,0 +1,97 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common; + +import com.fasterxml.jackson.annotation.JsonProperty; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskReport; + +import java.util.Map; +import java.util.Objects; + +public class IngestionStatsAndErrorsTaskReportData +{ + @JsonProperty + private IngestionState ingestionState; + + @JsonProperty + private Map unparseableEvents; + + @JsonProperty + private Map rowStats; + + public IngestionStatsAndErrorsTaskReportData( + @JsonProperty("ingestionState") IngestionState ingestionState, + @JsonProperty("unparseableEvents") Map unparseableEvents, + @JsonProperty("rowStats") Map rowStats + ) + { + this.ingestionState = ingestionState; + this.unparseableEvents = unparseableEvents; + this.rowStats = rowStats; + } + + @JsonProperty + public IngestionState getIngestionState() + { + return ingestionState; + } + + @JsonProperty + public Map getUnparseableEvents() + { + return unparseableEvents; + } + + @JsonProperty + public Map getRowStats() + { + return rowStats; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + IngestionStatsAndErrorsTaskReportData that = (IngestionStatsAndErrorsTaskReportData) o; + return getIngestionState() == that.getIngestionState() && + Objects.equals(getUnparseableEvents(), that.getUnparseableEvents()) && + Objects.equals(getRowStats(), that.getRowStats()); + } + + @Override + public int hashCode() + { + return Objects.hash(getIngestionState(), getUnparseableEvents(), getRowStats()); + } + + public static IngestionStatsAndErrorsTaskReportData getPayloadFromTaskReports( + Map taskReports + ) + { + return (IngestionStatsAndErrorsTaskReportData) taskReports.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY) + .getPayload(); + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java index 50d8f2c277c6..8bf7e3ab647d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Objects; import com.google.common.base.Preconditions; +import io.druid.indexer.TaskReport; import io.druid.indexer.TaskState; import java.util.Map; @@ -38,57 +39,54 @@ public class TaskStatus { public static TaskStatus running(String taskId) { - return new TaskStatus(taskId, TaskState.RUNNING, -1, null, null, null); + return new TaskStatus(taskId, TaskState.RUNNING, -1, null, null); } public static TaskStatus success(String taskId) { - return new TaskStatus(taskId, TaskState.SUCCESS, -1, null, null, null); + return new TaskStatus(taskId, TaskState.SUCCESS, -1, null, null); } - public static TaskStatus success(String taskId, Map metrics, String errorMsg, Map context) + public static TaskStatus success(String taskId, String errorMsg, Map taskReports) { - return new TaskStatus(taskId, TaskState.SUCCESS, -1, metrics, errorMsg, context); + return new TaskStatus(taskId, TaskState.SUCCESS, -1, errorMsg, taskReports); } public static TaskStatus failure(String taskId) { - return new TaskStatus(taskId, TaskState.FAILED, -1, null, null, null); + return new TaskStatus(taskId, TaskState.FAILED, -1, null, null); } - public static TaskStatus failure(String taskId, Map metrics, String errorMsg, Map context) + public static TaskStatus failure(String taskId, String errorMsg, Map taskReports) { - return new TaskStatus(taskId, TaskState.FAILED, -1, metrics, errorMsg, context); + return new TaskStatus(taskId, TaskState.FAILED, -1, errorMsg, taskReports); } public static TaskStatus fromCode(String taskId, TaskState code) { - return new TaskStatus(taskId, code, -1, null, null, null); + return new TaskStatus(taskId, code, -1, null, null); } private final String id; private final TaskState status; private final long duration; - private final Map metrics; private final String errorMsg; - private final Map context; + private final Map taskReports; @JsonCreator protected TaskStatus( @JsonProperty("id") String id, @JsonProperty("status") TaskState status, @JsonProperty("duration") long duration, - @JsonProperty("metrics") Map metrics, @JsonProperty("errorMsg") String errorMsg, - @JsonProperty("context") Map context + @JsonProperty("taskReports") Map taskReports ) { this.id = id; this.status = status; this.duration = duration; - this.metrics = metrics; this.errorMsg = errorMsg; - this.context = context; + this.taskReports = taskReports; // Check class invariants. Preconditions.checkNotNull(id, "id"); @@ -113,22 +111,16 @@ public long getDuration() return duration; } - @JsonProperty("metrics") - public Map getMetrics() - { - return metrics; - } - @JsonProperty("errorMsg") public String getErrorMsg() { return errorMsg; } - @JsonProperty("context") - public Map getContext() + @JsonProperty("taskReports") + public Map getTaskReports() { - return context; + return taskReports; } /** @@ -180,7 +172,19 @@ public boolean isFailure() public TaskStatus withDuration(long _duration) { - return new TaskStatus(id, status, _duration, metrics, errorMsg, context); + return new TaskStatus(id, status, _duration, errorMsg, taskReports); + } + + @Override + public String toString() + { + return Objects.toStringHelper(this) + .add("id", id) + .add("status", status) + .add("duration", duration) + .add("errorMsg", errorMsg) + .add("taskReports", taskReports) + .toString(); } @Override @@ -193,30 +197,16 @@ public boolean equals(Object o) return false; } TaskStatus that = (TaskStatus) o; - return duration == that.duration && - java.util.Objects.equals(id, that.id) && + return getDuration() == that.getDuration() && + java.util.Objects.equals(getId(), that.getId()) && status == that.status && - java.util.Objects.equals(metrics, that.metrics) && - java.util.Objects.equals(errorMsg, that.errorMsg) && - java.util.Objects.equals(context, that.context); + java.util.Objects.equals(getErrorMsg(), that.getErrorMsg()) && + java.util.Objects.equals(getTaskReports(), that.getTaskReports()); } @Override public int hashCode() { - return java.util.Objects.hash(id, status, duration); - } - - @Override - public String toString() - { - return Objects.toStringHelper(this) - .add("id", id) - .add("status", status) - .add("duration", duration) - .add("metrics", metrics) - .add("errorMsg", errorMsg) - .add("context", context) - .toString(); + return java.util.Objects.hash(getId(), status, getDuration(), getErrorMsg(), getTaskReports()); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 60f4b9e0f1b4..e97725bdab02 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -40,8 +40,11 @@ import io.druid.discovery.LookupNodeService; import io.druid.indexer.IngestionState; import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexer.TaskReport; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.SegmentTransactionalInsertAction; @@ -363,9 +366,8 @@ dataSchema, new RealtimeIOConfig(null, null, null), null .emit(); return TaskStatus.failure( getId(), - getTaskCompletionMetrics(), Throwables.getStackTraceAsString(e), - getTaskCompletionContext() + getTaskCompletionReports() ); } finally { @@ -386,9 +388,8 @@ dataSchema, new RealtimeIOConfig(null, null, null), null log.info("Job done!"); return TaskStatus.success( getId(), - getTaskCompletionMetrics(), null, - getTaskCompletionContext() + getTaskCompletionReports() ); } @@ -506,20 +507,31 @@ && isFirehoseDrainableByClosing(((TimedShutoffFirehoseFactory) firehoseFactory). && isFirehoseDrainableByClosing(((ClippedFirehoseFactory) firehoseFactory).getDelegate())); } - private Map getTaskCompletionContext() + private Map getTaskCompletionReports() { - Map context = Maps.newHashMap(); + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + ingestionState, + getTaskCompletionUnparseableEvents(), + getTaskCompletionRowStats() + ) + ) + ); + } + + private Map getTaskCompletionUnparseableEvents() + { + Map unparseableEventsMap = Maps.newHashMap(); List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); if (buildSegmentsParseExceptionMessages != null) { - Map unparseableEventsMap = Maps.newHashMap(); unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); - context.put("unparseableEvents", unparseableEventsMap); } - context.put("ingestionState", ingestionState); - return context; + return unparseableEventsMap; } - private Map getTaskCompletionMetrics() + private Map getTaskCompletionRowStats() { Map metricsMap = Maps.newHashMap(); if (metrics != null) { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index 279c940f79d2..9b9f07783b4c 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -39,6 +39,9 @@ import io.druid.indexer.MetadataStorageUpdaterJobHandler; import io.druid.indexer.TaskMetricsGetter; import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexer.TaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; import io.druid.indexing.common.TaskStatus; @@ -237,9 +240,8 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception return TaskStatus.failure( getId(), - getTaskCompletionMetrics(), Throwables.getStackTraceAsString(effectiveException), - getTaskCompletionContext() + getTaskCompletionReports() ); } finally { @@ -301,9 +303,8 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception if (indexerSchema == null) { return TaskStatus.failure( getId(), - getTaskCompletionMetrics(), determineConfigStatus.getErrorMsg(), - getTaskCompletionContext() + getTaskCompletionReports() ); } } @@ -388,13 +389,16 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception if (buildSegmentsStatus.getDataSegments() != null) { ingestionState = IngestionState.COMPLETED; toolbox.publishSegments(buildSegmentsStatus.getDataSegments()); - return TaskStatus.success(getId(), getTaskCompletionMetrics(), null, getTaskCompletionContext()); + return TaskStatus.success( + getId(), + null, + getTaskCompletionReports() + ); } else { return TaskStatus.failure( getId(), - getTaskCompletionMetrics(), buildSegmentsStatus.getErrorMsg(), - getTaskCompletionContext() + getTaskCompletionReports() ); } } @@ -430,7 +434,21 @@ public Response getRowStats( return Response.ok(returnMap).build(); } - private Map getTaskCompletionMetrics() + private Map getTaskCompletionReports() + { + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + ingestionState, + null, + getTaskCompletionRowStats() + ) + ) + ); + } + + private Map getTaskCompletionRowStats() { Map metrics = Maps.newHashMap(); if (determineConfigStatus != null) { @@ -448,13 +466,6 @@ private Map getTaskCompletionMetrics() return metrics; } - private Map getTaskCompletionContext() - { - Map context = Maps.newHashMap(); - context.put("ingestionState", ingestionState); - return context; - } - public static class InnerProcessingStatsGetter implements TaskMetricsGetter { public static final List KEYS = Arrays.asList( diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index e0b1ceb23391..bd2ae570314a 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -43,8 +43,11 @@ import io.druid.hll.HyperLogLogCollector; import io.druid.indexer.IngestionState; import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexer.TaskReport; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; @@ -447,9 +450,8 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception log.error(e, "Encountered exception in %s.", ingestionState); return TaskStatus.failure( getId(), - getTaskCompletionMetrics(), Throwables.getStackTraceAsString(e), - getTaskCompletionContext() + getTaskCompletionReports() ); } @@ -460,24 +462,37 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception } } - private Map getTaskCompletionContext() + private Map getTaskCompletionReports() { - Map context = Maps.newHashMap(); + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + ingestionState, + getTaskCompletionUnparseableEvents(), + getTaskCompletionRowStats() + ) + ) + ); + } + + private Map getTaskCompletionUnparseableEvents() + { + Map unparseableEventsMap = Maps.newHashMap(); List determinePartitionsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions( determinePartitionsSavedParseExceptions); List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions( buildSegmentsSavedParseExceptions); + if (determinePartitionsParseExceptionMessages != null || buildSegmentsParseExceptionMessages != null) { - Map unparseableEventsMap = Maps.newHashMap(); unparseableEventsMap.put("determinePartitions", determinePartitionsParseExceptionMessages); unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); - context.put("unparseableEvents", unparseableEventsMap); } - context.put("ingestionState", ingestionState); - return context; + + return unparseableEventsMap; } - private Map getTaskCompletionMetrics() + private Map getTaskCompletionRowStats() { Map metrics = Maps.newHashMap(); if (determinePartitionsFireDepartmentMetrics != null) { @@ -980,9 +995,8 @@ dataSchema, new RealtimeIOConfig(null, null, null), null log.error("Failed to publish segments, aborting!"); return TaskStatus.failure( getId(), - getTaskCompletionMetrics(), "Failed to publish segments.", - getTaskCompletionContext() + getTaskCompletionReports() ); } else { log.info( @@ -1002,9 +1016,8 @@ dataSchema, new RealtimeIOConfig(null, null, null), null return TaskStatus.success( getId(), - getTaskCompletionMetrics(), null, - getTaskCompletionContext() + getTaskCompletionReports() ); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java index 9b118d271067..9d434c524e2e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java @@ -465,11 +465,11 @@ public TaskStatus call() log.warn(e, "Interrupted while running task[%s]", task); } - status = TaskStatus.failure(task.getId(), null, e.toString(), null); + status = TaskStatus.failure(task.getId(), e.toString(), null); } catch (Exception e) { log.error(e, "Exception while running task[%s]", task); - status = TaskStatus.failure(task.getId(), null, e.toString(), null); + status = TaskStatus.failure(task.getId(), e.toString(), null); } catch (Throwable t) { log.error(t, "Uncaught Throwable while running task[%s]", task); diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java index 654612d0b4c8..864553b9b67a 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java @@ -622,9 +622,8 @@ public Response getCompleteTasks( status.getDuration(), TaskLocation.unknown(), pair.rhs, - status.getMetrics(), status.getErrorMsg(), - status.getContext()); + status.getTaskReports()); })); return Response.ok(completeTasks).build(); @@ -782,7 +781,6 @@ public TaskStatusPlus apply(TaskRunnerWorkItem workItem) workItem.getLocation(), workItem.getDataSource(), null, - null, null ); } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java index f3d5eee0af16..4009464e42b3 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java @@ -47,6 +47,7 @@ import io.druid.indexer.IngestionState; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskState; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.SegmentLoaderFactory; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; @@ -612,6 +613,10 @@ public void testReportParseExceptionsOnBadMetric() throws Exception TaskStatus status = statusFuture.get(); Assert.assertTrue(status.getErrorMsg().contains("java.lang.RuntimeException: Max parse exceptions exceeded, terminating task...")); + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Map expectedUnparseables = ImmutableMap.of( "buildSegments", Arrays.asList( @@ -619,7 +624,7 @@ public void testReportParseExceptionsOnBadMetric() throws Exception ) ); - Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } @Test(timeout = 60_000L) @@ -704,7 +709,12 @@ public void testNoReportParseExceptions() throws Exception // Wait for the task to finish. final TaskStatus taskStatus = statusFuture.get(); Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode()); - Assert.assertEquals(expectedMetrics, taskStatus.getMetrics()); + + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskStatus.getTaskReports() + ); + + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); } @Test(timeout = 60_000L) @@ -792,7 +802,12 @@ public void testMultipleParseExceptionsSuccess() throws Exception // Wait for the task to finish. final TaskStatus taskStatus = statusFuture.get(); Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode()); - Assert.assertEquals(expectedMetrics, taskStatus.getMetrics()); + + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskStatus.getTaskReports() + ); + + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Map expectedUnparseables = ImmutableMap.of( "buildSegments", Arrays.asList( @@ -802,8 +817,8 @@ public void testMultipleParseExceptionsSuccess() throws Exception "Unparseable timestamp found! Event: null" ) ); - Assert.assertEquals(expectedUnparseables, taskStatus.getContext().get("unparseableEvents")); - Assert.assertEquals(IngestionState.COMPLETED, taskStatus.getContext().get("ingestionState")); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); + Assert.assertEquals(IngestionState.COMPLETED, reportData.getIngestionState()); } @Test(timeout = 60_000L) @@ -850,6 +865,11 @@ public void testMultipleParseExceptionsFailure() throws Exception final TaskStatus taskStatus = statusFuture.get(); Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode()); Assert.assertTrue(taskStatus.getErrorMsg().contains("Max parse exceptions exceeded, terminating task...")); + + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskStatus.getTaskReports() + ); + Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( @@ -859,7 +879,7 @@ public void testMultipleParseExceptionsFailure() throws Exception TaskMetricsUtils.ROWS_THROWN_AWAY, 0L ) ); - Assert.assertEquals(expectedMetrics, taskStatus.getMetrics()); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Map expectedUnparseables = ImmutableMap.of( "buildSegments", Arrays.asList( @@ -869,8 +889,8 @@ public void testMultipleParseExceptionsFailure() throws Exception "Unparseable timestamp found! Event: null" ) ); - Assert.assertEquals(expectedUnparseables, taskStatus.getContext().get("unparseableEvents")); - Assert.assertEquals(IngestionState.BUILD_SEGMENTS, taskStatus.getContext().get("ingestionState")); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); + Assert.assertEquals(IngestionState.BUILD_SEGMENTS, reportData.getIngestionState()); } @Test(timeout = 60_000L) @@ -1102,6 +1122,11 @@ public void testRestoreCorruptData() throws Exception // Wait for the task to finish. TaskStatus status = statusFuture.get(); + + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( @@ -1111,7 +1136,7 @@ public void testRestoreCorruptData() throws Exception TaskMetricsUtils.ROWS_THROWN_AWAY, 0L ) ); - Assert.assertEquals(expectedMetrics, status.getMetrics()); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Assert.assertTrue(status.getErrorMsg().contains("java.lang.IllegalArgumentException\n\tat java.nio.Buffer.position")); } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index 6f4b3ecc32fe..09baa2748f39 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -37,6 +37,7 @@ import io.druid.data.input.impl.TimestampSpec; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskState; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; import io.druid.indexing.common.TaskStatus; @@ -845,7 +846,10 @@ public void testReportParseException() throws Exception "buildSegments", Arrays.asList("Unparseable timestamp found! Event: {time=unparseable, d=a, val=1}") ); - Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } @Test @@ -925,6 +929,10 @@ public void testMultipleParseExceptionsSuccess() throws Exception Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode()); Assert.assertEquals(null, status.getErrorMsg()); + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Map expectedMetrics = ImmutableMap.of( "determinePartitions", ImmutableMap.of( @@ -941,7 +949,7 @@ public void testMultipleParseExceptionsSuccess() throws Exception TaskMetricsUtils.ROWS_THROWN_AWAY, 1L ) ); - Assert.assertEquals(expectedMetrics, status.getMetrics()); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Map expectedUnparseables = ImmutableMap.of( "determinePartitions", @@ -963,7 +971,7 @@ public void testMultipleParseExceptionsSuccess() throws Exception ) ); - Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } @Test @@ -1043,6 +1051,10 @@ public void testMultipleParseExceptionsFailure() throws Exception Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); checkTaskStatusErrorMsgForParseExceptionsExceeded(status); + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( @@ -1053,7 +1065,7 @@ public void testMultipleParseExceptionsFailure() throws Exception ) ); - Assert.assertEquals(expectedMetrics, status.getMetrics()); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Map expectedUnparseables = ImmutableMap.of( "determinePartitions", @@ -1066,7 +1078,7 @@ public void testMultipleParseExceptionsFailure() throws Exception ) ); - Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } @Test @@ -1146,6 +1158,10 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); checkTaskStatusErrorMsgForParseExceptionsExceeded(status); + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Map expectedMetrics = ImmutableMap.of( "determinePartitions", ImmutableMap.of( @@ -1156,7 +1172,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc ) ); - Assert.assertEquals(expectedMetrics, status.getMetrics()); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Map expectedUnparseables = ImmutableMap.of( "determinePartitions", @@ -1169,7 +1185,7 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc new ArrayList<>() ); - Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } @@ -1305,13 +1321,17 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception checkTaskStatusErrorMsgForParseExceptionsExceeded(status); + IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + status.getTaskReports() + ); + Map expectedUnparseables = ImmutableMap.of( "determinePartitions", new ArrayList<>(), "buildSegments", Arrays.asList("Unparseable timestamp found! Event: {column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1}") ); - Assert.assertEquals(expectedUnparseables, status.getContext().get("unparseableEvents")); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } public static void checkTaskStatusErrorMsgForParseExceptionsExceeded(TaskStatus status) From 888fa4d755dd023921e1a3c891897dc579379cdb Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 21 Mar 2018 12:33:56 -0700 Subject: [PATCH 04/10] Fix tests --- .../IngestionStatsAndErrorsTaskReport.java | 34 +++++++++++++++++-- ...IngestionStatsAndErrorsTaskReportData.java | 14 ++++++-- .../java/io/druid/indexer/TaskReport.java | 8 +++++ .../druid/indexing/kafka/KafkaIndexTask.java | 4 +-- .../indexing/kafka/KafkaIndexTaskTest.java | 2 +- .../AppenderatorDriverRealtimeIndexTask.java | 4 +-- .../indexing/common/task/HadoopIndexTask.java | 4 +-- .../druid/indexing/common/task/IndexTask.java | 4 +-- ...penderatorDriverRealtimeIndexTaskTest.java | 2 +- .../indexing/common/task/IndexTaskTest.java | 2 +- 10 files changed, 62 insertions(+), 16 deletions(-) rename {indexing-service/src/main/java/io/druid/indexing/common => api/src/main/java/io/druid/indexer}/IngestionStatsAndErrorsTaskReport.java (68%) rename {indexing-service/src/main/java/io/druid/indexing/common => api/src/main/java/io/druid/indexer}/IngestionStatsAndErrorsTaskReportData.java (90%) diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java b/api/src/main/java/io/druid/indexer/IngestionStatsAndErrorsTaskReport.java similarity index 68% rename from indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java rename to api/src/main/java/io/druid/indexer/IngestionStatsAndErrorsTaskReport.java index 278d328fe315..cde065a7bf97 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java +++ b/api/src/main/java/io/druid/indexer/IngestionStatsAndErrorsTaskReport.java @@ -17,10 +17,11 @@ * under the License. */ -package io.druid.indexing.common; +package io.druid.indexer; import com.fasterxml.jackson.annotation.JsonProperty; -import io.druid.indexer.TaskReport; + +import java.util.Objects; public class IngestionStatsAndErrorsTaskReport implements TaskReport { @@ -58,4 +59,33 @@ public Object getPayload() { return payload; } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + IngestionStatsAndErrorsTaskReport that = (IngestionStatsAndErrorsTaskReport) o; + return Objects.equals(getTaskId(), that.getTaskId()) && + Objects.equals(getPayload(), that.getPayload()); + } + + @Override + public int hashCode() + { + return Objects.hash(getTaskId(), getPayload()); + } + + @Override + public String toString() + { + return "IngestionStatsAndErrorsTaskReport{" + + "taskId='" + taskId + '\'' + + ", payload=" + payload + + '}'; + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java b/api/src/main/java/io/druid/indexer/IngestionStatsAndErrorsTaskReportData.java similarity index 90% rename from indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java rename to api/src/main/java/io/druid/indexer/IngestionStatsAndErrorsTaskReportData.java index e148fcdaa129..c1d45199abfe 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java +++ b/api/src/main/java/io/druid/indexer/IngestionStatsAndErrorsTaskReportData.java @@ -17,11 +17,9 @@ * under the License. */ -package io.druid.indexing.common; +package io.druid.indexer; import com.fasterxml.jackson.annotation.JsonProperty; -import io.druid.indexer.IngestionState; -import io.druid.indexer.TaskReport; import java.util.Map; import java.util.Objects; @@ -94,4 +92,14 @@ public static IngestionStatsAndErrorsTaskReportData getPayloadFromTaskReports( return (IngestionStatsAndErrorsTaskReportData) taskReports.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY) .getPayload(); } + + @Override + public String toString() + { + return "IngestionStatsAndErrorsTaskReportData{" + + "ingestionState=" + ingestionState + + ", unparseableEvents=" + unparseableEvents + + ", rowStats=" + rowStats + + '}'; + } } diff --git a/api/src/main/java/io/druid/indexer/TaskReport.java b/api/src/main/java/io/druid/indexer/TaskReport.java index 47f23f81da4f..355a051e88d9 100644 --- a/api/src/main/java/io/druid/indexer/TaskReport.java +++ b/api/src/main/java/io/druid/indexer/TaskReport.java @@ -19,6 +19,8 @@ package io.druid.indexer; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.google.common.collect.Maps; import java.util.Map; @@ -28,6 +30,12 @@ * Supervisors or supervisorTasks. See ParallelIndexSinglePhaseSupervisorTask and ParallelIndexSinglePhaseSubTask * as an example. */ +/** + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { + @JsonSubTypes.Type(name = "ingestionStatsAndErrors", value = IngestionStatsAndErrorsTaskReport.class) +}) public interface TaskReport { String getTaskId(); diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index 51835439a8d7..089d6c02c01f 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -54,8 +54,8 @@ import io.druid.indexer.TaskReport; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexer.IngestionStatsAndErrorsTaskReport; +import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.CheckPointDataSourceMetadataAction; diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java index 578d2e905dc7..2d17cb26c786 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -41,7 +41,7 @@ import io.druid.data.input.impl.LongDimensionSchema; import io.druid.data.input.impl.StringDimensionSchema; import io.druid.indexer.TaskMetricsUtils; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.task.IndexTaskTest; import io.druid.client.cache.CacheConfig; import io.druid.client.cache.MapCache; diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index e97725bdab02..2bb7ed9780cb 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -39,12 +39,12 @@ import io.druid.discovery.DruidNodeDiscoveryProvider; import io.druid.discovery.LookupNodeService; import io.druid.indexer.IngestionState; +import io.druid.indexer.IngestionStatsAndErrorsTaskReport; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskReport; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.SegmentTransactionalInsertAction; diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index 9b9f07783b4c..864f0c8d385d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -36,12 +36,12 @@ import io.druid.indexer.HadoopDruidIndexerJob; import io.druid.indexer.HadoopIngestionSpec; import io.druid.indexer.IngestionState; +import io.druid.indexer.IngestionStatsAndErrorsTaskReport; import io.druid.indexer.MetadataStorageUpdaterJobHandler; import io.druid.indexer.TaskMetricsGetter; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskReport; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; import io.druid.indexing.common.TaskStatus; diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index bd2ae570314a..2a8fa8de5c80 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -42,12 +42,12 @@ import io.druid.data.input.Rows; import io.druid.hll.HyperLogLogCollector; import io.druid.indexer.IngestionState; +import io.druid.indexer.IngestionStatsAndErrorsTaskReport; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskReport; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java index 4009464e42b3..8fbf8ffedd29 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java @@ -47,7 +47,7 @@ import io.druid.indexer.IngestionState; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskState; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.SegmentLoaderFactory; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index 09baa2748f39..1a5743c12b96 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -37,7 +37,7 @@ import io.druid.data.input.impl.TimestampSpec; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskState; -import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; import io.druid.indexing.common.TaskStatus; From d5f1e28828c24432aa4846d9df64cb67fcce1a6d Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 2 Apr 2018 15:56:36 -0700 Subject: [PATCH 05/10] Use TaskReport uploads --- .../java/io/druid/indexer/TaskStatusPlus.java | 21 +---- .../io/druid/indexer/TaskStatusPlusTest.java | 1 - .../indexing/kafka/KafkaIndexTaskTest.java | 46 ++++++---- .../IngestionStatsAndErrorsTaskReport.java | 10 ++- .../io/druid/indexing/common/TaskReport.java | 1 - .../overlord/http/OverlordResource.java | 5 +- ...penderatorDriverRealtimeIndexTaskTest.java | 76 +++++++++------- .../indexing/common/task/IndexTaskTest.java | 87 ++++++++++++------- .../common/task/NoopTestTaskFileWriter.java | 4 +- .../common/task/TaskReportSerdeTest.java | 79 +++++++++++++++++ 10 files changed, 222 insertions(+), 108 deletions(-) create mode 100644 indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java diff --git a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java index 26ad0837d3f7..898532637872 100644 --- a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java +++ b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java @@ -25,7 +25,6 @@ import org.joda.time.DateTime; import javax.annotation.Nullable; -import java.util.Map; import java.util.Objects; public class TaskStatusPlus @@ -42,9 +41,6 @@ public class TaskStatusPlus @Nullable private final String errorMsg; - @Nullable - private final Map taskReports; - @JsonCreator public TaskStatusPlus( @JsonProperty("id") String id, @@ -55,8 +51,7 @@ public TaskStatusPlus( @JsonProperty("duration") @Nullable Long duration, @JsonProperty("location") TaskLocation location, @JsonProperty("dataSource") String dataSource, - @JsonProperty("errorMsg") String errorMsg, - @JsonProperty("taskReports") Map taskReports + @JsonProperty("errorMsg") String errorMsg ) { if (state != null && state.isComplete()) { @@ -71,7 +66,6 @@ public TaskStatusPlus( this.location = Preconditions.checkNotNull(location, "location"); this.dataSource = dataSource; this.errorMsg = errorMsg; - this.taskReports = taskReports; } @JsonProperty @@ -132,13 +126,6 @@ public String getErrorMsg() return errorMsg; } - @Nullable - @JsonProperty("taskReports") - public Map getTaskReports() - { - return taskReports; - } - @Override public boolean equals(Object o) { @@ -157,8 +144,7 @@ public boolean equals(Object o) Objects.equals(getDuration(), that.getDuration()) && Objects.equals(getLocation(), that.getLocation()) && Objects.equals(getDataSource(), that.getDataSource()) && - Objects.equals(getErrorMsg(), that.getErrorMsg()) && - Objects.equals(getTaskReports(), that.getTaskReports()); + Objects.equals(getErrorMsg(), that.getErrorMsg()); } @Override @@ -173,8 +159,7 @@ public int hashCode() getDuration(), getLocation(), getDataSource(), - getErrorMsg(), - getTaskReports() + getErrorMsg() ); } } diff --git a/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java b/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java index f3f0aaaa6829..eeefe130e5be 100644 --- a/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java +++ b/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java @@ -54,7 +54,6 @@ public void testSerde() throws IOException 1000L, TaskLocation.create("testHost", 1010, -1), "ds_test", - null, null ); final String json = mapper.writeValueAsString(status); diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java index 00a041b444f9..e0dd1be77184 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -41,6 +41,8 @@ import io.druid.data.input.impl.StringDimensionSchema; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TaskReportFileWriter; import io.druid.indexing.common.task.IndexTaskTest; import io.druid.client.cache.CacheConfig; import io.druid.client.cache.MapCache; @@ -63,7 +65,6 @@ import io.druid.indexing.common.actions.TaskActionToolbox; import io.druid.indexing.common.config.TaskConfig; import io.druid.indexing.common.config.TaskStorageConfig; -import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.Task; import io.druid.indexing.kafka.supervisor.KafkaSupervisor; import io.druid.indexing.kafka.test.TestBroker; @@ -208,6 +209,7 @@ public class KafkaIndexTaskTest private List> records; private final boolean isIncrementalHandoffSupported; private final Set checkpointRequestsHash = Sets.newHashSet(); + private File reportsFile; // This should be removed in versions greater that 0.12.x // isIncrementalHandoffSupported should always be set to true in those later versions @@ -327,6 +329,7 @@ public void setupTest() throws IOException doHandoff = true; topic = getTopicName(); records = generateRecords(topic); + reportsFile = File.createTempFile("KafkaIndexTaskTestReports-" + System.currentTimeMillis(), "json"); makeToolboxFactory(); } @@ -340,7 +343,7 @@ public void tearDownTest() runningTasks.clear(); } - + reportsFile.delete(); destroyToolboxFactory(); } @@ -1046,17 +1049,15 @@ public void testMultipleParseExceptionsSuccess() throws Exception metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) ); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 4L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 3L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 3L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 1L + TaskMetricsUtils.ROWS_PROCESSED, 4, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 3, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1 ) ); Assert.assertEquals(expectedMetrics, reportData.getRowStats()); @@ -1123,17 +1124,15 @@ public void testMultipleParseExceptionsFailure() throws Exception Assert.assertEquals(ImmutableSet.of(), publishedDescriptors()); Assert.assertNull(metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource())); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 3L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 3L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + TaskMetricsUtils.ROWS_PROCESSED, 3, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 ) ); Assert.assertEquals(expectedMetrics, reportData.getRowStats()); @@ -2228,7 +2227,7 @@ public List getLocations() EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), - new NoopTestTaskFileWriter() + new TaskReportFileWriter(reportsFile) ); } @@ -2350,4 +2349,17 @@ private SegmentDescriptor SD(final Task task, final String intervalString, final final Interval interval = Intervals.of(intervalString); return new SegmentDescriptor(interval, getLock(task, interval).getVersion(), partitionNum); } + + private IngestionStatsAndErrorsTaskReportData getTaskReportData() throws IOException + { + Map taskReports = objectMapper.readValue( + reportsFile, + new TypeReference>() + { + } + ); + return IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskReports + ); + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java index 46a1e0cf3d51..3ebf79da9123 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java @@ -20,9 +20,11 @@ package io.druid.indexing.common; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; import java.util.Objects; +@JsonTypeName("ingestionStatsAndErrors") public class IngestionStatsAndErrorsTaskReport implements TaskReport { public static final String REPORT_KEY = "ingestionStatsAndErrors"; @@ -88,4 +90,10 @@ public String toString() ", payload=" + payload + '}'; } -} \ No newline at end of file + + @JsonProperty("type") + private String getType() + { + return "ingestionStatsAndErrors"; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java index 1e1c4f73a618..335b75bc3c66 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.google.common.collect.Maps; -import io.druid.indexer.IngestionStatsAndErrorsTaskReport; import java.util.Map; diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java index 199cf5f8e052..a0dc0fe22a19 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java @@ -622,8 +622,8 @@ public Response getCompleteTasks( status.getDuration(), TaskLocation.unknown(), pair.rhs, - status.getErrorMsg(), - status.getTaskReports()); + status.getErrorMsg() + ); })); return Response.ok(completeTasks).build(); @@ -807,7 +807,6 @@ public TaskStatusPlus apply(TaskRunnerWorkItem workItem) null, workItem.getLocation(), workItem.getDataSource(), - null, null ); } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java index 3fcbc5a8444a..5e09e978fb32 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java @@ -19,6 +19,7 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; @@ -47,8 +48,10 @@ import io.druid.indexer.IngestionState; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskState; -import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.SegmentLoaderFactory; +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TaskReportFileWriter; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.TaskToolboxFactory; @@ -164,6 +167,7 @@ public class AppenderatorDriverRealtimeIndexTaskTest "host", new NoopEmitter() ); + private static final ObjectMapper objectMapper = TestHelper.makeJsonMapper(); private static final String FAIL_DIM = "__fail__"; @@ -264,6 +268,7 @@ public Firehose connect(InputRowParser parser, File temporaryDirectory) throws P private TaskLockbox taskLockbox; private TaskToolboxFactory taskToolboxFactory; private File baseDir; + private File reportsFile; @Before public void setUp() throws IOException @@ -280,6 +285,7 @@ public void setUp() throws IOException derbyConnector.createPendingSegmentsTable(); baseDir = tempFolder.newFolder(); + reportsFile = File.createTempFile("KafkaIndexTaskTestReports-" + System.currentTimeMillis(), "json"); makeToolboxFactory(baseDir); } @@ -287,6 +293,7 @@ public void setUp() throws IOException public void tearDown() { taskExec.shutdownNow(); + reportsFile.delete(); } @Test(timeout = 60_000L) @@ -613,9 +620,7 @@ public void testReportParseExceptionsOnBadMetric() throws Exception TaskStatus status = statusFuture.get(); Assert.assertTrue(status.getErrorMsg().contains("java.lang.RuntimeException: Max parse exceptions exceeded, terminating task...")); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedUnparseables = ImmutableMap.of( "buildSegments", @@ -699,10 +704,10 @@ public void testNoReportParseExceptions() throws Exception Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 2L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 1L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 2L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + TaskMetricsUtils.ROWS_PROCESSED, 2, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 1, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 ) ); @@ -710,9 +715,7 @@ public void testNoReportParseExceptions() throws Exception final TaskStatus taskStatus = statusFuture.get(); Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode()); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - taskStatus.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Assert.assertEquals(expectedMetrics, reportData.getRowStats()); } @@ -792,10 +795,10 @@ public void testMultipleParseExceptionsSuccess() throws Exception Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 2L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 2L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 2L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + TaskMetricsUtils.ROWS_PROCESSED, 2, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 2, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 ) ); @@ -803,9 +806,7 @@ public void testMultipleParseExceptionsSuccess() throws Exception final TaskStatus taskStatus = statusFuture.get(); Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode()); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - taskStatus.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Assert.assertEquals(expectedMetrics, reportData.getRowStats()); Map expectedUnparseables = ImmutableMap.of( @@ -866,17 +867,15 @@ public void testMultipleParseExceptionsFailure() throws Exception Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode()); Assert.assertTrue(taskStatus.getErrorMsg().contains("Max parse exceptions exceeded, terminating task...")); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - taskStatus.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 1L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 2L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 2L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + TaskMetricsUtils.ROWS_PROCESSED, 1, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 2, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 ) ); Assert.assertEquals(expectedMetrics, reportData.getRowStats()); @@ -1123,17 +1122,15 @@ public void testRestoreCorruptData() throws Exception // Wait for the task to finish. TaskStatus status = statusFuture.get(); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 0L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 0L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 0L + TaskMetricsUtils.ROWS_PROCESSED, 0, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_UNPARSEABLE, 0, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 ) ); Assert.assertEquals(expectedMetrics, reportData.getRowStats()); @@ -1459,7 +1456,7 @@ public List getLocations() EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0), - new NoopTestTaskFileWriter() + new TaskReportFileWriter(reportsFile) ); } @@ -1481,4 +1478,17 @@ public long sumMetric(final Task task, final DimFilter filter, final String metr task.getQueryRunner(query).run(QueryPlus.wrap(query), ImmutableMap.of()).toList(); return results.isEmpty() ? 0 : results.get(0).getValue().getLongMetric(metric); } + + private IngestionStatsAndErrorsTaskReportData getTaskReportData() throws IOException + { + Map taskReports = objectMapper.readValue( + reportsFile, + new TypeReference>() + { + } + ); + return IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskReports + ); + } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index eacf19cb822e..906e2657ab67 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -19,6 +19,7 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -37,9 +38,11 @@ import io.druid.data.input.impl.TimestampSpec; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskState; -import io.druid.indexer.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TaskReportFileWriter; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.TestUtils; @@ -81,7 +84,9 @@ import io.druid.timeline.partition.NumberedShardSpec; import io.druid.timeline.partition.ShardSpec; import org.joda.time.Interval; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -130,6 +135,7 @@ public class IndexTaskTest private IndexMergerV9 indexMergerV9; private IndexIO indexIO; private volatile int segmentAllocatePartitionCounter; + private File reportsFile; public IndexTaskTest() { @@ -139,6 +145,18 @@ public IndexTaskTest() indexIO = testUtils.getTestIndexIO(); } + @Before + public void setup() throws IOException + { + reportsFile = File.createTempFile("IndexTaskTestReports-" + System.currentTimeMillis(), "json"); + } + + @After + public void teardown() throws IOException + { + reportsFile.delete(); + } + @Test public void testDeterminePartitions() throws Exception { @@ -846,9 +864,7 @@ public void testReportParseException() throws Exception "buildSegments", Arrays.asList("Unparseable timestamp found! Event: {time=unparseable, d=a, val=1}") ); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } @@ -929,24 +945,22 @@ public void testMultipleParseExceptionsSuccess() throws Exception Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode()); Assert.assertEquals(null, status.getErrorMsg()); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedMetrics = ImmutableMap.of( "determinePartitions", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 4L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 4L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 1L + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_PROCESSED, 4, + TaskMetricsUtils.ROWS_UNPARSEABLE, 4, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1 ), "buildSegments", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 1L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 3L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 4L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 1L + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 3, + TaskMetricsUtils.ROWS_PROCESSED, 1, + TaskMetricsUtils.ROWS_UNPARSEABLE, 4, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1 ) ); Assert.assertEquals(expectedMetrics, reportData.getRowStats()); @@ -1051,17 +1065,15 @@ public void testMultipleParseExceptionsFailure() throws Exception Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); checkTaskStatusErrorMsgForParseExceptionsExceeded(status); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedMetrics = ImmutableMap.of( "buildSegments", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 1L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 3L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 2L + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_PROCESSED, 1, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3, + TaskMetricsUtils.ROWS_THROWN_AWAY, 2 ) ); @@ -1158,17 +1170,15 @@ public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exc Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); checkTaskStatusErrorMsgForParseExceptionsExceeded(status); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedMetrics = ImmutableMap.of( "determinePartitions", ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, 1L, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0L, - TaskMetricsUtils.ROWS_UNPARSEABLE, 3L, - TaskMetricsUtils.ROWS_THROWN_AWAY, 2L + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_PROCESSED, 1, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3, + TaskMetricsUtils.ROWS_THROWN_AWAY, 2 ) ); @@ -1321,9 +1331,7 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception checkTaskStatusErrorMsgForParseExceptionsExceeded(status); - IngestionStatsAndErrorsTaskReportData reportData = IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( - status.getTaskReports() - ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); Map expectedUnparseables = ImmutableMap.of( "determinePartitions", @@ -1454,7 +1462,7 @@ public Map makeLoadSpec(URI uri) null, null, null, - new NoopTestTaskFileWriter() + new TaskReportFileWriter(reportsFile) ); indexTask.isReady(box.getTaskActionClient()); @@ -1566,4 +1574,17 @@ private static IndexTuningConfig createTuningConfig( 1 ); } + + private IngestionStatsAndErrorsTaskReportData getTaskReportData() throws IOException + { + Map taskReports = jsonMapper.readValue( + reportsFile, + new TypeReference>() + { + } + ); + return IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskReports + ); + } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java b/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java index cebee6c624f6..f9bfe12d772c 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java @@ -22,6 +22,8 @@ import io.druid.indexing.common.TaskReport; import io.druid.indexing.common.TaskReportFileWriter; +import java.util.Map; + public class NoopTestTaskFileWriter extends TaskReportFileWriter { public NoopTestTaskFileWriter() @@ -30,7 +32,7 @@ public NoopTestTaskFileWriter() } @Override - public void write(TaskReport report) + public void write(Map reports) { } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java new file mode 100644 index 000000000000..ac479a8769b0 --- /dev/null +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java @@ -0,0 +1,79 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common.task; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import io.druid.indexer.IngestionState; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TestUtils; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + +public class TaskReportSerdeTest +{ + private final ObjectMapper jsonMapper; + + public TaskReportSerdeTest() + { + TestUtils testUtils = new TestUtils(); + jsonMapper = testUtils.getTestObjectMapper(); + } + + @Test + public void testSerde() throws Exception + { + IngestionStatsAndErrorsTaskReport report1 = new IngestionStatsAndErrorsTaskReport( + "testID", + new IngestionStatsAndErrorsTaskReportData( + IngestionState.BUILD_SEGMENTS, + ImmutableMap.of( + "hello", "world" + ), + ImmutableMap.of( + "number", 1234 + ) + ) + ); + String report1serialized = jsonMapper.writeValueAsString(report1); + IngestionStatsAndErrorsTaskReport report2 = jsonMapper.readValue( + report1serialized, + IngestionStatsAndErrorsTaskReport.class + ); + Assert.assertEquals(report1, report2); + + + Map reportMap1 = TaskReport.buildTaskReports(report1); + String reportMapSerialized = jsonMapper.writeValueAsString(reportMap1); + Map reportMap2 = jsonMapper.readValue( + reportMapSerialized, + new TypeReference>() + { + } + ); + Assert.assertEquals(reportMap1, reportMap2); + } +} From 9943b275ef8603709103cebbf64a0148c6aebe18 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 2 Apr 2018 17:18:55 -0700 Subject: [PATCH 06/10] Refactor fire department metrics retrieval --- .../io/druid/indexer/TaskMetricsGetter.java | 2 +- .../druid/indexing/kafka/KafkaIndexTask.java | 19 +++--- .../IngestionStatsAndErrorsTaskReport.java | 3 + .../AppenderatorDriverRealtimeIndexTask.java | 19 +++--- .../indexing/common/task/HadoopIndexTask.java | 61 ++----------------- .../druid/indexing/common/task/IndexTask.java | 39 ++++++------ .../realtime/FireDepartmentMetrics.java | 10 --- ...ireDepartmentMetricsTaskMetricsGetter.java | 29 ++------- 8 files changed, 56 insertions(+), 126 deletions(-) diff --git a/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java b/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java index bc5d1517ac25..c29e890010cb 100644 --- a/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java +++ b/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java @@ -25,5 +25,5 @@ public interface TaskMetricsGetter { List getKeys(); - Map getMetrics(); + Map getTotalMetrics(); } diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index d9f6cef0d417..7009508b7080 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -50,6 +50,7 @@ import io.druid.discovery.DruidNodeDiscoveryProvider; import io.druid.discovery.LookupNodeService; import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsGetter; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; @@ -86,6 +87,7 @@ import io.druid.segment.indexing.RealtimeIOConfig; import io.druid.segment.realtime.FireDepartment; import io.druid.segment.realtime.FireDepartmentMetrics; +import io.druid.segment.realtime.FireDepartmentMetricsTaskMetricsGetter; import io.druid.segment.realtime.RealtimeMetricsMonitor; import io.druid.segment.realtime.appenderator.Appenderator; import io.druid.segment.realtime.appenderator.AppenderatorDriverAddResult; @@ -251,6 +253,8 @@ public enum Status private CircularBuffer savedParseExceptions; private IngestionState ingestionState; + private TaskMetricsGetter metricsGetter; + @JsonCreator public KafkaIndexTask( @JsonProperty("id") String id, @@ -508,6 +512,7 @@ private TaskStatus runInternal(final TaskToolbox toolbox) throws Exception null ); fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); + metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(fireDepartmentMetrics); toolbox.getMonitorScheduler().addMonitor( new RealtimeMetricsMonitor( ImmutableList.of(fireDepartmentForMetrics), @@ -951,6 +956,7 @@ private TaskStatus runInternalLegacy(final TaskToolbox toolbox) throws Exception null ); fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); + metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(fireDepartmentMetrics); toolbox.getMonitorScheduler().addMonitor( new RealtimeMetricsMonitor( ImmutableList.of(fireDepartmentForMetrics), @@ -1353,10 +1359,10 @@ private Map getTaskCompletionUnparseableEvents() private Map getTaskCompletionRowStats() { Map metrics = Maps.newHashMap(); - if (fireDepartmentMetrics != null) { + if (metricsGetter != null) { metrics.put( "buildSegments", - FireDepartmentMetrics.getRowMetricsFromFireDepartmentMetrics(fireDepartmentMetrics) + metricsGetter.getTotalMetrics() ); } return metrics; @@ -1579,15 +1585,10 @@ public Response getRowStats( Map returnMap = Maps.newHashMap(); Map totalsMap = Maps.newHashMap(); - if (fireDepartmentMetrics != null) { + if (metricsGetter != null) { totalsMap.put( "buildSegments", - TaskMetricsUtils.makeIngestionRowMetrics( - fireDepartmentMetrics.processed(), - fireDepartmentMetrics.processedWithErrors(), - fireDepartmentMetrics.unparseable(), - fireDepartmentMetrics.thrownAway() - ) + metricsGetter.getTotalMetrics() ); } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java index 3ebf79da9123..3c636f2678f4 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java @@ -91,6 +91,9 @@ public String toString() '}'; } + // TaskReports are put into a Map and serialized. + // Jackson doesn't normally serialize the TaskReports with a "type" field in that situation, + // so explictly serialize the "type" field (otherwise, deserialization fails). @JsonProperty("type") private String getType() { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index f9203ccd321d..1ab34222a306 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -39,6 +39,7 @@ import io.druid.discovery.DruidNodeDiscoveryProvider; import io.druid.discovery.LookupNodeService; import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsGetter; import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; @@ -66,6 +67,7 @@ import io.druid.segment.indexing.RealtimeIOConfig; import io.druid.segment.realtime.FireDepartment; import io.druid.segment.realtime.FireDepartmentMetrics; +import io.druid.segment.realtime.FireDepartmentMetricsTaskMetricsGetter; import io.druid.segment.realtime.RealtimeMetricsMonitor; import io.druid.segment.realtime.appenderator.Appenderator; import io.druid.segment.realtime.appenderator.AppenderatorDriverAddResult; @@ -142,6 +144,9 @@ private static String makeTaskId(RealtimeAppenderatorIngestionSpec spec) @JsonIgnore private volatile FireDepartmentMetrics metrics = null; + @JsonIgnore + private TaskMetricsGetter metricsGetter; + @JsonIgnore private volatile boolean gracefullyStopped = false; @@ -250,6 +255,7 @@ dataSchema, new RealtimeIOConfig(null, null, null), null ); this.metrics = fireDepartmentForMetrics.getMetrics(); + metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(metrics); Supplier committerSupplier = null; final File firehoseTempDir = toolbox.getFirehoseTemporaryDir(); @@ -460,15 +466,10 @@ public Response getRowStats( Map returnMap = Maps.newHashMap(); Map totalsMap = Maps.newHashMap(); - if (metrics != null) { + if (metricsGetter != null) { totalsMap.put( "buildSegments", - TaskMetricsUtils.makeIngestionRowMetrics( - metrics.processed(), - metrics.processedWithErrors(), - metrics.unparseable(), - metrics.thrownAway() - ) + metricsGetter.getTotalMetrics() ); } @@ -532,10 +533,10 @@ private Map getTaskCompletionUnparseableEvents() private Map getTaskCompletionRowStats() { Map metricsMap = Maps.newHashMap(); - if (metrics != null) { + if (metricsGetter != null) { metricsMap.put( "buildSegments", - FireDepartmentMetrics.getRowMetricsFromFireDepartmentMetrics(metrics) + metricsGetter.getTotalMetrics() ); } return metricsMap; diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index 97f8c83c2198..3d66a78f19ab 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -476,22 +476,9 @@ public static class InnerProcessingStatsGetter implements TaskMetricsGetter TaskMetricsUtils.ROWS_UNPARSEABLE ); - public static final Map MISSING_SAMPLE_DEFAULT_VALUES = Maps.newHashMap(); - static { - MISSING_SAMPLE_DEFAULT_VALUES.put(TaskMetricsUtils.ROWS_PROCESSED, 0.0d); - MISSING_SAMPLE_DEFAULT_VALUES.put(TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0.0d); - MISSING_SAMPLE_DEFAULT_VALUES.put(TaskMetricsUtils.ROWS_THROWN_AWAY, 0.0d); - MISSING_SAMPLE_DEFAULT_VALUES.put(TaskMetricsUtils.ROWS_UNPARSEABLE, 0.0d); - } - private final Method getStatsMethod; private final Object innerProcessingRunner; - private long processed = 0; - private long processedWithErrors = 0; - private long thrownAway = 0; - private long unparseable = 0; - public InnerProcessingStatsGetter( Object innerProcessingRunner ) @@ -513,48 +500,12 @@ public List getKeys() } @Override - public Map getMetrics() - { - try { - Map statsMap = (Map) getStatsMethod.invoke(innerProcessingRunner); - if (statsMap == null) { - return MISSING_SAMPLE_DEFAULT_VALUES; - } - - long curProcessed = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED); - long curProcessedWithErrors = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS); - long curThrownAway = (Long) statsMap.get(TaskMetricsUtils.ROWS_THROWN_AWAY); - long curUnparseable = (Long) statsMap.get(TaskMetricsUtils.ROWS_UNPARSEABLE); - - Long processedDiff = curProcessed - processed; - Long procssedWithErrorsDiff = curProcessedWithErrors - processedWithErrors; - Long thrownAwayDiff = curThrownAway - thrownAway; - Long unparseableDiff = curUnparseable - unparseable; - - processed = curProcessed; - processedWithErrors = curProcessedWithErrors; - thrownAway = curThrownAway; - unparseable = curUnparseable; - - return ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, processedDiff.doubleValue(), - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, procssedWithErrorsDiff.doubleValue(), - TaskMetricsUtils.ROWS_THROWN_AWAY, thrownAwayDiff.doubleValue(), - TaskMetricsUtils.ROWS_UNPARSEABLE, unparseableDiff.doubleValue() - ); - } - catch (Exception e) { - log.error(e, "Got exception from getMetrics(): "); - return null; - } - } - - public Map getTotalMetrics() + public Map getTotalMetrics() { try { Map statsMap = (Map) getStatsMethod.invoke(innerProcessingRunner); if (statsMap == null) { - return MISSING_SAMPLE_DEFAULT_VALUES; + return null; } long curProcessed = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED); long curProcessedWithErrors = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS); @@ -562,10 +513,10 @@ public Map getTotalMetrics() long curUnparseable = (Long) statsMap.get(TaskMetricsUtils.ROWS_UNPARSEABLE); return ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, (double) curProcessed, - TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, (double) curProcessedWithErrors, - TaskMetricsUtils.ROWS_THROWN_AWAY, (double) curThrownAway, - TaskMetricsUtils.ROWS_UNPARSEABLE, (double) curUnparseable + TaskMetricsUtils.ROWS_PROCESSED, curProcessed, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, curProcessedWithErrors, + TaskMetricsUtils.ROWS_THROWN_AWAY, curThrownAway, + TaskMetricsUtils.ROWS_UNPARSEABLE, curUnparseable ); } catch (Exception e) { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index 1324fb420c71..80480a549d20 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -42,7 +42,7 @@ import io.druid.data.input.Rows; import io.druid.hll.HyperLogLogCollector; import io.druid.indexer.IngestionState; -import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexer.TaskMetricsGetter; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; @@ -72,6 +72,7 @@ import io.druid.segment.indexing.granularity.GranularitySpec; import io.druid.segment.realtime.FireDepartment; import io.druid.segment.realtime.FireDepartmentMetrics; +import io.druid.segment.realtime.FireDepartmentMetricsTaskMetricsGetter; import io.druid.segment.realtime.RealtimeMetricsMonitor; import io.druid.segment.realtime.appenderator.Appenderator; import io.druid.segment.realtime.appenderator.AppenderatorConfig; @@ -164,12 +165,18 @@ private static String makeGroupId(boolean isAppendToExisting, String dataSource) @JsonIgnore private FireDepartmentMetrics buildSegmentsFireDepartmentMetrics; + @JsonIgnore + private TaskMetricsGetter buildSegmentsMetricsGetter; + @JsonIgnore private CircularBuffer buildSegmentsSavedParseExceptions; @JsonIgnore private FireDepartmentMetrics determinePartitionsFireDepartmentMetrics; + @JsonIgnore + private TaskMetricsGetter determinePartitionsMetricsGetter; + @JsonIgnore private CircularBuffer determinePartitionsSavedParseExceptions; @@ -349,29 +356,19 @@ public Response getRowStats( } if (needsDeterminePartitions) { - if (determinePartitionsFireDepartmentMetrics != null) { + if (determinePartitionsMetricsGetter != null) { totalsMap.put( "determinePartitions", - TaskMetricsUtils.makeIngestionRowMetrics( - determinePartitionsFireDepartmentMetrics.processed(), - determinePartitionsFireDepartmentMetrics.processedWithErrors(), - determinePartitionsFireDepartmentMetrics.unparseable(), - determinePartitionsFireDepartmentMetrics.thrownAway() - ) + determinePartitionsMetricsGetter.getTotalMetrics() ); } } if (needsBuildSegments) { - if (buildSegmentsFireDepartmentMetrics != null) { + if (buildSegmentsMetricsGetter != null) { totalsMap.put( "buildSegments", - TaskMetricsUtils.makeIngestionRowMetrics( - buildSegmentsFireDepartmentMetrics.processed(), - buildSegmentsFireDepartmentMetrics.processedWithErrors(), - buildSegmentsFireDepartmentMetrics.unparseable(), - buildSegmentsFireDepartmentMetrics.thrownAway() - ) + buildSegmentsMetricsGetter.getTotalMetrics() ); } } @@ -495,16 +492,16 @@ private Map getTaskCompletionUnparseableEvents() private Map getTaskCompletionRowStats() { Map metrics = Maps.newHashMap(); - if (determinePartitionsFireDepartmentMetrics != null) { + if (determinePartitionsMetricsGetter != null) { metrics.put( "determinePartitions", - FireDepartmentMetrics.getRowMetricsFromFireDepartmentMetrics(determinePartitionsFireDepartmentMetrics) + determinePartitionsMetricsGetter.getTotalMetrics() ); } - if (buildSegmentsFireDepartmentMetrics != null) { + if (buildSegmentsMetricsGetter != null) { metrics.put( "buildSegments", - FireDepartmentMetrics.getRowMetricsFromFireDepartmentMetrics(buildSegmentsFireDepartmentMetrics) + buildSegmentsMetricsGetter.getTotalMetrics() ); } return metrics; @@ -700,6 +697,9 @@ private Map> collectIntervalsAndShardSp ) throws IOException { determinePartitionsFireDepartmentMetrics = new FireDepartmentMetrics(); + determinePartitionsMetricsGetter = new FireDepartmentMetricsTaskMetricsGetter( + determinePartitionsFireDepartmentMetrics + ); final Map> hllCollectors = new TreeMap<>( Comparators.intervalsByStartThenEnd() @@ -836,6 +836,7 @@ private TaskStatus generateAndPublishSegments( dataSchema, new RealtimeIOConfig(null, null, null), null ); buildSegmentsFireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); + buildSegmentsMetricsGetter = new FireDepartmentMetricsTaskMetricsGetter(buildSegmentsFireDepartmentMetrics); if (toolbox.getMonitorScheduler() != null) { toolbox.getMonitorScheduler().addMonitor( diff --git a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java index bd9a1bf48ab5..ed4ad0c32236 100644 --- a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java +++ b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java @@ -263,14 +263,4 @@ public FireDepartmentMetrics merge(FireDepartmentMetrics other) messageGap.set(Math.max(messageGap(), otherSnapshot.messageGap())); return this; } - - public static Map getRowMetricsFromFireDepartmentMetrics(FireDepartmentMetrics fdm) - { - return TaskMetricsUtils.makeIngestionRowMetrics( - fdm.processed(), - fdm.processedWithErrors(), - fdm.unparseable(), - fdm.thrownAway() - ); - } } diff --git a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java index d5d0f924f047..9c7ee60fde18 100644 --- a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java +++ b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java @@ -22,7 +22,6 @@ import com.google.common.collect.ImmutableMap; import io.druid.indexer.TaskMetricsGetter; import io.druid.indexer.TaskMetricsUtils; -import io.druid.java.util.common.logger.Logger; import java.util.Arrays; import java.util.List; @@ -32,18 +31,13 @@ public class FireDepartmentMetricsTaskMetricsGetter implements TaskMetricsGetter { public static final List KEYS = Arrays.asList( TaskMetricsUtils.ROWS_PROCESSED, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, TaskMetricsUtils.ROWS_THROWN_AWAY, TaskMetricsUtils.ROWS_UNPARSEABLE ); - private static final Logger log = new Logger(FireDepartmentMetricsTaskMetricsGetter.class); - private final FireDepartmentMetrics fireDepartmentMetrics; - private double processed = 0; - private double thrownAway = 0; - private double unparseable = 0; - public FireDepartmentMetricsTaskMetricsGetter( FireDepartmentMetrics fireDepartmentMetrics ) @@ -58,24 +52,13 @@ public List getKeys() } @Override - public Map getMetrics() + public Map getTotalMetrics() { - double curProcessed = fireDepartmentMetrics.processed(); - double curThrownAway = fireDepartmentMetrics.thrownAway(); - double curUnparseable = fireDepartmentMetrics.unparseable(); - - double processedDiff = curProcessed - processed; - double thrownAwayDiff = curThrownAway - thrownAway; - double unparseableDiff = curUnparseable - unparseable; - - processed = curProcessed; - thrownAway = curThrownAway; - unparseable = curUnparseable; - return ImmutableMap.of( - TaskMetricsUtils.ROWS_PROCESSED, processedDiff, - TaskMetricsUtils.ROWS_THROWN_AWAY, thrownAwayDiff, - TaskMetricsUtils.ROWS_UNPARSEABLE, unparseableDiff + TaskMetricsUtils.ROWS_PROCESSED, fireDepartmentMetrics.processed(), + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, fireDepartmentMetrics.processedWithErrors(), + TaskMetricsUtils.ROWS_THROWN_AWAY, fireDepartmentMetrics.thrownAway(), + TaskMetricsUtils.ROWS_UNPARSEABLE, fireDepartmentMetrics.unparseable() ); } } From 60beb1c824cc612aebe456574d40d99104060503 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 2 Apr 2018 17:45:57 -0700 Subject: [PATCH 07/10] Refactor input row serde in hadoop task --- .../druid/indexing/kafka/KafkaIndexTask.java | 1 - .../io/druid/indexer/IndexGeneratorJob.java | 15 ++++++---- .../java/io/druid/indexer/InputRowSerde.java | 30 +++++++++++++++++-- .../indexer/IndexGeneratorCombinerTest.java | 8 ++--- .../io/druid/indexer/InputRowSerdeTest.java | 16 +++++----- .../AppenderatorDriverRealtimeIndexTask.java | 1 - .../realtime/FireDepartmentMetrics.java | 2 -- 7 files changed, 48 insertions(+), 25 deletions(-) diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index 7009508b7080..1e05efce7902 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -51,7 +51,6 @@ import io.druid.discovery.LookupNodeService; import io.druid.indexer.IngestionState; import io.druid.indexer.TaskMetricsGetter; -import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java index feff79d6f522..ada3c6e704da 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java @@ -40,7 +40,6 @@ import io.druid.indexer.path.DatasourcePathSpec; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; -import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.concurrent.Execs; import io.druid.java.util.common.logger.Logger; @@ -358,7 +357,7 @@ protected void innerMap( // type SegmentInputRow serves as a marker that these InputRow instances have already been combined // and they contain the columns as they show up in the segment after ingestion, not what you would see in raw // data - Pair> serializedRow = inputRow instanceof SegmentInputRow ? + InputRowSerde.SerializeResult serializeResult = inputRow instanceof SegmentInputRow ? InputRowSerde.toBytes( typeHelperMap, inputRow, @@ -382,10 +381,14 @@ protected void innerMap( .put(hashedDimensions) .array() ).toBytesWritable(), - new BytesWritable(serializedRow.lhs) + new BytesWritable(serializeResult.getSerializedRow()) ); - ParseException pe = IncrementalIndex.getCombinedParseException(inputRow, serializedRow.rhs, null); + ParseException pe = IncrementalIndex.getCombinedParseException( + inputRow, + serializeResult.getParseExceptionMessages(), + null + ); if (pe != null) { throw pe; } else { @@ -465,11 +468,11 @@ private void flushIndexToContextAndClose(BytesWritable key, IncrementalIndex ind InputRow inputRow = getInputRowFromRow(row, dimensions); // reportParseExceptions is true as any unparseable data is already handled by the mapper. - Pair> serializedRow = InputRowSerde.toBytes(typeHelperMap, inputRow, combiningAggs, true); + InputRowSerde.SerializeResult serializeResult = InputRowSerde.toBytes(typeHelperMap, inputRow, combiningAggs, true); context.write( key, - new BytesWritable(serializedRow.lhs) + new BytesWritable(serializeResult.getSerializedRow()) ); } index.close(); diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java index a5d184f53d99..3a44762599cc 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java @@ -32,7 +32,6 @@ import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.DimensionsSpec; import io.druid.java.util.common.IAE; -import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.logger.Logger; import io.druid.java.util.common.parsers.ParseException; @@ -100,6 +99,31 @@ public static Map getTypeHelperMap(DimensionsSpec return typeHelperMap; } + public static class SerializeResult + { + private final byte[] serializedRow; + private final List parseExceptionMessages; + + public SerializeResult( + final byte[] serializedRow, + final List parseExceptionMessages + ) + { + this.serializedRow = serializedRow; + this.parseExceptionMessages = parseExceptionMessages; + } + + public byte[] getSerializedRow() + { + return serializedRow; + } + + public List getParseExceptionMessages() + { + return parseExceptionMessages; + } + } + public static class StringIndexSerdeTypeHelper implements IndexSerdeTypeHelper> { @Override @@ -241,7 +265,7 @@ public Double deserialize(ByteArrayDataInput in) } } - public static final Pair> toBytes( + public static final SerializeResult toBytes( final Map typeHelperMap, final InputRow row, AggregatorFactory[] aggs, @@ -321,7 +345,7 @@ public InputRow get() } } - return Pair.of(out.toByteArray(), parseExceptionMessages); + return new SerializeResult(out.toByteArray(), parseExceptionMessages); } catch (IOException ex) { throw new RuntimeException(ex); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java index 39942bdc5b01..4f0a22dd45f0 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java @@ -175,8 +175,8 @@ public void testMultipleRowsMerged() throws Exception ) ); List rows = Lists.newArrayList( - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true).lhs), - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true).lhs) + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true).getSerializedRow()), + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true).getSerializedRow()) ); Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class); @@ -253,8 +253,8 @@ public void testMultipleRowsNotMerged() throws Exception Map typeHelperMap = InputRowSerde.getTypeHelperMap(dimensionsSpec); List rows = Lists.newArrayList( - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true).lhs), - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true).lhs) + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true).getSerializedRow()), + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true).getSerializedRow()) ); Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java index 83a09a0307b8..387d5b53512e 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java @@ -30,7 +30,6 @@ import io.druid.data.input.impl.StringDimensionSchema; import io.druid.hll.HyperLogLogCollector; import io.druid.jackson.AggregatorsModule; -import io.druid.java.util.common.Pair; import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregator; @@ -124,7 +123,8 @@ public Aggregator factorize(ColumnSelectorFactory metricFactory) null ); - byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, false).lhs; // Ignore Unparseable aggregator + byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, false) + .getSerializedRow(); // Ignore Unparseable aggregator InputRow out = InputRowSerde.fromBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), data, aggregatorFactories); Assert.assertEquals(timestamp, out.getTimestampFromEpoch()); @@ -173,7 +173,7 @@ public void testThrowParseExceptions() null ); - Pair> result = InputRowSerde.toBytes( + InputRowSerde.SerializeResult result = InputRowSerde.toBytes( InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, @@ -181,14 +181,14 @@ public void testThrowParseExceptions() ); Assert.assertEquals( Arrays.asList("Unable to parse value[m3v] for field[m3]"), - result.rhs + result.getParseExceptionMessages() ); } @Test public void testDimensionParseExceptions() { - Pair> result; + InputRowSerde.SerializeResult result; InputRow in = new MapBasedInputRow( timestamp, dims, @@ -208,7 +208,7 @@ public void testDimensionParseExceptions() result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); Assert.assertEquals( Arrays.asList("could not convert value [d1v] to long"), - result.rhs + result.getParseExceptionMessages() ); dimensionsSpec = new DimensionsSpec( @@ -221,7 +221,7 @@ public void testDimensionParseExceptions() result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); Assert.assertEquals( Arrays.asList("could not convert value [d1v] to float"), - result.rhs + result.getParseExceptionMessages() ); dimensionsSpec = new DimensionsSpec( @@ -234,7 +234,7 @@ public void testDimensionParseExceptions() result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); Assert.assertEquals( Arrays.asList("could not convert value [d1v] to double"), - result.rhs + result.getParseExceptionMessages() ); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 1ab34222a306..bb26870dfd3a 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -40,7 +40,6 @@ import io.druid.discovery.LookupNodeService; import io.druid.indexer.IngestionState; import io.druid.indexer.TaskMetricsGetter; -import io.druid.indexer.TaskMetricsUtils; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; diff --git a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java index ed4ad0c32236..302b58663ced 100644 --- a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java +++ b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java @@ -20,9 +20,7 @@ package io.druid.segment.realtime; import com.google.common.base.Preconditions; -import io.druid.indexer.TaskMetricsUtils; -import java.util.Map; import java.util.concurrent.atomic.AtomicLong; /** From 0e0e8551b0645c038e776f8795d4a8dc2e2e4e74 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 2 Apr 2018 17:52:41 -0700 Subject: [PATCH 08/10] Refactor hadoop task loader names --- .../indexing/common/task/HadoopIndexTask.java | 28 ++++++++----------- .../indexing/common/task/HadoopTask.java | 9 ++---- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index 3d66a78f19ab..d46a57598050 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -265,16 +265,11 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception Object determinePartitionsInnerProcessingRunner = getForeignClassloaderObject( "io.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner", - new String[]{ - toolbox.getObjectMapper().writeValueAsString(spec), - toolbox.getConfig().getHadoopWorkingPath(), - toolbox.getSegmentPusher().getPathForHadoop() - }, loader ); determinePartitionsStatsGetter = new InnerProcessingStatsGetter(determinePartitionsInnerProcessingRunner); - String[] input1 = new String[]{ + String[] determinePartitionsInput = new String[]{ toolbox.getObjectMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), toolbox.getSegmentPusher().getPathForHadoop() @@ -282,8 +277,11 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception HadoopIngestionSpec indexerSchema = null; final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader(); - Class aClazz = determinePartitionsInnerProcessingRunner.getClass(); - Method determinePartitionsInnerProcessingRunTask = aClazz.getMethod("runTask", input1.getClass()); + Class determinePartitionsRunnerClass = determinePartitionsInnerProcessingRunner.getClass(); + Method determinePartitionsInnerProcessingRunTask = determinePartitionsRunnerClass.getMethod( + "runTask", + determinePartitionsInput.getClass() + ); try { Thread.currentThread().setContextClassLoader(loader); @@ -291,7 +289,7 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception final String determineConfigStatusString = (String) determinePartitionsInnerProcessingRunTask.invoke( determinePartitionsInnerProcessingRunner, - new Object[]{input1} + new Object[]{determinePartitionsInput} ); @@ -357,21 +355,17 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception Object innerProcessingRunner = getForeignClassloaderObject( "io.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessingRunner", - new String[]{ - toolbox.getObjectMapper().writeValueAsString(indexerSchema), - version - }, loader ); buildSegmentsStatsGetter = new InnerProcessingStatsGetter(innerProcessingRunner); - String[] input = new String[]{ + String[] buildSegmentsInput = new String[]{ toolbox.getObjectMapper().writeValueAsString(indexerSchema), version }; - Class aClazz2 = innerProcessingRunner.getClass(); - Method innerProcessingRunTask = aClazz2.getMethod("runTask", input.getClass()); + Class buildSegmentsRunnerClass = innerProcessingRunner.getClass(); + Method innerProcessingRunTask = buildSegmentsRunnerClass.getMethod("runTask", buildSegmentsInput.getClass()); try { Thread.currentThread().setContextClassLoader(loader); @@ -379,7 +373,7 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception ingestionState = IngestionState.BUILD_SEGMENTS; final String jobStatusString = (String) innerProcessingRunTask.invoke( innerProcessingRunner, - new Object[]{input} + new Object[]{buildSegmentsInput} ); buildSegmentsStatus = toolbox.getObjectMapper().readValue( diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java index 18d95ea81a93..60be2b8639a1 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java @@ -227,20 +227,17 @@ public static OutputType invokeForeignLoader( /** * This method tries to isolate class loading during a Function call * - * @param clazzName The Class which has a static method called `runTask` - * @param input The input for `runTask`, must have `input.getClass()` be the class of the input for runTask + * @param clazzName The Class which has an instance method called `runTask` * @param loader The loader to use as the context class loader during invocation - * @param The input type of the method. * * @return The result of the method invocation */ - public static Object getForeignClassloaderObject( + public static Object getForeignClassloaderObject( final String clazzName, - final InputType input, final ClassLoader loader ) { - log.debug("Launching [%s] on class loader [%s] with input class [%s]", clazzName, loader, input.getClass()); + log.debug("Launching [%s] on class loader [%s]", clazzName, loader); final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(loader); From c2c132affa086ecc178eb08b4a671739e40c08f7 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 3 Apr 2018 12:40:51 -0700 Subject: [PATCH 09/10] Truncate error message in TaskStatus, add errorMsg to task report --- .../druid/indexing/kafka/KafkaIndexTask.java | 8 +++-- ...IngestionStatsAndErrorsTaskReportData.java | 35 +++++++++++++------ .../io/druid/indexing/common/TaskStatus.java | 15 +++++++- .../AppenderatorDriverRealtimeIndexTask.java | 9 +++-- .../indexing/common/task/HadoopIndexTask.java | 15 +++++--- .../druid/indexing/common/task/IndexTask.java | 12 +++++-- .../common/task/TaskReportSerdeTest.java | 3 +- 7 files changed, 72 insertions(+), 25 deletions(-) diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index 1e05efce7902..ab69c43b212d 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -253,6 +253,7 @@ public enum Status private IngestionState ingestionState; private TaskMetricsGetter metricsGetter; + private String errorMsg; @JsonCreator public KafkaIndexTask( @@ -435,11 +436,11 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception } catch (Exception e) { log.error(e, "Encountered exception while running task."); - + errorMsg = Throwables.getStackTraceAsString(e); toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return TaskStatus.failure( getId(), - Throwables.getStackTraceAsString(e) + errorMsg ); } } @@ -1339,7 +1340,8 @@ private Map getTaskCompletionReports() new IngestionStatsAndErrorsTaskReportData( ingestionState, getTaskCompletionUnparseableEvents(), - getTaskCompletionRowStats() + getTaskCompletionRowStats(), + errorMsg ) ) ); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java index 01b4c2d4513f..24114e5f11eb 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java @@ -36,15 +36,20 @@ public class IngestionStatsAndErrorsTaskReportData @JsonProperty private Map rowStats; + @JsonProperty + private String errorMsg; + public IngestionStatsAndErrorsTaskReportData( @JsonProperty("ingestionState") IngestionState ingestionState, @JsonProperty("unparseableEvents") Map unparseableEvents, - @JsonProperty("rowStats") Map rowStats + @JsonProperty("rowStats") Map rowStats, + @JsonProperty("errorMsg") String errorMsg ) { this.ingestionState = ingestionState; this.unparseableEvents = unparseableEvents; this.rowStats = rowStats; + this.errorMsg = errorMsg; } @JsonProperty @@ -65,6 +70,20 @@ public Map getRowStats() return rowStats; } + @JsonProperty + public String getErrorMsg() + { + return errorMsg; + } + + public static IngestionStatsAndErrorsTaskReportData getPayloadFromTaskReports( + Map taskReports + ) + { + return (IngestionStatsAndErrorsTaskReportData) taskReports.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY) + .getPayload(); + } + @Override public boolean equals(Object o) { @@ -77,21 +96,14 @@ public boolean equals(Object o) IngestionStatsAndErrorsTaskReportData that = (IngestionStatsAndErrorsTaskReportData) o; return getIngestionState() == that.getIngestionState() && Objects.equals(getUnparseableEvents(), that.getUnparseableEvents()) && - Objects.equals(getRowStats(), that.getRowStats()); + Objects.equals(getRowStats(), that.getRowStats()) && + Objects.equals(getErrorMsg(), that.getErrorMsg()); } @Override public int hashCode() { - return Objects.hash(getIngestionState(), getUnparseableEvents(), getRowStats()); - } - - public static IngestionStatsAndErrorsTaskReportData getPayloadFromTaskReports( - Map taskReports - ) - { - return (IngestionStatsAndErrorsTaskReportData) taskReports.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY) - .getPayload(); + return Objects.hash(getIngestionState(), getUnparseableEvents(), getRowStats(), getErrorMsg()); } @Override @@ -101,6 +113,7 @@ public String toString() "ingestionState=" + ingestionState + ", unparseableEvents=" + unparseableEvents + ", rowStats=" + rowStats + + ", errorMsg='" + errorMsg + '\'' + '}'; } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java index d83e9179db68..61e64917ca82 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java @@ -34,6 +34,8 @@ */ public class TaskStatus { + public static final int MAX_ERROR_MSG_LENGTH = 100; + public static TaskStatus running(String taskId) { return new TaskStatus(taskId, TaskState.RUNNING, -1, null); @@ -64,6 +66,17 @@ public static TaskStatus fromCode(String taskId, TaskState code) return new TaskStatus(taskId, code, -1, null); } + // The error message can be large, so truncate it to avoid storing large objects in zookeeper/metadata storage. + // The full error message will be available via a TaskReport. + private static String truncateErrorMsg(String errorMsg) + { + if (errorMsg != null && errorMsg.length() > MAX_ERROR_MSG_LENGTH) { + return errorMsg.substring(0, MAX_ERROR_MSG_LENGTH) + "..."; + } else { + return errorMsg; + } + } + private final String id; private final TaskState status; private final long duration; @@ -80,7 +93,7 @@ protected TaskStatus( this.id = id; this.status = status; this.duration = duration; - this.errorMsg = errorMsg; + this.errorMsg = truncateErrorMsg(errorMsg); // Check class invariants. Preconditions.checkNotNull(id, "id"); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index bb26870dfd3a..9e2751ffe7ff 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -167,6 +167,9 @@ private static String makeTaskId(RealtimeAppenderatorIngestionSpec spec) @JsonIgnore private IngestionState ingestionState; + @JsonIgnore + private String errorMsg; + @JsonCreator public AppenderatorDriverRealtimeIndexTask( @JsonProperty("id") String id, @@ -370,10 +373,11 @@ dataSchema, new RealtimeIOConfig(null, null, null), null catch (Throwable e) { log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()) .emit(); + errorMsg = Throwables.getStackTraceAsString(e); toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return TaskStatus.failure( getId(), - Throwables.getStackTraceAsString(e) + errorMsg ); } finally { @@ -513,7 +517,8 @@ private Map getTaskCompletionReports() new IngestionStatsAndErrorsTaskReportData( ingestionState, getTaskCompletionUnparseableEvents(), - getTaskCompletionRowStats() + getTaskCompletionRowStats(), + errorMsg ) ) ); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index d46a57598050..984a9fd6a523 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -115,6 +115,9 @@ private static String getTheDataSource(HadoopIngestionSpec spec) @JsonIgnore private HadoopIndexGeneratorInnerProcessingStatus buildSegmentsStatus = null; + @JsonIgnore + private String errorMsg; + /** * @param spec is used by the HadoopDruidIndexerJob to set up the appropriate parameters * for creating Druid index segments. It may be modified. @@ -238,10 +241,11 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception log.error(e, "Encountered exception in run():"); } + errorMsg = Throwables.getStackTraceAsString(effectiveException); toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return TaskStatus.failure( getId(), - Throwables.getStackTraceAsString(effectiveException) + errorMsg ); } finally { @@ -299,10 +303,11 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception indexerSchema = determineConfigStatus.getSchema(); if (indexerSchema == null) { + errorMsg = determineConfigStatus.getErrorMsg(); toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return TaskStatus.failure( getId(), - determineConfigStatus.getErrorMsg() + errorMsg ); } } @@ -390,10 +395,11 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception null ); } else { + errorMsg = buildSegmentsStatus.getErrorMsg(); toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return TaskStatus.failure( getId(), - buildSegmentsStatus.getErrorMsg() + errorMsg ); } } @@ -437,7 +443,8 @@ private Map getTaskCompletionReports() new IngestionStatsAndErrorsTaskReportData( ingestionState, null, - getTaskCompletionRowStats() + getTaskCompletionRowStats(), + errorMsg ) ) ); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index 80480a549d20..9a4daa084a2e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -180,6 +180,9 @@ private static String makeGroupId(boolean isAppendToExisting, String dataSource) @JsonIgnore private CircularBuffer determinePartitionsSavedParseExceptions; + @JsonIgnore + private String errorMsg; + @JsonCreator public IndexTask( @JsonProperty("id") final String id, @@ -445,10 +448,11 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception } catch (Exception e) { log.error(e, "Encountered exception in %s.", ingestionState); + errorMsg = Throwables.getStackTraceAsString(e); toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return TaskStatus.failure( getId(), - Throwables.getStackTraceAsString(e) + errorMsg ); } @@ -467,7 +471,8 @@ private Map getTaskCompletionReports() new IngestionStatsAndErrorsTaskReportData( ingestionState, getTaskCompletionUnparseableEvents(), - getTaskCompletionRowStats() + getTaskCompletionRowStats(), + errorMsg ) ) ); @@ -994,10 +999,11 @@ dataSchema, new RealtimeIOConfig(null, null, null), null ingestionState = IngestionState.COMPLETED; if (published == null) { log.error("Failed to publish segments, aborting!"); + errorMsg = "Failed to publish segments."; toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return TaskStatus.failure( getId(), - "Failed to publish segments." + errorMsg ); } else { log.info( diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java index ac479a8769b0..62edbbbdc0c0 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java @@ -55,7 +55,8 @@ public void testSerde() throws Exception ), ImmutableMap.of( "number", 1234 - ) + ), + "an error message" ) ); String report1serialized = jsonMapper.writeValueAsString(report1); From 8e36c225a6ae9a7b5e6b134c91c8194dde912325 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 4 Apr 2018 14:46:25 -0700 Subject: [PATCH 10/10] PR comments --- .../data/input/impl/MapInputRowParser.java | 2 +- .../java/io/druid/utils/CircularBuffer.java | 13 +++++ .../src/main/java/io/druid/indexer/Jobby.java | 17 +++++- .../io/druid/indexer/IndexGeneratorJob.java | 8 +-- .../java/io/druid/indexer/InputRowSerde.java | 58 +++++++++++-------- .../src/main/java/io/druid/indexer/Utils.java | 2 +- .../indexer/IndexGeneratorCombinerTest.java | 8 +-- .../io/druid/indexer/InputRowSerdeTest.java | 11 ++-- 8 files changed, 75 insertions(+), 44 deletions(-) diff --git a/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java b/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java index 5c168441dac2..49d40fd88c47 100644 --- a/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java +++ b/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java @@ -72,7 +72,7 @@ public List parseBatch(Map theMap) } } catch (Exception e) { - throw new ParseException(e, "Unparseable timestamp found! Event: " + theMap); + throw new ParseException(e, "Unparseable timestamp found! Event: %s", theMap); } return ImmutableList.of(new MapBasedInputRow(timestamp.getMillis(), dimensions, theMap)); diff --git a/api/src/main/java/io/druid/utils/CircularBuffer.java b/api/src/main/java/io/druid/utils/CircularBuffer.java index bac7277b367d..e5f8158e0efe 100644 --- a/api/src/main/java/io/druid/utils/CircularBuffer.java +++ b/api/src/main/java/io/druid/utils/CircularBuffer.java @@ -21,6 +21,11 @@ import com.google.common.base.Preconditions; +/** + * A circular buffer that supports random bidirectional access. + * + * @param Type of object to be stored in the buffer + */ public class CircularBuffer { public E[] getBuffer() @@ -52,8 +57,13 @@ public void add(E item) } } + /** + * Access object at a given index, starting from the latest entry added and moving backwards. + */ public E getLatest(int index) { + Preconditions.checkArgument(index >= 0 && index < size, "invalid index"); + int bufferIndex = start - index - 1; if (bufferIndex < 0) { bufferIndex = buffer.length + bufferIndex; @@ -61,6 +71,9 @@ public E getLatest(int index) return buffer[bufferIndex]; } + /** + * Access object at a given index, starting from the earliest entry added and moving forward. + */ public E get(int index) { Preconditions.checkArgument(index >= 0 && index < size, "invalid index"); diff --git a/common/src/main/java/io/druid/indexer/Jobby.java b/common/src/main/java/io/druid/indexer/Jobby.java index c0f2d68c7c31..b0d26affdf40 100644 --- a/common/src/main/java/io/druid/indexer/Jobby.java +++ b/common/src/main/java/io/druid/indexer/Jobby.java @@ -19,6 +19,8 @@ package io.druid.indexer; +import io.druid.java.util.common.StringUtils; + import javax.annotation.Nullable; import java.util.Map; @@ -28,15 +30,26 @@ public interface Jobby { boolean run(); + /** + * @return A map containing statistics for a Jobby, optionally null if the Jobby is unable to provide stats. + */ @Nullable default Map getStats() { - throw new UnsupportedOperationException("This Jobby does not implement getJobStats()."); + throw new UnsupportedOperationException( + StringUtils.format("This Jobby does not implement getJobStats(), Jobby class: [%s]", getClass()) + ); } + /** + * @return A string representing the error that caused a Jobby to fail. Can be null if the Jobby did not fail, + * or is unable to provide an error message. + */ @Nullable default String getErrorMessage() { - throw new UnsupportedOperationException("This Jobby does not implement getErrorMessage()."); + throw new UnsupportedOperationException( + StringUtils.format("This Jobby does not implement getErrorMessage(), Jobby class: [%s]", getClass()) + ); } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java index ada3c6e704da..b5708b94354c 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java @@ -361,15 +361,13 @@ protected void innerMap( InputRowSerde.toBytes( typeHelperMap, inputRow, - aggsForSerializingSegmentInputRow, - reportParseExceptions + aggsForSerializingSegmentInputRow ) : InputRowSerde.toBytes( typeHelperMap, inputRow, - aggregators, - reportParseExceptions + aggregators ); context.write( @@ -468,7 +466,7 @@ private void flushIndexToContextAndClose(BytesWritable key, IncrementalIndex ind InputRow inputRow = getInputRowFromRow(row, dimensions); // reportParseExceptions is true as any unparseable data is already handled by the mapper. - InputRowSerde.SerializeResult serializeResult = InputRowSerde.toBytes(typeHelperMap, inputRow, combiningAggs, true); + InputRowSerde.SerializeResult serializeResult = InputRowSerde.toBytes(typeHelperMap, inputRow, combiningAggs); context.write( key, diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java index 3a44762599cc..4f0d9d4c81a3 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java @@ -45,7 +45,6 @@ import io.druid.segment.serde.ComplexMetrics; import org.apache.hadoop.io.WritableUtils; -import javax.annotation.Nullable; import java.io.DataInput; import java.io.IOException; import java.util.ArrayList; @@ -67,8 +66,7 @@ public interface IndexSerdeTypeHelper { ValueType getType(); - @Nullable - String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions); + void serialize(ByteArrayDataOutput out, Object value); T deserialize(ByteArrayDataInput in); } @@ -133,7 +131,7 @@ public ValueType getType() } @Override - public String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public void serialize(ByteArrayDataOutput out, Object value) { List values = Rows.objectToStrings(value); try { @@ -142,7 +140,6 @@ public String serialize(ByteArrayDataOutput out, Object value, boolean reportPar catch (IOException ioe) { throw new RuntimeException(ioe); } - return null; } @Override @@ -166,15 +163,15 @@ public ValueType getType() } @Override - public String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public void serialize(ByteArrayDataOutput out, Object value) { - String parseExceptionMessage = null; + ParseException exceptionToThrow = null; Long ret = null; try { - ret = DimensionHandlerUtils.convertObjectToLong(value, reportParseExceptions); + ret = DimensionHandlerUtils.convertObjectToLong(value, true); } catch (ParseException pe) { - parseExceptionMessage = pe.getMessage(); + exceptionToThrow = pe; } if (ret == null) { @@ -183,7 +180,10 @@ public String serialize(ByteArrayDataOutput out, Object value, boolean reportPar ret = DimensionHandlerUtils.ZERO_LONG; } out.writeLong(ret); - return parseExceptionMessage; + + if (exceptionToThrow != null) { + throw exceptionToThrow; + } } @Override @@ -202,15 +202,15 @@ public ValueType getType() } @Override - public String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public void serialize(ByteArrayDataOutput out, Object value) { - String parseExceptionMessage = null; + ParseException exceptionToThrow = null; Float ret = null; try { - ret = DimensionHandlerUtils.convertObjectToFloat(value, reportParseExceptions); + ret = DimensionHandlerUtils.convertObjectToFloat(value, true); } catch (ParseException pe) { - parseExceptionMessage = pe.getMessage(); + exceptionToThrow = pe; } if (ret == null) { @@ -219,7 +219,10 @@ public String serialize(ByteArrayDataOutput out, Object value, boolean reportPar ret = DimensionHandlerUtils.ZERO_FLOAT; } out.writeFloat(ret); - return parseExceptionMessage; + + if (exceptionToThrow != null) { + throw exceptionToThrow; + } } @Override @@ -238,15 +241,15 @@ public ValueType getType() } @Override - public String serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public void serialize(ByteArrayDataOutput out, Object value) { - String parseExceptionMessage = null; + ParseException exceptionToThrow = null; Double ret = null; try { - ret = DimensionHandlerUtils.convertObjectToDouble(value, reportParseExceptions); + ret = DimensionHandlerUtils.convertObjectToDouble(value, true); } catch (ParseException pe) { - parseExceptionMessage = pe.getMessage(); + exceptionToThrow = pe; } if (ret == null) { @@ -255,7 +258,10 @@ public String serialize(ByteArrayDataOutput out, Object value, boolean reportPar ret = DimensionHandlerUtils.ZERO_DOUBLE; } out.writeDouble(ret); - return parseExceptionMessage; + + if (exceptionToThrow != null) { + throw exceptionToThrow; + } } @Override @@ -268,8 +274,7 @@ public Double deserialize(ByteArrayDataInput in) public static final SerializeResult toBytes( final Map typeHelperMap, final InputRow row, - AggregatorFactory[] aggs, - boolean reportParseExceptions + AggregatorFactory[] aggs ) { try { @@ -290,9 +295,12 @@ public static final SerializeResult toBytes( typeHelper = STRING_HELPER; } writeString(dim, out); - String parseExceptionMessage = typeHelper.serialize(out, row.getRaw(dim), true); - if (parseExceptionMessage != null) { - parseExceptionMessages.add(parseExceptionMessage); + + try { + typeHelper.serialize(out, row.getRaw(dim)); + } + catch (ParseException pe) { + parseExceptionMessages.add(pe.getMessage()); } } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java b/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java index 0729cf65ad72..1a899df18ee3 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java @@ -145,7 +145,7 @@ public static String getFailureMessage(Job failedJob, ObjectMapper jsonMapper) return jsonMapper.writeValueAsString(taskDiagsMap); } catch (IOException | InterruptedException ie) { - log.error("couldn't get failure cause for job."); + log.error(ie, "couldn't get failure cause for job [%s]", failedJob.getJobName()); return null; } } diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java index 4f0a22dd45f0..3bfb1fb39832 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java @@ -175,8 +175,8 @@ public void testMultipleRowsMerged() throws Exception ) ); List rows = Lists.newArrayList( - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true).getSerializedRow()), - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true).getSerializedRow()) + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators).getSerializedRow()), + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators).getSerializedRow()) ); Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class); @@ -253,8 +253,8 @@ public void testMultipleRowsNotMerged() throws Exception Map typeHelperMap = InputRowSerde.getTypeHelperMap(dimensionsSpec); List rows = Lists.newArrayList( - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true).getSerializedRow()), - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true).getSerializedRow()) + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators).getSerializedRow()), + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators).getSerializedRow()) ); Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java index 387d5b53512e..0b72d31a71aa 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java @@ -123,7 +123,7 @@ public Aggregator factorize(ColumnSelectorFactory metricFactory) null ); - byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, false) + byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories) .getSerializedRow(); // Ignore Unparseable aggregator InputRow out = InputRowSerde.fromBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), data, aggregatorFactories); @@ -176,8 +176,7 @@ public void testThrowParseExceptions() InputRowSerde.SerializeResult result = InputRowSerde.toBytes( InputRowSerde.getTypeHelperMap(dimensionsSpec), in, - aggregatorFactories, - true + aggregatorFactories ); Assert.assertEquals( Arrays.asList("Unable to parse value[m3v] for field[m3]"), @@ -205,7 +204,7 @@ public void testDimensionParseExceptions() null, null ); - result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories); Assert.assertEquals( Arrays.asList("could not convert value [d1v] to long"), result.getParseExceptionMessages() @@ -218,7 +217,7 @@ public void testDimensionParseExceptions() null, null ); - result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories); Assert.assertEquals( Arrays.asList("could not convert value [d1v] to float"), result.getParseExceptionMessages() @@ -231,7 +230,7 @@ public void testDimensionParseExceptions() null, null ); - result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories); Assert.assertEquals( Arrays.asList("could not convert value [d1v] to double"), result.getParseExceptionMessages()