From c556409411f8d5b41bad7988bd42ee7bc5305ccc Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Wed, 9 Aug 2017 20:02:17 +0100 Subject: [PATCH 1/8] restore state on the poll loop --- .../processor/internals/AbstractTask.java | 55 +- .../processor/internals/AssignedTasks.java | 319 ++++++++ .../processor/internals/ChangelogReader.java | 6 +- .../internals/ProcessorStateManager.java | 66 +- .../processor/internals/StandbyTask.java | 17 +- .../processor/internals/StateRestorer.java | 9 +- .../internals/StoreChangelogReader.java | 152 ++-- .../processor/internals/StreamTask.java | 16 +- .../processor/internals/StreamThread.java | 703 +++++++----------- .../kafka/streams/KafkaStreamsTest.java | 2 +- .../integration/ResetIntegrationTest.java | 8 +- .../processor/internals/AbstractTaskTest.java | 68 +- .../internals/AssignedTasksTest.java | 315 ++++++++ .../internals/ProcessorStateManagerTest.java | 32 +- .../processor/internals/StandbyTaskTest.java | 7 +- .../internals/StateRestorerTest.java | 4 +- .../internals/StoreChangelogReaderTest.java | 24 +- .../processor/internals/StreamTaskTest.java | 4 +- .../processor/internals/StreamThreadTest.java | 357 ++++----- .../StreamThreadStateStoreProviderTest.java | 2 + .../kafka/test/MockChangelogReader.java | 10 +- .../test/ProcessorTopologyTestDriver.java | 1 + 22 files changed, 1376 insertions(+), 801 deletions(-) create mode 100644 streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java index 8427e11b5188a..92485b77ad1f2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java @@ -23,7 +23,9 @@ import org.apache.kafka.common.errors.AuthorizationException; import org.apache.kafka.common.errors.WakeupException; import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.errors.LockException; import org.apache.kafka.streams.errors.ProcessorStateException; +import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.processor.ProcessorContext; import org.apache.kafka.streams.processor.StateStore; import org.apache.kafka.streams.processor.TaskId; @@ -39,6 +41,7 @@ public abstract class AbstractTask { private static final Logger log = LoggerFactory.getLogger(AbstractTask.class); + private static final int STATE_DIR_LOCK_RETRIES = 5; final TaskId id; final String applicationId; @@ -48,6 +51,7 @@ public abstract class AbstractTask { final Consumer consumer; final String logPrefix; final boolean eosEnabled; + private final StateDirectory stateDirectory; InternalProcessorContext processorContext; @@ -69,6 +73,7 @@ public abstract class AbstractTask { this.topology = topology; this.consumer = consumer; this.eosEnabled = StreamsConfig.EXACTLY_ONCE.equals(config.getString(StreamsConfig.PROCESSING_GUARANTEE_CONFIG)); + this.stateDirectory = stateDirectory; logPrefix = String.format("%s [%s]", isStandby ? "standby-task" : "task", id()); @@ -93,7 +98,7 @@ public abstract class AbstractTask { public abstract void suspend(); public abstract void close(final boolean clean); - public final TaskId id() { + public TaskId id() { return id; } @@ -101,7 +106,7 @@ public final String applicationId() { return applicationId; } - public final Set partitions() { + public Set partitions() { return partitions; } @@ -188,6 +193,20 @@ void flushState() { } void initializeStateStores() { + if (topology.stateStores().isEmpty()) { + return; + } + + try { + if (!stateDirectory.lock(id, STATE_DIR_LOCK_RETRIES)) { + throw new LockException(String.format("%s Failed to lock the state directory for task %s", + logPrefix, id)); + } + } catch (IOException e) { + throw new StreamsException(String.format("%s fatal error while trying to lock the state directory for task %s", + logPrefix, + id)); + } log.trace("{} Initializing state stores", logPrefix); // set initial offset limits @@ -204,8 +223,38 @@ void initializeStateStores() { * @param writeCheckpoint boolean indicating if a checkpoint file should be written */ void closeStateManager(final boolean writeCheckpoint) throws ProcessorStateException { + ProcessorStateException exception = null; log.trace("{} Closing state manager", logPrefix); - stateMgr.close(writeCheckpoint ? recordCollectorOffsets() : null); + try { + stateMgr.close(writeCheckpoint ? recordCollectorOffsets() : null); + } catch (final ProcessorStateException e) { + exception = e; + } finally { + try { + stateDirectory.unlock(id); + } catch (IOException e) { + if (exception == null) { + exception = new ProcessorStateException(String.format("%s Failed to release state dir lock", logPrefix), e); + } + log.error("{} Failed to release state dir lock: ", logPrefix, e); + } + } + if (exception != null) { + throw exception; + } + } + + /** + * initialize the topology/state stores + * @return true if the topology is ready to run, i.e, all stores have been restored. + */ + public abstract boolean initialize(); + + boolean hasStateStores() { + return !topology.stateStores().isEmpty(); } + Collection changelogPartitions() { + return stateMgr.changelogPartitions(); + } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java new file mode 100644 index 0000000000000..bf387e2c78323 --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.processor.internals; + +import org.apache.kafka.clients.consumer.CommitFailedException; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.ProducerFencedException; +import org.apache.kafka.streams.errors.LockException; +import org.apache.kafka.streams.processor.TaskId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicReference; + +class AssignedTasks { + private static final Logger log = LoggerFactory.getLogger(AssignedTasks.class); + private final String logPrefix; + private final String taskTypeName; + private Map created = new HashMap<>(); + private Map suspended = new HashMap<>(); + private Map restoring = new HashMap<>(); + private Set restoredPartitions = new HashSet<>(); + // IQ may access this map. + private Map running = new ConcurrentHashMap<>(); + private Map runningByPartition = new HashMap<>(); + + AssignedTasks(final String logPrefix, final String taskTypeName) { + this.logPrefix = logPrefix; + this.taskTypeName = taskTypeName; + } + + void addNewTask(final T task) { + created.put(task.id(), task); + } + + Set uninitializedPartitions() { + if (created.isEmpty()) { + return Collections.emptySet(); + } + final Set partitions = new HashSet<>(); + for (final Map.Entry entry : created.entrySet()) { + if (entry.getValue().hasStateStores()) { + partitions.addAll(entry.getValue().partitions()); + } + } + return partitions; + } + + void initializeNewTasks() { + if (!created.isEmpty()) { + log.trace("{} initializing {}s {}", logPrefix, taskTypeName, created.keySet()); + } + for (final Iterator> it = created.entrySet().iterator(); it.hasNext(); ) { + final Map.Entry entry = it.next(); + try { + if (!entry.getValue().initialize()) { + restoring.put(entry.getKey(), entry.getValue()); + } else { + transitionToRunning(entry.getValue()); + } + it.remove(); + } catch (final LockException e) { + log.warn("{} Could not create {} {} due to {}; will retry", logPrefix, taskTypeName, entry.getKey(), e); + } + } + } + + Set updateRestored(final Collection restored) { + if (restored.isEmpty()) { + return Collections.emptySet(); + } + final Set resume = new HashSet<>(); + restoredPartitions.addAll(restored); + for (final Iterator> it = restoring.entrySet().iterator(); it.hasNext(); ) { + final Map.Entry entry = it.next(); + T task = entry.getValue(); + if (restoredPartitions.containsAll(task.changelogPartitions())) { + transitionToRunning(task); + resume.addAll(task.partitions()); + it.remove(); + } + } + if (allTasksRunning()) { + restoredPartitions.clear(); + } + return resume; + } + + boolean allTasksRunning() { + return created.isEmpty() + && suspended.isEmpty() + && restoring.isEmpty(); + } + + Collection running() { + return running.values(); + } + + RuntimeException suspend() { + final AtomicReference firstException = new AtomicReference<>(null); + log.trace("{} suspending running {} {}", logPrefix, taskTypeName, runningTaskIds()); + firstException.compareAndSet(null, suspendTasks(running.values(), suspended)); + log.trace("{} close restoring {} {}", logPrefix, taskTypeName, restoring.keySet()); + firstException.compareAndSet(null, closeRestoringTasks()); + running.clear(); + restoring.clear(); + created.clear(); + runningByPartition.clear(); + return firstException.get(); + } + + private RuntimeException closeRestoringTasks() { + RuntimeException exception = null; + for (final T task : restoring.values()) { + try { + task.close(false); + } catch (final RuntimeException e) { + log.error("{} failed to close restoring {}, {}", logPrefix, taskTypeName, task.id, e); + if (exception == null) { + exception = e; + } + } + } + return exception; + } + + private RuntimeException suspendTasks(final Collection tasks, final Map suspendedTasks) { + RuntimeException exception = null; + for (Iterator it = tasks.iterator(); it.hasNext(); ) { + final T task = it.next(); + try { + task.suspend(); + suspendedTasks.put(task.id(), task); + } catch (final CommitFailedException e) { + // commit failed during suspension. Just log it. + log.warn("{} Failed to commit {} {} state when suspending due to CommitFailedException", logPrefix, taskTypeName, task.id); + } catch (final ProducerFencedException e) { + closeZombieTask(task); + it.remove(); + } catch (final RuntimeException e) { + log.error("{} Suspending {} {} failed due to the following error:", logPrefix, taskTypeName, task.id, e); + try { + task.close(false); + } catch (final Exception f) { + log.error("{} After suspending failed, closing the same {} {} failed again due to the following error:", logPrefix, taskTypeName, task.id, f); + } + if (exception == null) { + exception = e; + } + } + } + return exception; + } + + private void closeZombieTask(final T task) { + log.warn("{} Producer of {} {} fenced; closing zombie task", logPrefix, taskTypeName, task.id); + try { + task.close(false); + } catch (final Exception e) { + log.warn("{} Failed to close zombie {} due to {}, ignore and proceed", taskTypeName, logPrefix, e); + } + } + + boolean hasRunningTasks() { + return !running.isEmpty(); + } + + boolean maybeResumeSuspendedTask(final TaskId taskId, final Set partitions) { + if (suspended.containsKey(taskId)) { + final T task = suspended.get(taskId); + if (task.partitions().equals(partitions)) { + suspended.remove(taskId); + log.trace("{} resuming suspended {} {}", logPrefix, taskTypeName, taskId); + task.resume(); + transitionToRunning(task); + return true; + } + } + return false; + } + + private void transitionToRunning(final T task) { + running.put(task.id(), task); + for (TopicPartition topicPartition : task.partitions()) { + runningByPartition.put(topicPartition, task); + } + for (TopicPartition topicPartition : task.changelogPartitions()) { + runningByPartition.put(topicPartition, task); + } + } + + T runningTaskFor(final TopicPartition partition) { + return runningByPartition.get(partition); + } + + Set runningTaskIds() { + return running.keySet(); + } + + Map runningTaskMap() { + return Collections.unmodifiableMap(running); + } + + public String toString(final String indent) { + final StringBuilder builder = new StringBuilder(); + describe(builder, running.values(), indent, "Running:"); + describe(builder, suspended.values(), indent, "Suspended:"); + describe(builder, restoring.values(), indent, "Restoring:"); + describe(builder, created.values(), indent, "New:"); + return builder.toString(); + } + + private void describe(final StringBuilder builder, + final Collection tasks, + final String indent, + final String name) { + builder.append(indent).append(name); + for (final T t : tasks) { + builder.append(indent).append(t.toString(indent + "\t\t")); + } + builder.append("\n"); + } + + List allInitializedTasks() { + final List tasks = new ArrayList<>(); + tasks.addAll(running.values()); + tasks.addAll(suspended.values()); + tasks.addAll(restoring.values()); + return tasks; + } + + Collection suspended() { + return suspended.values(); + } + + Collection restoring() { + return Collections.unmodifiableCollection(restoring.values()); + } + + Collection allAssignedTaskIds() { + final List taskIds = new ArrayList<>(); + taskIds.addAll(running.keySet()); + taskIds.addAll(suspended.keySet()); + taskIds.addAll(restoring.keySet()); + taskIds.addAll(created.keySet()); + return taskIds; + } + + void clear() { + runningByPartition.clear(); + running.clear(); + created.clear(); + suspended.clear(); + restoredPartitions.clear(); + } + + Set previousTasks() { + final Set previous = new HashSet<>(); + previous.addAll(suspended.keySet()); + return previous; + } + + RuntimeException applyToRunningTasks(final StreamThread.TaskAction action, final boolean throwException) { + RuntimeException firstException = null; + + for (Iterator it = running().iterator(); it.hasNext(); ) { + final T task = it.next(); + try { + action.apply(task); + } catch (final ProducerFencedException e) { + closeZombieTask(task); + it.remove(); + } catch (final RuntimeException t) { + log.error("{} Failed to {} {} {} due to the following error:", + logPrefix, + action.name(), + taskTypeName, + task.id(), + t); + if (throwException) { + throw t; + } + if (firstException == null) { + firstException = t; + } + } + } + + return firstException; + } + + public Collection restoringTaskIds() { + return restoring.keySet(); + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ChangelogReader.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ChangelogReader.java index 2e006a0c74498..d8ed35ee7644f 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ChangelogReader.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ChangelogReader.java @@ -18,6 +18,7 @@ import org.apache.kafka.common.TopicPartition; +import java.util.Collection; import java.util.Map; /** @@ -42,11 +43,14 @@ public interface ChangelogReader { /** * Restore all registered state stores by reading from their changelogs. + * @return all topic partitions that have been restored */ - void restore(); + Collection restore(); /** * @return the restored offsets for all persistent stores. */ Map restoredOffsets(); + + void reset(); } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java index 226d7eb82d133..c429feffe8b3a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java @@ -49,7 +49,6 @@ public class ProcessorStateManager implements StateManager { private final TaskId taskId; private final String logPrefix; private final boolean isStandby; - private final StateDirectory stateDirectory; private final ChangelogReader changelogReader; private final Map stores; private final Map globalStores; @@ -58,7 +57,7 @@ public class ProcessorStateManager implements StateManager { private final Map checkpointedOffsets; private final Map restoreCallbacks; // used for standby tasks, keyed by state topic name private final Map storeToChangelogTopic; - private final boolean eosEnabled; + private final List changelogPartitions = new ArrayList<>(); // TODO: this map does not work with customized grouper where multiple partitions // of the same topic can be assigned to the same topic. @@ -76,9 +75,8 @@ public ProcessorStateManager(final TaskId taskId, final StateDirectory stateDirectory, final Map storeToChangelogTopic, final ChangelogReader changelogReader, - final boolean eosEnabled) throws LockException, IOException { + final boolean eosEnabled) throws IOException { this.taskId = taskId; - this.stateDirectory = stateDirectory; this.changelogReader = changelogReader; logPrefix = String.format("task [%s]", taskId); @@ -93,12 +91,7 @@ public ProcessorStateManager(final TaskId taskId, this.isStandby = isStandby; restoreCallbacks = isStandby ? new HashMap() : null; this.storeToChangelogTopic = storeToChangelogTopic; - this.eosEnabled = eosEnabled; - if (!stateDirectory.lock(taskId, 5)) { - throw new LockException(String.format("%s Failed to lock the state directory for task %s", - logPrefix, taskId)); - } // get a handle on the parent/base directory of the task directory // note that the parent directory could have been accidentally deleted here, // so catch that exception if that is the case @@ -170,16 +163,19 @@ public void register(final StateStore store, restoreCallbacks.put(topic, stateRestoreCallback); } + } else { log.trace("{} Restoring state store {} from changelog topic {}", logPrefix, store.name(), topic); final StateRestorer restorer = new StateRestorer(storePartition, stateRestoreCallback, checkpointedOffsets.get(storePartition), offsetLimit(storePartition), - store.persistent()); + store.persistent(), + store.name()); + changelogReader.register(restorer); } - + changelogPartitions.add(storePartition); stores.put(store.name(), store); } @@ -273,38 +269,26 @@ public void flush() { @Override public void close(final Map ackedOffsets) throws ProcessorStateException { RuntimeException firstException = null; - try { - // attempting to close the stores, just in case they - // are not closed by a ProcessorNode yet - if (!stores.isEmpty()) { - log.debug("{} Closing its state manager and all the registered state stores", logPrefix); - for (final Map.Entry entry : stores.entrySet()) { - log.debug("{} Closing storage engine {}", logPrefix, entry.getKey()); - try { - entry.getValue().close(); - } catch (final Exception e) { - if (firstException == null) { - firstException = new ProcessorStateException(String.format("%s Failed to close state store %s", logPrefix, entry.getKey()), e); - } - log.error("{} Failed to close state store {}: ", logPrefix, entry.getKey(), e); + // attempting to close the stores, just in case they + // are not closed by a ProcessorNode yet + if (!stores.isEmpty()) { + log.debug("{} Closing its state manager and all the registered state stores", logPrefix); + for (final Map.Entry entry : stores.entrySet()) { + log.debug("{} Closing storage engine {}", logPrefix, entry.getKey()); + try { + entry.getValue().close(); + } catch (final Exception e) { + if (firstException == null) { + firstException = new ProcessorStateException(String.format("%s Failed to close state store %s", logPrefix, entry.getKey()), e); } + log.error("{} Failed to close state store {}: ", logPrefix, entry.getKey(), e); } - - if (ackedOffsets != null) { - checkpoint(ackedOffsets); - } - } - } finally { - // release the state directory directoryLock - try { - stateDirectory.unlock(taskId); - } catch (final IOException e) { - if (firstException == null) { - firstException = new ProcessorStateException(String.format("%s Failed to release state dir lock", logPrefix), e); - } - log.error("{} Failed to release state dir lock: ", logPrefix, e); + + if (ackedOffsets != null) { + checkpoint(ackedOffsets); } + } if (firstException != null) { @@ -359,4 +343,8 @@ void registerGlobalStateStores(final List stateStores) { public StateStore getGlobalStore(final String name) { return globalStores.get(name); } + + Collection changelogPartitions() { + return changelogPartitions; + } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java index df3bea4042566..53d93316a9b98 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java @@ -27,6 +27,7 @@ import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -36,7 +37,7 @@ public class StandbyTask extends AbstractTask { private static final Logger log = LoggerFactory.getLogger(StandbyTask.class); - private final Map checkpointedOffsets; + private Map checkpointedOffsets = new HashMap<>(); /** * Create {@link StandbyTask} with its assigned partitions @@ -63,11 +64,6 @@ public class StandbyTask extends AbstractTask { // initialize the topology with its own context processorContext = new StandbyContextImpl(id, applicationId, config, stateMgr, metrics); - - log.debug("{} Initializing", logPrefix); - initializeStateStores(); - processorContext.initialized(); - checkpointedOffsets = Collections.unmodifiableMap(stateMgr.checkpointed()); } /** @@ -77,7 +73,7 @@ public class StandbyTask extends AbstractTask { */ @Override public void resume() { - log.debug("{} " + "Resuming", logPrefix); + log.debug("{} Resuming", logPrefix); updateOffsetLimits(); } @@ -148,4 +144,11 @@ Map checkpointedOffsets() { return checkpointedOffsets; } + public boolean initialize() { + initializeStateStores(); + checkpointedOffsets = Collections.unmodifiableMap(stateMgr.checkpointed()); + processorContext.initialized(); + return true; + } + } \ No newline at end of file diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateRestorer.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateRestorer.java index 79bfd1d0f49d7..bfd266b40c717 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateRestorer.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateRestorer.java @@ -27,6 +27,7 @@ public class StateRestorer { private final boolean persistent; private final TopicPartition partition; private final StateRestoreCallback stateRestoreCallback; + private final String storeName; private long restoredOffset; private long startingOffset; @@ -35,12 +36,14 @@ public class StateRestorer { final StateRestoreCallback stateRestoreCallback, final Long checkpoint, final long offsetLimit, - final boolean persistent) { + final boolean persistent, + final String storeName) { this.partition = partition; this.stateRestoreCallback = stateRestoreCallback; this.checkpoint = checkpoint; this.offsetLimit = offsetLimit; this.persistent = persistent; + this.storeName = storeName; } public TopicPartition partition() { @@ -90,4 +93,8 @@ long offsetLimit() { private Long readTo(final long endOffset) { return endOffset < offsetLimit ? endOffset : offsetLimit; } + + String storeName() { + return storeName; + } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java index 3d5a793b0a240..bfe37ab7d364f 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java @@ -28,10 +28,10 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -45,6 +45,9 @@ public class StoreChangelogReader implements ChangelogReader { private final long partitionValidationTimeoutMs; private final Map> partitionInfo = new HashMap<>(); private final Map stateRestorers = new HashMap<>(); + private final Map needsRestoring = new HashMap<>(); + private final Map endOffsets = new HashMap<>(); + private boolean initialized = false; public StoreChangelogReader(final String threadId, final Consumer consumer, final Time time, final long partitionValidationTimeoutMs) { this.time = time; @@ -92,69 +95,87 @@ public void validatePartitionExists(final TopicPartition topicPartition, final S @Override public void register(final StateRestorer restorer) { - if (restorer.offsetLimit() > 0) { - stateRestorers.put(restorer.partition(), restorer); - } + stateRestorers.put(restorer.partition(), restorer); } - public void restore() { - final long start = time.milliseconds(); - final Map needsRestoring = new HashMap<>(); - try { - if (!consumer.subscription().isEmpty()) { - throw new IllegalStateException(String.format("Restore consumer should have not subscribed to any partitions (%s) beforehand", consumer.subscription())); - } - final Map endOffsets = consumer.endOffsets(stateRestorers.keySet()); - - // remove any partitions where we already have all of the data - for (final Map.Entry entry : endOffsets.entrySet()) { - TopicPartition topicPartition = entry.getKey(); - Long offset = entry.getValue(); - final StateRestorer restorer = stateRestorers.get(topicPartition); - if (restorer.checkpoint() >= offset) { - restorer.setRestoredOffset(restorer.checkpoint()); - } else { - needsRestoring.put(topicPartition, restorer); - } + + public Collection restore() { + final List completed = new ArrayList<>(); + if (!initialized) { + completed.addAll(initialize()); + } + + if (needsRestoring.isEmpty()) { + consumer.assign(Collections.emptyList()); + return completed; + } + + final Set partitions = new HashSet<>(needsRestoring.keySet()); + final ConsumerRecords allRecords = consumer.poll(10); + for (final TopicPartition partition : partitions) { + if (restorePartition(allRecords, partition)) { + completed.add(partition); } + } - log.debug("{} Starting restoring state stores from changelog topics {}", logPrefix, needsRestoring.keySet()); - - consumer.assign(needsRestoring.keySet()); - final List needsPositionUpdate = new ArrayList<>(); - for (final StateRestorer restorer : needsRestoring.values()) { - if (restorer.checkpoint() != StateRestorer.NO_CHECKPOINT) { - consumer.seek(restorer.partition(), restorer.checkpoint()); - logRestoreOffsets(restorer.partition(), - restorer.checkpoint(), - endOffsets.get(restorer.partition())); - restorer.setStartingOffset(consumer.position(restorer.partition())); - } else { - consumer.seekToBeginning(Collections.singletonList(restorer.partition())); - needsPositionUpdate.add(restorer); - } + if (needsRestoring.isEmpty()) { + consumer.assign(Collections.emptyList()); + } + return completed; + } + + private Collection initialize() { + needsRestoring.clear(); + if (!consumer.subscription().isEmpty()) { + throw new IllegalStateException(String.format("Restore consumer should have not subscribed to any partitions (%s) beforehand", consumer.subscription())); + } + endOffsets.putAll(consumer.endOffsets(stateRestorers.keySet())); + + // remove any partitions where we already have all of the data + for (final Map.Entry entry : endOffsets.entrySet()) { + TopicPartition topicPartition = entry.getKey(); + Long offset = entry.getValue(); + final StateRestorer restorer = stateRestorers.get(topicPartition); + if (restorer.checkpoint() >= offset) { + log.debug("{} not restoring partition {} as checkpoint is >= offset", topicPartition); + restorer.setRestoredOffset(restorer.checkpoint()); + } else if (restorer.offsetLimit() == 0) { + log.debug("{} not restoring partition {} as offset limit is 0", topicPartition); + restorer.setRestoredOffset(0); + } else { + needsRestoring.put(topicPartition, restorer); } + } - for (final StateRestorer restorer : needsPositionUpdate) { - final long position = consumer.position(restorer.partition()); - restorer.setStartingOffset(position); + log.debug("{} Starting restoring state stores from changelog topics {}", logPrefix, needsRestoring.keySet()); + + consumer.assign(needsRestoring.keySet()); + final List needsPositionUpdate = new ArrayList<>(); + for (final StateRestorer restorer : needsRestoring.values()) { + if (restorer.checkpoint() != StateRestorer.NO_CHECKPOINT) { + consumer.seek(restorer.partition(), restorer.checkpoint()); logRestoreOffsets(restorer.partition(), - position, + restorer.checkpoint(), endOffsets.get(restorer.partition())); + restorer.setStartingOffset(consumer.position(restorer.partition())); + } else { + consumer.seekToBeginning(Collections.singletonList(restorer.partition())); + needsPositionUpdate.add(restorer); } + } - final Set partitions = new HashSet<>(needsRestoring.keySet()); - while (!partitions.isEmpty()) { - final ConsumerRecords allRecords = consumer.poll(10); - final Iterator partitionIterator = partitions.iterator(); - while (partitionIterator.hasNext()) { - restorePartition(endOffsets, allRecords, partitionIterator); - } - } - } finally { - consumer.assign(Collections.emptyList()); - log.info("{} Completed restore all active states from changelog topics {} in {}ms ", logPrefix, needsRestoring.keySet(), time.milliseconds() - start); + for (final StateRestorer restorer : needsPositionUpdate) { + final long position = consumer.position(restorer.partition()); + restorer.setStartingOffset(position); + logRestoreOffsets(restorer.partition(), + position, + endOffsets.get(restorer.partition())); } + + final Set completed = new HashSet<>(stateRestorers.keySet()); + completed.removeAll(needsRestoring.keySet()); + initialized = true; + return completed; } private void logRestoreOffsets(final TopicPartition partition, final long startingOffset, final Long endOffset) { @@ -177,13 +198,21 @@ public Map restoredOffsets() { return restoredOffsets; } - private void restorePartition(final Map endOffsets, - final ConsumerRecords allRecords, - final Iterator partitionIterator) { - final TopicPartition topicPartition = partitionIterator.next(); + @Override + public void reset() { + partitionInfo.clear(); + stateRestorers.clear(); + needsRestoring.clear(); + endOffsets.clear(); + initialized = false; + } + + private boolean restorePartition(final ConsumerRecords allRecords, + final TopicPartition topicPartition) { final StateRestorer restorer = stateRestorers.get(topicPartition); final Long endOffset = endOffsets.get(topicPartition); final long pos = processNext(allRecords.records(topicPartition), restorer, endOffset); + restorer.setRestoredOffset(pos); if (restorer.hasCompleted(pos, endOffset)) { if (pos > endOffset + 1) { throw new IllegalStateException( @@ -193,8 +222,6 @@ private void restorePartition(final Map endOffsets, pos)); } - restorer.setRestoredOffset(pos); - log.debug("{} Completed restoring state from changelog {} with {} records ranging from offset {} to {}", logPrefix, topicPartition, @@ -202,11 +229,14 @@ private void restorePartition(final Map endOffsets, restorer.startingOffset(), restorer.restoredOffset()); - partitionIterator.remove(); + needsRestoring.remove(topicPartition); + return true; } + return false; } - private long processNext(final List> records, final StateRestorer restorer, final Long endOffset) { + private long processNext(final List> records, + final StateRestorer restorer, final Long endOffset) { for (final ConsumerRecord record : records) { final long offset = record.offset(); if (restorer.hasCompleted(offset, endOffset)) { diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java index 2d7b494573d5a..cc6353781a9c7 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java @@ -137,16 +137,13 @@ public StreamTask(final TaskId id, // initialize the topology with its own context processorContext = new ProcessorContextImpl(id, this, config, recordCollector, stateMgr, metrics, cache); this.time = time; - log.debug("{} Initializing", logPrefix); - initializeStateStores(); + stateMgr.registerGlobalStateStores(topology.globalStateStores()); if (eosEnabled) { this.producer.initTransactions(); this.producer.beginTransaction(); transactionInFlight = true; } - initTopology(); - processorContext.initialized(); } /** @@ -157,7 +154,7 @@ public StreamTask(final TaskId id, */ @Override public void resume() { - log.debug("{} Resuming", logPrefix); + log.debug("{} Resuming {}", logPrefix, id); if (eosEnabled) { producer.beginTransaction(); transactionInFlight = true; @@ -323,7 +320,7 @@ private void commitOffsets(final boolean startNewTransaction) { } } - private void initTopology() { + void initTopology() { // initialize the task by initializing all its processor nodes in the topology log.trace("{} Initializing processor nodes of the topology", logPrefix); for (final ProcessorNode node : topology.processors()) { @@ -552,4 +549,11 @@ RecordCollector createRecordCollector() { return new RecordCollectorImpl(producer, id.toString()); } + public boolean initialize() { + initializeStateStores(); + initTopology(); + processorContext.initialized(); + return topology.stateStores().isEmpty(); + } + } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index c2da0cb6bd51a..43fc06708a52c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -27,8 +27,6 @@ import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.config.ConfigDef; -import org.apache.kafka.common.config.ConfigDef.Type; import org.apache.kafka.common.errors.ProducerFencedException; import org.apache.kafka.common.metrics.Metrics; import org.apache.kafka.common.metrics.Sensor; @@ -40,7 +38,6 @@ import org.apache.kafka.common.utils.Time; import org.apache.kafka.streams.KafkaClientSupplier; import org.apache.kafka.streams.StreamsConfig; -import org.apache.kafka.streams.errors.LockException; import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.errors.TaskIdFormatException; import org.apache.kafka.streams.processor.PartitionGrouper; @@ -63,8 +60,8 @@ import java.util.Map; import java.util.Set; import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Pattern; @@ -79,43 +76,56 @@ public class StreamThread extends Thread { * Stream thread states are the possible states that a stream thread can be in. * A thread must only be in one state at a time * The expected state transitions with the following defined states is: - * + *

*

      *                +-------------+
-     *                | Created     |
-     *                +-----+-------+
-     *                      |
-     *                      v
-     *                +-----+-------+
+     *          +<--- | Created     |
+     *          |     +-----+-------+
+     *          |           |
+     *          |           v
+     *          |     +-----+-------+
      *          +<--- | Running     | <----+
      *          |     +-----+-------+      |
      *          |           |              |
      *          |           v              |
      *          |     +-----+-------+      |
-     *          +<--- | Partitions  |      |
-     *          |     | Revoked     |      |
+     *          +<--- | Partitions  | <-+  |
+     *          |     | Revoked     | --+  |
      *          |     +-----+-------+      |
      *          |           |              |
      *          |           v              |
      *          |     +-----+-------+      |
-     *          |     | Assigning   |      |
-     *          |     | Partitions  | ---->+
+     *          +<--- | Assigning   |      |
+     *          |     | Partitions  |      |
+     *          |     +-----+-------+      |
+     *          |           |              |
+     *          |           v              |
+     *          |     +-----+-------+      |
+     *          |     | Partitions  |      |
+     *          +<--- | Assigned    | +--> +
      *          |     +-----+-------+
      *          |           |
      *          |           v
      *          |     +-----+-------+
      *          +---> | Pending     |
-     *                | Shutdown    |
-     *                +-----+-------+
-     *                      |
-     *                      v
-     *                +-----+-------+
-     *                | Dead        |
+     *          |     | Shutdown    |
+     *          |     +-----+-------+
+     *          |           |
+     *          |           v
+     *          |     +-----+-------+
+     *          +---> | Dead        |
      *                +-------------+
      * 
+ *

+ * Note the following: + * - Any state can go to PENDING_SHUTDOWN. That is because streams can be closed at any time. + * - Any state can go to DEAD. That is because exceptions can happen at any other state, + * leading to the stream thread terminating. + * - A streams thread can stay in PARTITIONS_REVOKED indefinitely, in the corner case when + * the coordinator repeatedly fails in-between revoking partitions and assigning new partitions. */ public enum State { - CREATED(1), RUNNING(1, 2, 4), PARTITIONS_REVOKED(3, 4), ASSIGNING_PARTITIONS(1, 4), PENDING_SHUTDOWN(5), DEAD; + CREATED(1, 5, 6), RUNNING(2, 5, 6), PARTITIONS_REVOKED(2, 3, 5, 6), ASSIGNING_PARTITIONS(4, 5, 6), PARTITIONS_ASSIGNED(1, 2, 5, 6), PENDING_SHUTDOWN(6), DEAD; private final Set validTransitions = new HashSet<>(); @@ -148,74 +158,73 @@ public interface StateListener { private class RebalanceListener implements ConsumerRebalanceListener { private final Time time; - private final int requestTimeOut; - RebalanceListener(final Time time, final int requestTimeOut) { + RebalanceListener(final Time time) { this.time = time; - this.requestTimeOut = requestTimeOut; } @Override public void onPartitionsAssigned(final Collection assignment) { log.debug("{} at state {}: new partitions {} assigned at the end of consumer rebalance.\n" + - "\tassigned active tasks: {}\n" + - "\tassigned standby tasks: {}\n" + - "\tcurrent suspended active tasks: {}\n" + - "\tcurrent suspended standby tasks: {}\n" + - "\tprevious active tasks: {}", - logPrefix, - state, - assignment, - partitionAssignor.activeTasks().keySet(), - partitionAssignor.standbyTasks().keySet(), - suspendedTasks.keySet(), - suspendedStandbyTasks.keySet(), - prevActiveTasks); + "\tassigned active tasks: {}\n" + + "\tassigned standby tasks: {}\n" + + "\tcurrent suspended active tasks: {}\n" + + "\tcurrent suspended standby tasks: {}\n", + logPrefix, + state, + assignment, + partitionAssignor.activeTasks().keySet(), + partitionAssignor.standbyTasks().keySet(), + active.previousTasks(), + standby.previousTasks()); final long start = time.milliseconds(); try { - storeChangelogReader = new StoreChangelogReader(getName(), restoreConsumer, time, requestTimeOut); - setStateWhenNotInPendingShutdown(State.ASSIGNING_PARTITIONS); + + if (!setStateWhenNotInPendingShutdown(State.ASSIGNING_PARTITIONS)) { + return; + } // do this first as we may have suspended standby tasks that // will become active or vice versa closeNonAssignedSuspendedStandbyTasks(); closeNonAssignedSuspendedTasks(); - addStreamTasks(assignment, start); - storeChangelogReader.restore(); - addStandbyTasks(start); + addStreamTasks(assignment); + addStandbyTasks(); streamsMetadataState.onChange(partitionAssignor.getPartitionsByHostState(), partitionAssignor.clusterMetadata()); - lastCleanMs = time.milliseconds(); // start the cleaning cycle - setStateWhenNotInPendingShutdown(State.RUNNING); + storeChangelogReader.reset(); + Set partitions = active.uninitializedPartitions(); + log.trace("{} pausing partitions: {}", logPrefix, partitions); + consumer.pause(partitions); + setStateWhenNotInPendingShutdown(State.PARTITIONS_ASSIGNED); } catch (final Throwable t) { rebalanceException = t; throw t; } finally { log.info("{} partition assignment took {} ms.\n" + - "\tcurrent active tasks: {}\n" + - "\tcurrent standby tasks: {}\n" + - "\tprevious active tasks: {}\n", - logPrefix, - time.milliseconds() - start, - activeTasks.keySet(), - standbyTasks.keySet(), - prevActiveTasks); + "\tcurrent active tasks: {}\n" + + "\tcurrent standby tasks: {}\n" + + "\tprevious active tasks: {}\n", + logPrefix, + time.milliseconds() - start, + active.allAssignedTaskIds(), + standby.allAssignedTaskIds(), + active.previousTasks()); } } @Override public void onPartitionsRevoked(final Collection assignment) { log.debug("{} at state {}: partitions {} revoked at the beginning of consumer rebalance.\n" + - "\tcurrent assigned active tasks: {}\n" + - "\tcurrent assigned standby tasks: {}\n", - logPrefix, - state, - assignment, - activeTasks.keySet(), standbyTasks.keySet()); + "\tcurrent assigned active tasks: {}\n" + + "\tcurrent assigned standby tasks: {}\n", + logPrefix, + state, + assignment, + active.runningTaskIds(), standby.runningTaskIds()); final long start = time.milliseconds(); try { setStateWhenNotInPendingShutdown(State.PARTITIONS_REVOKED); - lastCleanMs = Long.MAX_VALUE; // stop the cleaning cycle until partitions are assigned // suspend active tasks suspendTasksAndState(); } catch (final Throwable t) { @@ -223,57 +232,25 @@ public void onPartitionsRevoked(final Collection assignment) { throw t; } finally { streamsMetadataState.onChange(Collections.>emptyMap(), partitionAssignor.clusterMetadata()); - removeStreamTasks(); - removeStandbyTasks(); + standbyRecords.clear(); log.info("{} partition revocation took {} ms.\n" + - "\tsuspended active tasks: {}\n" + - "\tsuspended standby tasks: {}", - logPrefix, - time.milliseconds() - start, - suspendedTasks.keySet(), - suspendedStandbyTasks.keySet()); + "\tsuspended active tasks: {}\n" + + "\tsuspended standby tasks: {}", + logPrefix, + time.milliseconds() - start, + active.previousTasks(), + standby.previousTasks()); } } } abstract class AbstractTaskCreator { - final static long MAX_BACKOFF_TIME_MS = 1000L; - void retryWithBackoff(final Map> tasksToBeCreated, final long start) { - long backoffTimeMs = 50L; - final Set retryingTasks = new HashSet<>(); - while (true) { - final Iterator>> it = tasksToBeCreated.entrySet().iterator(); - while (it.hasNext()) { - final Map.Entry> newTaskAndPartitions = it.next(); - final TaskId taskId = newTaskAndPartitions.getKey(); - final Set partitions = newTaskAndPartitions.getValue(); - - try { - createTask(taskId, partitions); - it.remove(); - backoffTimeMs = 50L; - retryingTasks.remove(taskId); - } catch (final LockException e) { - // ignore and retry - if (!retryingTasks.contains(taskId)) { - log.warn("{} Could not create task {} due to {}; will retry", logPrefix, taskId, e); - retryingTasks.add(taskId); - } - } - } - - if (tasksToBeCreated.isEmpty() || time.milliseconds() - start > rebalanceTimeoutMs) { - break; - } - - try { - Thread.sleep(backoffTimeMs); - backoffTimeMs <<= 1; - backoffTimeMs = Math.min(backoffTimeMs, MAX_BACKOFF_TIME_MS); - } catch (final InterruptedException e) { - // ignore - } + void createTasks(final Map> tasksToBeCreated) { + for (final Map.Entry> newTaskAndPartitions : tasksToBeCreated.entrySet()) { + final TaskId taskId = newTaskAndPartitions.getKey(); + final Set partitions = newTaskAndPartitions.getValue(); + createTask(taskId, partitions); } } @@ -283,33 +260,25 @@ void retryWithBackoff(final Map> tasksToBeCreated, f class TaskCreator extends AbstractTaskCreator { @Override void createTask(final TaskId taskId, final Set partitions) { - final StreamTask task = createStreamTask(taskId, partitions); - - activeTasks.put(taskId, task); - - for (final TopicPartition partition : partitions) { - activeTasksByPartition.put(partition, task); - } + active.addNewTask(createStreamTask(taskId, partitions)); } } class StandbyTaskCreator extends AbstractTaskCreator { - private final Map checkpointedOffsets; - - StandbyTaskCreator(final Map checkpointedOffsets) { - this.checkpointedOffsets = checkpointedOffsets; - } @Override void createTask(final TaskId taskId, final Set partitions) { final StandbyTask task = createStandbyTask(taskId, partitions); - updateStandByTaskMaps(checkpointedOffsets, taskId, partitions, task); + if (task != null) { + standby.addNewTask(task); + } } } - interface StreamTaskAction { + interface TaskAction { String name(); - void apply(final StreamTask task); + + void apply(final T task); } /** @@ -395,17 +364,11 @@ void removeAllSensors() { private final String logPrefix; private final String threadClientId; private final Pattern sourceTopicPattern; - private final Map activeTasks; - private final Map standbyTasks; - private final Map activeTasksByPartition; - private final Map standbyTasksByPartition; - private final Set prevActiveTasks; - private final Map suspendedTasks; - private final Map suspendedStandbyTasks; + private final AssignedTasks active; + private final AssignedTasks standby; + private final Time time; - private final int rebalanceTimeoutMs; private final long pollTimeMs; - private final long cleanTimeMs; private final long commitTimeMs; private final StreamsMetricsThreadImpl streamsMetrics; // TODO: this is not private only for tests, should be better refactored @@ -413,7 +376,6 @@ void removeAllSensors() { private String originalReset; private StreamPartitionAssignor partitionAssignor; private long timerStartedMs; - private long lastCleanMs; private long lastCommitMs; private Throwable rebalanceException = null; private final boolean eosEnabled; @@ -422,7 +384,7 @@ void removeAllSensors() { private boolean processStandbyRecords = false; private final ThreadCache cache; - private StoreChangelogReader storeChangelogReader; + final StoreChangelogReader storeChangelogReader; private final TaskCreator taskCreator = new TaskCreator(); @@ -475,32 +437,23 @@ public StreamThread(final TopologyBuilder builder, consumer = clientSupplier.getConsumer(consumerConfigs); log.info("{} Creating restore consumer client", logPrefix); restoreConsumer = clientSupplier.getRestoreConsumer(config.getRestoreConsumerConfigs(threadClientId)); - // initialize the task list - // activeTasks needs to be concurrent as it can be accessed - // by QueryableState - activeTasks = new ConcurrentHashMap<>(); - standbyTasks = new HashMap<>(); - activeTasksByPartition = new HashMap<>(); - standbyTasksByPartition = new HashMap<>(); - prevActiveTasks = new HashSet<>(); - suspendedTasks = new HashMap<>(); - suspendedStandbyTasks = new HashMap<>(); - // standby KTables standbyRecords = new HashMap<>(); + this.stateDirectory = stateDirectory; - final Object maxPollInterval = consumerConfigs.get(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG); - rebalanceTimeoutMs = (Integer) ConfigDef.parseType(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, maxPollInterval, Type.INT); pollTimeMs = config.getLong(StreamsConfig.POLL_MS_CONFIG); commitTimeMs = config.getLong(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG); - cleanTimeMs = config.getLong(StreamsConfig.STATE_CLEANUP_DELAY_MS_CONFIG); this.time = time; timerStartedMs = time.milliseconds(); - lastCleanMs = Long.MAX_VALUE; // the cleaning cycle won't start until partition assignment lastCommitMs = timerStartedMs; - rebalanceListener = new RebalanceListener(time, config.getInt(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG)); + final Integer requestTimeOut = config.getInt(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG); + rebalanceListener = new RebalanceListener(time); + active = new AssignedTasks<>(logPrefix, "stream task"); + standby = new AssignedTasks<>(logPrefix, "standby task"); + storeChangelogReader = + new StoreChangelogReader(getName(), restoreConsumer, time, requestTimeOut); setState(State.RUNNING); } @@ -539,27 +492,53 @@ private void runLoop() { consumer.subscribe(sourceTopicPattern, rebalanceListener); while (stillRunning()) { - timerStartedMs = time.milliseconds(); - - // try to fetch some records if necessary - final ConsumerRecords records = pollRequests(); - if (records != null && !records.isEmpty() && !activeTasks.isEmpty()) { - streamsMetrics.pollTimeSensor.record(computeLatency(), timerStartedMs); - addRecordsToTasks(records); - final long totalProcessed = processAndPunctuate(activeTasks, recordsProcessedBeforeCommit); - if (totalProcessed > 0) { - final long processLatency = computeLatency(); - streamsMetrics.processTimeSensor.record(processLatency / (double) totalProcessed, - timerStartedMs); - recordsProcessedBeforeCommit = adjustRecordsProcessedBeforeCommit(recordsProcessedBeforeCommit, totalProcessed, - processLatency, commitTimeMs); - } + recordsProcessedBeforeCommit = runOnce(recordsProcessedBeforeCommit); + } + log.info("{} Shutting down at user request", logPrefix); + } + + // Visible for testing + long runOnce(final long recordsProcessedBeforeCommit) { + long processedBeforeCommit = recordsProcessedBeforeCommit; + timerStartedMs = time.milliseconds(); + + // try to fetch some records if necessary + final ConsumerRecords records = pollRequests(); + + if (state == State.PARTITIONS_ASSIGNED) { + active.initializeNewTasks(); + standby.initializeNewTasks(); + + final Collection restored = storeChangelogReader.restore(); + final Set resumed = active.updateRestored(restored); + + if (!resumed.isEmpty()) { + log.trace("{} resuming partitions {}", logPrefix, resumed); + consumer.resume(resumed); } - maybeCommit(timerStartedMs); - maybeUpdateStandbyTasks(timerStartedMs); + if (active.allTasksRunning()) { + assignStandbyPartitions(); + setState(State.RUNNING); + } } - log.info("{} Shutting down at user request", logPrefix); + + if (records != null && !records.isEmpty() && active.hasRunningTasks()) { + streamsMetrics.pollTimeSensor.record(computeLatency(), timerStartedMs); + addRecordsToTasks(records); + final long totalProcessed = processAndPunctuate(recordsProcessedBeforeCommit); + if (totalProcessed > 0) { + final long processLatency = computeLatency(); + streamsMetrics.processTimeSensor.record(processLatency / (double) totalProcessed, + timerStartedMs); + processedBeforeCommit = adjustRecordsProcessedBeforeCommit(recordsProcessedBeforeCommit, totalProcessed, + processLatency, commitTimeMs); + } + } + + maybeCommit(timerStartedMs); + maybeUpdateStandbyTasks(timerStartedMs); + return processedBeforeCommit; } /** @@ -637,7 +616,7 @@ private void addRecordsToTasks(final ConsumerRecords records) { int numAddedRecords = 0; for (final TopicPartition partition : records.partitions()) { - final StreamTask task = activeTasksByPartition.get(partition); + final StreamTask task = active.runningTaskFor(partition); numAddedRecords += task.addRecords(partition, records.records(partition)); } streamsMetrics.skippedRecordsSensor.record(records.count() - numAddedRecords, timerStartedMs); @@ -647,52 +626,47 @@ private void addRecordsToTasks(final ConsumerRecords records) { /** * Schedule the records processing by selecting which record is processed next. Commits may * happen as records are processed. - * @param tasks The tasks that have records. * @param recordsProcessedBeforeCommit number of records to be processed before commit is called. * if UNLIMITED_RECORDS, then commit is never called * @return Number of records processed since last commit. */ - private long processAndPunctuate(final Map tasks, - final long recordsProcessedBeforeCommit) { + private long processAndPunctuate(final long recordsProcessedBeforeCommit) { - long totalProcessedEachRound; - long totalProcessedSinceLastMaybeCommit = 0; + final AtomicLong totalProcessedEachRound = new AtomicLong(0); + final AtomicLong totalProcessedSinceLastMaybeCommit = new AtomicLong(0); // Round-robin scheduling by taking one record from each task repeatedly // until no task has any records left do { - totalProcessedEachRound = 0; - final Iterator> it = tasks.entrySet().iterator(); - while (it.hasNext()) { - final StreamTask task = it.next().getValue(); - try { - // we processed one record, - // if more are buffered waiting for the next round + totalProcessedEachRound.set(0); + active.applyToRunningTasks(new TaskAction() { + @Override + public String name() { + return "process"; + } - // TODO: We should check for stream time punctuation right after each process call - // of the task instead of only calling it after all records being processed + @Override + public void apply(final StreamTask task) { if (task.process()) { - totalProcessedEachRound++; - totalProcessedSinceLastMaybeCommit++; + totalProcessedEachRound.incrementAndGet(); + totalProcessedSinceLastMaybeCommit.incrementAndGet(); } - } catch (final ProducerFencedException e) { - closeZombieTask(task); - it.remove(); } - } + }, true); if (recordsProcessedBeforeCommit != UNLIMITED_RECORDS && - totalProcessedSinceLastMaybeCommit >= recordsProcessedBeforeCommit) { - totalProcessedSinceLastMaybeCommit = 0; + totalProcessedSinceLastMaybeCommit.get() >= recordsProcessedBeforeCommit) { + totalProcessedSinceLastMaybeCommit.set(0); final long processLatency = computeLatency(); - streamsMetrics.processTimeSensor.record(processLatency / (double) totalProcessedSinceLastMaybeCommit, + streamsMetrics.processTimeSensor.record(processLatency / (double) totalProcessedSinceLastMaybeCommit.get(), timerStartedMs); maybeCommit(timerStartedMs); } - } while (totalProcessedEachRound != 0); + } while (totalProcessedEachRound.get() != 0); // go over the tasks again to punctuate or commit - final RuntimeException e = performOnStreamTasks(new StreamTaskAction() { - private String name; + + final RuntimeException e = active.applyToRunningTasks(new TaskAction() { + String name; @Override public String name() { return name; @@ -711,17 +685,17 @@ public void apply(final StreamTask task) { if (log.isDebugEnabled()) { log.debug("{} Committed active task {} per user request in {}ms", - logPrefix, task.id(), timerStartedMs - beforeCommitMs); + logPrefix, task.id(), timerStartedMs - beforeCommitMs); } } } - }); + }, false); if (e != null) { throw e; } - return totalProcessedSinceLastMaybeCommit; + return totalProcessedSinceLastMaybeCommit.get(); } private void maybePunctuate(final StreamTask task) { @@ -775,14 +749,14 @@ protected void maybeCommit(final long now) { if (commitTimeMs >= 0 && lastCommitMs + commitTimeMs < now) { if (log.isTraceEnabled()) { log.trace("{} Committing all active tasks {} and standby tasks {} since {}ms has elapsed (commit interval is {}ms)", - logPrefix, activeTasks.keySet(), standbyTasks.keySet(), now - lastCommitMs, commitTimeMs); + logPrefix, active.runningTaskIds(), standby.runningTaskIds(), now - lastCommitMs, commitTimeMs); } commitAll(); if (log.isDebugEnabled()) { log.info("{} Committed all active tasks {} and standby tasks {} in {}ms", - logPrefix, activeTasks.keySet(), standbyTasks.keySet(), timerStartedMs - now); + logPrefix, active.runningTaskIds(), standby.runningTaskIds(), timerStartedMs - now); } lastCommitMs = now; @@ -795,22 +769,23 @@ protected void maybeCommit(final long now) { * Commit the states of all its tasks */ private void commitAll() { - final RuntimeException e = performOnStreamTasks(new StreamTaskAction() { + + final RuntimeException e = active.applyToRunningTasks(new TaskAction() { @Override public String name() { return "commit"; } @Override - public void apply(final StreamTask task) { - commitOne(task); + public void apply(final StreamTask task1) { + commitOne(task1); } - }); + }, false); if (e != null) { throw e; } - for (final StandbyTask task : standbyTasks.values()) { + for (final StandbyTask task : standby.running()) { commitOne(task); } } @@ -833,8 +808,29 @@ private void commitOne(final AbstractTask task) { streamsMetrics.commitTimeSensor.record(computeLatency(), timerStartedMs); } + private void assignStandbyPartitions() { + final Collection running = standby.running(); + final Map checkpointedOffsets = new HashMap<>(); + for (StandbyTask standbyTask : running) { + checkpointedOffsets.putAll(standbyTask.checkpointedOffsets()); + } + + final List assignment = new ArrayList<>(checkpointedOffsets.keySet()); + restoreConsumer.assign(assignment); + for (final Map.Entry entry : checkpointedOffsets.entrySet()) { + final TopicPartition partition = entry.getKey(); + final long offset = entry.getValue(); + if (offset >= 0) { + restoreConsumer.seek(partition, offset); + } else { + restoreConsumer.seekToBeginning(singleton(partition)); + } + } + log.trace("{} assigned {} partitions to restore consumer for standby tasks {}", logPrefix, assignment, standby.runningTaskIds()); + } + private void maybeUpdateStandbyTasks(final long now) { - if (!standbyTasks.isEmpty()) { + if (state == State.RUNNING && standby.hasRunningTasks()) { if (processStandbyRecords) { if (!standbyRecords.isEmpty()) { final Map>> remainingStandbyRecords = new HashMap<>(); @@ -843,7 +839,7 @@ private void maybeUpdateStandbyTasks(final long now) { final TopicPartition partition = entry.getKey(); List> remaining = entry.getValue(); if (remaining != null) { - final StandbyTask task = standbyTasksByPartition.get(partition); + final StandbyTask task = standby.runningTaskFor(partition); remaining = task.update(partition, remaining); if (remaining != null) { remainingStandbyRecords.put(partition, remaining); @@ -855,7 +851,7 @@ private void maybeUpdateStandbyTasks(final long now) { standbyRecords = remainingStandbyRecords; - log.debug("{} Updated standby tasks {} in {}ms", logPrefix, standbyTasks.keySet(), time.milliseconds() - now); + log.debug("{} Updated standby tasks {} in {}ms", logPrefix, standby.runningTaskIds(), time.milliseconds() - now); } processStandbyRecords = false; } @@ -864,7 +860,7 @@ private void maybeUpdateStandbyTasks(final long now) { if (!records.isEmpty()) { for (final TopicPartition partition : records.partitions()) { - final StandbyTask task = standbyTasksByPartition.get(partition); + final StandbyTask task = standby.runningTaskFor(partition); if (task == null) { throw new StreamsException(logPrefix + " Missing standby task for partition " + partition); @@ -910,14 +906,14 @@ public synchronized boolean stillRunning() { } public Map tasks() { - return Collections.unmodifiableMap(activeTasks); + return active.runningTaskMap(); } /** * Returns ids of tasks that were being executed before the rebalance. */ public Set prevActiveTasks() { - return Collections.unmodifiableSet(prevActiveTasks); + return Collections.unmodifiableSet(active.previousTasks()); } /** @@ -965,11 +961,12 @@ public synchronized State state() { return state; } - private synchronized void setStateWhenNotInPendingShutdown(final State newState) { + private synchronized boolean setStateWhenNotInPendingShutdown(final State newState) { if (state == State.PENDING_SHUTDOWN) { - return; + return false; } setState(newState); + return true; } private synchronized void setState(final State newState) { @@ -1007,23 +1004,11 @@ public String toString(final String indent) { .append(indent).append("\tStreamsThread clientId: ").append(clientId).append("\n") .append(indent).append("\tStreamsThread threadId: ").append(getName()).append("\n"); - // iterate and print active tasks - if (activeTasks != null) { - sb.append(indent).append("\tActive tasks:\n"); - for (final Map.Entry entry : activeTasks.entrySet()) { - final StreamTask task = entry.getValue(); - sb.append(indent).append(task.toString(indent + "\t\t")); - } - } - - // iterate and print standby tasks - if (standbyTasks != null) { - sb.append(indent).append("\tStandby tasks:\n"); - for (final StandbyTask task : standbyTasks.values()) { - sb.append(indent).append(task.toString(indent + "\t\t")); - } - sb.append("\n"); - } + sb.append(indent).append("\tActive tasks:\n"); + sb.append(active.toString(indent + "\t\t")); + sb.append(indent).append("\tStandby tasks:\n"); + sb.append(standby.toString(indent + "\t\t")); + sb.append("\n"); return sb.toString(); } @@ -1036,7 +1021,8 @@ void setPartitionAssignor(final StreamPartitionAssignor partitionAssignor) { this.partitionAssignor = partitionAssignor; } - private void shutdown(final boolean cleanRun) { + // Visible for testing + void shutdown(final boolean cleanRun) { log.info("{} Shutting down", logPrefix); shutdownTasksAndState(cleanRun); @@ -1064,8 +1050,8 @@ private void shutdown(final boolean cleanRun) { log.error("{} Failed to close KafkaStreamClient due to the following error:", logPrefix, e); } - removeStreamTasks(); - removeStandbyTasks(); + active.clear(); + standby.clear(); // clean up global tasks @@ -1077,18 +1063,18 @@ private void shutdown(final boolean cleanRun) { @SuppressWarnings("ThrowableNotThrown") private void shutdownTasksAndState(final boolean cleanRun) { log.debug("{} Shutting down all active tasks {}, standby tasks {}, suspended tasks {}, and suspended standby tasks {}", - logPrefix, activeTasks.keySet(), standbyTasks.keySet(), - suspendedTasks.keySet(), suspendedStandbyTasks.keySet()); + logPrefix, active.runningTaskIds(), standby.runningTaskIds(), + active.previousTasks(), standby.previousTasks()); for (final AbstractTask task : allTasks()) { try { task.close(cleanRun); } catch (final RuntimeException e) { log.error("{} Failed while closing {} {} due to the following error:", - logPrefix, - task.getClass().getSimpleName(), - task.id(), - e); + logPrefix, + task.getClass().getSimpleName(), + task.id(), + e); } } @@ -1102,58 +1088,15 @@ private void shutdownTasksAndState(final boolean cleanRun) { */ private void suspendTasksAndState() { log.debug("{} Suspending all active tasks {} and standby tasks {}", - logPrefix, activeTasks.keySet(), standbyTasks.keySet()); + logPrefix, active.runningTaskIds(), standby.runningTaskIds()); final AtomicReference firstException = new AtomicReference<>(null); - firstException.compareAndSet(null, performOnStreamTasks(new StreamTaskAction() { - @Override - public String name() { - return "suspend"; - } - - @Override - public void apply(final StreamTask task) { - try { - task.suspend(); - } catch (final CommitFailedException e) { - // commit failed during suspension. Just log it. - log.warn("{} Failed to commit task {} state when suspending due to CommitFailedException", logPrefix, task.id); - } catch (final Exception e) { - log.error("{} Suspending task {} failed due to the following error:", logPrefix, task.id, e); - try { - task.close(false); - } catch (final Exception f) { - log.error("{} After suspending failed, closing the same task {} failed again due to the following error:", logPrefix, task.id, f); - } - throw e; - } - } - })); - - for (final StandbyTask task : standbyTasks.values()) { - try { - try { - task.suspend(); - } catch (final Exception e) { - log.error("{} Suspending standby task {} failed due to the following error:", logPrefix, task.id, e); - try { - task.close(false); - } catch (final Exception f) { - log.error("{} After suspending failed, closing the same standby task {} failed again due to the following error:", logPrefix, task.id, f); - } - throw e; - } - } catch (final RuntimeException e) { - firstException.compareAndSet(null, e); - } - } - + firstException.compareAndSet(null, active.suspend()); + firstException.compareAndSet(null, standby.suspend()); // remove the changelog partitions from restore consumer firstException.compareAndSet(null, unAssignChangeLogPartitions()); - updateSuspendedTasks(); - if (firstException.get() != null) { throw new StreamsException(logPrefix + " failed to suspend stream tasks", firstException.get()); } @@ -1171,56 +1114,23 @@ private RuntimeException unAssignChangeLogPartitions() { } private List allTasks() { - final List tasks = activeAndStandbytasks(); - tasks.addAll(suspendedAndSuspendedStandbytasks()); - return tasks; - } - - private List activeAndStandbytasks() { - final List tasks = new ArrayList(activeTasks.values()); - tasks.addAll(standbyTasks.values()); + final List tasks = active.allInitializedTasks(); + tasks.addAll(standby.allInitializedTasks()); return tasks; } - private List suspendedAndSuspendedStandbytasks() { - final List tasks = new ArrayList(suspendedTasks.values()); - tasks.addAll(suspendedStandbyTasks.values()); - return tasks; - } - - private StreamTask findMatchingSuspendedTask(final TaskId taskId, final Set partitions) { - if (suspendedTasks.containsKey(taskId)) { - final StreamTask task = suspendedTasks.get(taskId); - if (task.partitions().equals(partitions)) { - return task; - } - } - return null; - } - - private StandbyTask findMatchingSuspendedStandbyTask(final TaskId taskId, final Set partitions) { - if (suspendedStandbyTasks.containsKey(taskId)) { - final StandbyTask task = suspendedStandbyTasks.get(taskId); - if (task.partitions().equals(partitions)) { - return task; - } - } - return null; - } - private void closeNonAssignedSuspendedTasks() { final Map> newTaskAssignment = partitionAssignor.activeTasks(); - final Iterator> suspendedTaskIterator = suspendedTasks.entrySet().iterator(); + final Iterator suspendedTaskIterator = active.suspended().iterator(); while (suspendedTaskIterator.hasNext()) { - final Map.Entry next = suspendedTaskIterator.next(); - final StreamTask task = next.getValue(); - final Set assignedPartitionsForTask = newTaskAssignment.get(next.getKey()); + final StreamTask task = suspendedTaskIterator.next(); + final Set assignedPartitionsForTask = newTaskAssignment.get(task.id); if (!task.partitions().equals(assignedPartitionsForTask)) { log.debug("{} Closing suspended and not re-assigned task {}", logPrefix, task.id()); try { task.closeSuspended(true, null); } catch (final Exception e) { - log.error("{} Failed to close suspended task {} due to the following error:", logPrefix, next.getKey(), e); + log.error("{} Failed to close suspended task {} due to the following error:", logPrefix, task.id, e); } finally { suspendedTaskIterator.remove(); } @@ -1229,12 +1139,11 @@ private void closeNonAssignedSuspendedTasks() { } private void closeNonAssignedSuspendedStandbyTasks() { - final Set currentSuspendedTaskIds = partitionAssignor.standbyTasks().keySet(); - final Iterator> standByTaskIterator = suspendedStandbyTasks.entrySet().iterator(); + final Set newStandbyTaskIds = partitionAssignor.standbyTasks().keySet(); + final Iterator standByTaskIterator = standby.suspended().iterator(); while (standByTaskIterator.hasNext()) { - final Map.Entry suspendedTask = standByTaskIterator.next(); - if (!currentSuspendedTaskIds.contains(suspendedTask.getKey())) { - final StandbyTask task = suspendedTask.getValue(); + final StandbyTask task = standByTaskIterator.next(); + if (!newStandbyTaskIds.contains(task.id)) { log.debug("{} Closing suspended and not re-assigned standby task {}", logPrefix, task.id()); try { task.close(true); @@ -1253,18 +1162,18 @@ protected StreamTask createStreamTask(final TaskId id, final Collection createProducer(final TaskId id) { return producer; } - private void addStreamTasks(final Collection assignment, final long start) { + private void addStreamTasks(final Collection assignment) { if (partitionAssignor == null) { throw new IllegalStateException(logPrefix + " Partition assignor has not been initialized while adding stream tasks: this should not happen."); } @@ -1305,17 +1214,7 @@ private void addStreamTasks(final Collection assignment, final l if (assignment.containsAll(partitions)) { try { - final StreamTask task = findMatchingSuspendedTask(taskId, partitions); - if (task != null) { - suspendedTasks.remove(taskId); - task.resume(); - - activeTasks.put(taskId, task); - - for (final TopicPartition partition : partitions) { - activeTasksByPartition.put(partition, task); - } - } else { + if (!active.maybeResumeSuspendedTask(taskId, partitions)) { newTasks.put(taskId, partitions); } } catch (final StreamsException e) { @@ -1331,7 +1230,7 @@ private void addStreamTasks(final Collection assignment, final l // -> other thread will call removeSuspendedTasks(); eventually log.trace("{} New active tasks to be created: {}", logPrefix, newTasks); - taskCreator.retryWithBackoff(newTasks, start); + taskCreator.createTasks(newTasks); } // visible for testing @@ -1353,13 +1252,11 @@ protected StandbyTask createStandbyTask(final TaskId id, final Collection checkpointedOffsets = new HashMap<>(); - final Map> newStandbyTasks = new HashMap<>(); log.debug("{} Adding assigned standby tasks {}", logPrefix, partitionAssignor.standbyTasks()); @@ -1367,116 +1264,18 @@ private void addStandbyTasks(final long start) { for (final Map.Entry> entry : partitionAssignor.standbyTasks().entrySet()) { final TaskId taskId = entry.getKey(); final Set partitions = entry.getValue(); - final StandbyTask task = findMatchingSuspendedStandbyTask(taskId, partitions); - - if (task != null) { - suspendedStandbyTasks.remove(taskId); - task.resume(); - } else { + if (!standby.maybeResumeSuspendedTask(taskId, partitions)) { newStandbyTasks.put(taskId, partitions); } - updateStandByTaskMaps(checkpointedOffsets, taskId, partitions, task); } // create all newly assigned standby tasks (guard against race condition with other thread via backoff and retry) // -> other thread will call removeSuspendedStandbyTasks(); eventually log.trace("{} New standby tasks to be created: {}", logPrefix, newStandbyTasks); - new StandbyTaskCreator(checkpointedOffsets).retryWithBackoff(newStandbyTasks, start); - - restoreConsumer.assign(new ArrayList<>(checkpointedOffsets.keySet())); - - for (final Map.Entry entry : checkpointedOffsets.entrySet()) { - final TopicPartition partition = entry.getKey(); - final long offset = entry.getValue(); - if (offset >= 0) { - restoreConsumer.seek(partition, offset); - } else { - restoreConsumer.seekToBeginning(singleton(partition)); - } - } - } - - private void updateStandByTaskMaps(final Map checkpointedOffsets, - final TaskId taskId, - final Set partitions, - final StandbyTask task) { - if (task != null) { - standbyTasks.put(taskId, task); - for (final TopicPartition partition : partitions) { - standbyTasksByPartition.put(partition, task); - } - // collect checked pointed offsets to position the restore consumer - // this include all partitions from which we restore states - for (final TopicPartition partition : task.checkpointedOffsets().keySet()) { - standbyTasksByPartition.put(partition, task); - } - checkpointedOffsets.putAll(task.checkpointedOffsets()); - } - } - - private void updateSuspendedTasks() { - suspendedTasks.clear(); - suspendedTasks.putAll(activeTasks); - suspendedStandbyTasks.putAll(standbyTasks); - } - - private void removeStreamTasks() { - log.debug("{} Removing all active tasks {}", logPrefix, activeTasks.keySet()); - - try { - prevActiveTasks.clear(); - prevActiveTasks.addAll(activeTasks.keySet()); - - activeTasks.clear(); - activeTasksByPartition.clear(); - } catch (final Exception e) { - log.error("{} Failed to remove stream tasks due to the following error:", logPrefix, e); - } - } - - private void removeStandbyTasks() { - log.debug("{} Removing all standby tasks {}", logPrefix, standbyTasks.keySet()); - - standbyTasks.clear(); - standbyTasksByPartition.clear(); - standbyRecords.clear(); - } - - private void closeZombieTask(final StreamTask task) { - log.warn("{} Producer of task {} fenced; closing zombie task", logPrefix, task.id); - try { - task.close(false); - } catch (final Exception e) { - log.warn("{} Failed to close zombie task due to {}, ignore and proceed", logPrefix, e); - } - activeTasks.remove(task.id); + new StandbyTaskCreator().createTasks(newStandbyTasks); } - private RuntimeException performOnStreamTasks(final StreamTaskAction action) { - RuntimeException firstException = null; - final Iterator> it = activeTasks.entrySet().iterator(); - while (it.hasNext()) { - final StreamTask task = it.next().getValue(); - try { - action.apply(task); - } catch (final ProducerFencedException e) { - closeZombieTask(task); - it.remove(); - } catch (final RuntimeException t) { - log.error("{} Failed to {} stream task {} due to the following error:", - logPrefix, - action.name(), - task.id(), - t); - if (firstException == null) { - firstException = t; - } - } - } - - return firstException; - } } diff --git a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java index a03b7cc380a03..2483a1b999d5f 100644 --- a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java @@ -319,7 +319,7 @@ public void shouldCleanupOldStateDirs() throws InterruptedException { CLUSTER.createTopic(topic); final KStreamBuilder builder = new KStreamBuilder(); - builder.stream(Serdes.String(), Serdes.String(), topic); + builder.table(Serdes.String(), Serdes.String(), topic, topic); final KafkaStreams streams = new KafkaStreams(builder, props); final CountDownLatch latch = new CountDownLatch(1); diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java index 3cff7f7809649..7868981cc0c3d 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java @@ -132,7 +132,7 @@ public void cleanup() throws Exception { public void testReprocessingFromScratchAfterResetWithIntermediateUserTopic() throws Exception { CLUSTER.createTopic(INTERMEDIATE_USER_TOPIC); - final Properties streamsConfiguration = prepareTest(); + final Properties streamsConfiguration = prepareTest(4); final Properties resultTopicConsumerConfig = TestUtils.consumerConfig( CLUSTER.bootstrapServers(), APP_ID + "-standard-consumer-" + OUTPUT_TOPIC, @@ -198,7 +198,7 @@ public void testReprocessingFromScratchAfterResetWithIntermediateUserTopic() thr @Test public void testReprocessingFromScratchAfterResetWithoutIntermediateUserTopic() throws Exception { - final Properties streamsConfiguration = prepareTest(); + final Properties streamsConfiguration = prepareTest(1); final Properties resultTopicConsumerConfig = TestUtils.consumerConfig( CLUSTER.bootstrapServers(), APP_ID + "-standard-consumer-" + OUTPUT_TOPIC, @@ -241,14 +241,14 @@ public void testReprocessingFromScratchAfterResetWithoutIntermediateUserTopic() cleanGlobal(null); } - private Properties prepareTest() throws Exception { + private Properties prepareTest(final int threads) throws Exception { final Properties streamsConfiguration = new Properties(); streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, APP_ID + testNo); streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.Long().getClass()); streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass()); - streamsConfiguration.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 4); + streamsConfiguration.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, threads); streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100); streamsConfiguration.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, 100); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractTaskTest.java index 123cbf07ad1ab..d5675e7250568 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractTaskTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractTaskTest.java @@ -24,60 +24,104 @@ import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.AuthorizationException; import org.apache.kafka.common.errors.WakeupException; -import org.apache.kafka.common.utils.MockTime; import org.apache.kafka.common.utils.Time; import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.errors.LockException; import org.apache.kafka.streams.errors.ProcessorStateException; import org.apache.kafka.streams.processor.StateStore; import org.apache.kafka.streams.processor.TaskId; import org.apache.kafka.test.TestUtils; +import org.easymock.EasyMock; +import org.junit.Before; import org.junit.Test; +import java.io.IOException; import java.util.Collections; +import java.util.List; import java.util.Properties; +import static org.junit.Assert.fail; + public class AbstractTaskTest { + private final TaskId id = new TaskId(0, 0); + private StateDirectory stateDirectory = EasyMock.createMock(StateDirectory.class); + + @Before + public void before() { + EasyMock.expect(stateDirectory.directoryForTask(id)).andReturn(TestUtils.tempDirectory()); + } + @Test(expected = ProcessorStateException.class) public void shouldThrowProcessorStateExceptionOnInitializeOffsetsWhenAuthorizationException() throws Exception { final Consumer consumer = mockConsumer(new AuthorizationException("blah")); - final AbstractTask task = createTask(consumer); + final AbstractTask task = createTask(consumer, Collections.emptyList()); task.updateOffsetLimits(); } @Test(expected = ProcessorStateException.class) public void shouldThrowProcessorStateExceptionOnInitializeOffsetsWhenKafkaException() throws Exception { final Consumer consumer = mockConsumer(new KafkaException("blah")); - final AbstractTask task = createTask(consumer); + final AbstractTask task = createTask(consumer, Collections.emptyList()); task.updateOffsetLimits(); } @Test(expected = WakeupException.class) public void shouldThrowWakeupExceptionOnInitializeOffsetsWhenWakeupException() throws Exception { final Consumer consumer = mockConsumer(new WakeupException()); - final AbstractTask task = createTask(consumer); + final AbstractTask task = createTask(consumer, Collections.emptyList()); task.updateOffsetLimits(); } - private AbstractTask createTask(final Consumer consumer) { - final MockTime time = new MockTime(); + @Test + public void shouldThrowLockExceptionIfFailedToLockStateDirectoryWhenTopologyHasStores() throws IOException { + final Consumer consumer = EasyMock.createNiceMock(Consumer.class); + final StateStore store = EasyMock.createNiceMock(StateStore.class); + EasyMock.expect(stateDirectory.lock(id, 5)).andReturn(false); + EasyMock.replay(stateDirectory); + + final AbstractTask task = createTask(consumer, Collections.singletonList(store)); + + try { + task.initializeStateStores(); + fail("Should have thrown LockException"); + } catch (final LockException e) { + // ok + } + + } + + @Test + public void shouldNotAttemptToLockIfNoStores() throws IOException { + final Consumer consumer = EasyMock.createNiceMock(Consumer.class); + EasyMock.replay(stateDirectory); + + final AbstractTask task = createTask(consumer, Collections.emptyList()); + + task.initializeStateStores(); + + // should fail if lock is called + EasyMock.verify(stateDirectory); + } + + private AbstractTask createTask(final Consumer consumer, final List stateStores) { final Properties properties = new Properties(); properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "app-id"); properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummyhost:9092"); final StreamsConfig config = new StreamsConfig(properties); - return new AbstractTask(new TaskId(0, 0), + return new AbstractTask(id, "app", Collections.singletonList(new TopicPartition("t", 0)), new ProcessorTopology(Collections.emptyList(), Collections.emptyMap(), Collections.emptyMap(), - Collections.emptyList(), + stateStores, Collections.emptyMap(), Collections.emptyList()), consumer, new StoreChangelogReader(consumer, Time.SYSTEM, 5000), false, - new StateDirectory("app", TestUtils.tempDirectory().getPath(), time), + stateDirectory, config) { @Override public void resume() {} @@ -90,6 +134,12 @@ public void suspend() {} @Override public void close(final boolean clean) {} + + @Override + public boolean initialize() { + return false; + } + }; } diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java new file mode 100644 index 0000000000000..f9fde1a0a69b6 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.processor.internals; + +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.ProducerFencedException; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.streams.processor.TaskId; +import org.easymock.EasyMock; +import org.junit.Before; +import org.junit.Test; + +import java.util.Collection; +import java.util.Collections; +import java.util.Set; + +import static org.hamcrest.CoreMatchers.not; +import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +public class AssignedTasksTest { + + private final AssignedTasks assignedTasks = new AssignedTasks<>("log", "task"); + private final AbstractTask t1 = EasyMock.createMock(AbstractTask.class); + private final AbstractTask t2 = EasyMock.createMock(AbstractTask.class); + private final TopicPartition tp1 = new TopicPartition("t1", 0); + private final TopicPartition tp2 = new TopicPartition("t2", 0); + private final TopicPartition changeLog1 = new TopicPartition("cl1", 0); + private final TopicPartition changeLog2 = new TopicPartition("cl2", 0); + private final TaskId taskId1 = new TaskId(0, 0); + private final TaskId taskId2 = new TaskId(1, 0); + + @Before + public void before() { + EasyMock.expect(t1.id()).andReturn(taskId1).anyTimes(); + EasyMock.expect(t2.id()).andReturn(taskId2).anyTimes(); + } + + @Test + public void shouldGetPartitionsFromNewTasksThatHaveStateStores() { + EasyMock.expect(t1.hasStateStores()).andReturn(true); + EasyMock.expect(t2.hasStateStores()).andReturn(true); + EasyMock.expect(t1.partitions()).andReturn(Collections.singleton(tp1)); + EasyMock.expect(t2.partitions()).andReturn(Collections.singleton(tp2)); + EasyMock.replay(t1, t2); + + assignedTasks.addNewTask(t1); + assignedTasks.addNewTask(t2); + + final Set partitions = assignedTasks.uninitializedPartitions(); + assertThat(partitions, equalTo(Utils.mkSet(tp1, tp2))); + EasyMock.verify(t1, t2); + } + + @Test + public void shouldNotGetPartitionsFromNewTasksWithoutStateStores() { + EasyMock.expect(t1.hasStateStores()).andReturn(false); + EasyMock.expect(t2.hasStateStores()).andReturn(false); + EasyMock.replay(t1, t2); + + assignedTasks.addNewTask(t1); + assignedTasks.addNewTask(t2); + + final Set partitions = assignedTasks.uninitializedPartitions(); + assertTrue(partitions.isEmpty()); + EasyMock.verify(t1, t2); + } + + @Test + public void shouldInitializeNewTasks() { + EasyMock.expect(t1.initialize()).andReturn(false); + EasyMock.replay(t1); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + EasyMock.verify(t1); + } + + @Test + public void shouldMoveInitializedTasksNeedingRestoreToRestoring() { + EasyMock.expect(t1.initialize()).andReturn(false); + EasyMock.expect(t2.initialize()).andReturn(true); + EasyMock.expect(t2.partitions()).andReturn(Collections.singleton(tp2)); + EasyMock.expect(t2.changelogPartitions()).andReturn(Collections.emptyList()); + + EasyMock.replay(t1, t2); + + assignedTasks.addNewTask(t1); + assignedTasks.addNewTask(t2); + + assignedTasks.initializeNewTasks(); + + Collection restoring = assignedTasks.restoring(); + assertThat(restoring.size(), equalTo(1)); + assertSame(restoring.iterator().next(), t1); + } + + @Test + public void shouldMoveInitializedTasksThatDontNeedRestoringToRunning() { + EasyMock.expect(t2.initialize()).andReturn(true); + EasyMock.expect(t2.partitions()).andReturn(Collections.singleton(tp2)); + EasyMock.expect(t2.changelogPartitions()).andReturn(Collections.emptyList()); + + EasyMock.replay(t2); + + assignedTasks.addNewTask(t2); + assignedTasks.initializeNewTasks(); + + assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId2))); + } + + @Test + public void shouldTransitionFullyRestoredTasksToRunning() { + final Set task1Partitions = Utils.mkSet(tp1); + EasyMock.expect(t1.initialize()).andReturn(false); + EasyMock.expect(t1.partitions()).andReturn(task1Partitions).anyTimes(); + EasyMock.expect(t1.changelogPartitions()).andReturn(Utils.mkSet(changeLog1, changeLog2)).anyTimes(); + EasyMock.replay(t1); + + assignedTasks.addNewTask(t1); + + assignedTasks.initializeNewTasks(); + + assertTrue(assignedTasks.updateRestored(Utils.mkSet(changeLog1)).isEmpty()); + Set partitions = assignedTasks.updateRestored(Utils.mkSet(changeLog2)); + assertThat(partitions, equalTo(task1Partitions)); + assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId1))); + } + + @Test + public void shouldSuspendRunningTasks() { + mockRunningTaskSuspension(); + EasyMock.replay(t1); + + suspendTask(); + + assertThat(assignedTasks.previousTasks(), equalTo(Collections.singleton(taskId1))); + EasyMock.verify(t1); + } + + @Test + public void shouldNotSuspendUnInitializedTasks() { + EasyMock.replay(t1); + + assignedTasks.addNewTask(t1); + assignedTasks.suspend(); + + EasyMock.verify(t1); + } + + @Test + public void shouldNotSuspendSuspendedTasks() { + mockRunningTaskSuspension(); + EasyMock.replay(t1); + + suspendTask(); + assignedTasks.suspend(); + EasyMock.verify(t1); + } + + @Test + public void shouldCloseTaskOnSuspendWhenRuntimeException() { + mockInitializedTask(); + t1.suspend(); + EasyMock.expectLastCall().andThrow(new RuntimeException("KABOOM!")); + t1.close(false); + EasyMock.expectLastCall(); + EasyMock.replay(t1); + + assertThat(suspendTask(), not(nullValue())); + assertTrue(assignedTasks.previousTasks().isEmpty()); + EasyMock.verify(t1); + } + + @Test + public void shouldCloseTaskOnSuspendWhenProducerFencedException() { + mockInitializedTask(); + t1.suspend(); + EasyMock.expectLastCall().andThrow(new ProducerFencedException("KABOOM!")); + t1.close(false); + EasyMock.expectLastCall(); + EasyMock.replay(t1); + + assertThat(suspendTask(), nullValue()); + assertTrue(assignedTasks.previousTasks().isEmpty()); + EasyMock.verify(t1); + } + + private void mockInitializedTask() { + EasyMock.expect(t1.initialize()).andReturn(true); + EasyMock.expect(t1.partitions()).andReturn(Collections.singleton(tp1)); + EasyMock.expect(t1.changelogPartitions()).andReturn(Collections.emptyList()); + } + + @Test + public void shouldResumeMatchingSuspendedTasks() { + mockRunningTaskSuspension(); + t1.resume(); + EasyMock.expectLastCall(); + EasyMock.replay(t1); + + suspendTask(); + + assertTrue(assignedTasks.maybeResumeSuspendedTask(taskId1, Collections.singleton(tp1))); + assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId1))); + assertTrue(assignedTasks.previousTasks().isEmpty()); + EasyMock.verify(t1); + } + + + @SuppressWarnings("unchecked") + @Test + public void shouldApplyActionToRunningTasks() { + StreamThread.TaskAction taskAction = EasyMock.createMock(StreamThread.TaskAction.class); + taskAction.apply(EasyMock.anyObject(AbstractTask.class)); + EasyMock.expectLastCall(); + mockInitializedTask(); + + EasyMock.replay(t1, taskAction); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + assignedTasks.applyToRunningTasks(taskAction, false); + + EasyMock.verify(taskAction); + } + + @SuppressWarnings("unchecked") + @Test + public void shouldNotApplyActionToNotRunningTasks() { + final StreamThread.TaskAction taskAction = EasyMock.createMock(StreamThread.TaskAction.class); + EasyMock.expect(t1.initialize()).andReturn(false); + + EasyMock.replay(t1, taskAction); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + assignedTasks.applyToRunningTasks(taskAction, false); + + EasyMock.verify(taskAction); + } + + @SuppressWarnings({"unchecked", "ThrowableNotThrown"}) + @Test + public void shouldCloseTaskOnApplyToRunningIfProducerFencedException() { + final StreamThread.TaskAction taskAction = EasyMock.createMock(StreamThread.TaskAction.class); + taskAction.apply(EasyMock.anyObject(AbstractTask.class)); + EasyMock.expectLastCall().andThrow(new ProducerFencedException("BOOM!")); + mockInitializedTask(); + t1.close(false); + EasyMock.expectLastCall(); + EasyMock.replay(t1, taskAction); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + assignedTasks.applyToRunningTasks(taskAction, false); + assertTrue(assignedTasks.running().isEmpty()); + EasyMock.verify(t1, taskAction); + } + + @SuppressWarnings("unchecked") + @Test + public void shouldReturnExceptionAndNotCloseTaskOnApplyToRunningWhenRuntimeException() { + final StreamThread.TaskAction taskAction = EasyMock.createMock(StreamThread.TaskAction.class); + taskAction.apply(EasyMock.anyObject(AbstractTask.class)); + EasyMock.expectLastCall().andThrow(new RuntimeException("KABOOM!")); + EasyMock.expect(taskAction.name()).andReturn("name"); + mockInitializedTask(); + EasyMock.replay(t1, taskAction); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + assertThat(assignedTasks.applyToRunningTasks(taskAction, false), not(nullValue())); + assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId1))); + EasyMock.verify(t1, taskAction); + } + + private RuntimeException suspendTask() { + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + return assignedTasks.suspend(); + } + + private void mockRunningTaskSuspension() { + EasyMock.expect(t1.initialize()).andReturn(true); + EasyMock.expect(t1.partitions()).andReturn(Collections.singleton(tp1)).anyTimes(); + EasyMock.expect(t1.changelogPartitions()).andReturn(Collections.emptyList()).anyTimes(); + t1.suspend(); + EasyMock.expectLastCall(); + } + +} \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorStateManagerTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorStateManagerTest.java index f454216e177ec..1e4e0eec17a86 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorStateManagerTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorStateManagerTest.java @@ -21,7 +21,6 @@ import org.apache.kafka.common.serialization.Serdes; import org.apache.kafka.common.utils.MockTime; import org.apache.kafka.common.utils.Utils; -import org.apache.kafka.streams.errors.LockException; import org.apache.kafka.streams.errors.ProcessorStateException; import org.apache.kafka.streams.processor.TaskId; import org.apache.kafka.streams.state.internals.OffsetCheckpoint; @@ -34,9 +33,7 @@ import java.io.File; import java.io.IOException; -import java.nio.channels.FileChannel; -import java.nio.channels.FileLock; -import java.nio.file.StandardOpenOption; + import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -389,33 +386,6 @@ public void shouldNotWriteCheckpointForStoresWithoutChangelogTopic() throws Exce assertThat(read, equalTo(Collections.emptyMap())); } - @Test - public void shouldThrowLockExceptionIfFailedToLockStateDirectory() throws Exception { - final File taskDirectory = stateDirectory.directoryForTask(taskId); - final FileChannel channel = FileChannel.open(new File(taskDirectory, - StateDirectory.LOCK_FILE_NAME).toPath(), - StandardOpenOption.CREATE, - StandardOpenOption.WRITE); - // lock the task directory - final FileLock lock = channel.lock(); - - try { - new ProcessorStateManager( - taskId, - noPartitions, - false, - stateDirectory, - Collections.emptyMap(), - changelogReader, - false); - fail("Should have thrown LockException"); - } catch (final LockException e) { - // pass - } finally { - lock.release(); - channel.close(); - } - } @Test public void shouldThrowIllegalArgumentExceptionIfStoreNameIsSameAsCheckpointFileName() throws Exception { diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java index a358be5430e15..8621470844c1c 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java @@ -158,7 +158,7 @@ public void cleanup() throws IOException { public void testStorePartitions() throws Exception { StreamsConfig config = createConfig(baseDir); StandbyTask task = new StandbyTask(taskId, applicationId, topicPartitions, topology, consumer, changelogReader, config, null, stateDirectory); - + task.initialize(); assertEquals(Utils.mkSet(partition2), new HashSet<>(task.checkpointedOffsets().keySet())); } @@ -182,7 +182,7 @@ public void testUpdateNonPersistentStore() throws Exception { public void testUpdate() throws Exception { StreamsConfig config = createConfig(baseDir); StandbyTask task = new StandbyTask(taskId, applicationId, topicPartitions, topology, consumer, changelogReader, config, null, stateDirectory); - + task.initialize(); restoreStateConsumer.assign(new ArrayList<>(task.checkpointedOffsets().keySet())); for (ConsumerRecord record : Arrays.asList( @@ -240,7 +240,7 @@ public void testUpdateKTable() throws Exception { StreamsConfig config = createConfig(baseDir); StandbyTask task = new StandbyTask(taskId, applicationId, ktablePartitions, ktableTopology, consumer, changelogReader, config, null, stateDirectory); - + task.initialize(); restoreStateConsumer.assign(new ArrayList<>(task.checkpointedOffsets().keySet())); for (ConsumerRecord record : Arrays.asList( @@ -360,6 +360,7 @@ public void shouldCheckpointStoreOffsetsOnCommit() throws Exception { null, stateDirectory ); + task.initialize(); restoreStateConsumer.assign(new ArrayList<>(task.checkpointedOffsets().keySet())); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateRestorerTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateRestorerTest.java index 6968f3319ec33..3a8ebdaf22fc6 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateRestorerTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateRestorerTest.java @@ -28,7 +28,7 @@ public class StateRestorerTest { private static final long OFFSET_LIMIT = 50; private final MockRestoreCallback callback = new MockRestoreCallback(); - private final StateRestorer restorer = new StateRestorer(new TopicPartition("topic", 1), callback, null, OFFSET_LIMIT, true); + private final StateRestorer restorer = new StateRestorer(new TopicPartition("topic", 1), callback, null, OFFSET_LIMIT, true, "store"); @Test public void shouldCallRestoreOnRestoreCallback() throws Exception { @@ -53,7 +53,7 @@ public void shouldBeCompletedIfEndOffsetAndRecordOffsetAreZero() throws Exceptio @Test public void shouldBeCompletedIfOffsetAndOffsetLimitAreZero() throws Exception { - final StateRestorer restorer = new StateRestorer(new TopicPartition("topic", 1), callback, null, 0, true); + final StateRestorer restorer = new StateRestorer(new TopicPartition("topic", 1), callback, null, 0, true, "store"); assertTrue(restorer.hasCompleted(0, 10)); } diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java index 2ff6d33c47a99..66bdab6fd5664 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java @@ -139,7 +139,7 @@ public void shouldThrowExceptionIfConsumerHasCurrentSubscription() throws Except public void shouldRestoreAllMessagesFromBeginningWhenCheckpointNull() throws Exception { final int messages = 10; setupConsumer(messages, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true)); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store")); changelogReader.restore(); assertThat(callback.restored.size(), equalTo(messages)); @@ -149,7 +149,7 @@ public void shouldRestoreAllMessagesFromBeginningWhenCheckpointNull() throws Exc public void shouldRestoreMessagesFromCheckpoint() throws Exception { final int messages = 10; setupConsumer(messages, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, 5L, Long.MAX_VALUE, true)); + changelogReader.register(new StateRestorer(topicPartition, callback, 5L, Long.MAX_VALUE, true, "store")); changelogReader.restore(); assertThat(callback.restored.size(), equalTo(5)); @@ -159,7 +159,7 @@ public void shouldRestoreMessagesFromCheckpoint() throws Exception { public void shouldClearAssignmentAtEndOfRestore() throws Exception { final int messages = 1; setupConsumer(messages, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true)); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store")); changelogReader.restore(); assertThat(consumer.assignment(), equalTo(Collections.emptySet())); @@ -168,7 +168,7 @@ public void shouldClearAssignmentAtEndOfRestore() throws Exception { @Test public void shouldRestoreToLimitWhenSupplied() throws Exception { setupConsumer(10, topicPartition); - final StateRestorer restorer = new StateRestorer(topicPartition, callback, null, 3, true); + final StateRestorer restorer = new StateRestorer(topicPartition, callback, null, 3, true, "store"); changelogReader.register(restorer); changelogReader.restore(); @@ -186,9 +186,9 @@ public void shouldRestoreMultipleStores() throws Exception { setupConsumer(5, one); setupConsumer(3, two); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true)); - changelogReader.register(new StateRestorer(one, callbackOne, null, Long.MAX_VALUE, true)); - changelogReader.register(new StateRestorer(two, callbackTwo, null, Long.MAX_VALUE, true)); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store")); + changelogReader.register(new StateRestorer(one, callbackOne, null, Long.MAX_VALUE, true, "store_1")); + changelogReader.register(new StateRestorer(two, callbackTwo, null, Long.MAX_VALUE, true, "store_2")); changelogReader.restore(); @@ -199,7 +199,7 @@ public void shouldRestoreMultipleStores() throws Exception { @Test public void shouldNotRestoreAnythingWhenPartitionIsEmpty() throws Exception { - final StateRestorer restorer = new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true); + final StateRestorer restorer = new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store"); setupConsumer(0, topicPartition); changelogReader.register(restorer); @@ -212,7 +212,7 @@ public void shouldNotRestoreAnythingWhenPartitionIsEmpty() throws Exception { public void shouldNotRestoreAnythingWhenCheckpointAtEndOffset() throws Exception { final Long endOffset = 10L; setupConsumer(endOffset, topicPartition); - final StateRestorer restorer = new StateRestorer(topicPartition, callback, endOffset, Long.MAX_VALUE, true); + final StateRestorer restorer = new StateRestorer(topicPartition, callback, endOffset, Long.MAX_VALUE, true, "store"); changelogReader.register(restorer); @@ -224,7 +224,7 @@ public void shouldNotRestoreAnythingWhenCheckpointAtEndOffset() throws Exception @Test public void shouldReturnRestoredOffsetsForPersistentStores() throws Exception { setupConsumer(10, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true)); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store")); changelogReader.restore(); final Map restoredOffsets = changelogReader.restoredOffsets(); assertThat(restoredOffsets, equalTo(Collections.singletonMap(topicPartition, 10L))); @@ -233,7 +233,7 @@ public void shouldReturnRestoredOffsetsForPersistentStores() throws Exception { @Test public void shouldNotReturnRestoredOffsetsForNonPersistentStore() throws Exception { setupConsumer(10, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false)); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false, "store")); changelogReader.restore(); final Map restoredOffsets = changelogReader.restoredOffsets(); assertThat(restoredOffsets, equalTo(Collections.emptyMap())); @@ -247,7 +247,7 @@ public void shouldIgnoreNullKeysWhenRestoring() throws Exception { consumer.addRecord(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 1, (byte[]) null, bytes)); consumer.addRecord(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 2, bytes, bytes)); consumer.assign(Collections.singletonList(topicPartition)); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false)); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false, "store")); changelogReader.restore(); assertThat(callback.restored, CoreMatchers.equalTo(Utils.mkList(KeyValue.pair(bytes, bytes), KeyValue.pair(bytes, bytes)))); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java index 28ead02bdbf30..c6c977b30e47d 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java @@ -114,7 +114,6 @@ public class StreamTaskTest { private final MockTime time = new MockTime(); private File baseDir = TestUtils.tempDirectory(); private StateDirectory stateDirectory; - private final RecordCollectorImpl recordCollector = new RecordCollectorImpl(producer, "taskId"); private StreamsConfig config; private StreamsConfig eosConfig; private StreamTask task; @@ -147,6 +146,7 @@ public void setup() throws Exception { stateDirectory = new StateDirectory("applicationId", baseDir.getPath(), new MockTime()); task = new StreamTask(taskId00, applicationId, partitions, topology, consumer, changelogReader, config, streamsMetrics, stateDirectory, null, time, producer); + task.initialize(); } @After @@ -377,6 +377,7 @@ public void process(final Object key, final Object value) { task = new StreamTask(taskId00, applicationId, partitions, topology, consumer, changelogReader, config, streamsMetrics, stateDirectory, null, time, producer); + task.initialize(); final int offset = 20; task.addRecords(partition1, Collections.singletonList( new ConsumerRecord<>(partition1.topic(), partition1.partition(), offset, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, recordKey, recordValue))); @@ -494,6 +495,7 @@ public Map offsets() { }; } }; + streamTask.initialize(); time.sleep(config.getLong(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG)); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java index 3b280f1ffe467..e8518d9fcef54 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java @@ -41,6 +41,7 @@ import org.apache.kafka.streams.processor.internals.assignment.AssignmentInfo; import org.apache.kafka.streams.state.HostInfo; import org.apache.kafka.streams.state.Stores; +import org.apache.kafka.streams.state.internals.ThreadCache; import org.apache.kafka.test.MockClientSupplier; import org.apache.kafka.test.MockInternalTopicManager; import org.apache.kafka.test.MockProcessorSupplier; @@ -96,6 +97,7 @@ public class StreamThreadTest { private final StreamsConfig config = new StreamsConfig(configProps(false)); private final String stateDir = TestUtils.tempDirectory().getPath(); private final StateDirectory stateDirectory = new StateDirectory("applicationId", stateDir, mockTime); + private final ChangelogReader changelogReader = new StoreChangelogReader(clientSupplier.restoreConsumer, Time.SYSTEM, 5000); @Before public void setUp() throws Exception { @@ -169,7 +171,7 @@ private Properties configProps(final boolean enableEos) { }; } - private static class TestStreamTask extends StreamTask { + private class TestStreamTask extends StreamTask { boolean committed = false; private boolean suspended; private boolean closed; @@ -181,20 +183,20 @@ private static class TestStreamTask extends StreamTask { final ProcessorTopology topology, final Consumer consumer, final Producer producer, - final Consumer restoreConsumer, final StreamsConfig config, final StreamsMetrics metrics, - final StateDirectory stateDirectory) { + final StateDirectory stateDirectory, + final ChangelogReader storeChangelogReader) { super(id, applicationId, partitions, topology, consumer, - new StoreChangelogReader(restoreConsumer, Time.SYSTEM, 5000), + storeChangelogReader, config, metrics, stateDirectory, - null, + new ThreadCache("", 0L, metrics), new MockTime(), producer); } @@ -293,11 +295,10 @@ Map> activeTasks() { expectedGroup1 = new HashSet<>(Collections.singleton(t1p1)); activeTasks.put(new TaskId(0, 1), expectedGroup1); rebalanceListener.onPartitionsAssigned(assignedPartitions); - - assertEquals(thread.state(), StreamThread.State.RUNNING); Assert.assertEquals(stateListener.numChanges, 3); Assert.assertEquals(stateListener.oldState, StreamThread.State.ASSIGNING_PARTITIONS); - Assert.assertEquals(stateListener.newState, StreamThread.State.RUNNING); + thread.runOnce(-1); + assertEquals(thread.state(), StreamThread.State.RUNNING); assertTrue(thread.tasks().containsKey(task1)); assertEquals(expectedGroup1, thread.tasks().get(task1).partitions()); assertEquals(1, thread.tasks().size()); @@ -315,7 +316,7 @@ Map> activeTasks() { expectedGroup2 = new HashSet<>(Collections.singleton(t1p2)); activeTasks.put(new TaskId(0, 2), expectedGroup2); rebalanceListener.onPartitionsAssigned(assignedPartitions); - + thread.runOnce(-1); assertTrue(thread.tasks().containsKey(task2)); assertEquals(expectedGroup2, thread.tasks().get(task2).partitions()); assertEquals(1, thread.tasks().size()); @@ -330,7 +331,7 @@ Map> activeTasks() { activeTasks.put(new TaskId(0, 1), expectedGroup1); activeTasks.put(new TaskId(0, 2), expectedGroup2); rebalanceListener.onPartitionsAssigned(assignedPartitions); - + thread.runOnce(-1); assertTrue(thread.tasks().containsKey(task1)); assertTrue(thread.tasks().containsKey(task2)); assertEquals(expectedGroup1, thread.tasks().get(task1).partitions()); @@ -342,7 +343,7 @@ Map> activeTasks() { rebalanceListener.onPartitionsRevoked(revokedPartitions); assignedPartitions = Collections.emptyList(); rebalanceListener.onPartitionsAssigned(assignedPartitions); - + thread.runOnce(-1); assertTrue(thread.tasks().isEmpty()); thread.close(); @@ -397,6 +398,7 @@ Map> activeTasks() { activeTasks.put(new TaskId(1, 1), expectedGroup1); activeTasks.put(new TaskId(1, 2), expectedGroup2); rebalanceListener.onPartitionsAssigned(assignedPartitions); + thread.runOnce(-1); assertTrue(thread.tasks().containsKey(task4)); assertTrue(thread.tasks().containsKey(task5)); @@ -414,6 +416,7 @@ Map> activeTasks() { activeTasks.put(new TaskId(0, 1), expectedGroup1); activeTasks.put(new TaskId(1, 1), expectedGroup2); rebalanceListener.onPartitionsAssigned(assignedPartitions); + thread.runOnce(-1); assertTrue(thread.tasks().containsKey(task1)); assertTrue(thread.tasks().containsKey(task4)); @@ -428,6 +431,7 @@ Map> activeTasks() { expectedGroup1 = new HashSet<>(Collections.singleton(t1p1)); expectedGroup2 = new HashSet<>(Arrays.asList(t2p1, t3p1)); rebalanceListener.onPartitionsAssigned(assignedPartitions); + thread.runOnce(-1); assertTrue(thread.tasks().containsKey(task1)); assertTrue(thread.tasks().containsKey(task4)); @@ -440,6 +444,7 @@ Map> activeTasks() { rebalanceListener.onPartitionsRevoked(revokedPartitions); assignedPartitions = Collections.emptyList(); rebalanceListener.onPartitionsAssigned(assignedPartitions); + thread.runOnce(-1); assertTrue(thread.tasks().isEmpty()); @@ -504,7 +509,13 @@ public void testHandingOverTaskFromOneToAnotherThread() throws Exception { ); builder.addSource("source", TOPIC); - clientSupplier.consumer.assign(Arrays.asList(new TopicPartition(TOPIC, 0), new TopicPartition(TOPIC, 1))); + TopicPartition tp0 = new TopicPartition(TOPIC, 0); + TopicPartition tp1 = new TopicPartition(TOPIC, 1); + clientSupplier.consumer.assign(Arrays.asList(tp0, tp1)); + final Map offsets = new HashMap<>(); + offsets.put(tp0, 0L); + offsets.put(tp1, 0L); + clientSupplier.consumer.updateBeginningOffsets(offsets); final StreamThread thread1 = new StreamThread( builder, @@ -546,7 +557,9 @@ public void testHandingOverTaskFromOneToAnotherThread() throws Exception { // assign thread1.rebalanceListener.onPartitionsAssigned(task0Assignment); + thread1.runOnce(-1); thread2.rebalanceListener.onPartitionsAssigned(task1Assignment); + thread2.runOnce(-1); final Set originalTaskAssignmentThread1 = new HashSet<>(); originalTaskAssignmentThread1.addAll(thread1.tasks().keySet()); @@ -557,6 +570,8 @@ public void testHandingOverTaskFromOneToAnotherThread() throws Exception { thread1.rebalanceListener.onPartitionsRevoked(task0Assignment); thread2.rebalanceListener.onPartitionsRevoked(task1Assignment); + assertThat(thread1.prevActiveTasks(), equalTo(originalTaskAssignmentThread1)); + assertThat(thread2.prevActiveTasks(), equalTo(originalTaskAssignmentThread2)); // assign reverted thread1Assignment.clear(); @@ -569,18 +584,18 @@ public void testHandingOverTaskFromOneToAnotherThread() throws Exception { @Override public void run() { thread1.rebalanceListener.onPartitionsAssigned(task1Assignment); + thread1.runOnce(-1); } }); runIt.start(); thread2.rebalanceListener.onPartitionsAssigned(task0Assignment); + thread2.runOnce(-1); runIt.join(); assertThat(thread1.tasks().keySet(), equalTo(originalTaskAssignmentThread2)); assertThat(thread2.tasks().keySet(), equalTo(originalTaskAssignmentThread1)); - assertThat(thread1.prevActiveTasks(), equalTo(originalTaskAssignmentThread1)); - assertThat(thread2.prevActiveTasks(), equalTo(originalTaskAssignmentThread2)); } private class MockStreamsPartitionAssignor extends StreamPartitionAssignor { @@ -697,10 +712,10 @@ protected StreamTask createStreamTask(final TaskId id, final Collection()), - restoreConsumer, config, new MockStreamsMetrics(new Metrics()), - stateDirectory); + stateDirectory, + storeChangelogReader); } }; @@ -720,6 +735,8 @@ protected StreamTask createStreamTask(final TaskId id, final Collection()), - clientSupplier.restoreConsumer, - config, - new MockStreamsMetrics(new Metrics()), - new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime)); + new TaskId(0, 0), + applicationId, + Utils.mkSet(new TopicPartition("topic", 0)), + builder.build(0), + clientSupplier.consumer, + clientSupplier.getProducer(new HashMap()), + config, + new MockStreamsMetrics(new Metrics()), + new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime), + changelogReader); final StreamThread thread = new StreamThread( builder, @@ -969,6 +988,7 @@ Map> activeTasks() { }); thread.rebalanceListener.onPartitionsAssigned(activeTasks); + thread.runOnce(-1); thread.rebalanceListener.onPartitionsRevoked(activeTasks); assertTrue(testStreamTask.suspended); @@ -1003,17 +1023,23 @@ public void shouldInitializeRestoreConsumerWithOffsetsFromStandbyTasks() throws final MockConsumer restoreConsumer = clientSupplier.restoreConsumer; restoreConsumer.updatePartitions("stream-thread-test-count-one-changelog", - Collections.singletonList(new PartitionInfo("stream-thread-test-count-one-changelog", - 0, - null, - new Node[0], - new Node[0]))); + Collections.singletonList(new PartitionInfo("stream-thread-test-count-one-changelog", + 0, + null, + new Node[0], + new Node[0]))); restoreConsumer.updatePartitions("stream-thread-test-count-two-changelog", - Collections.singletonList(new PartitionInfo("stream-thread-test-count-two-changelog", - 0, - null, - new Node[0], - new Node[0]))); + Collections.singletonList(new PartitionInfo("stream-thread-test-count-two-changelog", + 0, + null, + new Node[0], + new Node[0]))); + final TopicPartition tp1 = new TopicPartition("stream-thread-test-count-one-changelog", 0); + final TopicPartition tp2 = new TopicPartition("stream-thread-test-count-two-changelog", 0); + final Map beginningOffsets = new HashMap<>(); + beginningOffsets.put(tp1, 0L); + beginningOffsets.put(tp2, 0L); + restoreConsumer.updateBeginningOffsets(beginningOffsets); final Map> standbyTasks = new HashMap<>(); final TopicPartition t1 = new TopicPartition("t1", 0); @@ -1028,16 +1054,16 @@ Map> standbyTasks() { thread.rebalanceListener.onPartitionsRevoked(Collections.emptyList()); thread.rebalanceListener.onPartitionsAssigned(Collections.emptyList()); - - assertThat(restoreConsumer.assignment(), equalTo(Utils.mkSet(new TopicPartition("stream-thread-test-count-one-changelog", 0)))); + thread.runOnce(-1); + assertThat(restoreConsumer.assignment(), equalTo(Utils.mkSet(tp1))); // assign an existing standby plus a new one standbyTasks.put(new TaskId(1, 0), Utils.mkSet(new TopicPartition("t2", 0))); thread.rebalanceListener.onPartitionsRevoked(Collections.emptyList()); thread.rebalanceListener.onPartitionsAssigned(Collections.emptyList()); - - assertThat(restoreConsumer.assignment(), equalTo(Utils.mkSet(new TopicPartition("stream-thread-test-count-one-changelog", 0), - new TopicPartition("stream-thread-test-count-two-changelog", 0)))); + thread.runOnce(-1); + assertThat(restoreConsumer.assignment(), equalTo(Utils.mkSet(tp1, + tp2))); } @Test @@ -1082,11 +1108,15 @@ public void shouldCloseSuspendedTasksThatAreNoLongerAssignedToThisStreamThreadBe final Map> standbyTasks = new HashMap<>(); final TopicPartition t1 = new TopicPartition("t1", 0); - standbyTasks.put(new TaskId(0, 0), Utils.mkSet(t1)); + Set partitionsT1 = Utils.mkSet(t1); + standbyTasks.put(new TaskId(0, 0), partitionsT1); final Map> activeTasks = new HashMap<>(); final TopicPartition t2 = new TopicPartition("t2", 0); - activeTasks.put(new TaskId(1, 0), Utils.mkSet(t2)); + Set partitionsT2 = Utils.mkSet(t2); + activeTasks.put(new TaskId(1, 0), partitionsT2); + + clientSupplier.consumer.updateBeginningOffsets(Collections.singletonMap(t2, 0L)); thread.setPartitionAssignor(new StreamPartitionAssignor() { @Override @@ -1101,8 +1131,9 @@ Map> activeTasks() { }); thread.rebalanceListener.onPartitionsRevoked(Collections.emptyList()); + clientSupplier.consumer.assign(partitionsT2); thread.rebalanceListener.onPartitionsAssigned(Utils.mkSet(t2)); - + thread.runOnce(-1); // swap the assignment around and make sure we don't get any exceptions standbyTasks.clear(); activeTasks.clear(); @@ -1110,6 +1141,7 @@ Map> activeTasks() { activeTasks.put(new TaskId(0, 0), Utils.mkSet(t1)); thread.rebalanceListener.onPartitionsRevoked(Collections.emptyList()); + clientSupplier.consumer.assign(partitionsT1); thread.rebalanceListener.onPartitionsAssigned(Utils.mkSet(t1)); } @@ -1138,16 +1170,16 @@ public void shouldCloseActiveTasksThatAreAssignedToThisStreamThreadButAssignment protected StreamTask createStreamTask(final TaskId id, final Collection partitions) { final ProcessorTopology topology = builder.build(id.topicGroupId); final TestStreamTask task = new TestStreamTask( - id, - applicationId, - partitions, - topology, - consumer, - clientSupplier.getProducer(new HashMap()), - restoreConsumer, - config, - new MockStreamsMetrics(new Metrics()), - stateDirectory); + id, + applicationId, + partitions, + topology, + consumer, + clientSupplier.getProducer(new HashMap()), + config, + new MockStreamsMetrics(new Metrics()), + stateDirectory, + storeChangelogReader); createdTasks.put(partitions, task); return task; } @@ -1177,6 +1209,7 @@ Map> activeTasks() { // should create task for id 0_0 with a single partition thread.rebalanceListener.onPartitionsRevoked(Collections.emptyList()); thread.rebalanceListener.onPartitionsAssigned(task00Partitions); + thread.runOnce(-1); final TestStreamTask firstTask = createdTasks.get(task00Partitions); assertThat(firstTask.id(), is(taskId)); @@ -1223,31 +1256,21 @@ public void shouldCloseTaskAsZombieAndRemoveFromActiveTasksIfProducerWasFencedWh thread.rebalanceListener.onPartitionsRevoked(null); thread.rebalanceListener.onPartitionsAssigned(task0Assignment); + thread.runOnce(-1); + assertThat(thread.tasks().size(), equalTo(1)); final MockProducer producer = clientSupplier.producers.get(0); - thread.start(); - TestUtils.waitForCondition( - new TestCondition() { - @Override - public boolean conditionMet() { - return !consumer.subscription().isEmpty(); - } - }, - "StreamsThread's internal consumer did not subscribe to input topic."); // change consumer subscription from "pattern" to "manual" to be able to call .addRecords() - consumer.updateBeginningOffsets(new HashMap() { - { - put(task0Assignment.iterator().next(), 0L); - } - }); + consumer.updateBeginningOffsets(Collections.singletonMap(task0Assignment.iterator().next(), 0L)); consumer.unsubscribe(); consumer.assign(task0Assignment); consumer.addRecord(new ConsumerRecord<>(TOPIC, 0, 0, new byte[0], new byte[0])); mockTime.sleep(config.getLong(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG) + 1); + thread.runOnce(-1); TestUtils.waitForCondition( new TestCondition() { @Override @@ -1270,8 +1293,8 @@ public boolean conditionMet() { producer.fenceProducer(); mockTime.sleep(config.getLong(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG) + 1L); - consumer.addRecord(new ConsumerRecord<>(TOPIC, 0, 0, new byte[0], new byte[0])); + thread.runOnce(-1); TestUtils.waitForCondition( new TestCondition() { @Override @@ -1281,9 +1304,6 @@ public boolean conditionMet() { }, "StreamsThread did not remove fenced zombie task."); - thread.close(); - thread.join(); - assertThat(producer.commitCount(), equalTo(1L)); } @@ -1312,6 +1332,7 @@ public void shouldCloseTaskAsZombieAndRemoveFromActiveTasksIfProducerGotFencedAt thread.rebalanceListener.onPartitionsRevoked(null); thread.rebalanceListener.onPartitionsAssigned(task0Assignment); + thread.runOnce(-1); assertThat(thread.tasks().size(), equalTo(1)); thread.rebalanceListener.onPartitionsRevoked(null); @@ -1331,16 +1352,16 @@ public void shouldNotViolateAtLeastOnceWhenAnExceptionOccursOnTaskCloseDuringShu builder.stream("t1").groupByKey(); final TestStreamTask testStreamTask = new TestStreamTask( - new TaskId(0, 0), - applicationId, - Utils.mkSet(new TopicPartition("t1", 0)), - builder.build(0), - clientSupplier.consumer, - clientSupplier.getProducer(new HashMap()), - clientSupplier.restoreConsumer, - config, - new MockStreamsMetrics(new Metrics()), - new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime)) { + new TaskId(0, 0), + applicationId, + Utils.mkSet(new TopicPartition("t1", 0)), + builder.build(0), + clientSupplier.consumer, + clientSupplier.getProducer(new HashMap()), + config, + new MockStreamsMetrics(new Metrics()), + new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime), + changelogReader) { @Override public void close(final boolean clean) { @@ -1385,17 +1406,18 @@ protected StreamTask createStreamTask(final TaskId id, final Collection()), - clientSupplier.restoreConsumer, - config, - new MockStreamsMetrics(metrics), - new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime)) { + new TaskId(0, 0), + applicationId, + Utils.mkSet(t1), + builder.build(0), + clientSupplier.consumer, + clientSupplier.getProducer(new HashMap()), + config, + new MockStreamsMetrics(metrics), + new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime), + changelogReader) { @Override public void flushState() { @@ -1430,13 +1452,14 @@ protected StreamTask createStreamTask(final TaskId id, final CollectionemptyList()); + clientSupplier.consumer.assign(testStreamTask.partitions); + clientSupplier.consumer.updateBeginningOffsets(Collections.singletonMap(t1, 0L)); thread.rebalanceListener.onPartitionsAssigned(testStreamTask.partitions); + thread.runOnce(-1); // store should have been opened assertTrue(stateStore.isOpen()); - thread.start(); - thread.close(); - thread.join(); + thread.shutdown(true); assertFalse("task shouldn't have been committed as there was an exception during shutdown", testStreamTask.committed); // store should be closed even if we had an exception assertFalse(stateStore.isOpen()); @@ -1453,10 +1476,10 @@ public void shouldCaptureCommitFailedExceptionOnTaskSuspension() throws Exceptio builder.build(0), clientSupplier.consumer, clientSupplier.getProducer(new HashMap()), - clientSupplier.restoreConsumer, config, new MockStreamsMetrics(new Metrics()), - new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime)) { + new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime), + changelogReader) { @Override public void suspend() { @@ -1504,16 +1527,16 @@ public void shouldNotViolateAtLeastOnceWhenExceptionOccursDuringTaskSuspension() builder.stream("t1").groupByKey(); final TestStreamTask testStreamTask = new TestStreamTask( - new TaskId(0, 0), - applicationId, - Utils.mkSet(new TopicPartition("t1", 0)), - builder.build(0), - clientSupplier.consumer, - clientSupplier.getProducer(new HashMap()), - clientSupplier.restoreConsumer, - config, - new MockStreamsMetrics(new Metrics()), - new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime)) { + new TaskId(0, 0), + applicationId, + Utils.mkSet(new TopicPartition("t1", 0)), + builder.build(0), + clientSupplier.consumer, + clientSupplier.getProducer(new HashMap()), + config, + new MockStreamsMetrics(new Metrics()), + new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime), + changelogReader) { @Override public void suspend() { @@ -1549,6 +1572,7 @@ protected StreamTask createStreamTask(final TaskId id, final CollectionemptyList()); thread.rebalanceListener.onPartitionsAssigned(testStreamTask.partitions); + thread.runOnce(-1); try { thread.rebalanceListener.onPartitionsRevoked(Collections.emptyList()); fail("should have thrown exception"); @@ -1565,16 +1589,16 @@ public void shouldNotViolateAtLeastOnceWhenExceptionOccursDuringFlushStateWhileS builder.stream("t1").groupByKey(); final TestStreamTask testStreamTask = new TestStreamTask( - new TaskId(0, 0), - applicationId, - Utils.mkSet(new TopicPartition("t1", 0)), - builder.build(0), - clientSupplier.consumer, - clientSupplier.getProducer(new HashMap()), - clientSupplier.restoreConsumer, - config, - new MockStreamsMetrics(new Metrics()), - new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime)) { + new TaskId(0, 0), + applicationId, + Utils.mkSet(new TopicPartition("t1", 0)), + builder.build(0), + clientSupplier.consumer, + clientSupplier.getProducer(new HashMap()), + config, + new MockStreamsMetrics(new Metrics()), + new StateDirectory(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG), mockTime), + changelogReader) { @Override protected void flushState() { @@ -1610,6 +1634,7 @@ protected StreamTask createStreamTask(final TaskId id, final CollectionemptyList()); thread.rebalanceListener.onPartitionsAssigned(testStreamTask.partitions); + thread.runOnce(-1); try { thread.rebalanceListener.onPartitionsRevoked(Collections.emptyList()); fail("should have thrown exception"); @@ -1734,34 +1759,30 @@ public void shouldReleaseStateDirLockIfFailureOnTaskCloseForSuspendedTask() thro final StateDirectory stateDirMock = mockStateDirInteractions(taskId); final StreamThread thread = setupTest(taskId, stateDirMock); + thread.start(); thread.close(); thread.join(); EasyMock.verify(stateDirMock); } - private StreamThread setupTest(final TaskId taskId, final StateDirectory stateDirectory) throws InterruptedException { - final TopologyBuilder builder = new TopologyBuilder(); + + private StreamThread setupTest(final TaskId taskId, final StateDirectory stateDirectory) { + final KStreamBuilder builder = new KStreamBuilder(); builder.setApplicationId(applicationId); - builder.addSource("source", "topic"); + builder.table("topic", "topic"); final MockClientSupplier clientSupplier = new MockClientSupplier(); - final TestStreamTask testStreamTask = new TestStreamTask(taskId, - applicationId, - Utils.mkSet(new TopicPartition("topic", 0)), - builder.build(0), - clientSupplier.consumer, - clientSupplier.getProducer(new HashMap()), - clientSupplier.restoreConsumer, - config, - new MockStreamsMetrics(new Metrics()), - stateDirectory) { - - @Override - public void suspend() { - throw new RuntimeException("KABOOM!!!"); - } - }; + final TopicPartition topic = new TopicPartition("topic", 0); + final Set partitions = Utils.mkSet(topic); + final Map offsets = Collections.singletonMap(topic, 0L); + clientSupplier.restoreConsumer.updatePartitions("topic", + Collections.singletonList( + new PartitionInfo("topic", 0, null, null, null))); + clientSupplier.restoreConsumer.updateBeginningOffsets(offsets); + clientSupplier.restoreConsumer.updateEndOffsets(offsets); + clientSupplier.consumer.assign(partitions); + clientSupplier.consumer.updateBeginningOffsets(offsets); final StreamThread thread = new StreamThread( @@ -1778,17 +1799,32 @@ public void suspend() { @Override protected StreamTask createStreamTask(final TaskId id, final Collection partitions) { - return testStreamTask; + return new TestStreamTask(taskId, + applicationId, + partitions, + builder.build(0), + clientSupplier.consumer, + clientSupplier.getProducer(new HashMap()), + config, + new MockStreamsMetrics(new Metrics()), + stateDirectory, + storeChangelogReader) { + + @Override + public void suspend() { + throw new RuntimeException("KABOOM!!!"); + } + }; + } }; final Map> activeTasks = new HashMap<>(); - activeTasks.put(testStreamTask.id, testStreamTask.partitions); + activeTasks.put(taskId, partitions); thread.setPartitionAssignor(new MockStreamsPartitionAssignor(activeTasks)); - thread.start(); thread.rebalanceListener.onPartitionsRevoked(Collections.emptySet()); - thread.rebalanceListener.onPartitionsAssigned(testStreamTask.partitions); - + thread.rebalanceListener.onPartitionsAssigned(partitions); + thread.runOnce(-1); return thread; } @@ -1799,8 +1835,8 @@ public void shouldReleaseStateDirLockIfFailureOnStandbyTaskSuspend() throws Exce final StateDirectory stateDirMock = mockStateDirInteractions(taskId); final StreamThread thread = setupStandbyTest(taskId, stateDirMock); - startThreadAndRebalance(thread); - + thread.rebalanceListener.onPartitionsAssigned(Collections.emptySet()); + thread.runOnce(-1); try { thread.rebalanceListener.onPartitionsRevoked(Collections.emptyList()); fail("Should have thrown exception"); @@ -1813,25 +1849,13 @@ public void shouldReleaseStateDirLockIfFailureOnStandbyTaskSuspend() throws Exce EasyMock.verify(stateDirMock); } - private void startThreadAndRebalance(final StreamThread thread) throws InterruptedException { - thread.start(); - TestUtils.waitForCondition(new TestCondition() { - @Override - public boolean conditionMet() { - return thread.state() == StreamThread.State.RUNNING; - } - }, "thread didn't transition to running"); - thread.rebalanceListener.onPartitionsRevoked(Collections.emptySet()); - thread.rebalanceListener.onPartitionsAssigned(Collections.emptySet()); - } - @Test public void shouldReleaseStateDirLockIfFailureOnStandbyTaskCloseForUnassignedSuspendedStandbyTask() throws Exception { final TaskId taskId = new TaskId(0, 0); final StateDirectory stateDirMock = mockStateDirInteractions(taskId); final StreamThread thread = setupStandbyTest(taskId, stateDirMock); - startThreadAndRebalance(thread); + thread.start(); try { thread.close(); thread.join(); @@ -1915,6 +1939,8 @@ public void commit() { final Map> standbyTasks = new HashMap<>(); standbyTasks.put(taskId, Collections.singleton(new TopicPartition("topic", 0))); thread.setPartitionAssignor(new MockStreamsPartitionAssignor(Collections.>emptyMap(), standbyTasks)); + thread.rebalanceListener.onPartitionsAssigned(Collections.emptySet()); + thread.runOnce(-1); return thread; } @@ -1970,16 +1996,15 @@ private StreamThread getStreamThread() { protected StreamTask createStreamTask(final TaskId id, final Collection partitionsForTask) { final ProcessorTopology topology = builder.build(id.topicGroupId); return new TestStreamTask( - id, - applicationId, - partitionsForTask, - topology, - consumer, - clientSupplier.getProducer(new HashMap()), - restoreConsumer, - config, - new MockStreamsMetrics(new Metrics()), - stateDirectory); + id, + applicationId, + partitionsForTask, + topology, + consumer, + clientSupplier.getProducer(new HashMap()), + config, + new MockStreamsMetrics(new Metrics()), + stateDirectory, storeChangelogReader); } }; } diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/StreamThreadStateStoreProviderTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/StreamThreadStateStoreProviderTest.java index f04f80a195bcd..549c67c036ffb 100644 --- a/streams/src/test/java/org/apache/kafka/streams/state/internals/StreamThreadStateStoreProviderTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/StreamThreadStateStoreProviderTest.java @@ -103,10 +103,12 @@ public void before() throws IOException { stateDirectory = new StateDirectory(applicationId, stateConfigDir, new MockTime()); taskOne = createStreamsTask(applicationId, streamsConfig, clientSupplier, topology, new TaskId(0, 0)); + taskOne.initialize(); tasks.put(new TaskId(0, 0), taskOne); taskTwo = createStreamsTask(applicationId, streamsConfig, clientSupplier, topology, new TaskId(0, 1)); + taskTwo.initialize(); tasks.put(new TaskId(0, 1), taskTwo); diff --git a/streams/src/test/java/org/apache/kafka/test/MockChangelogReader.java b/streams/src/test/java/org/apache/kafka/test/MockChangelogReader.java index 4db10e7c39f59..86c0eb54fcf66 100644 --- a/streams/src/test/java/org/apache/kafka/test/MockChangelogReader.java +++ b/streams/src/test/java/org/apache/kafka/test/MockChangelogReader.java @@ -20,6 +20,7 @@ import org.apache.kafka.streams.processor.internals.ChangelogReader; import org.apache.kafka.streams.processor.internals.StateRestorer; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Map; @@ -39,8 +40,8 @@ public void register(final StateRestorer restorationInfo) { } @Override - public void restore() { - + public Collection restore() { + return registered; } @Override @@ -48,6 +49,11 @@ public Map restoredOffsets() { return Collections.emptyMap(); } + @Override + public void reset() { + registered.clear(); + } + public boolean wasRegistered(final TopicPartition partition) { return registered.contains(partition); } diff --git a/streams/src/test/java/org/apache/kafka/test/ProcessorTopologyTestDriver.java b/streams/src/test/java/org/apache/kafka/test/ProcessorTopologyTestDriver.java index a9f020b208c55..20ab3f1a2dff9 100644 --- a/streams/src/test/java/org/apache/kafka/test/ProcessorTopologyTestDriver.java +++ b/streams/src/test/java/org/apache/kafka/test/ProcessorTopologyTestDriver.java @@ -225,6 +225,7 @@ public List partitionsFor(final String topic) { cache, new MockTime(), producer); + task.initialize(); } } From ad948a77ee0737cebabe8d6e6f03d4c144538bda Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Mon, 14 Aug 2017 17:45:17 +0100 Subject: [PATCH 2/8] test for store change log partitions --- .../internals/StoreChangelogReader.java | 23 +++++++++---------- .../processor/internals/StreamThread.java | 2 +- .../internals/StoreChangelogReaderTest.java | 21 +++++++++++++++++ 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java index bfe37ab7d364f..ac2b3d024c577 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java @@ -100,31 +100,28 @@ public void register(final StateRestorer restorer) { public Collection restore() { - final List completed = new ArrayList<>(); if (!initialized) { - completed.addAll(initialize()); + initialize(); } if (needsRestoring.isEmpty()) { consumer.assign(Collections.emptyList()); - return completed; + return completed(); } final Set partitions = new HashSet<>(needsRestoring.keySet()); final ConsumerRecords allRecords = consumer.poll(10); for (final TopicPartition partition : partitions) { - if (restorePartition(allRecords, partition)) { - completed.add(partition); - } + restorePartition(allRecords, partition); } if (needsRestoring.isEmpty()) { consumer.assign(Collections.emptyList()); } - return completed; + return completed(); } - private Collection initialize() { + private void initialize() { needsRestoring.clear(); if (!consumer.subscription().isEmpty()) { throw new IllegalStateException(String.format("Restore consumer should have not subscribed to any partitions (%s) beforehand", consumer.subscription())); @@ -171,11 +168,7 @@ private Collection initialize() { position, endOffsets.get(restorer.partition())); } - - final Set completed = new HashSet<>(stateRestorers.keySet()); - completed.removeAll(needsRestoring.keySet()); initialized = true; - return completed; } private void logRestoreOffsets(final TopicPartition partition, final long startingOffset, final Long endOffset) { @@ -186,6 +179,12 @@ private void logRestoreOffsets(final TopicPartition partition, final long starti endOffset); } + private Collection completed() { + final Set completed = new HashSet<>(stateRestorers.keySet()); + completed.removeAll(needsRestoring.keySet()); + return completed; + } + @Override public Map restoredOffsets() { final Map restoredOffsets = new HashMap<>(); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index 43fc06708a52c..0f5cc778c87a2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -125,7 +125,7 @@ public class StreamThread extends Thread { * the coordinator repeatedly fails in-between revoking partitions and assigning new partitions. */ public enum State { - CREATED(1, 5, 6), RUNNING(2, 5, 6), PARTITIONS_REVOKED(2, 3, 5, 6), ASSIGNING_PARTITIONS(4, 5, 6), PARTITIONS_ASSIGNED(1, 2, 5, 6), PENDING_SHUTDOWN(6), DEAD; + CREATED(1, 5), RUNNING(2, 5), PARTITIONS_REVOKED(2, 3, 5), ASSIGNING_PARTITIONS(4, 5), PARTITIONS_ASSIGNED(1, 2, 5), PENDING_SHUTDOWN(6), DEAD; private final Set validTransitions = new HashSet<>(); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java index 66bdab6fd5664..4638cd0481701 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java @@ -31,12 +31,14 @@ import org.hamcrest.CoreMatchers; import org.junit.Test; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class StoreChangelogReaderTest { @@ -253,6 +255,25 @@ public void shouldIgnoreNullKeysWhenRestoring() throws Exception { assertThat(callback.restored, CoreMatchers.equalTo(Utils.mkList(KeyValue.pair(bytes, bytes), KeyValue.pair(bytes, bytes)))); } + @Test + public void shouldReturnCompletedPartitionsOnEachRestoreCall() { + assignPartition(10, topicPartition); + final byte[] bytes = new byte[0]; + for (int i = 0; i < 5; i++) { + consumer.addRecord(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), i, bytes, bytes)); + } + consumer.assign(Collections.singletonList(topicPartition)); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false, "store")); + + final Collection completedFirstTime = changelogReader.restore(); + assertTrue(completedFirstTime.isEmpty()); + for (int i = 5; i < 10; i++) { + consumer.addRecord(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), i, bytes, bytes)); + } + final Collection expected = Collections.singleton(topicPartition); + assertThat(changelogReader.restore(), equalTo(expected)); + } + private void setupConsumer(final long messages, final TopicPartition topicPartition) { assignPartition(messages, topicPartition); From 430df40f28e409ee95338808e03b7332f1e6f19f Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Tue, 15 Aug 2017 10:05:58 +0100 Subject: [PATCH 3/8] address comments --- .../apache/kafka/streams/KafkaStreams.java | 2 +- .../processor/internals/AbstractTask.java | 5 ++-- .../processor/internals/AssignedTasks.java | 14 ++++------ .../internals/ProcessorStateManager.java | 4 +-- .../processor/internals/StandbyTask.java | 1 + .../processor/internals/StateRestorer.java | 9 +------ .../internals/StoreChangelogReader.java | 7 ++--- .../processor/internals/StreamTask.java | 5 ++-- .../processor/internals/StreamThread.java | 25 +++++++----------- .../internals/StateRestorerTest.java | 4 +-- .../internals/StoreChangelogReaderTest.java | 26 +++++++++---------- .../processor/internals/StreamThreadTest.java | 2 +- 12 files changed, 45 insertions(+), 59 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java index 0c7c5987736ca..697d90c4cc735 100644 --- a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java +++ b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java @@ -275,7 +275,7 @@ public synchronized void onChange(final StreamThread thread, threadState.remove(thread.getId()); } if (newState == StreamThread.State.PARTITIONS_REVOKED || - newState == StreamThread.State.ASSIGNING_PARTITIONS) { + newState == StreamThread.State.PARTITIONS_ASSIGNED) { setState(State.REBALANCING); } else if (newState == StreamThread.State.RUNNING) { for (final StreamThread.State state : threadState.values()) { diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java index 92485b77ad1f2..b965a813eac5c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java @@ -203,9 +203,10 @@ void initializeStateStores() { logPrefix, id)); } } catch (IOException e) { - throw new StreamsException(String.format("%s fatal error while trying to lock the state directory for task %s", + throw new StreamsException(String.format("%s Fatal error while trying to lock the state directory for task %s %s", logPrefix, - id)); + id, + e.getMessage())); } log.trace("{} Initializing state stores", logPrefix); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java index bf387e2c78323..8ee94530c08fe 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java @@ -72,7 +72,7 @@ Set uninitializedPartitions() { void initializeNewTasks() { if (!created.isEmpty()) { - log.trace("{} initializing {}s {}", logPrefix, taskTypeName, created.keySet()); + log.trace("{} Initializing {}s {}", logPrefix, taskTypeName, created.keySet()); } for (final Iterator> it = created.entrySet().iterator(); it.hasNext(); ) { final Map.Entry entry = it.next(); @@ -122,9 +122,9 @@ Collection running() { RuntimeException suspend() { final AtomicReference firstException = new AtomicReference<>(null); - log.trace("{} suspending running {} {}", logPrefix, taskTypeName, runningTaskIds()); + log.trace("{} Suspending running {} {}", logPrefix, taskTypeName, runningTaskIds()); firstException.compareAndSet(null, suspendTasks(running.values(), suspended)); - log.trace("{} close restoring {} {}", logPrefix, taskTypeName, restoring.keySet()); + log.trace("{} Close restoring {} {}", logPrefix, taskTypeName, restoring.keySet()); firstException.compareAndSet(null, closeRestoringTasks()); running.clear(); restoring.clear(); @@ -139,7 +139,7 @@ private RuntimeException closeRestoringTasks() { try { task.close(false); } catch (final RuntimeException e) { - log.error("{} failed to close restoring {}, {}", logPrefix, taskTypeName, task.id, e); + log.error("{} Failed to close restoring {}, {}", logPrefix, taskTypeName, task.id, e); if (exception == null) { exception = e; } @@ -194,7 +194,7 @@ boolean maybeResumeSuspendedTask(final TaskId taskId, final Set final T task = suspended.get(taskId); if (task.partitions().equals(partitions)) { suspended.remove(taskId); - log.trace("{} resuming suspended {} {}", logPrefix, taskTypeName, taskId); + log.trace("{} Resuming suspended {} {}", logPrefix, taskTypeName, taskId); task.resume(); transitionToRunning(task); return true; @@ -312,8 +312,4 @@ RuntimeException applyToRunningTasks(final StreamThread.TaskAction action, fi return firstException; } - - public Collection restoringTaskIds() { - return restoring.keySet(); - } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java index c429feffe8b3a..110b03da5f827 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java @@ -170,8 +170,8 @@ public void register(final StateStore store, stateRestoreCallback, checkpointedOffsets.get(storePartition), offsetLimit(storePartition), - store.persistent(), - store.name()); + store.persistent() + ); changelogReader.register(restorer); } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java index 53d93316a9b98..d8311101fec9f 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java @@ -145,6 +145,7 @@ Map checkpointedOffsets() { } public boolean initialize() { + log.debug("{} Initializing", logPrefix); initializeStateStores(); checkpointedOffsets = Collections.unmodifiableMap(stateMgr.checkpointed()); processorContext.initialized(); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateRestorer.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateRestorer.java index bfd266b40c717..79bfd1d0f49d7 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateRestorer.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateRestorer.java @@ -27,7 +27,6 @@ public class StateRestorer { private final boolean persistent; private final TopicPartition partition; private final StateRestoreCallback stateRestoreCallback; - private final String storeName; private long restoredOffset; private long startingOffset; @@ -36,14 +35,12 @@ public class StateRestorer { final StateRestoreCallback stateRestoreCallback, final Long checkpoint, final long offsetLimit, - final boolean persistent, - final String storeName) { + final boolean persistent) { this.partition = partition; this.stateRestoreCallback = stateRestoreCallback; this.checkpoint = checkpoint; this.offsetLimit = offsetLimit; this.persistent = persistent; - this.storeName = storeName; } public TopicPartition partition() { @@ -93,8 +90,4 @@ long offsetLimit() { private Long readTo(final long endOffset) { return endOffset < offsetLimit ? endOffset : offsetLimit; } - - String storeName() { - return storeName; - } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java index ac2b3d024c577..ea64619f0ac06 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java @@ -134,10 +134,10 @@ private void initialize() { Long offset = entry.getValue(); final StateRestorer restorer = stateRestorers.get(topicPartition); if (restorer.checkpoint() >= offset) { - log.debug("{} not restoring partition {} as checkpoint is >= offset", topicPartition); + log.debug("{} not restoring partition {} as checkpoint {} is >= offset {}", logPrefix, topicPartition, restorer.checkpoint(), offset); restorer.setRestoredOffset(restorer.checkpoint()); } else if (restorer.offsetLimit() == 0) { - log.debug("{} not restoring partition {} as offset limit is 0", topicPartition); + log.debug("{} not restoring partition {} as offset limit is 0", logPrefix, topicPartition); restorer.setRestoredOffset(0); } else { needsRestoring.put(topicPartition, restorer); @@ -235,7 +235,8 @@ private boolean restorePartition(final ConsumerRecords allRecord } private long processNext(final List> records, - final StateRestorer restorer, final Long endOffset) { + final StateRestorer restorer, + final Long endOffset) { for (final ConsumerRecord record : records) { final long offset = record.offset(); if (restorer.hasCompleted(offset, endOffset)) { diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java index cc6353781a9c7..7eb5bdeaec741 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java @@ -154,7 +154,7 @@ public StreamTask(final TaskId id, */ @Override public void resume() { - log.debug("{} Resuming {}", logPrefix, id); + log.debug("{} Resuming", logPrefix); if (eosEnabled) { producer.beginTransaction(); transactionInFlight = true; @@ -320,7 +320,7 @@ private void commitOffsets(final boolean startNewTransaction) { } } - void initTopology() { + private void initTopology() { // initialize the task by initializing all its processor nodes in the topology log.trace("{} Initializing processor nodes of the topology", logPrefix); for (final ProcessorNode node : topology.processors()) { @@ -550,6 +550,7 @@ RecordCollector createRecordCollector() { } public boolean initialize() { + log.debug("{} Initializing", logPrefix); initializeStateStores(); initTopology(); processorContext.initialized(); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index 0f5cc778c87a2..a180c48383724 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -95,14 +95,8 @@ public class StreamThread extends Thread { * | | | * | v | * | +-----+-------+ | - * +<--- | Assigning | | - * | | Partitions | | - * | +-----+-------+ | - * | | | - * | v | - * | +-----+-------+ | - * | | Partitions | | - * +<--- | Assigned | +--> + + * +<--- | Partitions |+---> | + * | | Assigned | * | +-----+-------+ * | | * | v @@ -125,7 +119,7 @@ public class StreamThread extends Thread { * the coordinator repeatedly fails in-between revoking partitions and assigning new partitions. */ public enum State { - CREATED(1, 5), RUNNING(2, 5), PARTITIONS_REVOKED(2, 3, 5), ASSIGNING_PARTITIONS(4, 5), PARTITIONS_ASSIGNED(1, 2, 5), PENDING_SHUTDOWN(6), DEAD; + CREATED(1, 4), RUNNING(2, 4), PARTITIONS_REVOKED(2, 3, 4), PARTITIONS_ASSIGNED(1, 4), PENDING_SHUTDOWN(5), DEAD; private final Set validTransitions = new HashSet<>(); @@ -181,7 +175,7 @@ public void onPartitionsAssigned(final Collection assignment) { final long start = time.milliseconds(); try { - if (!setStateWhenNotInPendingShutdown(State.ASSIGNING_PARTITIONS)) { + if (!setStateWhenNotInPendingShutdown(State.PARTITIONS_ASSIGNED)) { return; } // do this first as we may have suspended standby tasks that @@ -452,8 +446,7 @@ public StreamThread(final TopologyBuilder builder, rebalanceListener = new RebalanceListener(time); active = new AssignedTasks<>(logPrefix, "stream task"); standby = new AssignedTasks<>(logPrefix, "standby task"); - storeChangelogReader = - new StoreChangelogReader(getName(), restoreConsumer, time, requestTimeOut); + storeChangelogReader = new StoreChangelogReader(getName(), restoreConsumer, time, requestTimeOut); setState(State.RUNNING); } @@ -498,8 +491,7 @@ private void runLoop() { } // Visible for testing - long runOnce(final long recordsProcessedBeforeCommit) { - long processedBeforeCommit = recordsProcessedBeforeCommit; + long runOnce(long recordsProcessedBeforeCommit) { timerStartedMs = time.milliseconds(); // try to fetch some records if necessary @@ -531,14 +523,14 @@ long runOnce(final long recordsProcessedBeforeCommit) { final long processLatency = computeLatency(); streamsMetrics.processTimeSensor.record(processLatency / (double) totalProcessed, timerStartedMs); - processedBeforeCommit = adjustRecordsProcessedBeforeCommit(recordsProcessedBeforeCommit, totalProcessed, + recordsProcessedBeforeCommit = adjustRecordsProcessedBeforeCommit(recordsProcessedBeforeCommit, totalProcessed, processLatency, commitTimeMs); } } maybeCommit(timerStartedMs); maybeUpdateStandbyTasks(timerStartedMs); - return processedBeforeCommit; + return recordsProcessedBeforeCommit; } /** @@ -1024,6 +1016,7 @@ void setPartitionAssignor(final StreamPartitionAssignor partitionAssignor) { // Visible for testing void shutdown(final boolean cleanRun) { log.info("{} Shutting down", logPrefix); + setState(State.PENDING_SHUTDOWN); shutdownTasksAndState(cleanRun); // close all embedded clients diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateRestorerTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateRestorerTest.java index 3a8ebdaf22fc6..6968f3319ec33 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateRestorerTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateRestorerTest.java @@ -28,7 +28,7 @@ public class StateRestorerTest { private static final long OFFSET_LIMIT = 50; private final MockRestoreCallback callback = new MockRestoreCallback(); - private final StateRestorer restorer = new StateRestorer(new TopicPartition("topic", 1), callback, null, OFFSET_LIMIT, true, "store"); + private final StateRestorer restorer = new StateRestorer(new TopicPartition("topic", 1), callback, null, OFFSET_LIMIT, true); @Test public void shouldCallRestoreOnRestoreCallback() throws Exception { @@ -53,7 +53,7 @@ public void shouldBeCompletedIfEndOffsetAndRecordOffsetAreZero() throws Exceptio @Test public void shouldBeCompletedIfOffsetAndOffsetLimitAreZero() throws Exception { - final StateRestorer restorer = new StateRestorer(new TopicPartition("topic", 1), callback, null, 0, true, "store"); + final StateRestorer restorer = new StateRestorer(new TopicPartition("topic", 1), callback, null, 0, true); assertTrue(restorer.hasCompleted(0, 10)); } diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java index 4638cd0481701..a2254b690f2bc 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java @@ -141,7 +141,7 @@ public void shouldThrowExceptionIfConsumerHasCurrentSubscription() throws Except public void shouldRestoreAllMessagesFromBeginningWhenCheckpointNull() throws Exception { final int messages = 10; setupConsumer(messages, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store")); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true)); changelogReader.restore(); assertThat(callback.restored.size(), equalTo(messages)); @@ -151,7 +151,7 @@ public void shouldRestoreAllMessagesFromBeginningWhenCheckpointNull() throws Exc public void shouldRestoreMessagesFromCheckpoint() throws Exception { final int messages = 10; setupConsumer(messages, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, 5L, Long.MAX_VALUE, true, "store")); + changelogReader.register(new StateRestorer(topicPartition, callback, 5L, Long.MAX_VALUE, true)); changelogReader.restore(); assertThat(callback.restored.size(), equalTo(5)); @@ -161,7 +161,7 @@ public void shouldRestoreMessagesFromCheckpoint() throws Exception { public void shouldClearAssignmentAtEndOfRestore() throws Exception { final int messages = 1; setupConsumer(messages, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store")); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true)); changelogReader.restore(); assertThat(consumer.assignment(), equalTo(Collections.emptySet())); @@ -170,7 +170,7 @@ public void shouldClearAssignmentAtEndOfRestore() throws Exception { @Test public void shouldRestoreToLimitWhenSupplied() throws Exception { setupConsumer(10, topicPartition); - final StateRestorer restorer = new StateRestorer(topicPartition, callback, null, 3, true, "store"); + final StateRestorer restorer = new StateRestorer(topicPartition, callback, null, 3, true); changelogReader.register(restorer); changelogReader.restore(); @@ -188,9 +188,9 @@ public void shouldRestoreMultipleStores() throws Exception { setupConsumer(5, one); setupConsumer(3, two); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store")); - changelogReader.register(new StateRestorer(one, callbackOne, null, Long.MAX_VALUE, true, "store_1")); - changelogReader.register(new StateRestorer(two, callbackTwo, null, Long.MAX_VALUE, true, "store_2")); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true)); + changelogReader.register(new StateRestorer(one, callbackOne, null, Long.MAX_VALUE, true)); + changelogReader.register(new StateRestorer(two, callbackTwo, null, Long.MAX_VALUE, true)); changelogReader.restore(); @@ -201,7 +201,7 @@ public void shouldRestoreMultipleStores() throws Exception { @Test public void shouldNotRestoreAnythingWhenPartitionIsEmpty() throws Exception { - final StateRestorer restorer = new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store"); + final StateRestorer restorer = new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true); setupConsumer(0, topicPartition); changelogReader.register(restorer); @@ -214,7 +214,7 @@ public void shouldNotRestoreAnythingWhenPartitionIsEmpty() throws Exception { public void shouldNotRestoreAnythingWhenCheckpointAtEndOffset() throws Exception { final Long endOffset = 10L; setupConsumer(endOffset, topicPartition); - final StateRestorer restorer = new StateRestorer(topicPartition, callback, endOffset, Long.MAX_VALUE, true, "store"); + final StateRestorer restorer = new StateRestorer(topicPartition, callback, endOffset, Long.MAX_VALUE, true); changelogReader.register(restorer); @@ -226,7 +226,7 @@ public void shouldNotRestoreAnythingWhenCheckpointAtEndOffset() throws Exception @Test public void shouldReturnRestoredOffsetsForPersistentStores() throws Exception { setupConsumer(10, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true, "store")); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, true)); changelogReader.restore(); final Map restoredOffsets = changelogReader.restoredOffsets(); assertThat(restoredOffsets, equalTo(Collections.singletonMap(topicPartition, 10L))); @@ -235,7 +235,7 @@ public void shouldReturnRestoredOffsetsForPersistentStores() throws Exception { @Test public void shouldNotReturnRestoredOffsetsForNonPersistentStore() throws Exception { setupConsumer(10, topicPartition); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false, "store")); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false)); changelogReader.restore(); final Map restoredOffsets = changelogReader.restoredOffsets(); assertThat(restoredOffsets, equalTo(Collections.emptyMap())); @@ -249,7 +249,7 @@ public void shouldIgnoreNullKeysWhenRestoring() throws Exception { consumer.addRecord(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 1, (byte[]) null, bytes)); consumer.addRecord(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 2, bytes, bytes)); consumer.assign(Collections.singletonList(topicPartition)); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false, "store")); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false)); changelogReader.restore(); assertThat(callback.restored, CoreMatchers.equalTo(Utils.mkList(KeyValue.pair(bytes, bytes), KeyValue.pair(bytes, bytes)))); @@ -263,7 +263,7 @@ public void shouldReturnCompletedPartitionsOnEachRestoreCall() { consumer.addRecord(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), i, bytes, bytes)); } consumer.assign(Collections.singletonList(topicPartition)); - changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false, "store")); + changelogReader.register(new StateRestorer(topicPartition, callback, null, Long.MAX_VALUE, false)); final Collection completedFirstTime = changelogReader.restore(); assertTrue(completedFirstTime.isEmpty()); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java index e8518d9fcef54..83cf1e6de0a24 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java @@ -296,7 +296,7 @@ Map> activeTasks() { activeTasks.put(new TaskId(0, 1), expectedGroup1); rebalanceListener.onPartitionsAssigned(assignedPartitions); Assert.assertEquals(stateListener.numChanges, 3); - Assert.assertEquals(stateListener.oldState, StreamThread.State.ASSIGNING_PARTITIONS); + Assert.assertEquals(stateListener.oldState, StreamThread.State.PARTITIONS_ASSIGNED); thread.runOnce(-1); assertEquals(thread.state(), StreamThread.State.RUNNING); assertTrue(thread.tasks().containsKey(task1)); From 36d4ef0a05c64cfc1e9eba98606cf3ebfa62b740 Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Tue, 15 Aug 2017 12:07:28 +0100 Subject: [PATCH 4/8] move commit into AssignedTasks --- .../processor/internals/AssignedTasks.java | 24 ++++++++++++++++++- .../processor/internals/StreamThread.java | 21 ++-------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java index 8ee94530c08fe..639b827d799d3 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java @@ -284,13 +284,16 @@ Set previousTasks() { return previous; } - RuntimeException applyToRunningTasks(final StreamThread.TaskAction action, final boolean throwException) { + public RuntimeException applyToRunningTasks(final StreamThread.TaskAction action, final boolean throwException) { RuntimeException firstException = null; for (Iterator it = running().iterator(); it.hasNext(); ) { final T task = it.next(); try { action.apply(task); + } catch (final CommitFailedException e) { + // commit failed. This is already logged inside the task as WARN and we can just log it again here. + log.warn("{} Failed to commit {} {} state due to CommitFailedException; this task may be no longer owned by the thread", logPrefix, taskTypeName, task.id()); } catch (final ProducerFencedException e) { closeZombieTask(task); it.remove(); @@ -312,4 +315,23 @@ RuntimeException applyToRunningTasks(final StreamThread.TaskAction action, fi return firstException; } + + void commit() { + final RuntimeException exception = applyToRunningTasks(new StreamThread.TaskAction() { + @Override + public String name() { + return "commit"; + } + + @Override + public void apply(final T task) { + task.commit(); + } + }, false); + + if (exception != null) { + throw exception; + } + + } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index a180c48383724..47b9867953a3b 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -761,25 +761,8 @@ protected void maybeCommit(final long now) { * Commit the states of all its tasks */ private void commitAll() { - - final RuntimeException e = active.applyToRunningTasks(new TaskAction() { - @Override - public String name() { - return "commit"; - } - - @Override - public void apply(final StreamTask task1) { - commitOne(task1); - } - }, false); - if (e != null) { - throw e; - } - - for (final StandbyTask task : standby.running()) { - commitOne(task); - } + active.commit(); + standby.commit(); } /** From a9c1cbd82e37e50f9abf92401d6e1dafcc8e52fa Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Tue, 15 Aug 2017 14:23:52 +0100 Subject: [PATCH 5/8] address comments --- .../processor/internals/AbstractTask.java | 6 + .../processor/internals/AssignedTasks.java | 130 ++++++++++--- .../processor/internals/StandbyTask.java | 15 ++ .../processor/internals/StreamThread.java | 87 ++------- .../processor/internals/AbstractTaskTest.java | 14 ++ .../internals/AssignedTasksTest.java | 181 ++++++++++++++---- 6 files changed, 298 insertions(+), 135 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java index b965a813eac5c..0bcdbaccb2598 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java @@ -251,6 +251,8 @@ void closeStateManager(final boolean writeCheckpoint) throws ProcessorStateExcep */ public abstract boolean initialize(); + abstract boolean process(); + boolean hasStateStores() { return !topology.stateStores().isEmpty(); } @@ -258,4 +260,8 @@ boolean hasStateStores() { Collection changelogPartitions() { return stateMgr.changelogPartitions(); } + + abstract boolean maybePunctuate(); + + abstract boolean commitNeeded(); } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java index 639b827d799d3..6a36e9f2b2d34 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java @@ -19,6 +19,8 @@ import org.apache.kafka.clients.consumer.CommitFailedException; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.ProducerFencedException; +import org.apache.kafka.common.metrics.Sensor; +import org.apache.kafka.common.utils.Time; import org.apache.kafka.streams.errors.LockException; import org.apache.kafka.streams.processor.TaskId; import org.slf4j.Logger; @@ -34,12 +36,14 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; class AssignedTasks { private static final Logger log = LoggerFactory.getLogger(AssignedTasks.class); private final String logPrefix; private final String taskTypeName; + private final Time time; private Map created = new HashMap<>(); private Map suspended = new HashMap<>(); private Map restoring = new HashMap<>(); @@ -48,9 +52,12 @@ class AssignedTasks { private Map running = new ConcurrentHashMap<>(); private Map runningByPartition = new HashMap<>(); - AssignedTasks(final String logPrefix, final String taskTypeName) { + AssignedTasks(final String logPrefix, + final String taskTypeName, + final Time time) { this.logPrefix = logPrefix; this.taskTypeName = taskTypeName; + this.time = time; } void addNewTask(final T task) { @@ -123,7 +130,7 @@ Collection running() { RuntimeException suspend() { final AtomicReference firstException = new AtomicReference<>(null); log.trace("{} Suspending running {} {}", logPrefix, taskTypeName, runningTaskIds()); - firstException.compareAndSet(null, suspendTasks(running.values(), suspended)); + firstException.compareAndSet(null, suspendTasks(running.values())); log.trace("{} Close restoring {} {}", logPrefix, taskTypeName, restoring.keySet()); firstException.compareAndSet(null, closeRestoringTasks()); running.clear(); @@ -148,13 +155,13 @@ private RuntimeException closeRestoringTasks() { return exception; } - private RuntimeException suspendTasks(final Collection tasks, final Map suspendedTasks) { + private RuntimeException suspendTasks(final Collection tasks) { RuntimeException exception = null; for (Iterator it = tasks.iterator(); it.hasNext(); ) { final T task = it.next(); try { task.suspend(); - suspendedTasks.put(task.id(), task); + suspended.put(task.id(), task); } catch (final CommitFailedException e) { // commit failed during suspension. Just log it. log.warn("{} Failed to commit {} {} state when suspending due to CommitFailedException", logPrefix, taskTypeName, task.id); @@ -284,7 +291,99 @@ Set previousTasks() { return previous; } - public RuntimeException applyToRunningTasks(final StreamThread.TaskAction action, final boolean throwException) { + void commit() { + final RuntimeException exception = applyToRunningTasks(new TaskAction() { + @Override + public String name() { + return "commit"; + } + + @Override + public void apply(final T task) { + task.commit(); + } + }, false); + + if (exception != null) { + throw exception; + } + + } + + int process() { + final AtomicInteger processed = new AtomicInteger(0); + applyToRunningTasks(new TaskAction() { + @Override + public String name() { + return "process"; + } + + @Override + public void apply(final T task) { + if (task.process()) { + processed.incrementAndGet(); + } + } + }, true); + return processed.get(); + } + + void punctuateAndCommit(final Sensor commitTimeSensor, final Sensor punctuateTimeSensor) { + final Latency latency = new Latency(time.milliseconds()); + final RuntimeException exception = applyToRunningTasks(new TaskAction() { + String name; + + @Override + public String name() { + return name; + } + + @Override + public void apply(final T task) { + name = "punctuate"; + if (task.maybePunctuate()) { + punctuateTimeSensor.record(latency.compute(), latency.startTime); + } + if (task.commitNeeded()) { + name = "commit"; + long beforeCommitMs = time.milliseconds(); + task.commit(); + commitTimeSensor.record(latency.compute(), latency.startTime); + if (log.isDebugEnabled()) { + log.debug("{} Committed active task {} per user request in {} ms", + logPrefix, task.id(), latency.startTime - beforeCommitMs); + } + } + } + }, false); + + if (exception != null) { + throw exception; + } + } + + interface TaskAction { + String name(); + + void apply(final T task); + } + + class Latency { + private long startTime; + + Latency(final long startTime) { + this.startTime = startTime; + } + + private long compute() { + final long previousTimeMs = startTime; + startTime = time.milliseconds(); + return Math.max(startTime - previousTimeMs, 0); + } + } + + + private RuntimeException applyToRunningTasks(final TaskAction action, final boolean throwException) { RuntimeException firstException = null; for (Iterator it = running().iterator(); it.hasNext(); ) { @@ -293,7 +392,7 @@ public RuntimeException applyToRunningTasks(final StreamThread.TaskAction act action.apply(task); } catch (final CommitFailedException e) { // commit failed. This is already logged inside the task as WARN and we can just log it again here. - log.warn("{} Failed to commit {} {} state due to CommitFailedException; this task may be no longer owned by the thread", logPrefix, taskTypeName, task.id()); + log.warn("{} Failed to commit {} {} during {} state due to CommitFailedException; this task may be no longer owned by the thread", logPrefix, taskTypeName, task.id(), action.name()); } catch (final ProducerFencedException e) { closeZombieTask(task); it.remove(); @@ -315,23 +414,4 @@ public RuntimeException applyToRunningTasks(final StreamThread.TaskAction act return firstException; } - - void commit() { - final RuntimeException exception = applyToRunningTasks(new StreamThread.TaskAction() { - @Override - public String name() { - return "commit"; - } - - @Override - public void apply(final T task) { - task.commit(); - } - }, false); - - if (exception != null) { - throw exception; - } - - } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java index d8311101fec9f..bc29834a3098f 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java @@ -152,4 +152,19 @@ public boolean initialize() { return true; } + @Override + boolean process() { + throw new UnsupportedOperationException("process not supported by standby task"); + } + + @Override + boolean maybePunctuate() { + throw new UnsupportedOperationException("maybePunctuate not supported by standby task"); + } + + @Override + boolean commitNeeded() { + throw new UnsupportedOperationException("commitNeeded not supported by standby task"); + } + } \ No newline at end of file diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index 47b9867953a3b..4585b7fa0e0fb 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -61,7 +61,6 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Pattern; @@ -269,12 +268,6 @@ void createTask(final TaskId taskId, final Set partitions) { } } - interface TaskAction { - String name(); - - void apply(final T task); - } - /** * This class extends {@link StreamsMetricsImpl(Metrics, String, String, Map)} and * overrides one of its functions for efficiency @@ -444,8 +437,8 @@ public StreamThread(final TopologyBuilder builder, lastCommitMs = timerStartedMs; final Integer requestTimeOut = config.getInt(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG); rebalanceListener = new RebalanceListener(time); - active = new AssignedTasks<>(logPrefix, "stream task"); - standby = new AssignedTasks<>(logPrefix, "standby task"); + active = new AssignedTasks<>(logPrefix, "stream task", Time.SYSTEM); + standby = new AssignedTasks<>(logPrefix, "standby task", Time.SYSTEM); storeChangelogReader = new StoreChangelogReader(getName(), restoreConsumer, time, requestTimeOut); setState(State.RUNNING); } @@ -624,83 +617,27 @@ private void addRecordsToTasks(final ConsumerRecords records) { */ private long processAndPunctuate(final long recordsProcessedBeforeCommit) { - final AtomicLong totalProcessedEachRound = new AtomicLong(0); - final AtomicLong totalProcessedSinceLastMaybeCommit = new AtomicLong(0); + int processed; + long totalProcessedSinceLastMaybeCommit = 0; // Round-robin scheduling by taking one record from each task repeatedly // until no task has any records left do { - totalProcessedEachRound.set(0); - active.applyToRunningTasks(new TaskAction() { - @Override - public String name() { - return "process"; - } - - @Override - public void apply(final StreamTask task) { - if (task.process()) { - totalProcessedEachRound.incrementAndGet(); - totalProcessedSinceLastMaybeCommit.incrementAndGet(); - } - } - }, true); + processed = active.process(); + totalProcessedSinceLastMaybeCommit += processed; if (recordsProcessedBeforeCommit != UNLIMITED_RECORDS && - totalProcessedSinceLastMaybeCommit.get() >= recordsProcessedBeforeCommit) { - totalProcessedSinceLastMaybeCommit.set(0); + totalProcessedSinceLastMaybeCommit >= recordsProcessedBeforeCommit) { + totalProcessedSinceLastMaybeCommit = 0; final long processLatency = computeLatency(); - streamsMetrics.processTimeSensor.record(processLatency / (double) totalProcessedSinceLastMaybeCommit.get(), + streamsMetrics.processTimeSensor.record(processLatency / (double) totalProcessedSinceLastMaybeCommit, timerStartedMs); maybeCommit(timerStartedMs); } - } while (totalProcessedEachRound.get() != 0); + } while (processed != 0); // go over the tasks again to punctuate or commit - - final RuntimeException e = active.applyToRunningTasks(new TaskAction() { - String name; - @Override - public String name() { - return name; - } - - @Override - public void apply(final StreamTask task) { - name = "punctuate"; - maybePunctuate(task); - if (task.commitNeeded()) { - name = "commit"; - - long beforeCommitMs = time.milliseconds(); - - commitOne(task); - - if (log.isDebugEnabled()) { - log.debug("{} Committed active task {} per user request in {}ms", - logPrefix, task.id(), timerStartedMs - beforeCommitMs); - } - } - } - }, false); - - if (e != null) { - throw e; - } - - return totalProcessedSinceLastMaybeCommit.get(); - } - - private void maybePunctuate(final StreamTask task) { - try { - // check whether we should punctuate based on the task's partition group timestamp; - // which are essentially based on record timestamp. - if (task.maybePunctuate()) { - streamsMetrics.punctuateTimeSensor.record(computeLatency(), timerStartedMs); - } - } catch (final KafkaException e) { - log.error("{} Failed to punctuate active task {} due to the following error:", logPrefix, task.id(), e); - throw e; - } + active.punctuateAndCommit(streamsMetrics.commitTimeSensor, streamsMetrics.punctuateTimeSensor); + return totalProcessedSinceLastMaybeCommit; } /** diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractTaskTest.java index d5675e7250568..5e71f318f3369 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractTaskTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractTaskTest.java @@ -140,6 +140,20 @@ public boolean initialize() { return false; } + @Override + boolean process() { + return false; + } + + @Override + boolean maybePunctuate() { + return false; + } + + @Override + boolean commitNeeded() { + return false; + } }; } diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java index f9fde1a0a69b6..68a7619a86e97 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java @@ -17,8 +17,12 @@ package org.apache.kafka.streams.processor.internals; +import org.apache.kafka.clients.consumer.CommitFailedException; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.ProducerFencedException; +import org.apache.kafka.common.metrics.Metrics; +import org.apache.kafka.common.metrics.Sensor; +import org.apache.kafka.common.utils.Time; import org.apache.kafka.common.utils.Utils; import org.apache.kafka.streams.processor.TaskId; import org.easymock.EasyMock; @@ -35,10 +39,11 @@ import static org.hamcrest.core.IsEqual.equalTo; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; public class AssignedTasksTest { - private final AssignedTasks assignedTasks = new AssignedTasks<>("log", "task"); + private final AssignedTasks assignedTasks = new AssignedTasks<>("log", "task", Time.SYSTEM); private final AbstractTask t1 = EasyMock.createMock(AbstractTask.class); private final AbstractTask t2 = EasyMock.createMock(AbstractTask.class); private final TopicPartition tp1 = new TopicPartition("t1", 0); @@ -47,6 +52,9 @@ public class AssignedTasksTest { private final TopicPartition changeLog2 = new TopicPartition("cl2", 0); private final TaskId taskId1 = new TaskId(0, 0); private final TaskId taskId2 = new TaskId(1, 0); + private final Metrics metrics = new Metrics(); + private final Sensor punctuateSensor = metrics.sensor("punctuate"); + private final Sensor commitSensor = metrics.sensor("commit"); @Before public void before() { @@ -226,76 +234,179 @@ public void shouldResumeMatchingSuspendedTasks() { EasyMock.verify(t1); } - - @SuppressWarnings("unchecked") @Test - public void shouldApplyActionToRunningTasks() { - StreamThread.TaskAction taskAction = EasyMock.createMock(StreamThread.TaskAction.class); - taskAction.apply(EasyMock.anyObject(AbstractTask.class)); - EasyMock.expectLastCall(); + public void shouldCommitRunningTasks() { mockInitializedTask(); - - EasyMock.replay(t1, taskAction); + t1.commit(); + EasyMock.expectLastCall(); + EasyMock.replay(t1); assignedTasks.addNewTask(t1); assignedTasks.initializeNewTasks(); - assignedTasks.applyToRunningTasks(taskAction, false); + assignedTasks.commit(); + EasyMock.verify(t1); + } + + @Test + public void shouldCloseTaskOnCommitIfProduceFencedException() { + mockInitializedTask(); + t1.commit(); + EasyMock.expectLastCall().andThrow(new ProducerFencedException("")); + t1.close(false); + EasyMock.expectLastCall(); + EasyMock.replay(t1); + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); - EasyMock.verify(taskAction); + assignedTasks.commit(); + assertTrue(assignedTasks.running().isEmpty()); + EasyMock.verify(t1); } - @SuppressWarnings("unchecked") @Test - public void shouldNotApplyActionToNotRunningTasks() { - final StreamThread.TaskAction taskAction = EasyMock.createMock(StreamThread.TaskAction.class); - EasyMock.expect(t1.initialize()).andReturn(false); + public void shouldNotThrowCommitFailedExceptionOnCommit() { + mockInitializedTask(); + t1.commit(); + EasyMock.expectLastCall().andThrow(new CommitFailedException()); + EasyMock.replay(t1); + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); - EasyMock.replay(t1, taskAction); + assignedTasks.commit(); + assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId1))); + EasyMock.verify(t1); + } + @Test + public void shouldThrowExceptionOnCommitWhenNotCommitFailedOrProducerFenced() { + mockInitializedTask(); + t1.commit(); + EasyMock.expectLastCall().andThrow(new RuntimeException("")); + EasyMock.replay(t1); assignedTasks.addNewTask(t1); assignedTasks.initializeNewTasks(); - assignedTasks.applyToRunningTasks(taskAction, false); + try { + assignedTasks.commit(); + fail("Should have thrown exception"); + } catch (Exception e) { + // ok + } + assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId1))); + EasyMock.verify(t1); + } + + @Test + public void shouldProcessRunningTasks() { + mockInitializedTask(); + EasyMock.expect(t1.process()).andReturn(true); + EasyMock.replay(t1); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); - EasyMock.verify(taskAction); + assertThat(assignedTasks.process(), equalTo(1)); + EasyMock.verify(t1); } - @SuppressWarnings({"unchecked", "ThrowableNotThrown"}) @Test - public void shouldCloseTaskOnApplyToRunningIfProducerFencedException() { - final StreamThread.TaskAction taskAction = EasyMock.createMock(StreamThread.TaskAction.class); - taskAction.apply(EasyMock.anyObject(AbstractTask.class)); - EasyMock.expectLastCall().andThrow(new ProducerFencedException("BOOM!")); + public void shouldCloseTaskOnProcessIfProducerFencedException() { mockInitializedTask(); + EasyMock.expect(t1.process()).andThrow(new ProducerFencedException("")); t1.close(false); EasyMock.expectLastCall(); - EasyMock.replay(t1, taskAction); + EasyMock.replay(t1); assignedTasks.addNewTask(t1); assignedTasks.initializeNewTasks(); - assignedTasks.applyToRunningTasks(taskAction, false); + assignedTasks.process(); assertTrue(assignedTasks.running().isEmpty()); - EasyMock.verify(t1, taskAction); + EasyMock.verify(t1); } - @SuppressWarnings("unchecked") @Test - public void shouldReturnExceptionAndNotCloseTaskOnApplyToRunningWhenRuntimeException() { - final StreamThread.TaskAction taskAction = EasyMock.createMock(StreamThread.TaskAction.class); - taskAction.apply(EasyMock.anyObject(AbstractTask.class)); - EasyMock.expectLastCall().andThrow(new RuntimeException("KABOOM!")); - EasyMock.expect(taskAction.name()).andReturn("name"); + public void shouldThrowExceptionOnProcessWhenNotCommitFailedOrProducerFencedException() { + mockInitializedTask(); + EasyMock.expect(t1.process()).andThrow(new RuntimeException("")); + EasyMock.replay(t1); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + try { + assignedTasks.process(); + fail("should have thrown exception"); + } catch (Exception e) { + // okd + } + assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId1))); + EasyMock.verify(t1); + } + + @Test + public void shouldPunctuateRunningTasks() { + mockInitializedTask(); + EasyMock.expect(t1.maybePunctuate()).andReturn(true); + EasyMock.expect(t1.commitNeeded()).andReturn(false); + EasyMock.replay(t1); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + assignedTasks.punctuateAndCommit(commitSensor, punctuateSensor); + EasyMock.verify(t1); + } + + @Test + public void shouldCommitRunningTasksIfNeeded() { mockInitializedTask(); - EasyMock.replay(t1, taskAction); + EasyMock.expect(t1.maybePunctuate()).andReturn(true); + EasyMock.expect(t1.commitNeeded()).andReturn(true); + t1.commit(); + EasyMock.expectLastCall(); + EasyMock.replay(t1); + + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + assignedTasks.punctuateAndCommit(commitSensor, punctuateSensor); + EasyMock.verify(t1); + } + @Test + public void shouldThrowExceptionOnPunctuateAndCommitWhenNotCommitFailedOrProducerFencedException() { + mockInitializedTask(); + EasyMock.expect(t1.maybePunctuate()).andThrow(new RuntimeException("")); + EasyMock.replay(t1); assignedTasks.addNewTask(t1); assignedTasks.initializeNewTasks(); - assertThat(assignedTasks.applyToRunningTasks(taskAction, false), not(nullValue())); + try { + assignedTasks.punctuateAndCommit(commitSensor, punctuateSensor); + fail("should have thrown exception"); + } catch (Exception e) { + // ok + } assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId1))); - EasyMock.verify(t1, taskAction); + EasyMock.verify(t1); + } + + @Test + public void shouldCloseTaskOnPunctuateAndCommitIfProducerFencedException() { + mockInitializedTask(); + EasyMock.expect(t1.maybePunctuate()).andThrow(new ProducerFencedException("")); + t1.close(false); + EasyMock.expectLastCall(); + EasyMock.replay(t1); + assignedTasks.addNewTask(t1); + assignedTasks.initializeNewTasks(); + + assignedTasks.punctuateAndCommit(commitSensor, punctuateSensor); + assertTrue(assignedTasks.running().isEmpty()); + EasyMock.verify(t1); + } private RuntimeException suspendTask() { From 0ef6b87219e2c1389f4e876a03d475f1e32e2f98 Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Tue, 15 Aug 2017 20:08:17 +0100 Subject: [PATCH 6/8] add task to suspended task list if commit failed on suspension --- .../kafka/streams/processor/internals/AssignedTasks.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java index 6a36e9f2b2d34..137a7cd89f8df 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java @@ -61,6 +61,7 @@ class AssignedTasks { } void addNewTask(final T task) { + log.trace("{} add new {} {}", logPrefix, taskTypeName, task.id()); created.put(task.id(), task); } @@ -91,7 +92,8 @@ void initializeNewTasks() { } it.remove(); } catch (final LockException e) { - log.warn("{} Could not create {} {} due to {}; will retry", logPrefix, taskTypeName, entry.getKey(), e); + // made this trace as it will spam the logs in the poll loop. + log.trace("{} Could not create {} {} due to {}; will retry", logPrefix, taskTypeName, entry.getKey(), e.getMessage()); } } } @@ -163,6 +165,7 @@ private RuntimeException suspendTasks(final Collection tasks) { task.suspend(); suspended.put(task.id(), task); } catch (final CommitFailedException e) { + suspended.put(task.id(), task); // commit failed during suspension. Just log it. log.warn("{} Failed to commit {} {} state when suspending due to CommitFailedException", logPrefix, taskTypeName, task.id); } catch (final ProducerFencedException e) { @@ -205,6 +208,8 @@ boolean maybeResumeSuspendedTask(final TaskId taskId, final Set task.resume(); transitionToRunning(task); return true; + } else { + log.trace("{} couldn't resume task {} assigned partitions {}, task partitions", logPrefix, taskId, partitions, task.partitions); } } return false; From ccca8a7e09fb1c2f3efc7f3ba1192577bb86181a Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Tue, 15 Aug 2017 20:47:52 +0100 Subject: [PATCH 7/8] put back previous tasks --- .../processor/internals/AssignedTasks.java | 21 ++++++---- .../processor/internals/StreamThread.java | 39 +++++-------------- .../internals/AssignedTasksTest.java | 14 +++---- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java index 137a7cd89f8df..9c82fba9817a7 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java @@ -48,6 +48,7 @@ class AssignedTasks { private Map suspended = new HashMap<>(); private Map restoring = new HashMap<>(); private Set restoredPartitions = new HashSet<>(); + private Set previousActiveTasks = new HashSet<>(); // IQ may access this map. private Map running = new ConcurrentHashMap<>(); private Map runningByPartition = new HashMap<>(); @@ -125,7 +126,7 @@ boolean allTasksRunning() { && restoring.isEmpty(); } - Collection running() { + Collection runningTasks() { return running.values(); } @@ -135,6 +136,8 @@ RuntimeException suspend() { firstException.compareAndSet(null, suspendTasks(running.values())); log.trace("{} Close restoring {} {}", logPrefix, taskTypeName, restoring.keySet()); firstException.compareAndSet(null, closeRestoringTasks()); + previousActiveTasks.clear(); + previousActiveTasks.addAll(running.keySet()); running.clear(); restoring.clear(); created.clear(); @@ -265,11 +268,11 @@ List allInitializedTasks() { return tasks; } - Collection suspended() { + Collection suspendedTasks() { return suspended.values(); } - Collection restoring() { + Collection restoringTasks() { return Collections.unmodifiableCollection(restoring.values()); } @@ -290,10 +293,8 @@ void clear() { restoredPartitions.clear(); } - Set previousTasks() { - final Set previous = new HashSet<>(); - previous.addAll(suspended.keySet()); - return previous; + Set previousTaskIds() { + return previousActiveTasks; } void commit() { @@ -367,6 +368,10 @@ public void apply(final T task) { } } + Collection suspendedTaskIds() { + return suspended.keySet(); + } + interface TaskAction { String name(); @@ -391,7 +396,7 @@ private long compute() { private RuntimeException applyToRunningTasks(final TaskAction action, final boolean throwException) { RuntimeException firstException = null; - for (Iterator it = running().iterator(); it.hasNext(); ) { + for (Iterator it = runningTasks().iterator(); it.hasNext(); ) { final T task = it.next(); try { action.apply(task); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index 39530550cd76d..2d91e1b7b85e3 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -16,7 +16,6 @@ */ package org.apache.kafka.streams.processor.internals; -import org.apache.kafka.clients.consumer.CommitFailedException; import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; @@ -170,8 +169,8 @@ public void onPartitionsAssigned(final Collection assignment) { assignment, partitionAssignor.activeTasks().keySet(), partitionAssignor.standbyTasks().keySet(), - active.previousTasks(), - standby.previousTasks()); + active.previousTaskIds(), + standby.previousTaskIds()); final long start = time.milliseconds(); try { @@ -201,7 +200,7 @@ public void onPartitionsAssigned(final Collection assignment) { time.milliseconds() - start, active.allAssignedTaskIds(), standby.allAssignedTaskIds(), - active.previousTasks()); + active.previousTaskIds()); } } @@ -232,8 +231,8 @@ public void onPartitionsRevoked(final Collection assignment) { "\tsuspended standby tasks: {}", logPrefix, time.milliseconds() - start, - active.previousTasks(), - standby.previousTasks()); + active.suspendedTaskIds(), + standby.suspendedTaskIds()); } } } @@ -702,26 +701,8 @@ private void commitAll() { standby.commit(); } - /** - * Commit the state of a task - */ - private void commitOne(final AbstractTask task) { - try { - task.commit(); - } catch (final CommitFailedException e) { - // commit failed. This is already logged inside the task as WARN and we can just log it again here. - log.warn("{} Failed to commit {} {} state due to CommitFailedException; this task may be no longer owned by the thread", logPrefix, task.getClass().getSimpleName(), task.id()); - } catch (final KafkaException e) { - // commit failed due to an unexpected exception. Log it and rethrow the exception. - log.error("{} Failed to commit {} {} state due to the following error:", logPrefix, task.getClass().getSimpleName(), task.id(), e); - throw e; - } - - streamsMetrics.commitTimeSensor.record(computeLatency(), timerStartedMs); - } - private void assignStandbyPartitions() { - final Collection running = standby.running(); + final Collection running = standby.runningTasks(); final Map checkpointedOffsets = new HashMap<>(); for (StandbyTask standbyTask : running) { checkpointedOffsets.putAll(standbyTask.checkpointedOffsets()); @@ -827,7 +808,7 @@ public Map tasks() { * Returns ids of tasks that were being executed before the rebalance. */ public Set prevActiveTasks() { - return Collections.unmodifiableSet(active.previousTasks()); + return Collections.unmodifiableSet(active.previousTaskIds()); } /** @@ -997,7 +978,7 @@ void shutdown(final boolean cleanRun) { private void shutdownTasksAndState(final boolean cleanRun) { log.debug("{} Shutting down all active tasks {}, standby tasks {}, suspended tasks {}, and suspended standby tasks {}", logPrefix, active.runningTaskIds(), standby.runningTaskIds(), - active.previousTasks(), standby.previousTasks()); + active.previousTaskIds(), standby.previousTaskIds()); for (final AbstractTask task : allTasks()) { try { @@ -1054,7 +1035,7 @@ private List allTasks() { private void closeNonAssignedSuspendedTasks() { final Map> newTaskAssignment = partitionAssignor.activeTasks(); - final Iterator suspendedTaskIterator = active.suspended().iterator(); + final Iterator suspendedTaskIterator = active.suspendedTasks().iterator(); while (suspendedTaskIterator.hasNext()) { final StreamTask task = suspendedTaskIterator.next(); final Set assignedPartitionsForTask = newTaskAssignment.get(task.id); @@ -1073,7 +1054,7 @@ private void closeNonAssignedSuspendedTasks() { private void closeNonAssignedSuspendedStandbyTasks() { final Set newStandbyTaskIds = partitionAssignor.standbyTasks().keySet(); - final Iterator standByTaskIterator = standby.suspended().iterator(); + final Iterator standByTaskIterator = standby.suspendedTasks().iterator(); while (standByTaskIterator.hasNext()) { final StandbyTask task = standByTaskIterator.next(); if (!newStandbyTaskIds.contains(task.id)) { diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java index 68a7619a86e97..8a619eab0fed3 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java @@ -117,7 +117,7 @@ public void shouldMoveInitializedTasksNeedingRestoreToRestoring() { assignedTasks.initializeNewTasks(); - Collection restoring = assignedTasks.restoring(); + Collection restoring = assignedTasks.restoringTasks(); assertThat(restoring.size(), equalTo(1)); assertSame(restoring.iterator().next(), t1); } @@ -161,7 +161,7 @@ public void shouldSuspendRunningTasks() { suspendTask(); - assertThat(assignedTasks.previousTasks(), equalTo(Collections.singleton(taskId1))); + assertThat(assignedTasks.previousTaskIds(), equalTo(Collections.singleton(taskId1))); EasyMock.verify(t1); } @@ -195,7 +195,6 @@ public void shouldCloseTaskOnSuspendWhenRuntimeException() { EasyMock.replay(t1); assertThat(suspendTask(), not(nullValue())); - assertTrue(assignedTasks.previousTasks().isEmpty()); EasyMock.verify(t1); } @@ -209,7 +208,7 @@ public void shouldCloseTaskOnSuspendWhenProducerFencedException() { EasyMock.replay(t1); assertThat(suspendTask(), nullValue()); - assertTrue(assignedTasks.previousTasks().isEmpty()); + assertTrue(assignedTasks.previousTaskIds().isEmpty()); EasyMock.verify(t1); } @@ -230,7 +229,6 @@ public void shouldResumeMatchingSuspendedTasks() { assertTrue(assignedTasks.maybeResumeSuspendedTask(taskId1, Collections.singleton(tp1))); assertThat(assignedTasks.runningTaskIds(), equalTo(Collections.singleton(taskId1))); - assertTrue(assignedTasks.previousTasks().isEmpty()); EasyMock.verify(t1); } @@ -260,7 +258,7 @@ public void shouldCloseTaskOnCommitIfProduceFencedException() { assignedTasks.initializeNewTasks(); assignedTasks.commit(); - assertTrue(assignedTasks.running().isEmpty()); + assertTrue(assignedTasks.runningTasks().isEmpty()); EasyMock.verify(t1); } @@ -322,7 +320,7 @@ public void shouldCloseTaskOnProcessIfProducerFencedException() { assignedTasks.initializeNewTasks(); assignedTasks.process(); - assertTrue(assignedTasks.running().isEmpty()); + assertTrue(assignedTasks.runningTasks().isEmpty()); EasyMock.verify(t1); } @@ -404,7 +402,7 @@ public void shouldCloseTaskOnPunctuateAndCommitIfProducerFencedException() { assignedTasks.initializeNewTasks(); assignedTasks.punctuateAndCommit(commitSensor, punctuateSensor); - assertTrue(assignedTasks.running().isEmpty()); + assertTrue(assignedTasks.runningTasks().isEmpty()); EasyMock.verify(t1); } From f9c3b662cd90c83d32bbdb4972bd33a133cecd9f Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Wed, 16 Aug 2017 09:00:15 +0100 Subject: [PATCH 8/8] close uninitialized tasks on suspend --- .../processor/internals/AbstractTask.java | 1 + .../processor/internals/AssignedTasks.java | 9 +++++---- .../processor/internals/StandbyTask.java | 4 ++++ .../processor/internals/StreamTask.java | 19 +++++++++++-------- .../internals/AssignedTasksTest.java | 4 +++- .../processor/internals/StreamTaskTest.java | 12 ++++++++++++ 6 files changed, 36 insertions(+), 13 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java index 0bcdbaccb2598..b8a585a5ca5b8 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java @@ -52,6 +52,7 @@ public abstract class AbstractTask { final String logPrefix; final boolean eosEnabled; private final StateDirectory stateDirectory; + boolean taskInitialized; InternalProcessorContext processorContext; diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java index 9c82fba9817a7..6db212f835bb9 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedTasks.java @@ -135,7 +135,8 @@ RuntimeException suspend() { log.trace("{} Suspending running {} {}", logPrefix, taskTypeName, runningTaskIds()); firstException.compareAndSet(null, suspendTasks(running.values())); log.trace("{} Close restoring {} {}", logPrefix, taskTypeName, restoring.keySet()); - firstException.compareAndSet(null, closeRestoringTasks()); + firstException.compareAndSet(null, closeTasksUnclean(restoring.values())); + firstException.compareAndSet(null, closeTasksUnclean(created.values())); previousActiveTasks.clear(); previousActiveTasks.addAll(running.keySet()); running.clear(); @@ -145,13 +146,13 @@ RuntimeException suspend() { return firstException.get(); } - private RuntimeException closeRestoringTasks() { + private RuntimeException closeTasksUnclean(final Collection tasks) { RuntimeException exception = null; - for (final T task : restoring.values()) { + for (final T task : tasks) { try { task.close(false); } catch (final RuntimeException e) { - log.error("{} Failed to close restoring {}, {}", logPrefix, taskTypeName, task.id, e); + log.error("{} Failed to close {}, {}", logPrefix, taskTypeName, task.id, e); if (exception == null) { exception = e; } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java index bc29834a3098f..bc36351e60fb6 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java @@ -119,6 +119,9 @@ private void flushAndCheckpointState() { */ @Override public void close(final boolean clean) { + if (!taskInitialized) { + return; + } log.debug("{} Closing", logPrefix); boolean committedSuccessfully = false; try { @@ -149,6 +152,7 @@ public boolean initialize() { initializeStateStores(); checkpointedOffsets = Collections.unmodifiableMap(stateMgr.checkpointed()); processorContext.initialized(); + taskInitialized = true; return true; } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java index 7eb5bdeaec741..c5c67bb1541b1 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java @@ -331,6 +331,7 @@ private void initTopology() { processorContext.setCurrentNode(null); } } + taskInitialized = true; } /** @@ -373,14 +374,16 @@ private void closeTopology() { // close the processors // make sure close() is called for each node even when there is a RuntimeException RuntimeException exception = null; - for (final ProcessorNode node : topology.processors()) { - processorContext.setCurrentNode(node); - try { - node.close(); - } catch (final RuntimeException e) { - exception = e; - } finally { - processorContext.setCurrentNode(null); + if (taskInitialized) { + for (final ProcessorNode node : topology.processors()) { + processorContext.setCurrentNode(node); + try { + node.close(); + } catch (final RuntimeException e) { + exception = e; + } finally { + processorContext.setCurrentNode(null); + } } } diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java index 8a619eab0fed3..52d0ea8d87922 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AssignedTasksTest.java @@ -166,7 +166,9 @@ public void shouldSuspendRunningTasks() { } @Test - public void shouldNotSuspendUnInitializedTasks() { + public void shouldCloseUnInitializedTasksOnSuspend() { + t1.close(false); + EasyMock.expectLastCall(); EasyMock.replay(t1); assignedTasks.addNewTask(t1); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java index c6c977b30e47d..f4a04002eba54 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java @@ -613,6 +613,7 @@ public void shouldNotThrowIExceptionOnScheduleIfCurrentNodeIsNotNull() throws Ex public void shouldThrowExceptionIfAnyExceptionsRaisedDuringCloseButStillCloseAllProcessorNodesTopology() throws Exception { task.close(true); task = createTaskThatThrowsExceptionOnClose(); + task.initialize(); try { task.close(true); fail("should have thrown runtime exception"); @@ -779,6 +780,17 @@ public void shouldCloseProducerOnCloseWhenEosEnabled() throws Exception { assertTrue(producer.closed()); } + @Test + public void shouldNotCloseTopologyProcessorNodesIfNotInitialized() { + final StreamTask task = createTaskThatThrowsExceptionOnClose(); + try { + task.close(true); + } catch (Exception e) { + fail("should have not closed unitialized topology"); + } + } + + @SuppressWarnings("unchecked") private StreamTask createTaskThatThrowsExceptionOnClose() { final MockSourceNode processorNode = new MockSourceNode(topic1, intDeserializer, intDeserializer) {