apache · guozhangwang · Oct 7, 2019 · Aug 8, 2019 · Oct 2, 2019 · Oct 2, 2019
diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml
@@ -197,6 +197,9 @@
     <suppress checks="MethodLength"
               files="RocksDBWindowStoreTest.java"/>
 
+    <suppress checks="MemberName"
+              files="StreamsPartitionAssignorTest.java"/>
+
     <suppress checks="ClassDataAbstractionCoupling"
               files=".*[/\\]streams[/\\].*test[/\\].*.java"/>
 

diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java
@@ -355,26 +355,29 @@ protected void onJoinComplete(int generation,
         Set<TopicPartition> addedPartitions = new HashSet<>(assignedPartitions);
         addedPartitions.removeAll(ownedPartitions);
 
-        // Invoke user's revocation callback before changing assignment or updating state
         if (protocol == RebalanceProtocol.COOPERATIVE) {
             Set<TopicPartition> revokedPartitions = new HashSet<>(ownedPartitions);
             revokedPartitions.removeAll(assignedPartitions);
 
-            log.info("Updating with newly assigned partitions: {}, compare with already owned partitions: {}, " +
-                    "newly added partitions: {}, revoking partitions: {}",
+            log.info("Updating assignment with\n" +
+                    "now assigned partitions: {}\n" +
+                    "compare with previously owned partitions: {}\n" +
+                    "newly added partitions: {}\n" +
+                    "revoked partitions: {}\n",
                 Utils.join(assignedPartitions, ", "),
                 Utils.join(ownedPartitions, ", "),
                 Utils.join(addedPartitions, ", "),
-                Utils.join(revokedPartitions, ", "));
-
+                Utils.join(revokedPartitions, ", ")
+            );
 
             if (!revokedPartitions.isEmpty()) {
-                // revoke partitions that was previously owned but no longer assigned;
-                // note that we should only change the assignment AFTER we've triggered
-                // the revoke callback
+                // revoke partitions that were previously owned but no longer assigned;
+                // note that we should only change the assignment (or update the assignor's state)
+                // AFTER we've triggered  the revoke callback
                 firstException.compareAndSet(null, invokePartitionsRevoked(revokedPartitions));
 
                 // if revoked any partitions, need to re-join the group afterwards
+                log.debug("Need to revoke partitions {} and re-join the group", revokedPartitions);
                 requestRejoin();
             }
         }
@@ -679,7 +682,6 @@ protected void onJoinPrepare(int generation, String memberId) {
             }
         }
 
-
         isLeader = false;
         subscriptions.resetGroupSubscription();
 

diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java
@@ -270,8 +270,14 @@ public synchronized void assignFromSubscribed(Collection<TopicPartition> assignm
         if (!this.partitionsAutoAssigned())
             throw new IllegalArgumentException("Attempt to dynamically assign partitions while manual assignment in use");
 
+        Map<TopicPartition, TopicPartitionState> assignedPartitionStates = new HashMap<>(assignments.size());
+        for (TopicPartition tp : assignments) {
+            TopicPartitionState state = this.assignment.stateValue(tp);
+            if (state == null)
+                state = new TopicPartitionState();
+            assignedPartitionStates.put(tp, state);
+        }
 
-        Map<TopicPartition, TopicPartitionState> assignedPartitionStates = partitionToStateMap(assignments);
         assignmentId++;
         this.assignment.set(assignedPartitionStates);
     }
@@ -669,13 +675,6 @@ public synchronized ConsumerRebalanceListener rebalanceListener() {
         return rebalanceListener;
     }
 
-    private static Map<TopicPartition, TopicPartitionState> partitionToStateMap(Collection<TopicPartition> assignments) {
-        Map<TopicPartition, TopicPartitionState> map = new HashMap<>(assignments.size());
-        for (TopicPartition tp : assignments)
-            map.put(tp, new TopicPartitionState());
-        return map;
-    }
-
     private static class TopicPartitionState {
 
         private FetchState fetchState;

diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java
@@ -848,7 +848,7 @@ public void testUnauthorizedTopic() {
     }
 
     @Test
-    public void testFetchDuringRebalance() {
+    public void testFetchDuringEagerRebalance() {
         buildFetcher();
 
         subscriptions.subscribe(singleton(topicName), listener);
@@ -859,7 +859,9 @@ public void testFetchDuringRebalance() {
 
         assertEquals(1, fetcher.sendFetches());
 
-        // Now the rebalance happens and fetch positions are cleared
+        // Now the eager rebalance happens and fetch positions are cleared
+        subscriptions.assignFromSubscribed(Collections.emptyList());
+
         subscriptions.assignFromSubscribed(singleton(tp0));
         client.prepareResponse(fullFetchResponse(tp0, this.records, Errors.NONE, 100L, 0));
         consumerClient.poll(time.timer(0));
@@ -868,6 +870,31 @@ public void testFetchDuringRebalance() {
         assertTrue(fetcher.fetchedRecords().isEmpty());
     }
 
+    @Test
+    public void testFetchDuringCooperativeRebalance() {
+        buildFetcher();
+
+        subscriptions.subscribe(singleton(topicName), listener);
+        subscriptions.assignFromSubscribed(singleton(tp0));
+        subscriptions.seek(tp0, 0);
+
+        client.updateMetadata(initialUpdateResponse);
+
+        assertEquals(1, fetcher.sendFetches());
+
+        // Now the cooperative rebalance happens and fetch positions are NOT cleared for unrevoked partitions
+        subscriptions.assignFromSubscribed(singleton(tp0));
+
+        client.prepareResponse(fullFetchResponse(tp0, this.records, Errors.NONE, 100L, 0));
+        consumerClient.poll(time.timer(0));
+
+        Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> fetchedRecords = fetchedRecords();
+
+        // The active fetch should NOT be ignored since the position for tp0 is still valid
+        assertEquals(1, fetchedRecords.size());
+        assertEquals(3, fetchedRecords.get(tp0).size());
+    }
+
     @Test
     public void testInFlightFetchOnPausedPartition() {
         buildFetcher();

diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedStreamsTasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AssignedStreamsTasks.java
@@ -74,11 +74,15 @@ boolean allTasksRunning() {
     @Override
     void closeTask(final StreamTask task, final boolean clean) {
         if (suspended.containsKey(task.id())) {
-            task.closeSuspended(clean, false, null);
+            task.closeSuspended(clean, null);
         } else {
             task.close(clean, false);
         }
     }
+
+    boolean hasRestoringTasks() {
+        return !restoring.isEmpty();
+    }
 
     Set<TaskId> suspendedTaskIds() {
         return suspended.keySet();
@@ -107,7 +111,7 @@ RuntimeException suspendOrCloseTasks(final Set<TaskId> revokedTasks,
             } else if (restoring.containsKey(task)) {
                 revokedRestoringTasks.add(task);
             } else if (!suspended.containsKey(task)) {
-                log.warn("Task {} was revoked but cannot be found in the assignment", task);
+                log.warn("Task {} was revoked but cannot be found in the assignment, may have been closed due to error", task);
             }
         }
 
@@ -131,7 +135,7 @@ private RuntimeException suspendRunningTasks(final Set<TaskId> runningTasksToSus
                 task.suspend();
                 suspended.put(id, task);
             } catch (final TaskMigratedException closeAsZombieAndSwallow) {
-                // as we suspend a task, we are either shutting down or rebalancing, thus, we swallow and move on
+                // swallow and move on since we are rebalancing
                 log.info("Failed to suspend {} {} since it got migrated to another thread already. " +
                     "Closing it as zombie and move on.", taskTypeName, id);
                 firstException.compareAndSet(null, closeZombieTask(task));
@@ -248,7 +252,7 @@ private RuntimeException closeSuspended(final boolean isZombie,
 
         try {
             final boolean clean = !isZombie;
-            task.closeSuspended(clean, isZombie, null);
+            task.closeSuspended(clean, null);
         } catch (final RuntimeException e) {
             log.error("Failed to close suspended {} {} due to the following error:", taskTypeName, task.id(), e);
             return e;
@@ -264,7 +268,6 @@ RuntimeException closeNotAssignedSuspendedTasks(final Set<TaskId> revokedTasks)
         for (final TaskId revokedTask : revokedTasks) {
             final StreamTask suspendedTask = suspended.get(revokedTask);
 
-            // task may not be in the suspended tasks if it was closed due to some error
             if (suspendedTask != null) {
                 firstException.compareAndSet(null, closeSuspended(false, suspendedTask));
             } else {
@@ -335,7 +338,7 @@ boolean maybeResumeSuspendedTask(final TaskId taskId,
                 return true;
             } else {
                 log.warn("Couldn't resume task {} assigned partitions {}, task partitions {}", taskId, partitions, task.partitions());
-                task.closeSuspended(true, false, null);
+                task.closeSuspended(true, null);
             }
         }
         return false;

diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java
@@ -119,18 +119,6 @@ public void commit() {
         commitNeeded = false;
     }
 
-    /**
-     * <pre>
-     * - flush store
-     * - checkpoint store
-     * </pre>
-     */
-    @Override
-    public void suspend() {
-        log.debug("Suspending");
-        flushAndCheckpointState();
-    }
-
     private void flushAndCheckpointState() {
         stateMgr.flush();
         stateMgr.checkpoint(Collections.emptyMap());
@@ -162,13 +150,6 @@ public void close(final boolean clean,
         taskClosed = true;
     }
 
-    @Override
-    public void closeSuspended(final boolean clean,
-                               final boolean isZombie,
-                               final RuntimeException e) {
-        close(clean, isZombie);
-    }
-
     /**
      * Updates a state store using records from one change log partition
      *

diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java
@@ -79,8 +79,7 @@ public Collection<TopicPartition> restore(final RestoringTasks active) {
             initialize(active);
         }
 
-        if (needsRestoring.isEmpty() || restoreConsumer.assignment().isEmpty()) {
-            restoreConsumer.unsubscribe();
+        if (checkForCompletedRestoration()) {
             return completedRestorers;
         }
 
@@ -116,9 +115,7 @@ public Collection<TopicPartition> restore(final RestoringTasks active) {
 
         needsRestoring.removeAll(completedRestorers);
 
-        if (needsRestoring.isEmpty()) {
-            restoreConsumer.unsubscribe();
-        }
+        checkForCompletedRestoration();
 
         return completedRestorers;
     }
@@ -337,7 +334,14 @@ private long processNext(final List<ConsumerRecord<byte[], byte[]>> records,
         return nextPosition;
     }
 
-
+    private boolean checkForCompletedRestoration() {
+        if (needsRestoring.isEmpty()) {
+            log.info("Finished restoring all active tasks");
+            restoreConsumer.unsubscribe();
+            return true;
+        }
+        return false;
+    }
 
     private boolean hasPartition(final TopicPartition topicPartition) {
         final List<PartitionInfo> partitions = partitionInfo.get(topicPartition.topic());

diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java
@@ -560,7 +560,6 @@ private void initTopology() {
      * @throws TaskMigratedException if committing offsets failed (non-EOS)
      *                               or if the task producer got fenced (EOS)
      */
-    @Override
     public void suspend() {
         log.debug("Suspending");
         suspend(true, false);
@@ -674,10 +673,7 @@ private void closeTopology() {
     }
 
     // helper to avoid calling suspend() twice if a suspended task is not reassigned and closed
-    @Override
-    public void closeSuspended(final boolean clean,
-                               final boolean isZombie,
-                               RuntimeException firstException) {
+    void closeSuspended(final boolean clean, RuntimeException firstException) {
         try {
             closeStateManager(clean);
         } catch (final RuntimeException e) {
@@ -729,7 +725,7 @@ public void close(boolean clean,
             log.error("Could not close task due to the following error:", e);
         }
 
-        closeSuspended(clean, isZombie, firstException);
+        closeSuspended(clean, firstException);
 
         taskClosed = true;
     }

diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java
@@ -134,13 +134,13 @@ public class StreamThread extends Thread {
      */
     public enum State implements ThreadStateTransitionValidator {
 
-        CREATED(1, 5),                   // 0
-        STARTING(2, 3, 5),               // 1
-        PARTITIONS_REVOKED(3, 5),        // 2
-        PARTITIONS_ASSIGNED(2, 3, 4, 5), // 3
-        RUNNING(2, 3, 5),                // 4
-        PENDING_SHUTDOWN(6),             // 5
-        DEAD;                            // 6
+        CREATED(1, 5),                    // 0
+        STARTING(2, 3, 5),                // 1
+        PARTITIONS_REVOKED(2, 3, 5),      // 2
+        PARTITIONS_ASSIGNED(2, 3, 4, 5),  // 3
+        RUNNING(2, 3, 5),                 // 4
+        PENDING_SHUTDOWN(6),              // 5
+        DEAD;                             // 6
 
         private final Set<Integer> validTransitions = new HashSet<>();
 
@@ -744,9 +744,9 @@ void runOnce() {
             // to unblock the restoration as soon as possible
             records = pollRequests(Duration.ZERO);
         } else if (state == State.PARTITIONS_REVOKED) {
-            // try to fetch some records with normal poll time
-            // in order to wait long enough to get the join response
-            records = pollRequests(pollTime);
+            // try to fetch som records with zero poll millis to unblock
+            // other useful work while waiting for the join response
+            records = pollRequests(Duration.ZERO);
         } else if (state == State.RUNNING || state == State.STARTING) {
             // try to fetch some records with normal poll time
             // in order to get long polling
@@ -980,7 +980,12 @@ boolean maybeCommit() {
                 }
             }
 
-            lastCommitMs = now;
+            if (committed == -1) {
+                log.trace("Unable to commit as we are in the middle of a rebalance, will try again when it completes.");
+            } else {
+                lastCommitMs = now;
+            }
+
             processStandbyRecords = true;
         } else {
             committed = taskManager.maybeCommitActiveTasksPerUserRequested();