-
Notifications
You must be signed in to change notification settings - Fork 2.5k
feat: Add support for exclusive rollbacks with multi writer #18448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1245,25 +1245,57 @@ public boolean rollback(final String commitInstantTime, Option<HoodiePendingRoll | |||||||||||
| final Timer.Context timerContext = this.metrics.getRollbackCtx(); | ||||||||||||
| try { | ||||||||||||
| HoodieTable table = createTable(config, storageConf, skipVersionCheck); | ||||||||||||
|
|
||||||||||||
| Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstantsAsStream() | ||||||||||||
| .filter(instant -> EQUALS.test(instant.requestedTime(), commitInstantTime)) | ||||||||||||
| .findFirst()); | ||||||||||||
| Option<HoodieRollbackPlan> rollbackPlanOption; | ||||||||||||
| String rollbackInstantTime; | ||||||||||||
| if (pendingRollbackInfo.isPresent()) { | ||||||||||||
| Option<HoodieRollbackPlan> rollbackPlanOption = Option.empty(); | ||||||||||||
| Option<String> rollbackInstantTimeOpt; | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Refresh The exclusive branch rechecks commit presence on the reloaded timeline, but Line 1205 still uses the pre-lock Proposed fix Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstantsAsStream()
.filter(instant -> EQUALS.test(instant.requestedTime(), commitInstantTime))
.findFirst());
@@
if (config.isExclusiveRollbackEnabled()) {
// Reload meta client within the lock so that the timeline is latest while executing pending rollback
table.getMetaClient().reloadActiveTimeline();
+ commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstantsAsStream()
+ .filter(instant -> EQUALS.test(instant.requestedTime(), commitInstantTime))
+ .findFirst());
Option<HoodiePendingRollbackInfo> pendingRollbackOpt = getPendingRollbackInfo(table.getMetaClient(), commitInstantTime);
rollbackInstantTimeOpt = pendingRollbackOpt.map(info -> info.getRollbackInstant().requestedTime());
@@
- } else if (Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstantsAsStream()
- .filter(instant -> EQUALS.test(instant.requestedTime(), commitInstantTime))
- .findFirst()).isEmpty()) {
+ } else if (commitInstantOpt.isEmpty()) {
// Assume rollback is already executed since the commit is no longer present in the timeline
return false;
} else {Also applies to: 1179-1205 🤖 Prompt for AI Agents— CodeRabbit (original) (source:comment#3067036668) |
||||||||||||
| if (!config.isExclusiveRollbackEnabled() && pendingRollbackInfo.isPresent()) { | ||||||||||||
| // Only case when lock can be skipped is if exclusive rollback is disabled and | ||||||||||||
| // there is a pending rollback info available | ||||||||||||
| rollbackPlanOption = Option.of(pendingRollbackInfo.get().getRollbackPlan()); | ||||||||||||
| rollbackInstantTime = pendingRollbackInfo.get().getRollbackInstant().requestedTime(); | ||||||||||||
| rollbackInstantTimeOpt = Option.of(pendingRollbackInfo.get().getRollbackInstant().requestedTime()); | ||||||||||||
| } else { | ||||||||||||
| if (commitInstantOpt.isEmpty()) { | ||||||||||||
| log.error("Cannot find instant {} in the timeline of table {} for rollback", commitInstantTime, config.getBasePath()); | ||||||||||||
| return false; | ||||||||||||
| } | ||||||||||||
| if (!skipLocking) { | ||||||||||||
| txnManager.beginStateChange(Option.empty(), Option.empty()); | ||||||||||||
| } | ||||||||||||
| try { | ||||||||||||
| rollbackInstantTime = suppliedRollbackInstantTime.orElseGet(() -> createNewInstantTime(false)); | ||||||||||||
| rollbackPlanOption = table.scheduleRollback(context, rollbackInstantTime, commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers(), false); | ||||||||||||
| if (config.isExclusiveRollbackEnabled()) { | ||||||||||||
| // Reload meta client within the lock so that the timeline is latest while executing pending rollback | ||||||||||||
| table.getMetaClient().reloadActiveTimeline(); | ||||||||||||
| Option<HoodiePendingRollbackInfo> pendingRollbackOpt = getPendingRollbackInfo(table.getMetaClient(), commitInstantTime); | ||||||||||||
| rollbackInstantTimeOpt = pendingRollbackOpt.map(info -> info.getRollbackInstant().requestedTime()); | ||||||||||||
| if (pendingRollbackOpt.isPresent()) { | ||||||||||||
| // If pending rollback and heartbeat is expired, writer should start heartbeat and execute rollback | ||||||||||||
| if (heartbeatClient.isHeartbeatExpired(rollbackInstantTimeOpt.get())) { | ||||||||||||
| LOG.info("Heartbeat expired for rollback instant {}, executing rollback now", rollbackInstantTimeOpt); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The class is annotated with
Suggested change
— Greptile (original) (source:comment#3067036686) |
||||||||||||
| HeartbeatUtils.deleteHeartbeatFile(storage, basePath, rollbackInstantTimeOpt.get(), config); | ||||||||||||
| heartbeatClient.start(rollbackInstantTimeOpt.get()); | ||||||||||||
| rollbackPlanOption = pendingRollbackOpt.map(HoodiePendingRollbackInfo::getRollbackPlan); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Always stop the rollback heartbeat when this writer started it. If Proposed fix Option<HoodieRollbackPlan> rollbackPlanOption = Option.empty();
Option<String> rollbackInstantTimeOpt;
+ boolean startedRollbackHeartbeat = false;
@@
if (heartbeatClient.isHeartbeatExpired(rollbackInstantTimeOpt.get())) {
LOG.info("Heartbeat expired for rollback instant {}, executing rollback now", rollbackInstantTimeOpt);
HeartbeatUtils.deleteHeartbeatFile(storage, basePath, rollbackInstantTimeOpt.get(), config);
heartbeatClient.start(rollbackInstantTimeOpt.get());
+ startedRollbackHeartbeat = true;
rollbackPlanOption = pendingRollbackOpt.map(HoodiePendingRollbackInfo::getRollbackPlan);
@@
rollbackInstantTimeOpt = suppliedRollbackInstantTime.or(() -> Option.of(createNewInstantTime(false)));
heartbeatClient.start(rollbackInstantTimeOpt.get());
+ startedRollbackHeartbeat = true;
rollbackPlanOption = table.scheduleRollback(context, rollbackInstantTimeOpt.get(), commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers(), false);
@@
- HoodieRollbackMetadata rollbackMetadata = commitInstantOpt.isPresent()
- ? table.rollback(context, rollbackInstantTimeOpt.get(), commitInstantOpt.get(), true, skipLocking)
- : table.rollback(context, rollbackInstantTimeOpt.get(), table.getMetaClient().createNewInstant(
- HoodieInstant.State.INFLIGHT, rollbackPlanOption.get().getInstantToRollback().getAction(), commitInstantTime),
- false, skipLocking);
- if (timerContext != null) {
- long durationInMs = metrics.getDurationInMs(timerContext.stop());
- metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted());
- }
- if (config.isExclusiveRollbackEnabled()) {
- heartbeatClient.stop(rollbackInstantTimeOpt.get());
- }
- return true;
+ try {
+ HoodieRollbackMetadata rollbackMetadata = commitInstantOpt.isPresent()
+ ? table.rollback(context, rollbackInstantTimeOpt.get(), commitInstantOpt.get(), true, skipLocking)
+ : table.rollback(context, rollbackInstantTimeOpt.get(), table.getMetaClient().createNewInstant(
+ HoodieInstant.State.INFLIGHT, rollbackPlanOption.get().getInstantToRollback().getAction(), commitInstantTime),
+ false, skipLocking);
+ if (timerContext != null) {
+ long durationInMs = metrics.getDurationInMs(timerContext.stop());
+ metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted());
+ }
+ return true;
+ } finally {
+ if (config.isExclusiveRollbackEnabled() && startedRollbackHeartbeat && rollbackInstantTimeOpt.isPresent()) {
+ heartbeatClient.stop(rollbackInstantTimeOpt.get());
+ }
+ }Also applies to: 1203-1205, 1230-1241 🤖 Prompt for AI Agents— CodeRabbit (original) (source:comment#3067036671) |
||||||||||||
| } else { | ||||||||||||
| // Heartbeat is still active for another writer, ignore rollback for now | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: |
||||||||||||
| // TODO: ABCDEFGHI revisit return value | ||||||||||||
| return false; | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The comment
Suggested change
— Greptile (original) (source:comment#3067036706) |
||||||||||||
| } | ||||||||||||
| } else if (Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstantsAsStream() | ||||||||||||
| .filter(instant -> EQUALS.test(instant.requestedTime(), commitInstantTime)) | ||||||||||||
| .findFirst()).isEmpty()) { | ||||||||||||
| // Assume rollback is already executed since the commit is no longer present in the timeline | ||||||||||||
| return false; | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When exclusive rollback is enabled and no pending rollback exists yet (first writer to schedule a rollback for a failed commit), this path falls through without setting |
||||||||||||
| } | ||||||||||||
| } else { | ||||||||||||
| // Case where no pending rollback is present, | ||||||||||||
| if (commitInstantOpt.isEmpty()) { | ||||||||||||
| log.error("Cannot find instant {} in the timeline of table {} for rollback", commitInstantTime, config.getBasePath()); | ||||||||||||
| return false; | ||||||||||||
| } | ||||||||||||
| rollbackInstantTimeOpt = suppliedRollbackInstantTime.or(() -> Option.of(createNewInstantTime(false))); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||||||||||||
| if (config.isExclusiveRollbackEnabled()) { | ||||||||||||
| heartbeatClient.start(rollbackInstantTimeOpt.get()); | ||||||||||||
| } | ||||||||||||
| rollbackPlanOption = table.scheduleRollback(context, rollbackInstantTimeOpt.get(), commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers(), false); | ||||||||||||
| } | ||||||||||||
| } finally { | ||||||||||||
| if (!skipLocking) { | ||||||||||||
| txnManager.endStateChange(Option.empty()); | ||||||||||||
|
|
@@ -1279,14 +1311,17 @@ public boolean rollback(final String commitInstantTime, Option<HoodiePendingRoll | |||||||||||
| // is set to false since they are already deleted. | ||||||||||||
| // Execute rollback | ||||||||||||
| HoodieRollbackMetadata rollbackMetadata = commitInstantOpt.isPresent() | ||||||||||||
| ? table.rollback(context, rollbackInstantTime, commitInstantOpt.get(), true, skipLocking) | ||||||||||||
| : table.rollback(context, rollbackInstantTime, table.getMetaClient().createNewInstant( | ||||||||||||
| ? table.rollback(context, rollbackInstantTimeOpt.get(), commitInstantOpt.get(), true, skipLocking) | ||||||||||||
| : table.rollback(context, rollbackInstantTimeOpt.get(), table.getMetaClient().createNewInstant( | ||||||||||||
| HoodieInstant.State.INFLIGHT, rollbackPlanOption.get().getInstantToRollback().getAction(), commitInstantTime), | ||||||||||||
| false, skipLocking); | ||||||||||||
| if (timerContext != null) { | ||||||||||||
| long durationInMs = metrics.getDurationInMs(timerContext.stop()); | ||||||||||||
| metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted()); | ||||||||||||
| } | ||||||||||||
| if (config.isExclusiveRollbackEnabled()) { | ||||||||||||
| heartbeatClient.stop(rollbackInstantTimeOpt.get()); | ||||||||||||
| } | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||||||||||||
| return true; | ||||||||||||
| } else { | ||||||||||||
| throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitInstantTime); | ||||||||||||
|
|
||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🤖 Line 1239: The
heartbeatClient.stop()call is still only on the success path (afterreturn true). Iftable.rollback()throws, the heartbeat is never stopped, which blocks other writers from attempting the rollback until it naturally expires. This was flagged in the previous review — could you wrap the rollback execution + stop in a try/finally? Something like:- Generated by an AI agent and may contain mistakes. Please verify any suggestions before applying.