From a886d809b6ec48e9bc5779b4856669210d8b7200 Mon Sep 17 00:00:00 2001 From: Deepthi Devaki Akkoorath Date: Tue, 2 May 2023 14:31:30 +0200 Subject: [PATCH] fix(backup): do not take backup when one already exists Previously, we were only checking for backups taken by this node. However, there could be backups taken by another node. For example, after restore the checkpoint record is processed again, but it must not re-take the backup. To prevent that, we should check for all backups with the given backup id. --- .../BackupAlreadyExistsException.java | 4 +-- .../backup/management/BackupServiceImpl.java | 27 ++++++++++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/backup/src/main/java/io/camunda/zeebe/backup/management/BackupAlreadyExistsException.java b/backup/src/main/java/io/camunda/zeebe/backup/management/BackupAlreadyExistsException.java index 330965e9608f..09f929f9fa1f 100644 --- a/backup/src/main/java/io/camunda/zeebe/backup/management/BackupAlreadyExistsException.java +++ b/backup/src/main/java/io/camunda/zeebe/backup/management/BackupAlreadyExistsException.java @@ -8,11 +8,11 @@ package io.camunda.zeebe.backup.management; import io.camunda.zeebe.backup.api.BackupIdentifier; -import io.camunda.zeebe.backup.api.BackupStatus; +import io.camunda.zeebe.backup.api.BackupStatusCode; class BackupAlreadyExistsException extends RuntimeException { - BackupAlreadyExistsException(final BackupIdentifier id, final BackupStatus status) { + BackupAlreadyExistsException(final BackupIdentifier id, final BackupStatusCode status) { super("Backup with id %s already exists, status of the backup is %s".formatted(id, status)); } } diff --git a/backup/src/main/java/io/camunda/zeebe/backup/management/BackupServiceImpl.java b/backup/src/main/java/io/camunda/zeebe/backup/management/BackupServiceImpl.java index f18a16a38dd6..5ed078e45358 100644 --- a/backup/src/main/java/io/camunda/zeebe/backup/management/BackupServiceImpl.java +++ b/backup/src/main/java/io/camunda/zeebe/backup/management/BackupServiceImpl.java @@ -15,6 +15,8 @@ import io.camunda.zeebe.scheduler.ConcurrencyControl; import io.camunda.zeebe.scheduler.future.ActorFuture; import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.Optional; import java.util.Set; @@ -44,16 +46,22 @@ ActorFuture takeBackup( backupsInProgress.add(inProgressBackup); - final var checkCurrentBackup = backupStore.getStatus(inProgressBackup.id()); + final var checkCurrentBackup = + backupStore.list( + new BackupIdentifierWildcardImpl( + Optional.empty(), + Optional.of(inProgressBackup.id().partitionId()), + Optional.of(inProgressBackup.checkpointId()))); final ActorFuture backupSaved = concurrencyControl.createFuture(); checkCurrentBackup.whenCompleteAsync( - (status, error) -> { + (availableBackups, error) -> { if (error != null) { backupSaved.completeExceptionally(error); } else { - takeBackupIfDoesNotExist(status, inProgressBackup, concurrencyControl, backupSaved); + takeBackupIfDoesNotExist( + availableBackups, inProgressBackup, concurrencyControl, backupSaved); } }, concurrencyControl::run); @@ -63,12 +71,17 @@ ActorFuture takeBackup( } private void takeBackupIfDoesNotExist( - final BackupStatus status, + final Collection availableBackups, final InProgressBackup inProgressBackup, final ConcurrencyControl concurrencyControl, final ActorFuture backupSaved) { - switch (status.statusCode()) { + final BackupStatusCode existingBackupStatus = + availableBackups.isEmpty() + ? BackupStatusCode.DOES_NOT_EXIST + : Collections.max(availableBackups, Comparator.comparing(BackupStatus::statusCode)) + .statusCode(); + switch (existingBackupStatus) { case COMPLETED -> { LOG.debug("Backup {} is already completed, will not take a new one", inProgressBackup.id()); backupSaved.complete(null); @@ -77,9 +90,9 @@ private void takeBackupIfDoesNotExist( LOG.error( "Backup {} already exists with status {}, will not take a new one", inProgressBackup.id(), - status); + existingBackupStatus); backupSaved.completeExceptionally( - new BackupAlreadyExistsException(inProgressBackup.id(), status)); + new BackupAlreadyExistsException(inProgressBackup.id(), existingBackupStatus)); } default -> { final ActorFuture snapshotFound = concurrencyControl.createFuture();