Skip to content

Commit

Permalink
Generalise BackupFileSystem::GetLock and reuse it
Browse files Browse the repository at this point in the history
This reduces code duplication (multiple implementations of locking retries) and
also enables centralised logging of locking attempts, which can be more easily
fine-tuned.
  • Loading branch information
qris committed Nov 19, 2017
1 parent b595126 commit 83fc654
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 99 deletions.
49 changes: 37 additions & 12 deletions lib/backupstore/BackupFileSystem.cpp
Expand Up @@ -56,30 +56,55 @@

#include "MemLeakFindOn.h"

void BackupFileSystem::GetLock()
void BackupFileSystem::GetLock(int max_tries)
{
for(int triesLeft = 8; triesLeft >= 0; triesLeft--)
if(HaveLock())
{
// Account already locked, nothing to do
return;
}

int i = 0;
for(; i < max_tries || max_tries == KEEP_TRYING_FOREVER; i++)
{
try
{
TryGetLock();
}
catch(BackupStoreException &e)
{
if(EXCEPTION_IS_TYPE(e, BackupStoreException,
CouldNotLockStoreAccount) && triesLeft)
if(EXCEPTION_IS_TYPE(e, BackupStoreException, CouldNotLockStoreAccount) &&
((i < max_tries - 1) || max_tries == KEEP_TRYING_FOREVER))
{
// Sleep a bit, and try again, as long as we have retries left.
ShortSleep(MilliSecondsToBoxTime(1000), true);
// Will try again
}
else
{
throw;
}
}
}

// If that didn't throw an exception, then we should have successfully got a lock!
ASSERT(HaveLock());
BOX_LOG_CATEGORY(Log::TRACE, LOCKING, "Successfully locked account " <<
GetAccountIdentifier() << " after " << (i + 1) << " attempts");
}

void BackupFileSystem::ReleaseLock()
{
mapBackupStoreInfo.reset();

if(mapPotentialRefCountDatabase.get())
{
mapPotentialRefCountDatabase->Discard();
mapPotentialRefCountDatabase.reset();
}

mapPermanentRefCountDatabase.reset();
}

// Refresh forces the any current BackupStoreInfo to be discarded and reloaded from the
// store. This would be dangerous if anyone was holding onto a reference to it!
Expand Down Expand Up @@ -1942,6 +1967,14 @@ void S3BackupFileSystem::ReleaseLock()
{
BackupFileSystem::ReleaseLock();

// It's possible that neither the temporary nor the permanent refcount DB was
// requested while we had the write lock, so the cache lock may not have been
// acquired.
if(mCacheLock.GotLock())
{
mCacheLock.ReleaseLock();
}

// Releasing is so much easier!
if(!mHaveLock)
{
Expand Down Expand Up @@ -1980,14 +2013,6 @@ void S3BackupFileSystem::ReleaseLock()
ReportLockMismatches(mismatches);
ASSERT(mismatches.empty());

// It's possible that neither the temporary nor the permanent refcount DB was
// requested while we had the write lock, so the cache lock may not have been
// acquired.
if(mCacheLock.GotLock())
{
mCacheLock.ReleaseLock();
}

// Now we no longer have the lock!
mHaveLock = false;
}
Expand Down
24 changes: 9 additions & 15 deletions lib/backupstore/BackupFileSystem.h
Expand Up @@ -50,15 +50,9 @@ class BackupFileSystem

BackupFileSystem() { }
virtual ~BackupFileSystem() { }
virtual void TryGetLock() = 0;
virtual void GetLock();
virtual void ReleaseLock()
{
mapBackupStoreInfo.reset();
mapPotentialRefCountDatabase.reset();
mapPermanentRefCountDatabase.reset();
}

static const int KEEP_TRYING_FOREVER = -1;
virtual void GetLock(int max_tries = 8);
virtual void ReleaseLock();
virtual bool HaveLock() = 0;
virtual int GetBlockSize() = 0;
virtual BackupStoreInfo& GetBackupStoreInfo(bool ReadOnly, bool Refresh = false);
Expand Down Expand Up @@ -158,6 +152,7 @@ class BackupFileSystem
}

protected:
virtual void TryGetLock() = 0;
virtual std::auto_ptr<BackupStoreInfo> GetBackupStoreInfoInternal(bool ReadOnly) = 0;
std::auto_ptr<BackupStoreInfo> mapBackupStoreInfo;
// You can have one temporary and one permanent refcound DB open at any time,
Expand All @@ -166,7 +161,6 @@ class BackupFileSystem
std::auto_ptr<BackupStoreRefCountDatabase> mapPotentialRefCountDatabase;
std::auto_ptr<BackupStoreRefCountDatabase> mapPermanentRefCountDatabase;

protected:
friend class BackupStoreRefCountDatabaseWrapper;
// These should only be called by BackupStoreRefCountDatabaseWrapper::Commit():
virtual void RefCountDatabaseBeforeCommit(BackupStoreRefCountDatabase& refcount_db);
Expand Down Expand Up @@ -221,7 +215,6 @@ class RaidBackupFileSystem : public BackupFileSystem
ReleaseLock();
}
}
virtual void TryGetLock();
virtual void ReleaseLock()
{
BackupFileSystem::ReleaseLock();
Expand Down Expand Up @@ -280,6 +273,7 @@ class RaidBackupFileSystem : public BackupFileSystem
virtual void EnsureObjectIsPermanent(int64_t ObjectID, bool fix_errors);

protected:
virtual void TryGetLock();
virtual std::auto_ptr<BackupStoreInfo> GetBackupStoreInfoInternal(bool ReadOnly);
std::auto_ptr<BackupFileSystem::Transaction>
CombineFileOrDiff(int64_t OlderPatchID, int64_t NewerObjectID, bool NewerIsPatch);
Expand Down Expand Up @@ -332,7 +326,6 @@ class S3BackupFileSystem : public BackupFileSystem
S3BackupFileSystem(const Configuration& config, const std::string& BasePath,
const std::string& CacheDirectory, S3Client& rClient);
~S3BackupFileSystem();
virtual void TryGetLock();
virtual void ReleaseLock();
virtual bool HaveLock()
{
Expand Down Expand Up @@ -434,6 +427,10 @@ class S3BackupFileSystem : public BackupFileSystem
// Filesystem is not transactional, so nothing to do here.
}

protected:
virtual void TryGetLock();
virtual std::auto_ptr<BackupStoreInfo> GetBackupStoreInfoInternal(bool ReadOnly);

private:
std::string GetObjectURI(int64_t ObjectID, int Type) const;
typedef std::map<int64_t, std::vector<std::string> > start_id_to_files_t;
Expand All @@ -443,9 +440,6 @@ class S3BackupFileSystem : public BackupFileSystem
void CheckObjectsDir(int64_t start_id,
BackupFileSystem::CheckObjectsResult& result, bool fix_errors,
const start_id_to_files_t& start_id_to_files);

protected:
virtual std::auto_ptr<BackupStoreInfo> GetBackupStoreInfoInternal(bool ReadOnly);
virtual void SaveRefCountDatabase(BackupStoreRefCountDatabase& refcount_db);
};

Expand Down
5 changes: 3 additions & 2 deletions lib/backupstore/BackupStoreCheck.cpp
Expand Up @@ -119,8 +119,9 @@ void BackupStoreCheck::Check()
{
if(mFixErrors)
{
// Will throw an exception if it doesn't manage to get a lock:
mrFileSystem.TryGetLock();
// Will throw an exception if it doesn't manage to get a lock
// within about 8 seconds:
mrFileSystem.GetLock();
}

if(!mQuiet && mFixErrors)
Expand Down
67 changes: 25 additions & 42 deletions lib/backupstore/BackupStoreContext.cpp
Expand Up @@ -225,54 +225,37 @@ bool BackupStoreContext::AttemptToGetWriteLock()
// Request the lock
bool gotLock = false;

try
{
mpFileSystem->TryGetLock();
// If we got to here, then it worked!
gotLock = true;
}
catch(BackupStoreException &e)
for(int i = 0; i < (mpHousekeeping ? 2 : 1); i++)
{
if(!EXCEPTION_IS_TYPE(e, BackupStoreException, CouldNotLockStoreAccount))
try
{
// We don't know what this error is.
throw;
// On the first pass, only try once. If it fails, and we have a
// housekeeping process, then we'll notify it and try again,
// with more retries.
mpFileSystem->GetLock((i == 0) ? 1 :
MAX_WAIT_FOR_HOUSEKEEPING_TO_RELEASE_ACCOUNT);

// If we got to here, then it worked!
gotLock = true;
}

if(mpHousekeeping)
catch(BackupStoreException &e)
{
// The housekeeping process might have the thing open -- ask it to stop
char msg[256];
int msgLen = snprintf(msg, sizeof(msg), "r%x\n", mClientID);

// Send message
mpHousekeeping->SendMessageToHousekeepingProcess(msg, msgLen);
if(!EXCEPTION_IS_TYPE(e, BackupStoreException, CouldNotLockStoreAccount))
{
// We don't know what this error is.
throw;
}

// Then try again a few times
for(int tries = MAX_WAIT_FOR_HOUSEKEEPING_TO_RELEASE_ACCOUNT;
tries >= 0; tries--)
if(mpHousekeeping)
{
try
{
::sleep(1 /* second */);
mpFileSystem->TryGetLock();
// If we got to here, then it worked!
gotLock = true;
break;
}
catch(BackupStoreException &e)
{
if(EXCEPTION_IS_TYPE(e, BackupStoreException,
CouldNotLockStoreAccount))
{
// keep trying
}
else
{
// We don't know what this error is.
throw;
}
}
// The housekeeping process might have the account locked. Ask it to stop.
char msg[256];
int msgLen = snprintf(msg, sizeof(msg), "r%x\n", mClientID);

// Send message
mpHousekeeping->SendMessageToHousekeepingProcess(msg, msgLen);

// Then loop again, with more retries this time
}
}
}
Expand Down
40 changes: 13 additions & 27 deletions lib/backupstore/HousekeepStoreAccount.cpp
Expand Up @@ -110,39 +110,25 @@ bool HousekeepStoreAccount::DoHousekeeping(bool KeepTryingForever)
BOX_TRACE("Starting housekeeping on account " <<
mrFileSystem.GetAccountIdentifier());

// Attempt to lock the account
bool first_pass = true;
do
// Attempt to lock the account. If KeepTryingForever is false, then only
// try once, and return false if that fails.
try
{
try
mrFileSystem.GetLock(KeepTryingForever ? BackupFileSystem::KEEP_TRYING_FOREVER : 1);
}
catch(BackupStoreException &e)
{
if(EXCEPTION_IS_TYPE(e, BackupStoreException, CouldNotLockStoreAccount))
{
mrFileSystem.TryGetLock();
break;
// Couldn't lock the account -- just stop now
return false;
}
catch(BackupStoreException &e)
else
{
if(!EXCEPTION_IS_TYPE(e, BackupStoreException, CouldNotLockStoreAccount))
{
// something unexpected went wrong
throw;
}
else if(!KeepTryingForever)
{
// Couldn't lock the account -- just stop now
return false;
}
else if(first_pass)
{
BOX_INFO("Failed to lock account for housekeeping, "
"still trying...");
first_pass = false;
}

// Sleep a bit and try again
sleep(1);
// something unexpected went wrong
throw;
}
}
while(true);

// Load the store info to find necessary info for the housekeeping
BackupStoreInfo* pInfo = &(mrFileSystem.GetBackupStoreInfo(false)); // !ReadOnly
Expand Down
1 change: 0 additions & 1 deletion lib/backupstore/StoreTestUtils.cpp
Expand Up @@ -209,7 +209,6 @@ int check_account_for_errors(BackupFileSystem& filesystem, Log::Level log_level)
filesystem.CloseRefCountDatabase(filesystem.GetCurrentRefCountDatabase());
}

filesystem.TryGetLock();
BackupStoreCheck check(filesystem,
true, // FixErrors
false); // Quiet
Expand Down

0 comments on commit 83fc654

Please sign in to comment.