Skip to content

Commit

Permalink
Refactor HousekeepStoreAccount to use BackupFileSystem
Browse files Browse the repository at this point in the history
This involved moving big chunks of functionality into RaidBackupFileSystem, for
example CombineFile, CombineDiffs and RaidPutFileCompleteTransaction. Not much
is implemented for S3BackupFileSystem yet.
  • Loading branch information
qris committed Oct 7, 2017
1 parent b40dc41 commit c5466cc
Show file tree
Hide file tree
Showing 8 changed files with 442 additions and 352 deletions.
2 changes: 1 addition & 1 deletion lib/backupstore/BackupAccountControl.cpp
Expand Up @@ -520,7 +520,7 @@ int BackupStoreAccountControl::HousekeepAccountNow()
// Housekeeping locks the account itself, so we can't.
OPEN_ACCOUNT(false); // readWrite

HousekeepStoreAccount housekeeping(mAccountID, mRootDir, mDiscSetNum, NULL);
HousekeepStoreAccount housekeeping(*mapFileSystem, NULL);
bool success = housekeeping.DoHousekeeping();

if(!success)
Expand Down
158 changes: 130 additions & 28 deletions lib/backupstore/BackupFileSystem.cpp
Expand Up @@ -229,6 +229,24 @@ void RaidBackupFileSystem::TryGetLock()
}


std::string RaidBackupFileSystem::GetAccountIdentifier()
{
std::string account_name;
try
{
account_name = GetBackupStoreInfo(true).GetAccountName();
}
catch(RaidFileException &e)
{
account_name = "unknown";
}

std::ostringstream oss;
oss << BOX_FORMAT_ACCOUNT(mAccountID) << " (" << account_name << ")";
return oss.str();
}


std::string RaidBackupFileSystem::GetObjectFileName(int64_t ObjectID,
bool EnsureDirectoryExists)
{
Expand Down Expand Up @@ -414,16 +432,26 @@ class RaidPutFileCompleteTransaction : public BackupFileSystem::Transaction
{
private:
RaidFileWrite mStoreFile;
std::string mFileName;
int mDiscSet;
bool mCommitted;

public:
RaidPutFileCompleteTransaction(int StoreDiscSet, const std::string& filename)
: mStoreFile(StoreDiscSet, filename),
mCommitted(false)
RaidPutFileCompleteTransaction(int StoreDiscSet, const std::string& filename,
BackupStoreRefCountDatabase::refcount_t refcount)
: mStoreFile(StoreDiscSet, filename, refcount),
mFileName(filename),
mDiscSet(StoreDiscSet),
mCommitted(false),
mNumBlocks(-1)
{ }
~RaidPutFileCompleteTransaction();
virtual void Commit();
virtual int64_t GetNumBlocks() { return mNumBlocks; }
virtual int64_t GetNumBlocks()
{
ASSERT(mNumBlocks != -1);
return mNumBlocks;
}
RaidFileWrite& GetRaidFile() { return mStoreFile; }

// It doesn't matter what we return here, because this should never be called
Expand All @@ -437,31 +465,50 @@ class RaidPutFileCompleteTransaction : public BackupFileSystem::Transaction

void RaidPutFileCompleteTransaction::Commit()
{
ASSERT(!mCommitted);
mStoreFile.Commit(BACKUP_STORE_CONVERT_TO_RAID_IMMEDIATELY);

#ifndef NDEBUG
// Verify the file -- only necessary for non-diffed versions.
//
// We cannot use VerifyEncodedFileFormat() until the file is committed. We already
// verified it as we were saving it, so this is a double check that should not be
// necessary, and thus is only done in debug builds.
std::auto_ptr<RaidFileRead> checkFile(RaidFileRead::Open(mDiscSet, mFileName));
if(!BackupStoreFile::VerifyEncodedFileFormat(*checkFile))
{
mStoreFile.Delete();
THROW_EXCEPTION_MESSAGE(BackupStoreException, AddedFileDoesNotVerify,
"Newly saved file does not verify after write: this should not "
"happen: " << mFileName);
}
#endif // !NDEBUG

mCommitted = true;
}


RaidPutFileCompleteTransaction::~RaidPutFileCompleteTransaction()
{
if(mCommitted)
{
GetRaidFile().TransformToRaidStorage();
}
else
if(!mCommitted)
{
GetRaidFile().Delete();
mStoreFile.Discard();
}
}


std::auto_ptr<BackupFileSystem::Transaction>
RaidBackupFileSystem::PutFileComplete(int64_t ObjectID, IOStream& rFileData)
RaidBackupFileSystem::PutFileComplete(int64_t ObjectID, IOStream& rFileData,
BackupStoreRefCountDatabase::refcount_t refcount)
{
// Create the containing directory if it doesn't exist.
std::string filename = GetObjectFileName(ObjectID, true);

// We can only do this when the file (ObjectID) doesn't already exist.
ASSERT(refcount == 0);

RaidPutFileCompleteTransaction* pTrans = new RaidPutFileCompleteTransaction(
mStoreDiscSet, filename);
mStoreDiscSet, filename, refcount);
std::auto_ptr<BackupFileSystem::Transaction> apTrans(pTrans);

RaidFileWrite& rStoreFile(pTrans->GetRaidFile());
Expand All @@ -481,22 +528,6 @@ RaidBackupFileSystem::PutFileComplete(int64_t ObjectID, IOStream& rFileData)
// Need to do this before committing the RaidFile, can't do it after.
pTrans->mNumBlocks = rStoreFile.GetDiscUsageInBlocks();

// Verify the file -- only necessary for non-diffed versions.
//
// Checking the file requires that we commit the RaidFile first, which
// is unfortunate because we're not quite ready to, and because we thus
// treat full and differential uploads differently. But it's not a huge
// issue because we can always delete the RaidFile without any harm done
// in the full upload case.
rStoreFile.Commit(false); // Don't ConvertToRaidNow

std::auto_ptr<RaidFileRead> checkFile(RaidFileRead::Open(mStoreDiscSet,
filename));
if(!BackupStoreFile::VerifyEncodedFileFormat(*checkFile))
{
THROW_EXCEPTION(BackupStoreException, AddedFileDoesNotVerify)
}

return apTrans;
}

Expand Down Expand Up @@ -725,11 +756,82 @@ void RaidBackupFileSystem::DeleteFile(int64_t ObjectID)
deleteFile.Delete();
}


std::auto_ptr<BackupFileSystem::Transaction>
RaidBackupFileSystem::CombineFileOrDiff(int64_t OlderPatchID, int64_t NewerObjectID, bool NewerIsPatch)
{
// This normally only happens during housekeeping, which is using a temporary
// refcount database, so insist on that for now.
BackupStoreRefCountDatabase* pRefCount = mapPotentialRefCountDatabase.get();
ASSERT(pRefCount != NULL);
ASSERT(mapPermanentRefCountDatabase.get() == NULL ||
mapPermanentRefCountDatabase->IsReadOnly());

// Open the older object twice (the patch)
std::auto_ptr<IOStream> pdiff = GetFile(OlderPatchID);
std::auto_ptr<IOStream> pdiff2 = GetFile(OlderPatchID);

// Open the newer object (the file to be deleted)
std::auto_ptr<IOStream> pobjectBeingDeleted = GetFile(NewerObjectID);

// And open a write file to overwrite the older object (the patch)
std::string older_filename = GetObjectFileName(OlderPatchID,
false); // no need to make sure the directory it's in exists.

std::auto_ptr<RaidPutFileCompleteTransaction>
ap_overwrite_older(new RaidPutFileCompleteTransaction(
mStoreDiscSet, older_filename,
pRefCount->GetRefCount(OlderPatchID)));
RaidFileWrite& overwrite_older(ap_overwrite_older->GetRaidFile());
overwrite_older.Open(true /* allow overwriting */);

if(NewerIsPatch)
{
// Combine two adjacent patches (reverse diffs) into a single one object.
BackupStoreFile::CombineDiffs(*pobjectBeingDeleted, *pdiff, *pdiff2, overwrite_older);
}
else
{
// Combine an older patch (reverse diff) with the subsequent complete file.
BackupStoreFile::CombineFile(*pdiff, *pdiff2, *pobjectBeingDeleted, overwrite_older);
}

// Need to do this before committing the RaidFile, can't do it after.
ap_overwrite_older->mNumBlocks = overwrite_older.GetDiscUsageInBlocks();

// The file will be committed later when the directory is safely commited.
return static_cast<std::auto_ptr<BackupFileSystem::Transaction> >(ap_overwrite_older);
}


std::auto_ptr<BackupFileSystem::Transaction>
RaidBackupFileSystem::CombineFile(int64_t OlderPatchID, int64_t NewerFileID)
{
return CombineFileOrDiff(OlderPatchID, NewerFileID, false); // !NewerIsPatch
}


std::auto_ptr<BackupFileSystem::Transaction>
RaidBackupFileSystem::CombineDiffs(int64_t OlderPatchID, int64_t NewerPatchID)
{
return CombineFileOrDiff(OlderPatchID, NewerPatchID, true); // NewerIsPatch
}


int S3BackupFileSystem::GetBlockSize()
{
return S3_NOTIONAL_BLOCK_SIZE;
}


std::string S3BackupFileSystem::GetAccountIdentifier()
{
std::ostringstream oss;
oss << "'" << GetBackupStoreInfo(true).GetAccountName() << "'";
return oss.str();
}


std::string S3BackupFileSystem::GetObjectURL(const std::string& ObjectPath) const
{
const Configuration s3config = mrConfig.GetSubConfiguration("S3Store");
Expand Down
47 changes: 44 additions & 3 deletions lib/backupstore/BackupFileSystem.h
Expand Up @@ -63,6 +63,15 @@ class BackupFileSystem
virtual BackupStoreInfo& GetBackupStoreInfo(bool ReadOnly, bool Refresh = false);
virtual void PutBackupStoreInfo(BackupStoreInfo& rInfo) = 0;

// DiscardBackupStoreInfo() discards the active BackupStoreInfo, invalidating any
// references to it! It is needed to allow a BackupStoreContext to be Finished,
// changes made to the BackupStoreInfo by BackupStoreCheck and HousekeepStoreAccount,
// and the Context to be reopened.
virtual void DiscardBackupStoreInfo(BackupStoreInfo& rInfo)
{
ASSERT(mapBackupStoreInfo.get() == &rInfo);
mapBackupStoreInfo.reset();
}
virtual std::auto_ptr<BackupStoreInfo> GetBackupStoreInfoUncached()
{
// Return a BackupStoreInfo freshly retrieved from storage, read-only to
Expand Down Expand Up @@ -91,14 +100,23 @@ class BackupFileSystem
virtual void GetDirectory(int64_t ObjectID, BackupStoreDirectory& rDirOut) = 0;
virtual void PutDirectory(BackupStoreDirectory& rDir) = 0;
virtual std::auto_ptr<Transaction> PutFileComplete(int64_t ObjectID,
IOStream& rFileData) = 0;
IOStream& rFileData, BackupStoreRefCountDatabase::refcount_t refcount) = 0;
virtual std::auto_ptr<Transaction> PutFilePatch(int64_t ObjectID,
int64_t DiffFromFileID, IOStream& rPatchData) = 0;
virtual std::auto_ptr<IOStream> GetFile(int64_t ObjectID) = 0;
virtual std::auto_ptr<IOStream> GetFilePatch(int64_t ObjectID,
std::vector<int64_t>& rPatchChain) = 0;
virtual void DeleteFile(int64_t ObjectID) = 0;
virtual void DeleteDirectory(int64_t ObjectID) = 0;
virtual bool CanMergePatches() = 0;
virtual std::auto_ptr<BackupFileSystem::Transaction>
CombineFile(int64_t OlderPatchID, int64_t NewerFileID) = 0;
virtual std::auto_ptr<BackupFileSystem::Transaction>
CombineDiffs(int64_t OlderPatchID, int64_t NewerPatchID) = 0;
virtual std::string GetAccountIdentifier() = 0;
// Use of GetAccountID() is not recommended. It returns S3_FAKE_ACCOUNT_ID on
// S3BackupFileSystem.
virtual int GetAccountID() = 0;

protected:
virtual std::auto_ptr<BackupStoreInfo> GetBackupStoreInfoInternal(bool ReadOnly) = 0;
Expand Down Expand Up @@ -188,7 +206,7 @@ class RaidBackupFileSystem : public BackupFileSystem
virtual void GetDirectory(int64_t ObjectID, BackupStoreDirectory& rDirOut);
virtual void PutDirectory(BackupStoreDirectory& rDir);
virtual std::auto_ptr<Transaction> PutFileComplete(int64_t ObjectID,
IOStream& rFileData);
IOStream& rFileData, BackupStoreRefCountDatabase::refcount_t refcount);
virtual std::auto_ptr<Transaction> PutFilePatch(int64_t ObjectID,
int64_t DiffFromFileID, IOStream& rPatchData);
virtual std::auto_ptr<IOStream> GetFile(int64_t ObjectID);
Expand All @@ -199,9 +217,18 @@ class RaidBackupFileSystem : public BackupFileSystem
{
DeleteFile(ObjectID);
}
virtual bool CanMergePatches() { return true; }
std::auto_ptr<BackupFileSystem::Transaction>
CombineFile(int64_t OlderPatchID, int64_t NewerFileID);
std::auto_ptr<BackupFileSystem::Transaction>
CombineDiffs(int64_t OlderPatchID, int64_t NewerPatchID);
virtual std::string GetAccountIdentifier();
virtual int GetAccountID() { return mAccountID; }

protected:
virtual std::auto_ptr<BackupStoreInfo> GetBackupStoreInfoInternal(bool ReadOnly);
std::auto_ptr<BackupFileSystem::Transaction>
CombineFileOrDiff(int64_t OlderPatchID, int64_t NewerObjectID, bool NewerIsPatch);
};

#define S3_INFO_FILE_NAME "boxbackup.info"
Expand Down Expand Up @@ -247,7 +274,7 @@ class S3BackupFileSystem : public BackupFileSystem
virtual void GetDirectory(int64_t ObjectID, BackupStoreDirectory& rDirOut);
virtual void PutDirectory(BackupStoreDirectory& rDir);
virtual std::auto_ptr<Transaction> PutFileComplete(int64_t ObjectID,
IOStream& rFileData)
IOStream& rFileData, BackupStoreRefCountDatabase::refcount_t refcount)
{
return std::auto_ptr<Transaction>();
}
Expand Down Expand Up @@ -291,6 +318,20 @@ class S3BackupFileSystem : public BackupFileSystem
return GetObjectURI(ObjectID, ObjectExists_File);
}

virtual bool CanMergePatches() { return false; }
std::auto_ptr<BackupFileSystem::Transaction>
CombineFile(int64_t OlderPatchID, int64_t NewerFileID)
{
THROW_EXCEPTION(CommonException, NotSupported);
}
std::auto_ptr<BackupFileSystem::Transaction>
CombineDiffs(int64_t OlderPatchID, int64_t NewerPatchID)
{
THROW_EXCEPTION(CommonException, NotSupported);
}
virtual std::string GetAccountIdentifier();
virtual int GetAccountID() { return S3_FAKE_ACCOUNT_ID; }

private:
// S3BackupAccountControl wants to call some of these private APIs, but nobody else should:
friend class S3BackupAccountControl;
Expand Down

0 comments on commit c5466cc

Please sign in to comment.