From 98b7796ce9bb83f0fcdf01612fd1c65532459801 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Jun 2017 21:24:46 +0100 Subject: [PATCH] Fix confusion about URIs and paths in S3 filesystem * Make it clear (with parameter names and comments) which functions take paths (relative to the configured base path), which functions take URIs (complete paths) and which functions take URLs (hostname, port and URI). * Put all objects (files and directories) in the same "directory" tree on S3, so that we can list them all together and easily identify the type of each object (from its extension). Previously they were separated into "directories" (delimited prefixes) called "files" and "dirs" (unlike BackupStore filesystems). * Reinstate hierarchical "directories" on S3, similar to BackupStore filesystems, since the reasons for doing this in RaidFile may well apply to S3 and other compatible stores too. However the objects are still named with their complete object ID (unlike RaidFile) and a .file or .dir extension. (cherry picked from commit 3dd21a3028cacea2ecf6ee3de2f32db3c5138589) --- lib/backupstore/BackupAccountControl.h | 2 - lib/backupstore/BackupFileSystem.cpp | 73 +++++++++++++++++++----- lib/backupstore/BackupFileSystem.h | 77 +++++++++++++++++++++----- test/s3store/tests3store.cpp | 2 +- 4 files changed, 124 insertions(+), 30 deletions(-) diff --git a/lib/backupstore/BackupAccountControl.h b/lib/backupstore/BackupAccountControl.h index 5b5d89108..0f2ba5dbd 100644 --- a/lib/backupstore/BackupAccountControl.h +++ b/lib/backupstore/BackupAccountControl.h @@ -70,8 +70,6 @@ class S3BackupAccountControl : public BackupAccountControl // max size of soft limit as percent of hard limit #define MAX_SOFT_LIMIT_SIZE 97 -#define S3_INFO_FILE_NAME "boxbackup.info" -#define S3_NOTIONAL_BLOCK_SIZE 1048576 #endif // BACKUPACCOUNTCONTROL__H diff --git a/lib/backupstore/BackupFileSystem.cpp b/lib/backupstore/BackupFileSystem.cpp index 1894c702c..fbe8358fa 100644 --- a/lib/backupstore/BackupFileSystem.cpp +++ b/lib/backupstore/BackupFileSystem.cpp @@ -12,6 +12,8 @@ #include +#include + #include "autogen_BackupStoreException.h" #include "BackupConstants.h" #include "BackupStoreDirectory.h" @@ -29,6 +31,7 @@ #include "IOStream.h" #include "InvisibleTempFileStream.h" #include "S3Client.h" +#include "StoreStructure.h" #include "MemLeakFindOn.h" @@ -41,7 +44,7 @@ std::string S3BackupFileSystem::GetObjectURL(const std::string& ObjectPath) cons { const Configuration s3config = mrConfig.GetSubConfiguration("S3Store"); return std::string("http://") + s3config.GetKeyValue("HostName") + ":" + - s3config.GetKeyValue("Port") + GetObjectURI(ObjectPath); + s3config.GetKeyValue("Port") + ObjectPath; } int64_t S3BackupFileSystem::GetRevisionID(const std::string& uri, @@ -146,21 +149,65 @@ bool S3BackupFileSystem::ObjectExists(int64_t ObjectID, int64_t *pRevisionID) return true; } -// This URI should NOT include the BasePath, because the S3Client will add that -// automatically when we make a request for this URI. -std::string S3BackupFileSystem::GetDirectoryURI(int64_t ObjectID) +// -------------------------------------------------------------------------- +// +// Function +// Name: S3BackupFileSystem::GetObjectURI(int64_t ObjectID, +// int Type) +// Purpose: Builds the object filename for a given object, +// including mBasePath. Very similar to +// StoreStructure::MakeObjectFilename(), but files and +// directories have different extensions, and the +// filename is the full object ID, not just the lower +// STORE_ID_SEGMENT_LENGTH bits. +// Created: 2016/03/21 +// +// -------------------------------------------------------------------------- +std::string S3BackupFileSystem::GetObjectURI(int64_t ObjectID, int Type) const { + const static char *hex = "0123456789abcdef"; + ASSERT(mBasePath.size() > 0 && mBasePath[0] == '/' && + mBasePath[mBasePath.size() - 1] == '/'); std::ostringstream out; - out << "dirs/" << BOX_FORMAT_OBJECTID(ObjectID) << ".dir"; - return out.str(); -} + out << mBasePath; + + // Get the id value from the stored object ID into an unsigned int64_t, so that + // we can do bitwise operations on it safely. + uint64_t id = (uint64_t)ObjectID; + + // Shift off the bits which make up the leafname + id >>= STORE_ID_SEGMENT_LENGTH; + + // build pathname + while(id != 0) + { + // assumes that the segments are no bigger than 8 bits + int v = id & STORE_ID_SEGMENT_MASK; + out << hex[(v & 0xf0) >> 4]; + out << hex[v & 0xf]; + out << "/"; + + // shift the bits we used off the pathname + id >>= STORE_ID_SEGMENT_LENGTH; + } + + // append the filename + out << BOX_FORMAT_OBJECTID(ObjectID); + if(Type == ObjectExists_File) + { + out << ".file"; + } + else if(Type == ObjectExists_Dir) + { + out << ".dir"; + } + else + { + THROW_EXCEPTION_MESSAGE(CommonException, Internal, + "Unknown file type for object " << BOX_FORMAT_OBJECTID(ObjectID) << + ": " << Type); + } -// This URI should NOT include the BasePath, because the S3Client will add that -// automatically when we make a request for this URI. -std::string S3BackupFileSystem::GetFileURI(int64_t ObjectID) -{ - std::ostringstream out; - out << "files/" << BOX_FORMAT_OBJECTID(ObjectID) << ".dir"; return out.str(); } diff --git a/lib/backupstore/BackupFileSystem.h b/lib/backupstore/BackupFileSystem.h index df3984858..5909502df 100644 --- a/lib/backupstore/BackupFileSystem.h +++ b/lib/backupstore/BackupFileSystem.h @@ -16,6 +16,7 @@ #include "HTTPResponse.h" #include "NamedLock.h" #include "S3Client.h" +#include "Utils.h" class BackupStoreDirectory; class BackupStoreInfo; @@ -23,8 +24,6 @@ class BackupStoreRefCountDatabase; class Configuration; class IOStream; -#define S3_NOTIONAL_BLOCK_SIZE 1048576 - // -------------------------------------------------------------------------- // // Class @@ -113,14 +112,25 @@ class RaidBackupFileSystem : public BackupFileSystem } }; +#define S3_INFO_FILE_NAME "boxbackup.info" +#define S3_REFCOUNT_FILE_NAME "boxbackup.refcount.db" +#define S3_FAKE_ACCOUNT_ID 0x53336964 // 'S3id' +#define S3_CACHE_LOCK_NAME "boxbackup.cache.lock" + +#ifdef NDEBUG // release + // Use a larger block size for efficiency + #define S3_NOTIONAL_BLOCK_SIZE 1048576 +#else + // Use a smaller block size to make tests run faster + #define S3_NOTIONAL_BLOCK_SIZE 16384 +#endif + class S3BackupFileSystem : public BackupFileSystem { private: const Configuration& mrConfig; std::string mBasePath; S3Client& mrClient; - std::string GetDirectoryURI(int64_t ObjectID); - std::string GetFileURI(int64_t ObjectID); int64_t GetRevisionID(const std::string& uri, HTTPResponse& response) const; int GetSizeInBlocks(int64_t bytes) { @@ -175,24 +185,63 @@ class S3BackupFileSystem : public BackupFileSystem { THROW_EXCEPTION(CommonException, NotSupported); } - std::string GetObjectURI(const std::string& ObjectPath) const + + // And these are public to help with writing tests ONLY: + // The returned URI should start with mBasePath. + std::string GetMetadataURI(const std::string& MetadataFilename) const + { + return mBasePath + MetadataFilename; + } + // The returned URI should start with mBasePath. + std::string GetDirectoryURI(int64_t ObjectID) + { + return GetObjectURI(ObjectID, ObjectExists_Dir); + } + // The returned URI should start with mBasePath. + std::string GetFileURI(int64_t ObjectID) { - return mBasePath + ObjectPath; + return GetObjectURI(ObjectID, ObjectExists_File); } - std::string GetObjectURL(const std::string& ObjectPath) const; - HTTPResponse GetObject(const std::string& ObjectPath) + +private: + // S3BackupAccountControl wants to call some of these private APIs, but nobody else should: + friend class S3BackupAccountControl; + + // GetObjectURL() returns the complete URL for an object at the given + // path, by adding the hostname, port and the object's URI (which can + // be retrieved from GetMetadataURI or GetObjectURI). + std::string GetObjectURL(const std::string& ObjectURI) const; + + // GetObjectURI() is a private interface which converts an object ID + // and type into a URI, which starts with mBasePath: + std::string GetObjectURI(int64_t ObjectID, int Type) const; + + // GetObject() retrieves the object with the specified URI from the + // configured S3 server. S3Client has no idea about path prefixes + // (mBasePath), so it must already be added: the supplied ObjectURI + // must start with it. + HTTPResponse GetObject(const std::string& ObjectURI) { - return mrClient.GetObject(GetObjectURI(ObjectPath)); + return mrClient.GetObject(ObjectURI); } - HTTPResponse HeadObject(const std::string& ObjectPath) + + // HeadObject() retrieves the headers (metadata) for the object with + // the specified URI from the configured S3 server. S3Client has no + // idea about path prefixes (mBasePath), so it must already be added: + // the supplied ObjectURI must start with it. + HTTPResponse HeadObject(const std::string& ObjectURI) { - return mrClient.HeadObject(GetObjectURI(ObjectPath)); + return mrClient.HeadObject(ObjectURI); } - HTTPResponse PutObject(const std::string& ObjectPath, + + // PutObject() uploads the supplied stream to the configured S3 server, + // saving it with the supplied URI. S3Client has no idea about path + // prefixes (mBasePath), so it must already be added: the supplied + // ObjectURI must start with it. + HTTPResponse PutObject(const std::string& ObjectURI, IOStream& rStreamToSend, const char* pContentType = NULL) { - return mrClient.PutObject(GetObjectURI(ObjectPath), rStreamToSend, - pContentType); + return mrClient.PutObject(ObjectURI, rStreamToSend, pContentType); } }; diff --git a/test/s3store/tests3store.cpp b/test/s3store/tests3store.cpp index de15e6295..a3b9ba335 100644 --- a/test/s3store/tests3store.cpp +++ b/test/s3store/tests3store.cpp @@ -113,7 +113,7 @@ bool check_new_account_info() TEST_EQUAL(0, info->GetClientStoreMarker()); TEST_EQUAL("test", info->GetAccountName()); - FileStream root_stream("testfiles/store/subdir/dirs/0x1.dir"); + FileStream root_stream("testfiles/store/subdir/0x1.dir"); BackupStoreDirectory root_dir(root_stream); TEST_EQUAL(0, root_dir.GetNumberOfEntries());