Skip to content

Commit

Permalink
Fix confusion about URIs and paths in S3 filesystem
Browse files Browse the repository at this point in the history
* Make it clear (with parameter names and comments) which functions take paths
  (relative to the configured base path), which functions take URIs (complete
  paths) and which functions take URLs (hostname, port and URI).

* Put all objects (files and directories) in the same "directory" tree on S3,
  so that we can list them all together and easily identify the type of each
  object (from its extension). Previously they were separated into "directories"
  (delimited prefixes) called "files" and "dirs" (unlike BackupStore
  filesystems).

* Reinstate hierarchical "directories" on S3, similar to BackupStore
  filesystems, since the reasons for doing this in RaidFile may well apply to S3
  and other compatible stores too. However the objects are still named with
  their complete object ID (unlike RaidFile) and a .file or .dir extension.

(cherry picked from commit 3dd21a3)
  • Loading branch information
qris committed Jun 2, 2017
1 parent 0bcc722 commit 98b7796
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 30 deletions.
2 changes: 0 additions & 2 deletions lib/backupstore/BackupAccountControl.h
Expand Up @@ -70,8 +70,6 @@ class S3BackupAccountControl : public BackupAccountControl

// max size of soft limit as percent of hard limit
#define MAX_SOFT_LIMIT_SIZE 97
#define S3_INFO_FILE_NAME "boxbackup.info"
#define S3_NOTIONAL_BLOCK_SIZE 1048576

#endif // BACKUPACCOUNTCONTROL__H

73 changes: 60 additions & 13 deletions lib/backupstore/BackupFileSystem.cpp
Expand Up @@ -12,6 +12,8 @@

#include <sys/types.h>

#include <sstream>

#include "autogen_BackupStoreException.h"
#include "BackupConstants.h"
#include "BackupStoreDirectory.h"
Expand All @@ -29,6 +31,7 @@
#include "IOStream.h"
#include "InvisibleTempFileStream.h"
#include "S3Client.h"
#include "StoreStructure.h"

#include "MemLeakFindOn.h"

Expand All @@ -41,7 +44,7 @@ std::string S3BackupFileSystem::GetObjectURL(const std::string& ObjectPath) cons
{
const Configuration s3config = mrConfig.GetSubConfiguration("S3Store");
return std::string("http://") + s3config.GetKeyValue("HostName") + ":" +
s3config.GetKeyValue("Port") + GetObjectURI(ObjectPath);
s3config.GetKeyValue("Port") + ObjectPath;
}

int64_t S3BackupFileSystem::GetRevisionID(const std::string& uri,
Expand Down Expand Up @@ -146,21 +149,65 @@ bool S3BackupFileSystem::ObjectExists(int64_t ObjectID, int64_t *pRevisionID)
return true;
}

// This URI should NOT include the BasePath, because the S3Client will add that
// automatically when we make a request for this URI.
std::string S3BackupFileSystem::GetDirectoryURI(int64_t ObjectID)
// --------------------------------------------------------------------------
//
// Function
// Name: S3BackupFileSystem::GetObjectURI(int64_t ObjectID,
// int Type)
// Purpose: Builds the object filename for a given object,
// including mBasePath. Very similar to
// StoreStructure::MakeObjectFilename(), but files and
// directories have different extensions, and the
// filename is the full object ID, not just the lower
// STORE_ID_SEGMENT_LENGTH bits.
// Created: 2016/03/21
//
// --------------------------------------------------------------------------
std::string S3BackupFileSystem::GetObjectURI(int64_t ObjectID, int Type) const
{
const static char *hex = "0123456789abcdef";
ASSERT(mBasePath.size() > 0 && mBasePath[0] == '/' &&
mBasePath[mBasePath.size() - 1] == '/');
std::ostringstream out;
out << "dirs/" << BOX_FORMAT_OBJECTID(ObjectID) << ".dir";
return out.str();
}
out << mBasePath;

// Get the id value from the stored object ID into an unsigned int64_t, so that
// we can do bitwise operations on it safely.
uint64_t id = (uint64_t)ObjectID;

// Shift off the bits which make up the leafname
id >>= STORE_ID_SEGMENT_LENGTH;

// build pathname
while(id != 0)
{
// assumes that the segments are no bigger than 8 bits
int v = id & STORE_ID_SEGMENT_MASK;
out << hex[(v & 0xf0) >> 4];
out << hex[v & 0xf];
out << "/";

// shift the bits we used off the pathname
id >>= STORE_ID_SEGMENT_LENGTH;
}

// append the filename
out << BOX_FORMAT_OBJECTID(ObjectID);
if(Type == ObjectExists_File)
{
out << ".file";
}
else if(Type == ObjectExists_Dir)
{
out << ".dir";
}
else
{
THROW_EXCEPTION_MESSAGE(CommonException, Internal,
"Unknown file type for object " << BOX_FORMAT_OBJECTID(ObjectID) <<
": " << Type);
}

// This URI should NOT include the BasePath, because the S3Client will add that
// automatically when we make a request for this URI.
std::string S3BackupFileSystem::GetFileURI(int64_t ObjectID)
{
std::ostringstream out;
out << "files/" << BOX_FORMAT_OBJECTID(ObjectID) << ".dir";
return out.str();
}

Expand Down
77 changes: 63 additions & 14 deletions lib/backupstore/BackupFileSystem.h
Expand Up @@ -16,15 +16,14 @@
#include "HTTPResponse.h"
#include "NamedLock.h"
#include "S3Client.h"
#include "Utils.h"

class BackupStoreDirectory;
class BackupStoreInfo;
class BackupStoreRefCountDatabase;
class Configuration;
class IOStream;

#define S3_NOTIONAL_BLOCK_SIZE 1048576

// --------------------------------------------------------------------------
//
// Class
Expand Down Expand Up @@ -113,14 +112,25 @@ class RaidBackupFileSystem : public BackupFileSystem
}
};

#define S3_INFO_FILE_NAME "boxbackup.info"
#define S3_REFCOUNT_FILE_NAME "boxbackup.refcount.db"
#define S3_FAKE_ACCOUNT_ID 0x53336964 // 'S3id'
#define S3_CACHE_LOCK_NAME "boxbackup.cache.lock"

#ifdef NDEBUG // release
// Use a larger block size for efficiency
#define S3_NOTIONAL_BLOCK_SIZE 1048576
#else
// Use a smaller block size to make tests run faster
#define S3_NOTIONAL_BLOCK_SIZE 16384
#endif

class S3BackupFileSystem : public BackupFileSystem
{
private:
const Configuration& mrConfig;
std::string mBasePath;
S3Client& mrClient;
std::string GetDirectoryURI(int64_t ObjectID);
std::string GetFileURI(int64_t ObjectID);
int64_t GetRevisionID(const std::string& uri, HTTPResponse& response) const;
int GetSizeInBlocks(int64_t bytes)
{
Expand Down Expand Up @@ -175,24 +185,63 @@ class S3BackupFileSystem : public BackupFileSystem
{
THROW_EXCEPTION(CommonException, NotSupported);
}
std::string GetObjectURI(const std::string& ObjectPath) const

// And these are public to help with writing tests ONLY:
// The returned URI should start with mBasePath.
std::string GetMetadataURI(const std::string& MetadataFilename) const
{
return mBasePath + MetadataFilename;
}
// The returned URI should start with mBasePath.
std::string GetDirectoryURI(int64_t ObjectID)
{
return GetObjectURI(ObjectID, ObjectExists_Dir);
}
// The returned URI should start with mBasePath.
std::string GetFileURI(int64_t ObjectID)
{
return mBasePath + ObjectPath;
return GetObjectURI(ObjectID, ObjectExists_File);
}
std::string GetObjectURL(const std::string& ObjectPath) const;
HTTPResponse GetObject(const std::string& ObjectPath)

private:
// S3BackupAccountControl wants to call some of these private APIs, but nobody else should:
friend class S3BackupAccountControl;

// GetObjectURL() returns the complete URL for an object at the given
// path, by adding the hostname, port and the object's URI (which can
// be retrieved from GetMetadataURI or GetObjectURI).
std::string GetObjectURL(const std::string& ObjectURI) const;

// GetObjectURI() is a private interface which converts an object ID
// and type into a URI, which starts with mBasePath:
std::string GetObjectURI(int64_t ObjectID, int Type) const;

// GetObject() retrieves the object with the specified URI from the
// configured S3 server. S3Client has no idea about path prefixes
// (mBasePath), so it must already be added: the supplied ObjectURI
// must start with it.
HTTPResponse GetObject(const std::string& ObjectURI)
{
return mrClient.GetObject(GetObjectURI(ObjectPath));
return mrClient.GetObject(ObjectURI);
}
HTTPResponse HeadObject(const std::string& ObjectPath)

// HeadObject() retrieves the headers (metadata) for the object with
// the specified URI from the configured S3 server. S3Client has no
// idea about path prefixes (mBasePath), so it must already be added:
// the supplied ObjectURI must start with it.
HTTPResponse HeadObject(const std::string& ObjectURI)
{
return mrClient.HeadObject(GetObjectURI(ObjectPath));
return mrClient.HeadObject(ObjectURI);
}
HTTPResponse PutObject(const std::string& ObjectPath,

// PutObject() uploads the supplied stream to the configured S3 server,
// saving it with the supplied URI. S3Client has no idea about path
// prefixes (mBasePath), so it must already be added: the supplied
// ObjectURI must start with it.
HTTPResponse PutObject(const std::string& ObjectURI,
IOStream& rStreamToSend, const char* pContentType = NULL)
{
return mrClient.PutObject(GetObjectURI(ObjectPath), rStreamToSend,
pContentType);
return mrClient.PutObject(ObjectURI, rStreamToSend, pContentType);
}
};

Expand Down
2 changes: 1 addition & 1 deletion test/s3store/tests3store.cpp
Expand Up @@ -113,7 +113,7 @@ bool check_new_account_info()
TEST_EQUAL(0, info->GetClientStoreMarker());
TEST_EQUAL("test", info->GetAccountName());

FileStream root_stream("testfiles/store/subdir/dirs/0x1.dir");
FileStream root_stream("testfiles/store/subdir/0x1.dir");
BackupStoreDirectory root_dir(root_stream);
TEST_EQUAL(0, root_dir.GetNumberOfEntries());

Expand Down

0 comments on commit 98b7796

Please sign in to comment.