From ebb178b182836d616a34b2ac32afef2ae14b0e09 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Aug 2017 21:36:34 +0100 Subject: [PATCH] Add ListBucket command support to S3Simulator and S3Client --- lib/httpserver/S3Client.cpp | 125 +++++++++++++++- lib/httpserver/S3Client.h | 21 +++ lib/httpserver/S3Simulator.cpp | 222 +++++++++++++++++++++++++++++ lib/httpserver/S3Simulator.h | 2 + test/httpserver/testhttpserver.cpp | 74 +++++++++- 5 files changed, 442 insertions(+), 2 deletions(-) diff --git a/lib/httpserver/S3Client.cpp b/lib/httpserver/S3Client.cpp index b03aa1476..6b978b9f3 100644 --- a/lib/httpserver/S3Client.cpp +++ b/lib/httpserver/S3Client.cpp @@ -14,6 +14,9 @@ // #include // #include +#include +#include +#include #include #include "HTTPRequest.h" @@ -28,6 +31,123 @@ #include "MemLeakFindOn.h" +using boost::property_tree::ptree; + +// -------------------------------------------------------------------------- +// +// Function +// Name: S3Client::ListBucket(const std::string& prefix, +// const std::string& delimiter, +// std::vector* p_contents_out, +// std::vector* p_common_prefixes_out, +// bool* p_truncated_out, int max_keys, +// const std::string& marker) +// Purpose: Retrieve a list of objects in a bucket, with a +// common prefix, optionally starting from a specified +// marker, up to some limit. The entries, and common +// prefixes of entries containing the specified +// delimiter, will be appended to p_contents_out and +// p_common_prefixes_out. Returns the number of items +// appended (p_contents_out + p_common_prefixes_out), +// which may be 0 if there is nothing left to iterate +// over, or no matching files in the bucket. +// Created: 18/03/2016 +// +// -------------------------------------------------------------------------- + +int S3Client::ListBucket(std::vector* p_contents_out, + std::vector* p_common_prefixes_out, + const std::string& prefix, const std::string& delimiter, + bool* p_truncated_out, int max_keys, const std::string& marker) +{ + HTTPRequest request(HTTPRequest::Method_GET, "/"); + request.SetParameter("delimiter", delimiter); + request.SetParameter("prefix", prefix); + request.SetParameter("marker", marker); + if(max_keys != -1) + { + std::ostringstream max_keys_stream; + max_keys_stream << max_keys; + request.SetParameter("max-keys", max_keys_stream.str()); + } + + HTTPResponse response = FinishAndSendRequest(request); + CheckResponse(response, "Failed to list files in bucket"); + ASSERT(response.GetResponseCode() == HTTPResponse::Code_OK); + + std::string response_data((const char *)response.GetBuffer(), + response.GetSize()); + std::auto_ptr ap_response_stream( + new std::istringstream(response_data)); + + ptree response_tree; + read_xml(*ap_response_stream, response_tree, + boost::property_tree::xml_parser::trim_whitespace); + + if(response_tree.begin()->first != "ListBucketResult") + { + THROW_EXCEPTION_MESSAGE(HTTPException, BadResponse, + "Failed to list files in bucket: unexpected root element in " + "response: " << response_tree.begin()->first); + } + + if(++(response_tree.begin()) != response_tree.end()) + { + THROW_EXCEPTION_MESSAGE(HTTPException, BadResponse, + "Failed to list files in bucket: multiple root elements in " + "response: " << (++(response_tree.begin()))->first); + } + + ptree result = response_tree.get_child("ListBucketResult"); + ASSERT(result.get("Delimiter") == delimiter); + ASSERT(result.get("Prefix") == prefix); + ASSERT(result.get("Marker") == marker); + + std::string truncated = result.get("IsTruncated"); + ASSERT(truncated == "true" || truncated == "false"); + if(p_truncated_out) + { + *p_truncated_out = (truncated == "true"); + } + + int num_results = 0; + + // Iterate over all the children of the ListBucketResult, looking for + // nodes called "Contents", and examine them. + BOOST_FOREACH(ptree::value_type &v, result) + { + if(v.first == "Contents") + { + std::string name = v.second.get("Key"); + std::string etag = v.second.get("ETag"); + std::string size = v.second.get("Size"); + const char * size_end_ptr; + int64_t size_int = box_strtoui64(size.c_str(), &size_end_ptr, 10); + if(*size_end_ptr != 0) + { + THROW_EXCEPTION_MESSAGE(HTTPException, BadResponse, + "Failed to list files in bucket: bad size in " + "contents: " << size); + } + + p_contents_out->push_back(BucketEntry(name, etag, size_int)); + num_results++; + } + } + + ptree common_prefixes = result.get_child("CommonPrefixes"); + BOOST_FOREACH(ptree::value_type &v, common_prefixes) + { + if(v.first == "Prefix") + { + p_common_prefixes_out->push_back(v.second.data()); + num_results++; + } + } + + return num_results; +} + // -------------------------------------------------------------------------- // // Function @@ -132,8 +252,11 @@ HTTPResponse S3Client::FinishAndSendRequest(HTTPRequest request, IOStream* pStre virtual_host_name = mHostName; } + bool with_parameters_for_get_request = ( + request.GetMethod() == HTTPRequest::Method_GET || + request.GetMethod() == HTTPRequest::Method_HEAD); BOX_TRACE("S3Client: " << mHostName << " > " << request.GetMethodName() << - " " << request.GetRequestURI()); + " " << request.GetRequestURI(with_parameters_for_get_request)); std::ostringstream date; time_t tt = time(NULL); diff --git a/lib/httpserver/S3Client.h b/lib/httpserver/S3Client.h index 9df5a0653..eef30fd4e 100644 --- a/lib/httpserver/S3Client.h +++ b/lib/httpserver/S3Client.h @@ -53,6 +53,27 @@ class S3Client mNetworkTimeout(30000) { } + class BucketEntry { + public: + BucketEntry(const std::string& name, const std::string& etag, + int64_t size) + : mName(name), + mEtag(etag), + mSize(size) + { } + const std::string& name() const { return mName; } + const std::string& etag() const { return mEtag; } + const int64_t size() const { return mSize; } + private: + std::string mName, mEtag; + int64_t mSize; + }; + + int ListBucket(std::vector* p_contents_out, + std::vector* p_common_prefixes_out, + const std::string& prefix = "", const std::string& delimiter = "/", + bool* p_truncated_out = NULL, int max_keys = -1, + const std::string& marker = ""); HTTPResponse GetObject(const std::string& rObjectURI, const std::string& MD5Checksum = ""); HTTPResponse HeadObject(const std::string& rObjectURI); diff --git a/lib/httpserver/S3Simulator.cpp b/lib/httpserver/S3Simulator.cpp index a856c2ba6..f97321bfa 100644 --- a/lib/httpserver/S3Simulator.cpp +++ b/lib/httpserver/S3Simulator.cpp @@ -9,6 +9,12 @@ #include "Box.h" +#ifdef HAVE_DIRENT_H +# include +#endif + +#include + #include #include #include @@ -365,6 +371,12 @@ void S3Simulator::Handle(HTTPRequest &rRequest, HTTPResponse &rResponse) THROW_EXCEPTION_MESSAGE(HTTPException, BadRequest, "Unsupported Amazon SimpleDB Method"); } + else if(rRequest.GetMethod() == HTTPRequest::Method_GET && + (rRequest.GetRequestURI() == "" || + rRequest.GetRequestURI() == "/")) + { + HandleListObjects(bucket_name, rRequest, rResponse); + } else if(rRequest.GetMethod() == HTTPRequest::Method_GET) { HandleGet(rRequest, rResponse); @@ -442,6 +454,216 @@ void S3Simulator::Handle(HTTPRequest &rRequest, HTTPResponse &rResponse) } +// -------------------------------------------------------------------------- +// +// Function +// Name: S3Simulator::HandleListObjects( +// const std::string& bucket_name, +// HTTPRequest &rRequest, +// HTTPResponse &rResponse) +// Purpose: Handles an S3 list objects request. +// Created: 15/03/2016 +// +// -------------------------------------------------------------------------- + +void S3Simulator::HandleListObjects(const std::string& bucket_name, + HTTPRequest &request, HTTPResponse &response) +{ + if(bucket_name.empty()) + { + THROW_EXCEPTION_MESSAGE(HTTPException, BadRequest, + "A bucket name is required"); + } + + std::string delimiter = request.GetParameterString("delimiter", "/"); + if(delimiter != "/") + { + THROW_EXCEPTION_MESSAGE(HTTPException, BadRequest, "Delimiter must be /"); + } + + std::string prefix = request.GetParameterString("prefix", ""); + if(prefix != "" && !EndsWith("/", prefix)) + { + THROW_EXCEPTION_MESSAGE(HTTPException, BadRequest, + "Prefix must be empty, or end with /, but was: '" << prefix << "'"); + } + + std::string marker = request.GetParameterString("marker", ""); + std::string max_keys_str = request.GetParameterString("max-keys", "1000"); + int max_keys; + { + char* p_end; + max_keys = strtol(max_keys_str.c_str(), &p_end, 10); + if(*p_end != 0) + { + THROW_EXCEPTION_MESSAGE(HTTPException, BadRequest, + "max-keys parameter must be an integer: '" << + max_keys_str << "'"); + } + } + + std::string base_path = GetConfiguration().GetKeyValue("StoreDirectory"); + std::string prefixed_path = base_path + "/" + prefix; + if(!EndsWith("/", prefixed_path)) + { + THROW_EXCEPTION_MESSAGE(HTTPException, Internal, + "Directory name must end with '/': " << prefixed_path); + } + RemoveSuffix("/", prefixed_path); + + DIR *p_dir = opendir(prefixed_path.c_str()); + if(p_dir == NULL) + { + THROW_EXCEPTION_MESSAGE(HTTPException, FileNotFound, + "Directory not found: " << prefixed_path); + } + + typedef std::map object_name_to_type_t; + object_name_to_type_t object_name_to_type; + + try + { + for(struct dirent* p_dirent = readdir(p_dir); p_dirent != NULL; + p_dirent = readdir(p_dir)) + { + std::string entry_name(p_dirent->d_name); + if(entry_name == "." || entry_name == "..") + { + continue; + } + + std::string entry_path = prefixed_path + DIRECTORY_SEPARATOR + + entry_name; + + // Prefix must be empty, or end with / + ASSERT(prefix == "" || EndsWith("/", prefix)); + std::string object_name = prefix + entry_name; + +#ifdef HAVE_VALID_DIRENT_D_TYPE + if(p_dirent->d_type == DT_UNKNOWN) +#else + // Always use this branch if we don't have struct dirent.d_type: + if(true) +#endif + { + int entry_type = ObjectExists(entry_path); + if(entry_type == ObjectExists_File) + { + object_name_to_type[object_name] = + ObjectExists_File; + } + else if(entry_type == ObjectExists_Dir) + { + object_name_to_type[object_name] = + ObjectExists_Dir; + } + else + { + continue; + } + } +#ifdef HAVE_VALID_DIRENT_D_TYPE + else if(p_dirent->d_type == DT_REG) + { + object_name_to_type[object_name] = ObjectExists_File; + } + else if(p_dirent->d_type == DT_DIR) + { + object_name_to_type[object_name] = ObjectExists_Dir; + } +#endif // HAVE_VALID_DIRENT_D_TYPE + else + { + continue; + } + } + } + catch(BoxException &e) + { + closedir(p_dir); + throw; + } + + ptree result; + result.add("Name", bucket_name); + result.add("Prefix", prefix); + result.add("Marker", marker); + result.add("Delimiter", delimiter); + + ptree common_prefixes; + + bool truncated = false; + int result_count = 0; + for(object_name_to_type_t::iterator i = object_name_to_type.lower_bound(marker); + i != object_name_to_type.end(); i++) + { + if(result_count >= max_keys) + { + truncated = true; + break; + } + + // Both Contents and CommonPrefixes count towards number of + // elements returned. Each CommonPrefix counts as a single return, + // regardless of the number of files it contains/abbreviates. + result_count++; + + if(i->second == ObjectExists_Dir) + { + common_prefixes.add("Prefix", i->first + delimiter); + continue; + } + + std::string entry_path = base_path + DIRECTORY_SEPARATOR + i->first; + int64_t size; + if(!FileExists(entry_path, &size, true)) // TreatLinksAsNotExisting + { + continue; + } + + ptree contents; + contents.add("Key", i->first); + + std::string digest; + { + std::auto_ptr ap_file; + ap_file.reset(new FileStream(entry_path)); + + MD5DigestStream digester; + ap_file->CopyStreamTo(digester); + ap_file->Seek(0, IOStream::SeekType_Absolute); + digester.Close(); + digest = """ + digester.DigestAsString() + """; + } + contents.add("ETag", digest); + + std::ostringstream size_stream; + size_stream << size; + contents.add("Size", size_stream.str()); + + result.add_child("Contents", contents); + } + + closedir(p_dir); + + result.add("IsTruncated", truncated ? "true" : "false"); + result.add_child("CommonPrefixes", common_prefixes); + + ptree response_tree; + response_tree.add_child("ListBucketResult", result); + + // http://docs.amazonwebservices.com/AmazonS3/2006-03-01/UsingRESTOperations.html + response.AddHeader("x-amz-id-2", "qBmKRcEWBBhH6XAqsKU/eg24V3jf/kWKN9dJip1L/FpbYr9FDy7wWFurfdQOEMcY"); + response.AddHeader("x-amz-request-id", "F2A8CCCA26B4B26D"); + response.AddHeader("Date", "Wed, 01 Mar 2006 12:00:00 GMT"); + response.AddHeader("Last-Modified", "Sun, 1 Jan 2006 12:00:00 GMT"); + response.AddHeader("Server", "AmazonS3"); + + response.SetResponseCode(HTTPResponse::Code_OK); + response.Write(PtreeToXmlString(response_tree)); +} + + // -------------------------------------------------------------------------- // // Function diff --git a/lib/httpserver/S3Simulator.h b/lib/httpserver/S3Simulator.h index 5150379eb..3f92d9ba0 100644 --- a/lib/httpserver/S3Simulator.h +++ b/lib/httpserver/S3Simulator.h @@ -82,6 +82,8 @@ class S3Simulator : public HTTPServer const ConfigurationVerify* GetConfigVerify() const; virtual void Handle(HTTPRequest &rRequest, HTTPResponse &rResponse); + virtual void HandleListObjects(const std::string& bucket_name, + HTTPRequest &request, HTTPResponse &response); virtual void HandleGet(HTTPRequest &rRequest, HTTPResponse &rResponse, bool IncludeContent = true); virtual void HandlePut(HTTPRequest &rRequest, HTTPResponse &rResponse); diff --git a/test/httpserver/testhttpserver.cpp b/test/httpserver/testhttpserver.cpp index 467e3791b..a124af7bb 100644 --- a/test/httpserver/testhttpserver.cpp +++ b/test/httpserver/testhttpserver.cpp @@ -138,6 +138,18 @@ void TestWebServer::Handle(HTTPRequest &rRequest, HTTPResponse &rResponse) rResponse.Write(DEFAULT_RESPONSE_2, sizeof(DEFAULT_RESPONSE_2) - 1); } +std::vector get_entry_names(const std::vector entries) +{ + std::vector entry_names; + for(std::vector::const_iterator i = entries.begin(); + i != entries.end(); i++) + { + entry_names.push_back(i->name()); + + } + return entry_names; +} + bool exercise_s3client(S3Client& client) { int num_failures_initial = num_failures; @@ -210,7 +222,67 @@ bool exercise_s3client(S3Client& client) TEST_EQUAL("", response.GetHeaders().GetHeaderValue("etag", false)); // !required // This will fail if the file was created in the wrong place: - TEST_EQUAL(0, ::unlink("testfiles/store/newfile")); + TEST_EQUAL(0, EMU_UNLINK("testfiles/store/newfile")); + + // Test the ListBucket command. + std::vector actual_contents; + std::vector actual_common_prefixes; + TEST_EQUAL(3, client.ListBucket(&actual_contents, &actual_common_prefixes)); + std::vector actual_entry_names = + get_entry_names(actual_contents); + + std::vector expected_contents; + expected_contents.push_back("dsfdsfs98.fd"); + TEST_THAT(test_equal_lists(expected_contents, actual_entry_names)); + + std::vector expected_common_prefixes; + expected_common_prefixes.push_back("photos/"); + expected_common_prefixes.push_back("subdir/"); + TEST_THAT(test_equal_lists(expected_common_prefixes, actual_common_prefixes)); + + // Test that max_keys works. + actual_contents.clear(); + actual_common_prefixes.clear(); + + bool is_truncated; + TEST_EQUAL(2, + client.ListBucket( + &actual_contents, &actual_common_prefixes, + "", // prefix + "/", // delimiter + &is_truncated, + 2)); // max_keys + + TEST_THAT(is_truncated); + expected_contents.clear(); + expected_contents.push_back("dsfdsfs98.fd"); + actual_entry_names = get_entry_names(actual_contents); + TEST_THAT(test_equal_lists(expected_contents, actual_entry_names)); + + expected_common_prefixes.clear(); + expected_common_prefixes.push_back("photos/"); + TEST_THAT(test_equal_lists(expected_common_prefixes, actual_common_prefixes)); + + // Test that marker works. + actual_contents.clear(); + actual_common_prefixes.clear(); + + TEST_EQUAL(2, + client.ListBucket( + &actual_contents, &actual_common_prefixes, + "", // prefix + "/", // delimiter + &is_truncated, + 2, // max_keys + "photos")); // marker + + TEST_THAT(!is_truncated); + expected_contents.clear(); + actual_entry_names = get_entry_names(actual_contents); + TEST_THAT(test_equal_lists(expected_contents, actual_entry_names)); + + expected_common_prefixes.push_back("subdir/"); + TEST_THAT(test_equal_lists(expected_common_prefixes, actual_common_prefixes)); // Test is successful if the number of failures has not increased. return (num_failures == num_failures_initial);