Skip to content

Commit

Permalink
Cannot use libcurl for URL-parsing, bummer
Browse files Browse the repository at this point in the history
  • Loading branch information
coryan committed Sep 18, 2020
1 parent c5ce7b9 commit 99d0e71
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 16 deletions.
1 change: 1 addition & 0 deletions google/cloud/storage/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ if (BUILD_TESTING)
internal/curl_wrappers_locking_already_present_test.cc
internal/curl_wrappers_locking_disabled_test.cc
internal/curl_wrappers_locking_enabled_test.cc
internal/curl_wrappers_test.cc
internal/default_object_acl_requests_test.cc
internal/generate_message_boundary_test.cc
internal/generic_request_test.cc
Expand Down
16 changes: 3 additions & 13 deletions google/cloud/storage/internal/curl_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,6 @@ std::string UrlEscapeString(std::string const& value) {
return std::string(handle.MakeEscapedString(value).get());
}

std::string UrlHost(std::string url) {
CurlUrlPtr handle(curl_url(), &curl_url_cleanup);
auto* data = &url[0];
auto const code = curl_url_get(handle.get(), CURLUPART_HOST, &data, 0);
if (code != CURLUE_OK) return "storage.googleapis.com"; // a (good) guess
std::string result = data;
curl_free(data);
return result;
}

template <typename ReturnType>
StatusOr<ReturnType> ParseFromString(StatusOr<HttpResponse> response) {
if (!response.ok()) {
Expand Down Expand Up @@ -259,9 +249,9 @@ CurlClient::CurlClient(ClientOptions options)
xml_upload_endpoint_ = "https://storage-upload.googleapis.com";
xml_download_endpoint_ = "https://storage-download.googleapis.com";
}
storage_host_ = UrlHost(storage_endpoint_);
xml_upload_host_ = UrlHost(xml_upload_endpoint_);
xml_download_host_ = UrlHost(xml_download_endpoint_);
storage_host_ = ExtractUrlHostpart(storage_endpoint_);
xml_upload_host_ = ExtractUrlHostpart(xml_upload_endpoint_);
xml_download_host_ = ExtractUrlHostpart(xml_download_endpoint_);

CurlInitializeOnce(options);
}
Expand Down
35 changes: 35 additions & 0 deletions google/cloud/storage/internal/curl_wrappers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,41 @@ void CurlInitializeOnce(ClientOptions const& options) {
options.enable_sigpipe_handler());
}

std::string ExtractUrlHostpart(std::string url) {
auto constexpr kDefaultHost = "storage.googleapis.com";
// Yikes, we need to manually extract the "host part", this is notoriously
// hard to do correctly.
//
// We are not going to try to fully validate URLs, for example, we will ignore
// `userinfo` because those do not appear in any production or test workload
// for GCS.
//
// If we get an invalid-looking URL we simply return an empty string, this is
// only used in the CurlClient to generate the `Host: ` header. For invalid
// URLs the CurlClient won't work in any case.
//
// Likewise, this implementation is not super efficient. It does not need to
// be as it is used a handful of times when the CurlClient is created.
for (std::string scheme_prefix : {"https://", "http://"}) {
auto p = url.rfind(scheme_prefix, 0);
if (p == 0) url.erase(0, scheme_prefix.size());
}
if (url.empty()) return url;
if (url[0] == '[') {
auto p = url.find(']');
if (p == std::string::npos) return {};
return url.substr(1, p - 1);
}
auto p = url.find('/');
if (p != std::string::npos) {
url = url.substr(0, p);
}
p = url.rfind(':');
if (p == std::string::npos) return url;
url = url.substr(0, p);
return !url.empty() ? url : kDefaultHost;
}

} // namespace internal
} // namespace STORAGE_CLIENT_NS
} // namespace storage
Expand Down
6 changes: 3 additions & 3 deletions google/cloud/storage/internal/curl_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ using CurlPtr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
/// Hold a CURLM* handle and automatically clean it up.
using CurlMulti = std::unique_ptr<CURLM, decltype(&curl_multi_cleanup)>;

/// Hold a CURU* handle and automatically clean it up.
using CurlUrlPtr = std::unique_ptr<CURLU, decltype(&curl_url_cleanup)>;

/// Hold a character string created by CURL use correct deleter.
using CurlString = std::unique_ptr<char, decltype(&curl_free)>;

Expand All @@ -67,6 +64,9 @@ std::string CurlSslLibraryId();
/// Determines if the SSL library requires locking.
bool SslLibraryNeedsLocking(std::string const& curl_ssl_id);

/// Return the Host portion of a URL.
std::string ExtractUrlHostpart(std::string url);

} // namespace internal
} // namespace STORAGE_CLIENT_NS
} // namespace storage
Expand Down
70 changes: 70 additions & 0 deletions google/cloud/storage/internal/curl_wrappers_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "google/cloud/storage/internal/curl_wrappers.h"
#include <gmock/gmock.h>

namespace google {
namespace cloud {
namespace storage {
inline namespace STORAGE_CLIENT_NS {
namespace internal {
namespace {

TEST(CurlWrappersTest, ExtractUrlHostpart) {
struct Test {
std::string expected;
std::string input;
} cases[] = {
{"storage.googleapis.com", "https://storage.googleapis.com"},
{"storage.googleapis.com", "https://storage.googleapis.com"},
{"storage.googleapis.com", "https://storage.googleapis.com:443/"},
{"localhost", "http://localhost/"},
{"localhost", "http://localhost/"},
{"localhost", "http://localhost:8080/"},
{"localhost", "http://localhost/foo/bar"},
{"localhost", "http://localhost:8080/foo/bar"},
{"localhost", "http://localhost:8080/foo/bar"},
{"::1", "http://[::1]"},
{"::1", "http://[::1]/"},
{"::1", "http://[::1]:8080/"},
{"::1", "http://[::1]/foo/bar"},
{"::1", "http://[::1]:8080/foo/bar"},
{"127.0.0.1", "http://127.0.0.1"},
{"127.0.0.1", "http://127.0.0.1/"},
{"127.0.0.1", "http://127.0.0.1:8080"},
{"127.0.0.1", "http://127.0.0.1:8080/"},
{"127.0.0.1", "http://127.0.0.1/foo/bar"},
{"127.0.0.1", "http://127.0.0.1:8080/foo/bar"},
{"storage-download.127.0.0.1.nip.io",
"https://storage-download.127.0.0.1.nip.io/xmlapi/"},
{"gcs.127.0.0.1.nip.io",
"https://gcs.127.0.0.1.nip.io/storage/v1/"},
{"gcs.127.0.0.1.nip.io",
"https://gcs.127.0.0.1.nip.io:4443/upload/storage/v1/"},
{"gcs.127.0.0.1.nip.io",
"https://gcs.127.0.0.1.nip.io:4443/upload/storage/v1/"},
};

for (auto const& t : cases) {
EXPECT_EQ(t.expected, ExtractUrlHostpart(t.input));
}
}

} // namespace
} // namespace internal
} // namespace STORAGE_CLIENT_NS
} // namespace storage
} // namespace cloud
} // namespace google
1 change: 1 addition & 0 deletions google/cloud/storage/storage_client_unit_tests.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ storage_client_unit_tests = [
"internal/curl_wrappers_locking_already_present_test.cc",
"internal/curl_wrappers_locking_disabled_test.cc",
"internal/curl_wrappers_locking_enabled_test.cc",
"internal/curl_wrappers_test.cc",
"internal/default_object_acl_requests_test.cc",
"internal/generate_message_boundary_test.cc",
"internal/generic_request_test.cc",
Expand Down

0 comments on commit 99d0e71

Please sign in to comment.