Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-39292 [C++][FS]: Remove the AzureBackend enum and add more flexible connection options #39293

Merged
merged 1 commit into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 40 additions & 21 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,12 @@ AzureOptions::~AzureOptions() = default;

bool AzureOptions::Equals(const AzureOptions& other) const {
// TODO(GH-38598): update here when more auth methods are added.
const bool equals = backend == other.backend &&
const bool equals = blob_storage_authority == other.blob_storage_authority &&
dfs_storage_authority == other.dfs_storage_authority &&
blob_storage_scheme == other.blob_storage_scheme &&
dfs_storage_scheme == other.dfs_storage_scheme &&
default_metadata == other.default_metadata &&
account_blob_url_ == other.account_blob_url_ &&
account_dfs_url_ == other.account_dfs_url_ &&
account_name_ == other.account_name_ &&
credential_kind_ == other.credential_kind_;
if (!equals) {
return false;
Expand All @@ -72,42 +74,59 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
return false;
}

void AzureOptions::SetUrlsForAccountName(const std::string& account_name) {
if (this->backend == AzureBackend::kAzurite) {
account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
} else {
account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
namespace {
std::string BuildBaseUrl(const std::string& scheme, const std::string& authority,
const std::string& account_name) {
std::string url;
url += scheme + "://";
if (!authority.empty()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that we should reject empty authority because it builds no host part URL.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My reasoning here is that there are many validations I could apply, but the most complete validation will come from the error messages triggered by using the filesystem with a bogus URL with 3 slashes after https:.

if (authority[0] == '.') {
url += account_name;
url += authority;
} else {
url += authority;
url += "/";
url += account_name;
}
}
url += "/";
return url;
}
} // namespace

Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
AzureOptions::SetUrlsForAccountName(account_name);
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
std::string AzureOptions::AccountBlobUrl(const std::string& account_name) const {
return BuildBaseUrl(blob_storage_scheme, blob_storage_authority, account_name);
}

std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
}

Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
const std::string& account_key) {
AzureOptions::SetUrlsForAccountName(account_name);
credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
account_name_ = account_name;
storage_shared_key_credential_ =
std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
return Status::OK();
}

Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
}

Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
const {
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
Expand All @@ -119,11 +138,11 @@ AzureOptions::MakeDataLakeServiceClient() const {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(account_dfs_url_,
token_credential_);
return std::make_unique<DataLake::DataLakeServiceClient>(
AccountDfsUrl(account_name_), token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
account_dfs_url_, storage_shared_key_credential_);
AccountDfsUrl(account_name_), storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
}
Expand Down
51 changes: 33 additions & 18 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,37 @@ class DataLakeServiceClient;

namespace arrow::fs {

enum class AzureBackend {
/// \brief Official Azure Remote Backend
kAzure,
/// \brief Local Simulated Storage
kAzurite
};

/// Options for the AzureFileSystem implementation.
struct ARROW_EXPORT AzureOptions {
/// \brief The backend to connect to: Azure or Azurite (for testing).
AzureBackend backend = AzureBackend::kAzure;
/// \brief hostname[:port] of the Azure Blob Storage Service.
///
/// If the hostname is a relative domain name (one that starts with a '.'), then storage
/// account URLs will be constructed by prepending the account name to the hostname.
/// If the hostname is a fully qualified domain name, then the hostname will be used
/// as-is and the account name will follow the hostname in the URL path.
///
/// Default: ".blob.core.windows.net"
std::string blob_storage_authority = ".blob.core.windows.net";

/// \brief hostname[:port] of the Azure Data Lake Storage Gen 2 Service.
///
/// If the hostname is a relative domain name (one that starts with a '.'), then storage
/// account URLs will be constructed by prepending the account name to the hostname.
/// If the hostname is a fully qualified domain name, then the hostname will be used
/// as-is and the account name will follow the hostname in the URL path.
///
/// Default: ".dfs.core.windows.net"
std::string dfs_storage_authority = ".dfs.core.windows.net";

/// \brief Azure Blob Storage connection transport.
///
/// Default: "https"
std::string blob_storage_scheme = "https";

/// \brief Azure Data Lake Storage Gen 2 connection transport.
///
/// Default: "https"
std::string dfs_storage_scheme = "https";

// TODO(GH-38598): Add support for more auth methods.
// std::string connection_string;
Expand All @@ -65,22 +85,17 @@ struct ARROW_EXPORT AzureOptions {
std::shared_ptr<const KeyValueMetadata> default_metadata;

private:
std::string account_blob_url_;
std::string account_dfs_url_;

enum class CredentialKind {
kAnonymous,
kTokenCredential,
kStorageSharedKeyCredential,
} credential_kind_ = CredentialKind::kAnonymous;

std::string account_name_;
std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;

std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;

void SetUrlsForAccountName(const std::string& account_name);

public:
AzureOptions();
~AzureOptions();
Expand All @@ -92,8 +107,8 @@ struct ARROW_EXPORT AzureOptions {

bool Equals(const AzureOptions& other) const;

const std::string& AccountBlobUrl() const { return account_blob_url_; }
const std::string& AccountDfsUrl() const { return account_dfs_url_; }
std::string AccountBlobUrl(const std::string& account_name) const;
std::string AccountDfsUrl(const std::string& account_name) const;

Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
MakeBlobServiceClient() const;
Expand Down
21 changes: 18 additions & 3 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ namespace Blobs = Azure::Storage::Blobs;
namespace Core = Azure::Core;
namespace DataLake = Azure::Storage::Files::DataLake;

enum class AzureBackend {
/// \brief Official Azure Remote Backend
kAzure,
/// \brief Local Simulated Storage
kAzurite
};

class BaseAzureEnv : public ::testing::Environment {
protected:
std::string account_name_;
Expand Down Expand Up @@ -265,8 +272,6 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {

TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
AzureOptions options;
options.backend = AzureBackend::kAzurite; // Irrelevant for this test because it
// doesn't connect to the server.
ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
EXPECT_OK_AND_ASSIGN(auto default_credential_fs, AzureFileSystem::Make(options));
}
Expand Down Expand Up @@ -352,7 +357,17 @@ class TestAzureFileSystem : public ::testing::Test {

static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
AzureOptions options;
options.backend = env->backend();
switch (env->backend()) {
case AzureBackend::kAzurite:
options.blob_storage_authority = "127.0.0.1:10000";
options.dfs_storage_authority = "127.0.0.1:10000";
options.blob_storage_scheme = "http";
options.dfs_storage_scheme = "http";
break;
case AzureBackend::kAzure:
// Use the default values
break;
}
ARROW_EXPECT_OK(
options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
return options;
Expand Down
Loading