Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.doris.common.util;

import org.apache.doris.common.UserException;
import org.apache.doris.datasource.property.storage.AzurePropertyUtils;
import org.apache.doris.datasource.property.storage.StorageProperties;
import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException;
import org.apache.doris.fs.FileSystemType;
Expand Down Expand Up @@ -307,6 +308,10 @@ public static String getTempWritePath(String loc, String prefix) {
}

public TFileType getTFileTypeForBE() {
if ((SchemaTypeMapper.ABFS.getSchema().equals(schema) || SchemaTypeMapper.ABFSS.getSchema()
.equals(schema)) && AzurePropertyUtils.isOneLakeLocation(normalizedLocation)) {
return TFileType.FILE_HDFS;
}
if (StringUtils.isNotBlank(normalizedLocation) && isHdfsOnOssEndpoint(normalizedLocation)) {
return TFileType.FILE_HDFS;
}
Expand All @@ -324,6 +329,10 @@ public Path toStorageLocation() {


public FileSystemType getFileSystemType() {
if ((SchemaTypeMapper.ABFS.getSchema().equals(schema) || SchemaTypeMapper.ABFSS.getSchema()
.equals(schema)) && AzurePropertyUtils.isOneLakeLocation(normalizedLocation)) {
return FileSystemType.HDFS;
}
return SchemaTypeMapper.fromSchemaToFileSystemType(schema);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.apache.doris.datasource.property.ConnectorProperty;
import org.apache.doris.datasource.property.ParamRules;
import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties;
import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties;
import org.apache.doris.datasource.property.storage.StorageProperties;

import com.google.common.collect.Maps;
Expand Down Expand Up @@ -86,6 +85,7 @@ public class IcebergRestProperties extends AbstractIcebergProperties {

@ConnectorProperty(names = {"iceberg.rest.oauth2.credential"},
required = false,
sensitive = true,
description = "The oauth2 credential for the iceberg rest catalog service.")
private String icebergRestOauth2Credential;

Expand Down Expand Up @@ -150,19 +150,10 @@ public class IcebergRestProperties extends AbstractIcebergProperties {

@ConnectorProperty(names = {"iceberg.rest.secret-access-key"},
required = false,
sensitive = true,
description = "The secret access key for the iceberg rest catalog service.")
private String icebergRestSecretAccessKey = "";

@ConnectorProperty(names = {"iceberg.rest.connection-timeout-ms"},
required = false,
description = "Connection timeout in milliseconds for the REST catalog HTTP client. Default: 10000 (10s).")
private String icebergRestConnectionTimeoutMs = "10000";

@ConnectorProperty(names = {"iceberg.rest.socket-timeout-ms"},
required = false,
description = "Socket timeout in milliseconds for the REST catalog HTTP client. Default: 60000 (60s).")
private String icebergRestSocketTimeoutMs = "60000";

protected IcebergRestProperties(Map<String, String> props) {
super(props);
}
Expand Down Expand Up @@ -269,13 +260,6 @@ private void addOptionalProperties() {
if (isIcebergRestVendedCredentialsEnabled()) {
icebergRestCatalogProperties.put(VENDED_CREDENTIALS_HEADER, VENDED_CREDENTIALS_VALUE);
}

if (Strings.isNotBlank(icebergRestConnectionTimeoutMs)) {
icebergRestCatalogProperties.put("rest.client.connection-timeout-ms", icebergRestConnectionTimeoutMs);
}
if (Strings.isNotBlank(icebergRestSocketTimeoutMs)) {
icebergRestCatalogProperties.put("rest.client.socket-timeout-ms", icebergRestSocketTimeoutMs);
}
}

private void addAuthenticationProperties() {
Expand Down Expand Up @@ -339,14 +323,12 @@ public void toFileIOProperties(List<StorageProperties> storagePropertiesList,
Map<String, String> fileIOProperties, Configuration conf) {

for (StorageProperties storageProperties : storagePropertiesList) {
if (storageProperties instanceof HdfsCompatibleProperties) {
storageProperties.getBackendConfigProperties().forEach(conf::set);
} else if (storageProperties instanceof AbstractS3CompatibleProperties) {
if (storageProperties instanceof AbstractS3CompatibleProperties) {
// For all S3-compatible storage types, put properties in fileIOProperties map
toS3FileIOProperties((AbstractS3CompatibleProperties) storageProperties, fileIOProperties);
} else {
// For other storage types, just use fileIOProperties map
fileIOProperties.putAll(storageProperties.getBackendConfigProperties());
conf.addResource(storageProperties.getHadoopStorageConfig());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,19 @@
import org.apache.doris.common.Config;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.property.ConnectorProperty;
import org.apache.doris.datasource.property.ParamRules;
import org.apache.doris.datasource.property.storage.exception.AzureAuthType;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import lombok.Getter;
import lombok.Setter;
import org.apache.hadoop.conf.Configuration;

import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Stream;

/**
Expand Down Expand Up @@ -67,6 +71,7 @@ public class AzureProperties extends StorageProperties {
@Getter
@ConnectorProperty(names = {"azure.account_name", "azure.access_key", "s3.access_key",
"AWS_ACCESS_KEY", "ACCESS_KEY", "access_key"},
required = false,
sensitive = true,
description = "The access key of S3.")
protected String accountName = "";
Expand All @@ -75,9 +80,37 @@ public class AzureProperties extends StorageProperties {
@ConnectorProperty(names = {"azure.account_key", "azure.secret_key", "s3.secret_key",
"AWS_SECRET_KEY", "secret_key"},
sensitive = true,
required = false,
description = "The secret key of S3.")
protected String accountKey = "";

@ConnectorProperty(names = {"azure.oauth2_client_id"},
required = false,
description = "The client id of Azure AD application.")
private String clientId;

@ConnectorProperty(names = {"azure.oauth2_client_secret"},
required = false,
sensitive = true,
description = "The client secret of Azure AD application.")
private String clientSecret;


@ConnectorProperty(names = {"azure.oauth2_server_uri"},
required = false,
description = "The account host of Azure blob.")
private String oauthServerUri;

@ConnectorProperty(names = {"azure.oauth2_account_host"},
required = false,
description = "The account host of Azure blob.")
private String accountHost;

@ConnectorProperty(names = {"azure.auth_type"},
required = false,
description = "The auth type of Azure blob.")
private String azureAuthType = AzureAuthType.SharedKey.name();

@Getter
@ConnectorProperty(names = {"container", "azure.bucket", "s3.bucket"},
required = false,
Expand Down Expand Up @@ -108,11 +141,11 @@ public AzureProperties(Map<String, String> origProps) {
public void initNormalizeAndCheckProps() {
super.initNormalizeAndCheckProps();
//check endpoint
if (!endpoint.endsWith(AZURE_ENDPOINT_SUFFIX)) {
throw new IllegalArgumentException(String.format("Endpoint '%s' is not valid. It should end with '%s'.",
endpoint, AZURE_ENDPOINT_SUFFIX));
}
this.endpoint = formatAzureEndpoint(endpoint, accountName);
buildRules().validate();
if (AzureAuthType.OAuth2.name().equals(azureAuthType) && (!isIcebergRestCatalog())) {
throw new UnsupportedOperationException("OAuth2 auth type is only supported for iceberg rest catalog");
}
}

public static boolean guessIsMe(Map<String, String> origProps) {
Expand All @@ -134,14 +167,25 @@ public static boolean guessIsMe(Map<String, String> origProps) {

@Override
public Map<String, String> getBackendConfigProperties() {
if (!azureAuthType.equalsIgnoreCase("OAuth2")) {
Map<String, String> s3Props = new HashMap<>();
s3Props.put("AWS_ENDPOINT", endpoint);
s3Props.put("AWS_REGION", "dummy_region");
s3Props.put("AWS_ACCESS_KEY", accountName);
s3Props.put("AWS_SECRET_KEY", accountKey);
s3Props.put("AWS_NEED_OVERRIDE_ENDPOINT", "true");
s3Props.put("provider", "azure");
s3Props.put("use_path_style", usePathStyle);
return s3Props;
}
// oauth2 use hadoop config
Map<String, String> s3Props = new HashMap<>();
s3Props.put("AWS_ENDPOINT", endpoint);
s3Props.put("AWS_REGION", "dummy_region");
s3Props.put("AWS_ACCESS_KEY", accountName);
s3Props.put("AWS_SECRET_KEY", accountKey);
s3Props.put("AWS_NEED_OVERRIDE_ENDPOINT", "true");
s3Props.put("provider", "azure");
s3Props.put("use_path_style", usePathStyle);
hadoopStorageConfig.forEach(entry -> {
String key = entry.getKey();

s3Props.put(key, entry.getValue());

});
return s3Props;
}

Expand Down Expand Up @@ -186,10 +230,19 @@ public void initializeHadoopStorageConfig() {
hadoopStorageConfig.set(k, v);
}
});
setAzureAccountKeys(hadoopStorageConfig, accountName, accountKey);
if (azureAuthType != null && azureAuthType.equalsIgnoreCase("OAuth2")) {
setHDFSAzureOauth2Config(hadoopStorageConfig);
} else {
setHDFSAzureAccountKeys(hadoopStorageConfig, accountName, accountKey);
}
}

@Override
protected Set<String> schemas() {
return ImmutableSet.of("wasb", "wasbs", "abfs", "abfss");
}

private static void setAzureAccountKeys(Configuration conf, String accountName, String accountKey) {
private static void setHDFSAzureAccountKeys(Configuration conf, String accountName, String accountKey) {
String[] endpoints = {
"dfs.core.windows.net",
"blob.core.windows.net"
Expand All @@ -201,4 +254,48 @@ private static void setAzureAccountKeys(Configuration conf, String accountName,
conf.set("fs.azure.account.key", accountKey);
}

private void setHDFSAzureOauth2Config(Configuration conf) {
conf.set(String.format("fs.azure.account.auth.type.%s", accountHost), "OAuth");
conf.set(String.format("fs.azure.account.oauth.provider.type.%s", accountHost),
"org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider");
conf.set(String.format("fs.azure.account.oauth2.client.id.%s", accountHost), clientId);
conf.set(String.format("fs.azure.account.oauth2.client.secret.%s", accountHost), clientSecret);
conf.set(String.format("fs.azure.account.oauth2.client.endpoint.%s", accountHost), oauthServerUri);
}

private ParamRules buildRules() {
return new ParamRules()
// OAuth2 requires either credential or token, but not both
.requireIf(azureAuthType, AzureAuthType.OAuth2.name(), new String[]{accountHost,
clientId,
clientSecret,
oauthServerUri}, "When auth_type is OAuth2, oauth2_account_host, oauth2_client_id"
+ ", oauth2_client_secret, and oauth2_server_uri are required.")
.requireIf(azureAuthType, AzureAuthType.SharedKey.name(), new String[]{accountName, accountKey},
"When auth_type is SharedKey, account_name and account_key are required.");
}

// NB:Temporary check:
// Temporary check: Currently using OAuth2 for accessing Onalake storage via HDFS.
// In the future, OAuth2 will be supported via native SDK to reduce maintenance.
// For now, OAuth2 authentication is only allowed for Iceberg REST.
// TODO: Remove this temporary check later
private static final String ICEBERG_CATALOG_TYPE_KEY = "iceberg.catalog.type";
private static final String ICEBERG_CATALOG_TYPE_REST = "rest";
private static final String TYPE_KEY = "type";
private static final String ICEBERG_VALUE = "iceberg";

private boolean isIcebergRestCatalog() {
// check iceberg type
boolean hasIcebergType = origProps.entrySet().stream()
.anyMatch(entry -> TYPE_KEY.equalsIgnoreCase(entry.getKey())
&& ICEBERG_VALUE.equalsIgnoreCase(entry.getValue()));
if (!hasIcebergType && origProps.keySet().stream().anyMatch(TYPE_KEY::equalsIgnoreCase)) {
return false;
}
return origProps.entrySet().stream()
.anyMatch(entry -> ICEBERG_CATALOG_TYPE_KEY.equalsIgnoreCase(entry.getKey())
&& ICEBERG_CATALOG_TYPE_REST.equalsIgnoreCase(entry.getValue()));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.regex.Pattern;

public class AzurePropertyUtils {

Expand Down Expand Up @@ -69,10 +70,16 @@ public static String validateAndNormalizeUri(String path) throws UserException {
|| lower.startsWith("s3://"))) {
throw new StoragePropertiesException("Unsupported Azure URI scheme: " + path);
}

if (isOneLakeLocation(path)) {
return path;
}
return convertToS3Style(path);
}

private static final Pattern ONELAKE_PATTERN = Pattern.compile(
"abfs[s]?://([^@]+)@([^/]+)\\.dfs\\.fabric\\.microsoft\\.com(/.*)?", Pattern.CASE_INSENSITIVE);


/**
* Converts an Azure Blob Storage URI into a unified {@code s3://<container>/<path>} format.
* <p>
Expand Down Expand Up @@ -186,4 +193,8 @@ public static String validateAndGetUri(Map<String, String> props) {
.findFirst()
.orElseThrow(() -> new StoragePropertiesException("Properties must contain 'uri' key"));
}

public static boolean isOneLakeLocation(String location) {
return ONELAKE_PATTERN.matcher(location).matches();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.property.storage.exception;

public enum AzureAuthType {
OAuth2,
SharedKey;
}
Original file line number Diff line number Diff line change
Expand Up @@ -283,4 +283,21 @@ public void testHdfsStorageLocationConvert() {
Assertions.assertEquals(location, locationPath.getNormalizedLocation());
}

@Test
public void testOnelakeStorageLocationConvert() {
String location = "abfss://1a2b3c4d-1234-5678-abcd-9876543210ef@onelake.dfs.fabric.microsoft.com/myworkspace/lakehouse/default/Files/data/test.parquet";
LocationPath locationPath = LocationPath.of(location, STORAGE_PROPERTIES_MAP);
Assertions.assertEquals(TFileType.FILE_HDFS, locationPath.getTFileTypeForBE());
Assertions.assertEquals(FileSystemType.HDFS, locationPath.getFileSystemType());
location = "abfs://1a2b3c4d-1234-5678-abcd-9876543210ef@onelake.dfs.fabric.microsoft.com/myworkspace/lakehouse/default/Files/data/test.parquet";
locationPath = LocationPath.of(location, STORAGE_PROPERTIES_MAP);
Assertions.assertEquals(TFileType.FILE_HDFS, locationPath.getTFileTypeForBE());
Assertions.assertEquals(FileSystemType.HDFS, locationPath.getFileSystemType());
location = "abfss://mycontainer@mystorageaccount.dfs.core.windows.net/data/2025/11/11/";
locationPath = LocationPath.of(location, STORAGE_PROPERTIES_MAP);
Assertions.assertEquals(TFileType.FILE_S3, locationPath.getTFileTypeForBE());
Assertions.assertEquals(FileSystemType.S3, locationPath.getFileSystemType());

}

}
Loading
Loading