Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HADOOP-16005: Add XAttr support to WASB and ABFS #452

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.security.InvalidKeyException;
import java.util.Calendar;
import java.util.Date;
Expand Down Expand Up @@ -247,6 +249,8 @@ public class AzureNativeFileSystemStore implements NativeFileSystemStore {

private static final int DEFAULT_CONCURRENT_WRITES = 8;

private static final Charset METADATA_ENCODING = StandardCharsets.UTF_8;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for using a different encoding when compared to the AzureBlobFileSystemStore?

Copy link
Member Author

@c-w c-w Nov 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The documentation for Azure Data Lake Storage Gen2 (backing AzureBlobFileSystemStore) states that x-ms-properties must be encoded in ISO-8859-1 (see path/update#request-headers). The documentation for Azure Blob Storage (backing AzureNativeFileSystemStore) however only states that x-ms-meta must follow the conventions for C# identifiers (see set-blob-metadata#request-headers) which may contain Unicode letters and characters (see identifier-names). As such I believe that the two classes should use different encodings. What am I missing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was mostly curious about this one. I assume @DadanielZ can confirm this one?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is mostly looking good. If @DadanielZ can confirm the encoding issue, i can happy to commit this one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for being away so long, yes, this is fine


// Concurrent reads reads of data written out of band are disable by default.
//
private static final boolean DEFAULT_READ_TOLERATE_CONCURRENT_APPEND = false;
Expand Down Expand Up @@ -1662,17 +1666,30 @@ private static void storeFolderAttribute(CloudBlobWrapper blob) {
removeMetadataAttribute(blob, OLD_IS_FOLDER_METADATA_KEY);
}

private static void storeLinkAttribute(CloudBlobWrapper blob,
String linkTarget) throws UnsupportedEncodingException {
// We have to URL encode the link attribute as the link URI could
private static String encodeMetadataAttribute(String value) throws UnsupportedEncodingException {
// We have to URL encode the attribute as it could
// have URI special characters which unless encoded will result
// in 403 errors from the server. This is due to metadata properties
// being sent in the HTTP header of the request which is in turn used
// on the server side to authorize the request.
String encodedLinkTarget = null;
if (linkTarget != null) {
encodedLinkTarget = URLEncoder.encode(linkTarget, "UTF-8");
}
return value == null ? null : URLEncoder.encode(value, METADATA_ENCODING.name());
}

private static String decodeMetadataAttribute(String encoded) throws UnsupportedEncodingException {
return encoded == null ? null : URLDecoder.decode(encoded, METADATA_ENCODING.name());
}

private static String ensureValidAttributeName(String attribute) {
// Attribute names must be valid C# identifiers so we have to
// convert the namespace dots (e.g. "user.something") in the
// attribute names. Using underscores here to be consistent with
// the constant metadata keys defined earlier in the file
return attribute.replace('.', '_');
}

private static void storeLinkAttribute(CloudBlobWrapper blob,
String linkTarget) throws UnsupportedEncodingException {
String encodedLinkTarget = encodeMetadataAttribute(linkTarget);
storeMetadataAttribute(blob,
LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY,
encodedLinkTarget);
Expand All @@ -1686,11 +1703,7 @@ private static String getLinkAttributeValue(CloudBlobWrapper blob)
String encodedLinkTarget = getMetadataAttribute(blob,
LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY,
OLD_LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY);
String linkTarget = null;
if (encodedLinkTarget != null) {
linkTarget = URLDecoder.decode(encodedLinkTarget, "UTF-8");
}
return linkTarget;
return decodeMetadataAttribute(encodedLinkTarget);
}

private static boolean retrieveFolderAttribute(CloudBlobWrapper blob) {
Expand Down Expand Up @@ -2211,6 +2224,36 @@ public FileMetadata retrieveMetadata(String key) throws IOException {
}
}

@Override
public byte[] retrieveAttribute(String key, String attribute) throws IOException {
try {
checkContainer(ContainerAccessType.PureRead);
CloudBlobWrapper blob = getBlobReference(key);
blob.downloadAttributes(getInstrumentedContext());

String value = getMetadataAttribute(blob, ensureValidAttributeName(attribute));
value = decodeMetadataAttribute(value);
return value == null ? null : value.getBytes(METADATA_ENCODING);
} catch (Exception e) {
throw new AzureException(e);
}
}

@Override
public void storeAttribute(String key, String attribute, byte[] value) throws IOException {
try {
checkContainer(ContainerAccessType.ReadThenWrite);
CloudBlobWrapper blob = getBlobReference(key);
blob.downloadAttributes(getInstrumentedContext());

String encodedValue = encodeMetadataAttribute(new String(value, METADATA_ENCODING));
storeMetadataAttribute(blob, ensureValidAttributeName(attribute), encodedValue);
blob.uploadMetadata(getInstrumentedContext());
} catch (Exception e) {
throw new AzureException(e);
}
}

@Override
public InputStream retrieve(String key) throws AzureException, IOException {
return retrieve(key, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.fs.StreamCapabilities;
import org.apache.hadoop.fs.Syncable;
import org.apache.hadoop.fs.XAttrSetFlag;
import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation;
import org.apache.hadoop.fs.azure.metrics.AzureFileSystemMetricsSystem;
import org.apache.hadoop.fs.azure.security.Constants;
Expand Down Expand Up @@ -3563,6 +3564,76 @@ public void setOwner(Path p, String username, String groupname)
}
}

/**
* Set the value of an attribute for a path.
*
* @param path The path on which to set the attribute
* @param xAttrName The attribute to set
* @param value The byte value of the attribute to set (encoded in utf-8)
* @param flag The mode in which to set the attribute
* @throws IOException If there was an issue setting the attribute on Azure
c-w marked this conversation as resolved.
Show resolved Hide resolved
*/
@Override
public void setXAttr(Path path, String xAttrName, byte[] value, EnumSet<XAttrSetFlag> flag) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

too many line breaks in this method. Please remove the extra ones.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 54dcb1d.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re-activating this comment as i don't see this fixed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed some more newlines in 6ecbdc0. I'm finding it a bit difficult to discern what style to follow since the file seems to include multiple styles for newlines (e.g. my original implementation was close to setOwner in indentation style). If you have any additional feedback on-top of what is implemented now, I'd appreciate more concrete pointers on style/formatting. Thanks in advance!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry about nit-picking but it would be good to just follow the formatting in other methods in this file. For example, you don't need new lines in the catch block and also remove 3588.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the pointers. I was previously following the style of setOwner. I removed the newlines you mentioned in f7c0d26.

Path absolutePath = makeAbsolute(path);
performAuthCheck(absolutePath, WasbAuthorizationOperations.WRITE, "setXAttr", absolutePath);

String key = pathToKey(absolutePath);
FileMetadata metadata;
try {
metadata = store.retrieveMetadata(key);
} catch (IOException ex) {
Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex);
if (innerException instanceof StorageException
&& NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) {
throw new FileNotFoundException("File " + path + " doesn't exists.");
}
throw ex;
}

if (metadata == null) {
throw new FileNotFoundException("File doesn't exist: " + path);
}

boolean xAttrExists = store.retrieveAttribute(key, xAttrName) != null;
XAttrSetFlag.validate(xAttrName, xAttrExists, flag);
store.storeAttribute(key, xAttrName, value);
}

/**
* Get the value of an attribute for a path.
*
* @param path The path on which to get the attribute
* @param xAttrName The attribute to get
* @return The bytes of the attribute's value (encoded in utf-8)
* or null if the attribute does not exist
* @throws IOException If there was an issue getting the attribute from Azure
*/
@Override
public byte[] getXAttr(Path path, String xAttrName) throws IOException {
Path absolutePath = makeAbsolute(path);
performAuthCheck(absolutePath, WasbAuthorizationOperations.READ, "getXAttr", absolutePath);

String key = pathToKey(absolutePath);
FileMetadata metadata;
try {
metadata = store.retrieveMetadata(key);
} catch (IOException ex) {
Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex);
if (innerException instanceof StorageException
&& NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) {
throw new FileNotFoundException("File " + path + " doesn't exists.");
}
throw ex;
}

if (metadata == null) {
throw new FileNotFoundException("File doesn't exist: " + path);
}

return store.retrieveAttribute(key, xAttrName);
}

/**
* Is the user allowed?
* <ol>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ FileMetadata[] list(String prefix, final int maxListingCount,
void changePermissionStatus(String key, PermissionStatus newPermission)
throws AzureException;

byte[] retrieveAttribute(String key, String attribute) throws IOException;

void storeAttribute(String key, String attribute, byte[] value) throws IOException;

/**
* API to delete a blob in the back end azure storage.
* @param key - key to the blob being deleted.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Hashtable;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -56,6 +57,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.XAttrSetFlag;
import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants;
import org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations;
import org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes;
Expand Down Expand Up @@ -636,6 +638,83 @@ public void setOwner(final Path path, final String owner, final String group)
}
}

/**
* Set the value of an attribute for a path.
*
* @param path The path on which to set the attribute
* @param name The attribute to set
* @param value The byte value of the attribute to set (encoded in latin-1)
* @param flag The mode in which to set the attribute
* @throws IOException If there was an issue setting the attribute on Azure
c-w marked this conversation as resolved.
Show resolved Hide resolved
* @throws IllegalArgumentException If name is null or empty or if value is null
*/
@Override
public void setXAttr(final Path path, final String name, final byte[] value, final EnumSet<XAttrSetFlag> flag)
throws IOException {
LOG.debug("AzureBlobFileSystem.setXAttr path: {}", path);

if (name == null || name.isEmpty() || value == null) {
throw new IllegalArgumentException("A valid name and value must be specified.");
}

Path qualifiedPath = makeQualified(path);
performAbfsAuthCheck(FsAction.READ_WRITE, qualifiedPath);

try {
Hashtable<String, String> properties = abfsStore.getPathStatus(path);
String xAttrName = ensureValidAttributeName(name);
boolean xAttrExists = properties.containsKey(xAttrName);
XAttrSetFlag.validate(name, xAttrExists, flag);

String xAttrValue = abfsStore.decodeAttribute(value);
properties.put(xAttrName, xAttrValue);
abfsStore.setPathProperties(path, properties);
} catch (AzureBlobFileSystemException ex) {
checkException(path, ex);
}
}

/**
* Get the value of an attribute for a path.
*
* @param path The path on which to get the attribute
* @param name The attribute to get
* @return The bytes of the attribute's value (encoded in latin-1)
* or null if the attribute does not exist
* @throws IOException If there was an issue getting the attribute from Azure
* @throws IllegalArgumentException If name is null or empty
*/
@Override
public byte[] getXAttr(final Path path, final String name)
throws IOException {
LOG.debug("AzureBlobFileSystem.getXAttr path: {}", path);

if (name == null || name.isEmpty()) {
throw new IllegalArgumentException("A valid name must be specified.");
}

Path qualifiedPath = makeQualified(path);
performAbfsAuthCheck(FsAction.READ, qualifiedPath);

byte[] value = null;
try {
Hashtable<String, String> properties = abfsStore.getPathStatus(path);
String xAttrName = ensureValidAttributeName(name);
if (properties.containsKey(xAttrName)) {
String xAttrValue = properties.get(xAttrName);
value = abfsStore.encodeAttribute(xAttrValue);
}
} catch (AzureBlobFileSystemException ex) {
checkException(path, ex);
}
return value;
}

private static String ensureValidAttributeName(String attribute) {
// to avoid HTTP 400 Bad Request, InvalidPropertyName
return attribute.replace('.', '_');
}

/**
* Set permission of a path.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
Expand Down Expand Up @@ -197,6 +198,14 @@ public void close() throws IOException {
IOUtils.cleanupWithLogger(LOG, client);
}

byte[] encodeAttribute(String value) throws UnsupportedEncodingException {
return value.getBytes(XMS_PROPERTIES_ENCODING);
}

String decodeAttribute(byte[] value) throws UnsupportedEncodingException {
return new String(value, XMS_PROPERTIES_ENCODING);
}

private String[] authorityParts(URI uri) throws InvalidUriAuthorityException, InvalidUriException {
final String authority = uri.getRawAuthority();
if (null == authority) {
Expand Down
Loading