Skip to content

Commit

Permalink
HIVE-27833: Hive Acid Replication Support for Dell Powerscale
Browse files Browse the repository at this point in the history
Details:
* Isilon supports xAttr only on /.reserved/raw path. Because of that checkFileSystemXAttrSupport throws AccessControlException
* In case of HDFS encryption zone, a virtual path /.reserved/raw gives superusers direct access to underlying block data in filesystem.
This allows superusers to DistCp data without requiring access to encryption keys, and avoids the overhead of decrypting and re-encrypting data.
Isilon system only supports getXAttrs on /.reserved/raw paths

Code Change:
* Added Additional property dfs.xattr.supported.only.on.reserved.namespace which will be true in case of isilon
* Modified logic of checkFileSystemXAttrSupport to return false in case of isilon if path doesn't start with /.reserved/raw
Testing:
* Tested on Cluster
  • Loading branch information
harshal-16 committed Nov 12, 2023
1 parent d2d4b86 commit a6daa91
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 12 deletions.
20 changes: 15 additions & 5 deletions common/src/java/org/apache/hadoop/hive/common/FileUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@

package org.apache.hadoop.hive.common;

import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.REPL_ADD_RAW_RESERVED_NAMESPACE;
import static org.apache.hadoop.hive.shims.Utils.RAW_RESERVED_VIRTUAL_PATH;

import java.io.EOFException;
import java.io.File;
import java.io.FileNotFoundException;
Expand Down Expand Up @@ -61,6 +64,7 @@
import org.apache.hadoop.fs.PathExistsException;
import org.apache.hadoop.fs.PathIsDirectoryException;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.Utils;
Expand Down Expand Up @@ -767,7 +771,7 @@ static boolean copy(FileSystem srcFS, Path src,
// is tried and it fails. We depend upon that behaviour in cases like replication,
// wherein if distcp fails, there is good reason to not plod along with a trivial
// implementation, and fail instead.
copied = doIOUtilsCopyBytes(srcFS, srcFS.getFileStatus(src), dstFS, dst, deleteSource, overwrite, shouldPreserveXAttrs(conf, srcFS, dstFS), conf, copyStatistics);
copied = doIOUtilsCopyBytes(srcFS, srcFS.getFileStatus(src), dstFS, dst, deleteSource, overwrite, shouldPreserveXAttrs(conf, srcFS, dstFS, src), conf, copyStatistics);
}
return copied;
}
Expand Down Expand Up @@ -895,11 +899,17 @@ private static void checkDependencies(FileSystem srcFS, Path src, FileSystem dst
}
}

public static boolean shouldPreserveXAttrs(HiveConf conf, FileSystem srcFS, FileSystem dstFS) throws IOException {
if (!Utils.checkFileSystemXAttrSupport(srcFS) || !Utils.checkFileSystemXAttrSupport(dstFS)){
return false;
public static boolean shouldPreserveXAttrs(HiveConf conf, FileSystem srcFS, FileSystem dstFS, Path path) throws IOException {
if (conf.getBoolVar(ConfVars.DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE)) {
if (!(path.getName().startsWith(RAW_RESERVED_VIRTUAL_PATH))) {
return false;
}
} else {
if (!Utils.checkFileSystemXAttrSupport(srcFS) || !Utils.checkFileSystemXAttrSupport(dstFS)) {
return false;
}
}
for (Map.Entry<String,String> entry : conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()) {
for (Map.Entry<String, String> entry : conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()) {
String distCpOption = entry.getKey();
if (distCpOption.startsWith("p")) {
return distCpOption.contains("x");
Expand Down
2 changes: 2 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ public static enum ConfVars {
MSC_CACHE_RECORD_STATS("hive.metastore.client.cache.v2.recordStats", false,
"This property enables recording metastore client cache stats in DEBUG logs"),
// QL execution stuff
DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE("dfs.xattr.supported.only.on.reserved.namespace", false,
"DFS supports xattr only on Reserved Name space (/.reserved/raw)"),
SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""),
PLAN("hive.exec.plan", "", ""),
STAGINGDIR("hive.exec.stagingdir", ".hive-staging",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ void copyFilesBetweenFS(FileSystem srcFS, Path[] paths, FileSystem dstFS,
Path dst, boolean deleteSource, boolean overwrite,
DataCopyStatistics copyStatistics) throws IOException {
retryableFxn(() -> {
boolean preserveXAttrs = FileUtils.shouldPreserveXAttrs(hiveConf, srcFS, dstFS);
boolean preserveXAttrs = FileUtils.shouldPreserveXAttrs(hiveConf, srcFS, dstFS, paths[0]);
FileUtils.copy(srcFS, paths, dstFS, dst, deleteSource, overwrite, preserveXAttrs, hiveConf,
copyStatistics);
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
import org.apache.tez.test.MiniTezCluster;

import static org.apache.hadoop.hive.shims.Utils.RAW_RESERVED_VIRTUAL_PATH;
import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_JOB_ID;

/**
Expand Down Expand Up @@ -1047,7 +1048,7 @@ public HadoopShims.StoragePolicyShim getStoragePolicyShim(FileSystem fs) {
List<String> constructDistCpParams(List<Path> srcPaths, Path dst, Configuration conf) throws IOException {
// -update and -delete are mandatory options for directory copy to work.
List<String> params = constructDistCpDefaultParams(conf, dst.getFileSystem(conf),
srcPaths.get(0).getFileSystem(conf));
srcPaths.get(0).getFileSystem(conf), srcPaths);
if (!params.contains("-delete")) {
params.add("-delete");
}
Expand All @@ -1059,7 +1060,7 @@ List<String> constructDistCpParams(List<Path> srcPaths, Path dst, Configuration
}

private List<String> constructDistCpDefaultParams(Configuration conf, FileSystem dstFs,
FileSystem sourceFs) throws IOException {
FileSystem sourceFs, List<Path> srcPaths) throws IOException {
List<String> params = new ArrayList<String>();
boolean needToAddPreserveOption = true;
for (Map.Entry<String,String> entry : conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()){
Expand All @@ -1074,8 +1075,12 @@ private List<String> constructDistCpDefaultParams(Configuration conf, FileSystem
}
}
if (needToAddPreserveOption) {
params.add((Utils.checkFileSystemXAttrSupport(dstFs)
&& Utils.checkFileSystemXAttrSupport(sourceFs)) ? "-pbx" : "-pb");
if (conf.getBoolean("dfs.xattr.supported.only.on.reserved.namespace", false)) {
params.add(srcPaths.get(0).getName().startsWith(RAW_RESERVED_VIRTUAL_PATH) ? "-pbx" : "-pb");
} else {
params.add((Utils.checkFileSystemXAttrSupport(dstFs)
&& Utils.checkFileSystemXAttrSupport(sourceFs)) ? "-pbx" : "-pb");
}
}
if (!params.contains("-update")) {
params.add("-update");
Expand All @@ -1097,7 +1102,7 @@ List<String> constructDistCpWithSnapshotParams(List<Path> srcPaths, Path dst, St
Configuration conf, String diff) throws IOException {
// Get the default distcp params
List<String> params = constructDistCpDefaultParams(conf, dst.getFileSystem(conf),
srcPaths.get(0).getFileSystem(conf));
srcPaths.get(0).getFileSystem(conf), srcPaths);
if (params.contains("-delete")) {
params.remove("-delete");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
public class Utils {

private static final Logger LOG = LoggerFactory.getLogger(Utils.class);

public static final String RAW_RESERVED_VIRTUAL_PATH = "/.reserved/raw/";
private static final boolean IBM_JAVA = System.getProperty("java.vendor")
.contains("IBM");

Expand Down

0 comments on commit a6daa91

Please sign in to comment.