Skip to content

Commit

Permalink
HIVE-23539 : Optimize data copy during repl load operation for HDFS b…
Browse files Browse the repository at this point in the history
…ased staging location
  • Loading branch information
Pravin Sinha committed Jun 15, 2020
1 parent 625d471 commit 5c2fb99
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ private Task<?> tasksForAddPartition(Table table, AlterTableAddPartitionDesc add
}

Path loadTmpDir = replicaWarehousePartitionLocation;
boolean performOnlyMove = Utils.onSameHDFSFileSystem(event.dataPath(), replicaWarehousePartitionLocation);

// if move optimization is enabled, copy the files directly to the target path. No need to create the staging dir.
LoadFileType loadFileType;
if (event.replicationSpec().isInReplicationScope() &&
Expand All @@ -293,6 +293,8 @@ private Task<?> tasksForAddPartition(Table table, AlterTableAddPartitionDesc add
* If the Repl staging directory ('hive.repl.rootdir') is on the target cluster itself and the FS scheme is hdfs,
* data is moved directly from Repl staging data dir of partition to the partition's location on target warehouse.
*/
boolean performOnlyMove = event.replicationSpec().isInReplicationScope()
&& Utils.onSameHDFSFileSystem(event.dataPath(), replicaWarehousePartitionLocation);
Path moveSource = performOnlyMove ? partDataSrc : loadTmpDir;
Task<?> movePartitionTask = null;
if (loadFileType != LoadFileType.IGNORE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,6 @@ private Task<?> loadTableTask(Table table, ReplicationSpec replicationSpec, Path
Path fromURI) {
Path dataPath = fromURI;
Path loadTmpDir = tgtPath;
boolean performOnlyMove = Utils.onSameHDFSFileSystem(dataPath, tgtPath);

// if move optimization is enabled, copy the files directly to the target path. No need to create the staging dir.
LoadFileType loadFileType;
Expand All @@ -303,6 +302,7 @@ private Task<?> loadTableTask(Table table, ReplicationSpec replicationSpec, Path
* data is moved directly from Repl staging data dir of the partition to the partition's location on target
* warehouse.
*/
boolean performOnlyMove = replicationSpec.isInReplicationScope() && Utils.onSameHDFSFileSystem(dataPath, tgtPath);
Path moveSrcPath = performOnlyMove ? dataPath : loadTmpDir;

MoveWork moveWork = new MoveWork(new HashSet<>(), new HashSet<>(), null, null, false);
Expand Down

0 comments on commit 5c2fb99

Please sign in to comment.