Skip to content

Commit

Permalink
HIVE-12522: Wrong FS error during Tez merge files when warehouse and …
Browse files Browse the repository at this point in the history
…scratchdir are on different FS (Jason Dere, reviewed by Prasanth Jayachandran)
  • Loading branch information
Jason Dere committed Dec 1, 2015
1 parent be410d2 commit 8555d2a
Show file tree
Hide file tree
Showing 6 changed files with 1,544 additions and 2 deletions.
3 changes: 3 additions & 0 deletions itests/src/test/resources/testconfiguration.properties
Expand Up @@ -34,6 +34,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
load_fs2.q,\
load_hdfs_file_with_space_in_the_name.q,\
non_native_window_udf.q, \
orc_merge_diff_fs.q,\
optrstat_groupby.q,\
parallel_orderby.q,\
quotedid_smb.q,\
Expand Down Expand Up @@ -382,6 +383,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
llapdecider.q,\
mrr.q,\
orc_ppd_basic.q,\
orc_merge_diff_fs.q,\
tez_bmj_schema_evolution.q,\
tez_dml.q,\
tez_fsstat.q,\
Expand Down Expand Up @@ -1270,6 +1272,7 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
orc_merge7.q,\
orc_merge8.q,\
orc_merge9.q,\
orc_merge_diff_fs.q,\
orc_merge_incompat1.q,\
orc_merge_incompat2.q,\
parallel_orderby.q,\
Expand Down
5 changes: 3 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
Expand Up @@ -576,8 +576,9 @@ private Vertex createVertex(JobConf conf, MapWork mapWork,
// exist before jobClose (before renaming after job completion)
Path tempOutPath = Utilities.toTempPath(outputPath);
try {
if (!fs.exists(tempOutPath)) {
fs.mkdirs(tempOutPath);
FileSystem tmpOutFS = tempOutPath.getFileSystem(conf);
if (!tmpOutFS.exists(tempOutPath)) {
tmpOutFS.mkdirs(tempOutPath);
}
} catch (IOException e) {
throw new RuntimeException(
Expand Down
94 changes: 94 additions & 0 deletions ql/src/test/queries/clientpositive/orc_merge_diff_fs.q
@@ -0,0 +1,94 @@
set hive.explain.user=false;
set hive.merge.orcfile.stripe.level=false;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.optimize.sort.dynamic.partition=false;
set mapred.min.split.size=1000;
set mapred.max.split.size=2000;
set tez.grouping.min-size=1000;
set tez.grouping.max-size=2000;
set hive.merge.tezfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.merge.sparkfiles=false;

set hive.metastore.warehouse.dir=pfile://${system:test.tmp.dir}/orc_merge_diff_fs;

-- SORT_QUERY_RESULTS

DROP TABLE orcfile_merge1;
DROP TABLE orcfile_merge1b;
DROP TABLE orcfile_merge1c;

CREATE TABLE orcfile_merge1 (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
CREATE TABLE orcfile_merge1b (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
CREATE TABLE orcfile_merge1c (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;

-- merge disabled
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;

INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;

dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1/ds=1/part=0/;

set hive.merge.tezfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.sparkfiles=true;
-- auto-merge slow way
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;

INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;

dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1b/ds=1/part=0/;

set hive.merge.orcfile.stripe.level=true;
-- auto-merge fast way
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;

INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;

dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1c/ds=1/part=0/;

set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
-- Verify
SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1 WHERE ds='1'
) t;

SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1b WHERE ds='1'
) t;

SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1c WHERE ds='1'
) t;

select count(*) from orcfile_merge1;
select count(*) from orcfile_merge1b;
select count(*) from orcfile_merge1c;

DROP TABLE orcfile_merge1;
DROP TABLE orcfile_merge1b;
DROP TABLE orcfile_merge1c;

0 comments on commit 8555d2a

Please sign in to comment.