Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HIVE-12522: Wrong FS error during Tez merge files when warehouse and …
…scratchdir are on different FS (Jason Dere, reviewed by Prasanth Jayachandran)
- Loading branch information
Jason Dere
committed
Dec 1, 2015
1 parent
be410d2
commit 8555d2a
Showing
6 changed files
with
1,544 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
set hive.explain.user=false; | ||
set hive.merge.orcfile.stripe.level=false; | ||
set hive.exec.dynamic.partition=true; | ||
set hive.exec.dynamic.partition.mode=nonstrict; | ||
set hive.optimize.sort.dynamic.partition=false; | ||
set mapred.min.split.size=1000; | ||
set mapred.max.split.size=2000; | ||
set tez.grouping.min-size=1000; | ||
set tez.grouping.max-size=2000; | ||
set hive.merge.tezfiles=false; | ||
set hive.merge.mapfiles=false; | ||
set hive.merge.mapredfiles=false; | ||
set hive.merge.sparkfiles=false; | ||
|
||
set hive.metastore.warehouse.dir=pfile://${system:test.tmp.dir}/orc_merge_diff_fs; | ||
|
||
-- SORT_QUERY_RESULTS | ||
|
||
DROP TABLE orcfile_merge1; | ||
DROP TABLE orcfile_merge1b; | ||
DROP TABLE orcfile_merge1c; | ||
|
||
CREATE TABLE orcfile_merge1 (key INT, value STRING) | ||
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC; | ||
CREATE TABLE orcfile_merge1b (key INT, value STRING) | ||
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC; | ||
CREATE TABLE orcfile_merge1c (key INT, value STRING) | ||
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC; | ||
|
||
-- merge disabled | ||
EXPLAIN | ||
INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) | ||
SELECT key, value, PMOD(HASH(key), 2) as part | ||
FROM src; | ||
|
||
INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) | ||
SELECT key, value, PMOD(HASH(key), 2) as part | ||
FROM src; | ||
|
||
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1/ds=1/part=0/; | ||
|
||
set hive.merge.tezfiles=true; | ||
set hive.merge.mapfiles=true; | ||
set hive.merge.mapredfiles=true; | ||
set hive.merge.sparkfiles=true; | ||
-- auto-merge slow way | ||
EXPLAIN | ||
INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) | ||
SELECT key, value, PMOD(HASH(key), 2) as part | ||
FROM src; | ||
|
||
INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) | ||
SELECT key, value, PMOD(HASH(key), 2) as part | ||
FROM src; | ||
|
||
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1b/ds=1/part=0/; | ||
|
||
set hive.merge.orcfile.stripe.level=true; | ||
-- auto-merge fast way | ||
EXPLAIN | ||
INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) | ||
SELECT key, value, PMOD(HASH(key), 2) as part | ||
FROM src; | ||
|
||
INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) | ||
SELECT key, value, PMOD(HASH(key), 2) as part | ||
FROM src; | ||
|
||
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1c/ds=1/part=0/; | ||
|
||
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; | ||
-- Verify | ||
SELECT SUM(HASH(c)) FROM ( | ||
SELECT TRANSFORM(*) USING 'tr \t _' AS (c) | ||
FROM orcfile_merge1 WHERE ds='1' | ||
) t; | ||
|
||
SELECT SUM(HASH(c)) FROM ( | ||
SELECT TRANSFORM(*) USING 'tr \t _' AS (c) | ||
FROM orcfile_merge1b WHERE ds='1' | ||
) t; | ||
|
||
SELECT SUM(HASH(c)) FROM ( | ||
SELECT TRANSFORM(*) USING 'tr \t _' AS (c) | ||
FROM orcfile_merge1c WHERE ds='1' | ||
) t; | ||
|
||
select count(*) from orcfile_merge1; | ||
select count(*) from orcfile_merge1b; | ||
select count(*) from orcfile_merge1c; | ||
|
||
DROP TABLE orcfile_merge1; | ||
DROP TABLE orcfile_merge1b; | ||
DROP TABLE orcfile_merge1c; |
Oops, something went wrong.