Permalink
Show file tree
Hide file tree
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
DRILL-6744: Added tests to verify that filter pushdown works with var…
…… (#539) * DRILL-6744: Added tests to verify that filter pushdown works with varchar and decimal data types Covered: - Varchar and decimals with all physical types (int32, int64, fixed_len_byte_array, binary). - Parquet files version 1.8.1 and 1.10.0. - Queries with and without metadata files. - Metadata generated by Drill 1.14.0 and 1.16.0-SNAPSHOT. - Queries on Hive tables. Dataset: - Tables were generated from the TPCH data with scale 0.01 with decimal columns and small row group size. - External Hive tables. * DRILL-6744: Addressed the review comments
- Loading branch information
Showing
305 changed files
with
15,018 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#!/usr/bin/env bash | ||
source conf/drillTestConfig.properties | ||
|
||
set -x | ||
|
||
test_dir=${DRILL_TEST_DATA_DIR}/Datasources/parquet_storage/filter/pushdown/varchar_decimal | ||
dfs_location=${DRILL_TESTDATA}/filter/pushdown/varchar_decimal | ||
|
||
copy_to_dfs() { | ||
local metadata_file=$test_dir/$1 | ||
local destination=$dfs_location/$1 | ||
|
||
hadoop fs -rm $destination | ||
# We need to copy the metadata files separately because otherwise Drill considers the tables as modified and auto refreshes metadata. | ||
# Using "hadoop fs -put" doesn't help for some reason. | ||
hadoop fs -touchz $destination | ||
hadoop fs -appendToFile $metadata_file $destination | ||
} | ||
|
||
# Finding all metadata files and extracting 3 last directories of the path, then copying to dfs. | ||
for file in $(find ${test_dir} -name .drill.parquet_metadata | grep -oE "[^/]+/[^/]+/[^/]+/[^/]+$") | ||
do | ||
copy_to_dfs $file | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
create database if not exists filter_pushdown; | ||
|
||
drop table if exists filter_pushdown.customer; | ||
create external table filter_pushdown.customer( | ||
c_custkey int, | ||
c_name string, | ||
c_address string, | ||
c_nationkey int, | ||
c_phone string, | ||
c_acctbal decimal(6,2), | ||
c_mktsegment string, | ||
c_comment string | ||
) | ||
stored as parquet | ||
location 'dfs_location/1.16.0/customer'; | ||
|
||
drop table if exists filter_pushdown.part; | ||
create external table filter_pushdown.part( | ||
p_partkey int, | ||
p_name string, | ||
p_mfgr string, | ||
p_brand string, | ||
p_type string, | ||
p_size int, | ||
p_container string, | ||
p_retailprice bigint, | ||
p_comment string | ||
) | ||
stored as parquet | ||
location 'dfs_location/1.14.0/part'; | ||
|
||
drop table if exists filter_pushdown.partsupp; | ||
create external table filter_pushdown.partsupp( | ||
ps_partkey int, | ||
ps_suppkey int, | ||
ps_availqty int, | ||
ps_supplycost decimal(6,2), | ||
ps_comment string | ||
) | ||
stored as parquet | ||
location 'dfs_location/1.14.0/partsupp'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/usr/bin/env bash | ||
source conf/drillTestConfig.properties | ||
|
||
set -x | ||
|
||
test_dir=${DRILL_TEST_DATA_DIR}/Datasources/parquet_storage/filter/pushdown/varchar_decimal | ||
dfs_location=${DRILL_TESTDATA}/filter/pushdown/varchar_decimal/no_metadata_file | ||
|
||
hive_ddl=$test_dir/create_hive_tables.ddl | ||
hive_ddl_parametrized=${hive_ddl%.*}_param.ddl | ||
cp $hive_ddl $hive_ddl_parametrized | ||
|
||
# Replacing parameters with values: | ||
sed -i "s|dfs_location|$dfs_location|g" $hive_ddl_parametrized | ||
|
||
# Executing the ddl: | ||
${DRILL_TEST_DATA_DIR}/Datasources/hive/execHive.sh $hive_ddl_parametrized | ||
|
||
# Clean: | ||
rm $hive_ddl_parametrized |
Oops, something went wrong.