Skip to content
Permalink
Browse files
DRILL-6744: Added tests to verify that filter pushdown works with var…
…… (#539)

* DRILL-6744: Added tests to verify that filter pushdown works with varchar and decimal data types

Covered:
- Varchar and decimals with all physical types (int32, int64, fixed_len_byte_array, binary).
- Parquet files version 1.8.1 and 1.10.0.
- Queries with and without metadata files.
- Metadata generated by Drill 1.14.0 and 1.16.0-SNAPSHOT.
- Queries on Hive tables.
Dataset:
- Tables were generated from the TPCH data with scale 0.01 with decimal columns and small row group size.
- External Hive tables.

* DRILL-6744: Addressed the review comments
  • Loading branch information
agozhiy authored and Agirish committed Feb 9, 2019
1 parent 075bfeb commit 99077665e8438b18c41cfe9a96e8215917ecedb2
Showing 305 changed files with 15,018 additions and 0 deletions.
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
source conf/drillTestConfig.properties

set -x

test_dir=${DRILL_TEST_DATA_DIR}/Datasources/parquet_storage/filter/pushdown/varchar_decimal
dfs_location=${DRILL_TESTDATA}/filter/pushdown/varchar_decimal

copy_to_dfs() {
local metadata_file=$test_dir/$1
local destination=$dfs_location/$1

hadoop fs -rm $destination
# We need to copy the metadata files separately because otherwise Drill considers the tables as modified and auto refreshes metadata.
# Using "hadoop fs -put" doesn't help for some reason.
hadoop fs -touchz $destination
hadoop fs -appendToFile $metadata_file $destination
}

# Finding all metadata files and extracting 3 last directories of the path, then copying to dfs.
for file in $(find ${test_dir} -name .drill.parquet_metadata | grep -oE "[^/]+/[^/]+/[^/]+/[^/]+$")
do
copy_to_dfs $file
done
@@ -0,0 +1,41 @@
create database if not exists filter_pushdown;

drop table if exists filter_pushdown.customer;
create external table filter_pushdown.customer(
c_custkey int,
c_name string,
c_address string,
c_nationkey int,
c_phone string,
c_acctbal decimal(6,2),
c_mktsegment string,
c_comment string
)
stored as parquet
location 'dfs_location/1.16.0/customer';

drop table if exists filter_pushdown.part;
create external table filter_pushdown.part(
p_partkey int,
p_name string,
p_mfgr string,
p_brand string,
p_type string,
p_size int,
p_container string,
p_retailprice bigint,
p_comment string
)
stored as parquet
location 'dfs_location/1.14.0/part';

drop table if exists filter_pushdown.partsupp;
create external table filter_pushdown.partsupp(
ps_partkey int,
ps_suppkey int,
ps_availqty int,
ps_supplycost decimal(6,2),
ps_comment string
)
stored as parquet
location 'dfs_location/1.14.0/partsupp';
@@ -0,0 +1,20 @@
#!/usr/bin/env bash
source conf/drillTestConfig.properties

set -x

test_dir=${DRILL_TEST_DATA_DIR}/Datasources/parquet_storage/filter/pushdown/varchar_decimal
dfs_location=${DRILL_TESTDATA}/filter/pushdown/varchar_decimal/no_metadata_file

hive_ddl=$test_dir/create_hive_tables.ddl
hive_ddl_parametrized=${hive_ddl%.*}_param.ddl
cp $hive_ddl $hive_ddl_parametrized

# Replacing parameters with values:
sed -i "s|dfs_location|$dfs_location|g" $hive_ddl_parametrized

# Executing the ddl:
${DRILL_TEST_DATA_DIR}/Datasources/hive/execHive.sh $hive_ddl_parametrized

# Clean:
rm $hive_ddl_parametrized

0 comments on commit 9907766

Please sign in to comment.