Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HUDI-5111] Improve integration test coverage #7092

Merged
merged 4 commits into from Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -45,9 +45,21 @@ dag_content:
delete_input_data: false
type: ValidateDatasetNode
deps: first_insert
first_presto_query:
config:
execute_itr_count: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 30000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 5
delete_input_data: true
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -71,15 +71,15 @@ dag_content:
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
execute_itr_count: 20
validate_hive: true
delete_input_data: true
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: second_hive_sync
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 7600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 30
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 7600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
19 changes: 16 additions & 3 deletions docker/demo/config/test-suite/deltastreamer-medium-clustering.yaml
Expand Up @@ -17,7 +17,7 @@
# to be used with test-aggressive-clean-archival.properties

dag_name: deltastreamer-medium-clustering.yaml
dag_rounds: 20
dag_rounds: 15
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -62,14 +62,27 @@ dag_content:
deps: first_upsert
second_validate:
config:
validate_once_every_itr: 3
validate_hive: false
delete_input_data: true
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 15
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 20
execute_itr_count: 15
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -17,7 +17,7 @@
# to be used with test-aggressive-clean-archival.properties

dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 20
dag_rounds: 15
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -68,9 +68,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 15
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 20
execute_itr_count: 15
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
14 changes: 13 additions & 1 deletion docker/demo/config/test-suite/deltastreamer-non-partitioned.yaml
Expand Up @@ -56,8 +56,20 @@ dag_content:
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 6
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 11000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 6
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: detlastreamer-long-running-example.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
16 changes: 14 additions & 2 deletions docker/demo/config/test-suite/simple-clustering.yaml
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: simple-clustering.yaml
dag_rounds: 30
dag_rounds: 15
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
Expand Down Expand Up @@ -54,11 +54,23 @@ dag_content:
deps: first_delete
first_cluster:
config:
execute_itr_count: 25
execute_itr_count: 10
type: ClusteringNode
deps: first_validate
second_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_cluster
first_presto_query:
config:
validate_once_every_itr: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 8300
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
12 changes: 12 additions & 0 deletions docker/demo/config/test-suite/simple-deltastreamer.yaml
Expand Up @@ -68,3 +68,15 @@ dag_content:
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
validate_once_every_itr: 3
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 9600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
14 changes: 13 additions & 1 deletion docker/demo/config/test-suite/spark-immutable-dataset.yaml
Expand Up @@ -45,9 +45,21 @@ dag_content:
delete_input_data: false
type: ValidateDatasetNode
deps: first_insert
first_presto_query:
config:
execute_itr_count: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 48000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 5
delete_input_data: true
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -48,6 +48,18 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 6
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 6000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 6
Expand Down
16 changes: 14 additions & 2 deletions docker/demo/config/test-suite/spark-long-running.yaml
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: cow-spark-deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_rounds: 20
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
Expand Down Expand Up @@ -49,9 +49,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 30
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 189000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 30
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query