diff --git a/gcp_variant_transforms/testing/integration/run_tests.py b/gcp_variant_transforms/testing/integration/run_tests.py index f8b7f5404..7b32cdc08 100644 --- a/gcp_variant_transforms/testing/integration/run_tests.py +++ b/gcp_variant_transforms/testing/integration/run_tests.py @@ -73,8 +73,7 @@ def __init__(self, test_name, table_name, input_pattern, - validation_query, - expected_query_result, + assertion_configs, **kwargs): self._name = test_name @@ -82,8 +81,7 @@ def __init__(self, self._project = context.project self._table_name = '{}.{}'.format(dataset_id, table_name) output_table = '{}:{}'.format(context.project, self._table_name) - self._validation_query = (" ").join(validation_query) - self._expected_query_result = expected_query_result + self._assertion_configs = assertion_configs args = ['--input_pattern {}'.format(input_pattern), '--output_table {}'.format(output_table), '--project {}'.format(context.project), @@ -148,12 +146,27 @@ def _handle_failure(self, response): 'No traceback. See logs for more information on error.') def validate_table(self): - """Runs a simple query against the output table and verifies aggregates.""" + """Runs queries against the output table and verifies results.""" client = bigquery.Client(project=self._project) - # TODO(bashir2): Create macros for common queries and add the option for - # having a list of queries instead of just one. - query = self._validation_query.format(TABLE_NAME=self._table_name) - query_job = client.query(query) + # TODO(yifangchen): Create macros for common queries + query_formatter = QueryFormatter(self._table_name) + for assertion_config in self._assertion_configs: + query = query_formatter.format_query(assertion_config['query']) + assertion = QueryAssertion(client, query, assertion_config[ + 'expected_result']) + assertion.run_assertion() + + +class QueryAssertion(object): + """Runs a query and verifies that the output matches the expected result.""" + + def __init__(self, client, query, expected_result): + self._client = client + self._query = query + self._expected_result = expected_result + + def run_assertion(self): + query_job = self._client.query(self._query) assert query_job.state == 'RUNNING' iterator = query_job.result(timeout=60) rows = list(iterator) @@ -161,15 +174,36 @@ def validate_table(self): raise TestCaseFailure('Expected one row in query result, got {}'.format( len(rows))) row = rows[0] - if len(self._expected_query_result) != len(row): + if len(self._expected_result) != len(row): raise TestCaseFailure( 'Expected {} columns in the query result, got {}'.format( - len(self._expected_query_result), len(row))) - for key in self._expected_query_result.keys(): - if self._expected_query_result[key] != row.get(key): + len(self._expected_result), len(row))) + for key in self._expected_result.keys(): + if self._expected_result[key] != row.get(key): raise TestCaseFailure( 'Column {} mismatch: expected {}, got {}'.format( - key, self._expected_query_result[key], row.get(key))) + key, self._expected_result[key], row.get(key))) + + +class QueryFormatter(object): + """Formats a query. + + Replaces keyword TABLE_NAME and eventually macros in the query. + """ + + def __init__(self, table_name): + # type: (str) -> None + self._table_name = table_name + + def format_query(self, query): + # type: (List[str]) -> str + """Formats the given ``query``. + + Formatting logic is as follows: + - Concatenates ``query`` parts into one string. + - Replaces TABLE_NAME with the table associated for the query. + """ + return (' ').join(query).format(TABLE_NAME=self._table_name) class TestContextManager(object): @@ -272,11 +306,22 @@ def _load_test_config(filename): def _validate_test(test, filename): required_keys = ['test_name', 'table_name', 'input_pattern', - 'validation_query', 'expected_query_result'] + 'assertion_configs'] for key in required_keys: if key not in test: raise ValueError('Test case in {} is missing required key: {}'.format( filename, key)) + assertion_configs = test['assertion_configs'] + for assertion_config in assertion_configs: + _validate_assertion_config(assertion_config) + + +def _validate_assertion_config(assertion_config): + required_keys = ['query', 'expected_result'] + for key in required_keys: + if key not in assertion_config: + raise ValueError('Test case in {} is missing required key: {}'.format( + assertion_config, key)) def _run_test(test, context): diff --git a/gcp_variant_transforms/testing/integration/small_tests/valid_4_0.json b/gcp_variant_transforms/testing/integration/small_tests/valid_4_0.json index ce3f94c58..30a78e1d2 100644 --- a/gcp_variant_transforms/testing/integration/small_tests/valid_4_0.json +++ b/gcp_variant_transforms/testing/integration/small_tests/valid_4_0.json @@ -3,15 +3,19 @@ "table_name": "valid_4_0", "input_pattern": "gs://gcp-variant-transforms-testfiles/small_tests/valid-4.0.vcf", "runner": "DataflowRunner", - "validation_query": [ - "SELECT COUNT(0) AS num_rows, ", - " SUM(start_position) AS sum_start, ", - " SUM(end_position) AS sum_end ", - "FROM {TABLE_NAME}" - ], - "expected_query_result": { - "num_rows": 5, - "sum_start": 3607195, - "sum_end": 3607203 - } + "assertion_configs": [ + { + "query": [ + "SELECT COUNT(0) AS num_rows, ", + " SUM(start_position) AS sum_start, ", + " SUM(end_position) AS sum_end ", + "FROM {TABLE_NAME}" + ], + "expected_result": { + "num_rows": 5, + "sum_start": 3607195, + "sum_end": 3607203 + } + } + ] } diff --git a/gcp_variant_transforms/testing/integration/small_tests/valid_4_0_bz2.json b/gcp_variant_transforms/testing/integration/small_tests/valid_4_0_bz2.json index eaae90194..257b7b05d 100644 --- a/gcp_variant_transforms/testing/integration/small_tests/valid_4_0_bz2.json +++ b/gcp_variant_transforms/testing/integration/small_tests/valid_4_0_bz2.json @@ -3,15 +3,19 @@ "table_name": "valid_4_0_bz2", "input_pattern": "gs://gcp-variant-transforms-testfiles/small_tests/valid-4.0.vcf.bz2", "runner": "DataflowRunner", - "validation_query": [ - "SELECT COUNT(0) AS num_rows, ", - " SUM(start_position) AS sum_start, ", - " SUM(end_position) AS sum_end ", - "FROM {TABLE_NAME}" - ], - "expected_query_result": { - "num_rows": 5, - "sum_start": 3607195, - "sum_end": 3607203 - } + "assertion_configs": [ + { + "query": [ + "SELECT COUNT(0) AS num_rows, ", + " SUM(start_position) AS sum_start, ", + " SUM(end_position) AS sum_end ", + "FROM {TABLE_NAME}" + ], + "expected_result": { + "num_rows": 5, + "sum_start": 3607195, + "sum_end": 3607203 + } + } + ] } diff --git a/gcp_variant_transforms/testing/integration/small_tests/valid_4_0_gz.json b/gcp_variant_transforms/testing/integration/small_tests/valid_4_0_gz.json index 3d09dccfd..c0530b997 100644 --- a/gcp_variant_transforms/testing/integration/small_tests/valid_4_0_gz.json +++ b/gcp_variant_transforms/testing/integration/small_tests/valid_4_0_gz.json @@ -3,15 +3,19 @@ "table_name": "valid_4_0_gz", "input_pattern": "gs://gcp-variant-transforms-testfiles/small_tests/valid-4.0.vcf.gz", "runner": "DataflowRunner", - "validation_query": [ - "SELECT COUNT(0) AS num_rows, ", - " SUM(start_position) AS sum_start, ", - " SUM(end_position) AS sum_end ", - "FROM {TABLE_NAME}" - ], - "expected_query_result": { - "num_rows": 5, - "sum_start": 3607195, - "sum_end": 3607203 - } + "assertion_configs": [ + { + "query": [ + "SELECT COUNT(0) AS num_rows, ", + " SUM(start_position) AS sum_start, ", + " SUM(end_position) AS sum_end ", + "FROM {TABLE_NAME}" + ], + "expected_result": { + "num_rows": 5, + "sum_start": 3607195, + "sum_end": 3607203 + } + } + ] } diff --git a/gcp_variant_transforms/testing/integration/small_tests/valid_4_1.json b/gcp_variant_transforms/testing/integration/small_tests/valid_4_1.json index d93d28eb4..08e0a56d8 100644 --- a/gcp_variant_transforms/testing/integration/small_tests/valid_4_1.json +++ b/gcp_variant_transforms/testing/integration/small_tests/valid_4_1.json @@ -3,15 +3,19 @@ "table_name": "valid_4_1", "input_pattern": "gs://gcp-variant-transforms-testfiles/small_tests/valid-4.1-large.vcf", "runner": "DataflowRunner", - "validation_query": [ - "SELECT COUNT(0) AS num_rows, ", - " SUM(start_position) AS sum_start, ", - " SUM(end_position) AS sum_end ", - "FROM {TABLE_NAME}" - ], - "expected_query_result": { - "num_rows": 9882, - "sum_start": 5434957328, - "sum_end": 5435327553 - } + "assertion_configs": [ + { + "query": [ + "SELECT COUNT(0) AS num_rows, ", + " SUM(start_position) AS sum_start, ", + " SUM(end_position) AS sum_end ", + "FROM {TABLE_NAME}" + ], + "expected_result": { + "num_rows": 9882, + "sum_start": 5434957328, + "sum_end": 5435327553 + } + } + ] } diff --git a/gcp_variant_transforms/testing/integration/small_tests/valid_4_1_gz.json b/gcp_variant_transforms/testing/integration/small_tests/valid_4_1_gz.json index 9b6991a2d..87855e64d 100644 --- a/gcp_variant_transforms/testing/integration/small_tests/valid_4_1_gz.json +++ b/gcp_variant_transforms/testing/integration/small_tests/valid_4_1_gz.json @@ -3,15 +3,19 @@ "table_name": "valid_4_1_gz", "input_pattern": "gs://gcp-variant-transforms-testfiles/small_tests/valid-4.1-large.vcf.gz", "runner": "DataflowRunner", - "validation_query": [ - "SELECT COUNT(0) AS num_rows, ", - " SUM(start_position) AS sum_start, ", - " SUM(end_position) AS sum_end ", - "FROM {TABLE_NAME}" - ], - "expected_query_result": { - "num_rows": 9882, - "sum_start": 5434957328, - "sum_end": 5435327553 - } + "assertion_configs": [ + { + "query": [ + "SELECT COUNT(0) AS num_rows, ", + " SUM(start_position) AS sum_start, ", + " SUM(end_position) AS sum_end ", + "FROM {TABLE_NAME}" + ], + "expected_result": { + "num_rows": 9882, + "sum_start": 5434957328, + "sum_end": 5435327553 + } + } + ] } diff --git a/gcp_variant_transforms/testing/integration/small_tests/valid_4_2.json b/gcp_variant_transforms/testing/integration/small_tests/valid_4_2.json index 3557026da..ea5085e2f 100644 --- a/gcp_variant_transforms/testing/integration/small_tests/valid_4_2.json +++ b/gcp_variant_transforms/testing/integration/small_tests/valid_4_2.json @@ -3,15 +3,19 @@ "table_name": "valid_4_2", "input_pattern": "gs://gcp-variant-transforms-testfiles/small_tests/valid-4.2.vcf", "runner": "DataflowRunner", - "validation_query": [ - "SELECT COUNT(0) AS num_rows, ", - " SUM(start_position) AS sum_start, ", - " SUM(end_position) AS sum_end ", - "FROM {TABLE_NAME}" - ], - "expected_query_result": { - "num_rows": 13, - "sum_start": 23031929, - "sum_end": 23033052 - } + "assertion_configs": [ + { + "query": [ + "SELECT COUNT(0) AS num_rows, ", + " SUM(start_position) AS sum_start, ", + " SUM(end_position) AS sum_end ", + "FROM {TABLE_NAME}" + ], + "expected_result": { + "num_rows": 13, + "sum_start": 23031929, + "sum_end": 23033052 + } + } + ] } diff --git a/gcp_variant_transforms/testing/integration/small_tests/valid_4_2_VEP.json b/gcp_variant_transforms/testing/integration/small_tests/valid_4_2_VEP.json index a8fd827eb..10f5fba47 100644 --- a/gcp_variant_transforms/testing/integration/small_tests/valid_4_2_VEP.json +++ b/gcp_variant_transforms/testing/integration/small_tests/valid_4_2_VEP.json @@ -4,12 +4,29 @@ "input_pattern": "gs://gcp-variant-transforms-testfiles/small_tests/valid-4.2_VEP.vcf", "annotation_field": "CSQ", "runner": "DataflowRunner", - "validation_query": [ - "SELECT COUNT(DISTINCT CSQ.Feature) AS num_features ", - "FROM {TABLE_NAME} AS t, t.alternate_bases as alts, alts.CSQ as CSQ ", - "WHERE start_position = 1110695 AND alts.alt = 'G'" - ], - "expected_query_result": { - "num_features": 3 - } + "assertion_configs": [ + { + "query": [ + "SELECT COUNT(0) AS num_rows, ", + " SUM(start_position) AS sum_start, ", + " SUM(end_position) AS sum_end ", + "FROM {TABLE_NAME}" + ], + "expected_result": { + "num_rows": 11, + "sum_start": 21801693, + "sum_end": 21802814 + } + }, + { + "query": [ + "SELECT COUNT(DISTINCT CSQ.Feature) AS num_features ", + "FROM {TABLE_NAME} AS t, t.alternate_bases as alts, alts.CSQ as CSQ ", + "WHERE start_position = 1110695 AND alts.alt = 'G'" + ], + "expected_result": { + "num_features": 3 + } + } + ] } diff --git a/gcp_variant_transforms/testing/integration/small_tests/valid_4_2_gz.json b/gcp_variant_transforms/testing/integration/small_tests/valid_4_2_gz.json index dba30fcdb..9b6f21065 100644 --- a/gcp_variant_transforms/testing/integration/small_tests/valid_4_2_gz.json +++ b/gcp_variant_transforms/testing/integration/small_tests/valid_4_2_gz.json @@ -3,15 +3,19 @@ "table_name": "valid_4_2_gz", "input_pattern": "gs://gcp-variant-transforms-testfiles/small_tests/valid-4.2.vcf.gz", "runner": "DataflowRunner", - "validation_query": [ - "SELECT COUNT(0) AS num_rows, ", - " SUM(start_position) AS sum_start, ", - " SUM(end_position) AS sum_end ", - "FROM {TABLE_NAME}" - ], - "expected_query_result": { - "num_rows": 13, - "sum_start": 23031929, - "sum_end": 23033052 - } + "assertion_configs": [ + { + "query": [ + "SELECT COUNT(0) AS num_rows, ", + " SUM(start_position) AS sum_start, ", + " SUM(end_position) AS sum_end ", + "FROM {TABLE_NAME}" + ], + "expected_result": { + "num_rows": 13, + "sum_start": 23031929, + "sum_end": 23033052 + } + } + ] }