From 2ad5122ef4d23bd29138473f70735b35308203c9 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 9 Jan 2020 11:07:42 -0700 Subject: [PATCH 1/3] Add tags for sources Allow column-level tags, which are inherited by their tests --- core/dbt/contracts/graph/parsed.py | 6 +- core/dbt/contracts/graph/unparsed.py | 9 +- core/dbt/graph/selector.py | 5 +- core/dbt/parser/schema_test_builders.py | 18 ++-- core/dbt/parser/schemas.py | 45 +++++--- .../models-v2/models/schema.yml | 16 +++ .../test_schema_v2_tests.py | 64 ++++++++--- .../test_docs_generate.py | 100 +++++++++++++++--- .../035_docs_blocks/test_docs_blocks.py | 11 +- .../042_sources_test/models/schema.yml | 14 +++ .../042_sources_test/test_sources.py | 28 ++++- test/integration/047_dbt_ls_test/test_ls.py | 1 + test/unit/test_contracts_graph_parsed.py | 48 +++++++-- test/unit/test_contracts_graph_unparsed.py | 14 ++- 14 files changed, 309 insertions(+), 70 deletions(-) diff --git a/core/dbt/contracts/graph/parsed.py b/core/dbt/contracts/graph/parsed.py index fc32e9d0552..a92db585108 100644 --- a/core/dbt/contracts/graph/parsed.py +++ b/core/dbt/contracts/graph/parsed.py @@ -121,6 +121,7 @@ class ColumnInfo(JsonSchemaMixin, Replaceable): description: str = '' meta: Dict[str, Any] = field(default_factory=dict) data_type: Optional[str] = None + tags: List[str] = field(default_factory=list) # Docrefs are not quite like regular references, as they indicate what they @@ -513,6 +514,7 @@ class ParsedSourceDefinition( columns: Dict[str, ColumnInfo] = field(default_factory=dict) meta: Dict[str, Any] = field(default_factory=dict) source_meta: Dict[str, Any] = field(default_factory=dict) + tags: List[str] = field(default_factory=list) @property def is_ephemeral_model(self): @@ -530,10 +532,6 @@ def refs(self): def sources(self): return [] - @property - def tags(self): - return [] - @property def has_freshness(self): return bool(self.freshness) and self.loaded_at_field is not None diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index 73ded4dfe16..e58a1d66189 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -67,9 +67,14 @@ def __post_init__(self): self.tests = [] +@dataclass +class UnparsedColumn(NamedTested): + tags: List[str] = field(default_factory=list) + + @dataclass class ColumnDescription(JsonSchemaMixin, Replaceable): - columns: List[NamedTested] = field(default_factory=list) + columns: List[UnparsedColumn] = field(default_factory=list) @dataclass @@ -206,6 +211,7 @@ class UnparsedSourceTableDefinition(ColumnDescription, NodeDescription): external: Optional[ExternalTable] = field( default_factory=ExternalTable ) + tags: List[str] = field(default_factory=list) def __post_init__(self): NodeDescription.__post_init__(self) @@ -225,6 +231,7 @@ class UnparsedSourceDefinition(JsonSchemaMixin, Replaceable): ) loaded_at_field: Optional[str] = None tables: List[UnparsedSourceTableDefinition] = field(default_factory=list) + tags: List[str] = field(default_factory=list) @property def yaml_key(self) -> 'str': diff --git a/core/dbt/graph/selector.py b/core/dbt/graph/selector.py index cfe2d931177..595b0d122bf 100644 --- a/core/dbt/graph/selector.py +++ b/core/dbt/graph/selector.py @@ -1,4 +1,5 @@ from enum import Enum +from itertools import chain import networkx as nx # type: ignore @@ -180,7 +181,9 @@ class TagSelector(ManifestSelector): def search(self, included_nodes, selector): """ yields nodes from graph that have the specified tag """ - for node, real_node in self.parsed_nodes(included_nodes): + search = chain(self.parsed_nodes(included_nodes), + self.source_nodes(included_nodes)) + for node, real_node in search: if selector in real_node.tags: yield node diff --git a/core/dbt/parser/schema_test_builders.py b/core/dbt/parser/schema_test_builders.py index c2d7744d054..7dcc4ede7a2 100644 --- a/core/dbt/parser/schema_test_builders.py +++ b/core/dbt/parser/schema_test_builders.py @@ -6,7 +6,7 @@ from dbt.clients.jinja import get_rendered from dbt.contracts.graph.unparsed import ( UnparsedNodeUpdate, UnparsedSourceDefinition, - UnparsedSourceTableDefinition, NamedTested + UnparsedSourceTableDefinition, UnparsedColumn ) from dbt.exceptions import raise_compiler_error from dbt.parser.search import FileBlock @@ -79,7 +79,7 @@ def name(self) -> str: return '{0.name}_{1.name}'.format(self.source, self.table) @property - def columns(self) -> List[NamedTested]: + def columns(self) -> List[UnparsedColumn]: if self.table.columns is None: return [] else: @@ -136,17 +136,23 @@ def from_yaml_block( class SchemaTestBlock(TargetBlock): test: Dict[str, Any] column_name: Optional[str] + tags: List[str] @classmethod def from_target_block( - cls, src: TargetBlock, test: Dict[str, Any], column_name: Optional[str] + cls, + src: TargetBlock, + test: Dict[str, Any], + column_name: Optional[str], + tags: List[str], ) -> 'SchemaTestBlock': return cls( file=src.file, data=src.data, target=src.target, test=test, - column_name=column_name + column_name=column_name, + tags=tags, ) @@ -156,8 +162,6 @@ class TestBuilder(Generic[Target]): Test names have the following pattern: - the test name itself may be namespaced (package.test) - or it may not be namespaced (test) - - the test may have arguments embedded in the name (, severity=WARN) - - or it may not have arguments. """ TEST_NAME_PATTERN = re.compile( @@ -286,7 +290,7 @@ def build_raw_sql(self) -> str: model=self.build_model_str(), macro=self.macro_name(), kwargs=self.test_kwargs_str(), - severity=self.severity() + severity=self.severity(), ) def build_model_str(self): diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 86094721a50..93ff16e899f 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -1,3 +1,4 @@ +import itertools import os from abc import abstractmethod @@ -19,7 +20,7 @@ ParsedTestNode, ) from dbt.contracts.graph.unparsed import ( - UnparsedSourceDefinition, UnparsedNodeUpdate, NamedTested, + UnparsedSourceDefinition, UnparsedNodeUpdate, UnparsedColumn, UnparsedSourceTableDefinition, FreshnessThreshold ) from dbt.context.parser import docs @@ -68,11 +69,14 @@ def __init__(self): self.column_info: Dict[str, ColumnInfo] = {} self.docrefs: List[Docref] = [] - def add(self, column_name, description, data_type, meta): - self.column_info[column_name] = ColumnInfo(name=column_name, - description=description, - data_type=data_type, - meta=meta) + def add(self, column: UnparsedColumn, description, data_type, meta): + self.column_info[column.name] = ColumnInfo( + name=column.name, + description=description, + data_type=data_type, + meta=meta, + tags=column.tags, + ) def collect_docrefs( @@ -160,7 +164,7 @@ def _yaml_from_file( return None def parse_column( - self, block: TargetBlock, column: NamedTested, refs: ParserRef + self, block: TargetBlock, column: UnparsedColumn, refs: ParserRef ) -> None: column_name = column.name description = column.description @@ -168,13 +172,13 @@ def parse_column( meta = column.meta collect_docrefs(block.target, refs, column_name, description) - refs.add(column_name, description, data_type, meta) + refs.add(column, description, data_type, meta) if not column.tests: return for test in column.tests: - self.parse_test(block, test, column_name) + self.parse_test(block, test, column) def parse_node(self, block: SchemaTestBlock) -> ParsedTestNode: """In schema parsing, we rewrite most of the part of parse_node that @@ -209,11 +213,16 @@ def parse_node(self, block: SchemaTestBlock) -> ParsedTestNode: 'kwargs': builder.args, } + # copy - we don't want to mutate the tags! + tags = block.tags[:] + if 'schema' not in tags: + tags.append('schema') + node = self._create_parsetime_node( block=block, path=compiled_path, config=config, - tags=['schema'], + tags=tags, name=builder.fqn_name, raw_sql=builder.build_raw_sql(), column_name=block.column_name, @@ -227,16 +236,24 @@ def parse_test( self, target_block: TargetBlock, test: TestDef, - column_name: Optional[str] + column: Optional[UnparsedColumn], ) -> None: if isinstance(test, str): test = {test: {}} + if column is None: + column_name: Optional[str] = None + column_tags: List[str] = [] + else: + column_name = column.name + column_tags = column.tags + block = SchemaTestBlock.from_target_block( src=target_block, test=test, - column_name=column_name + column_name=column_name, + tags=column_tags, ) try: self.parse_node(block) @@ -395,6 +412,9 @@ def parse_with_refs( path = block.path.original_file_path source_meta = source.meta or {} + # make sure we don't do duplicate tags from source + table + tags = sorted(set(itertools.chain(source.tags, table.tags))) + result = ParsedSourceDefinition( package_name=self.project.project_name, database=(source.database or self.default_database), @@ -419,6 +439,7 @@ def parse_with_refs( quoting=quoting, resource_type=NodeType.Source, fqn=[self.project.project_name, source.name, table.name], + tags=tags, ) self.results.add_source(self.yaml.file, result) diff --git a/test/integration/008_schema_tests_test/models-v2/models/schema.yml b/test/integration/008_schema_tests_test/models-v2/models/schema.yml index af1af19e0eb..8721fe965c8 100644 --- a/test/integration/008_schema_tests_test/models-v2/models/schema.yml +++ b/test/integration/008_schema_tests_test/models-v2/models/schema.yml @@ -9,10 +9,14 @@ models: tests: - not_null - unique + tags: + - table_id - name: first_name description: "The user's first name" tests: - not_null + tags: + - table_first_name - name: ip_address description: "The user's IP address" tests: @@ -29,6 +33,8 @@ models: description: "The user's favorite color" tests: - accepted_values: { values: ['blue', 'green'], quote: true } + tags: + - table_favorite_color - name: fav_number description: "The user's favorite number" tests: @@ -47,6 +53,8 @@ models: - unique - accepted_values: { values: ['blue', 'green'] } - relationships: { field: favorite_color, to: ref('table_copy') } + tags: + - table_favorite_color - name: count description: "The number of responses for this favorite color" tests: @@ -61,10 +69,14 @@ models: tests: - not_null - unique + tags: + - xfail - name: favorite_color description: "The user's favorite color" tests: - accepted_values: { values: ['blue', 'green'] } + tags: + - xfail # all of these constraints will fail - name: table_failure_summary @@ -75,6 +87,8 @@ models: tests: - accepted_values: { values: ['red'] } - relationships: { field: favorite_color, to: ref('table_copy') } + tags: + - xfail # this table is disabled so these tests should be ignored - name: table_disabled @@ -94,3 +108,5 @@ models: description: "The user ID" tests: - relationships: { field: id, to: ref('table_failure_copy') } + tags: + - xfail diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index d228afe8b36..c7a5add39a5 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -26,6 +26,23 @@ def run_schema_validations(self): test_task = TestTask(args, self.config) return test_task.run() + def assertTestFailed(self, result): + self.assertIsNone(result.error) + self.assertFalse(result.skipped) + self.assertTrue( + result.status > 0, + 'test {} did not fail'.format(result.node.name) + ) + + def assertTestPassed(self, result): + self.assertIsNone(result.error) + self.assertFalse(result.skipped) + # status = # of failing rows + self.assertEqual( + result.status, 0, + 'test {} failed'.format(result.node.name) + ) + @use_profile('postgres') def test_postgres_schema_tests(self): results = self.run_dbt() @@ -37,25 +54,37 @@ def test_postgres_schema_tests(self): for result in test_results: # assert that all deliberately failing tests actually fail if 'failure' in result.node.name: - self.assertIsNone(result.error) - self.assertFalse(result.skipped) - self.assertTrue( - result.status > 0, - 'test {} did not fail'.format(result.node.name) - ) - + self.assertTestFailed(result) # assert that actual tests pass else: - self.assertIsNone(result.error) - self.assertFalse(result.skipped) - # status = # of failing rows - self.assertEqual( - result.status, 0, - 'test {} failed'.format(result.node.name) - ) + self.assertTestPassed(result) self.assertEqual(sum(x.status for x in test_results), 6) + @use_profile('postgres') + def test_postgres_schema_test_selection(self): + results = self.run_dbt() + self.assertEqual(len(results), 5) + test_results = self.run_dbt(['test', '--models', 'tag:table_favorite_color']) + self.assertEqual(len(test_results), 5) # 1 in table_copy, 4 in table_summary + for result in test_results: + self.assertTestPassed(result) + + @use_profile('postgres') + def test_postgres_schema_test_exclude_failures(self): + results = self.run_dbt() + self.assertEqual(len(results), 5) + test_results = self.run_dbt(['test', '--exclude', 'tag:xfail']) + # If the failed + disabled model's tests ran, there would be 20 of these. + self.assertEqual(len(test_results), 13) + for result in test_results: + self.assertTestPassed(result) + test_results = self.run_dbt(['test', '--models', 'tag:xfail'], expect_pass=False) + self.assertEqual(len(test_results), 6) + for result in test_results: + self.assertTestFailed(result) + + class TestMalformedSchemaTests(DBTIntegrationTest): def setUp(self): @@ -94,17 +123,17 @@ def setUp(self): @property def schema(self): return "schema_tests_008" - + @property def models(self): return "models-v2/custom-bad-test-macro" - + @property def project_config(self): return { "macro-paths": ["macros-v2/malformed"], } - + def run_schema_validations(self): args = FakeArgs() test_task = TestTask(args, self.config) @@ -162,6 +191,7 @@ def test_postgres_hooks_dont_run_for_tests(self): 'test {} failed'.format(result.node.name) ) + class TestCustomSchemaTests(DBTIntegrationTest): def setUp(self): diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 07770908068..bf553fb20be 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -900,30 +900,35 @@ def expected_seeded_manifest(self, model_database=None): 'description': 'The user ID number', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, 'email': { 'name': 'email', 'description': "The user's email", 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'patch_path': model_schema_yml_path, @@ -978,30 +983,35 @@ def expected_seeded_manifest(self, model_database=None): 'description': 'The user ID number', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, 'email': { 'name': 'email', 'description': "The user's email", 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'docrefs': [], @@ -1327,13 +1337,15 @@ def expected_postgres_references_manifest(self, model_database=None): 'description': 'The first name being summarized', 'name': 'first_name', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'ct': { 'description': 'The number of instances of the first name', 'name': 'ct', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, }, 'config': { @@ -1405,13 +1417,15 @@ def expected_postgres_references_manifest(self, model_database=None): 'description': 'The first name being summarized', 'name': 'first_name', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'ct': { 'description': 'The number of instances of the first name', 'name': 'ct', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, }, 'config': { @@ -1483,30 +1497,35 @@ def expected_postgres_references_manifest(self, model_database=None): 'description': 'The user ID number', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, 'email': { 'name': 'email', 'description': "The user's email", 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'config': { @@ -1554,7 +1573,8 @@ def expected_postgres_references_manifest(self, model_database=None): 'description': 'An ID field', 'name': 'id', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], } }, 'quoting': { @@ -1600,6 +1620,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'source_description': 'My source', 'source_name': 'my_source', 'source_meta': {}, + 'tags': [], 'unique_id': 'source.test.my_source.my_table', 'fqn': ['test', 'my_source', 'my_table'], } @@ -1836,30 +1857,35 @@ def expected_bigquery_complex_manifest(self): 'name': 'email', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'description': "The user's name", 'name': 'first_name', 'data_type': None, 'meta': {}, + 'tags': [], }, 'id': { 'description': 'The user id', 'name': 'id', 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'description': "The user's IP address", 'name': 'ip_address', 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'description': 'When the user was updated', 'name': 'updated_at', 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'description': 'A clustered and partitioned copy of the test model', @@ -1910,30 +1936,35 @@ def expected_bigquery_complex_manifest(self): 'name': 'email', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'description': "The user's name", 'name': 'first_name', 'data_type': None, 'meta': {}, + 'tags': [], }, 'id': { 'description': 'The user id', 'name': 'id', 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'description': "The user's IP address", 'name': 'ip_address', 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'description': 'When the user was updated', 'name': 'updated_at', 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'description': 'A clustered and partitioned copy of the test model, clustered on multiple columns', @@ -1985,30 +2016,35 @@ def expected_bigquery_complex_manifest(self): 'description': 'The first field', 'data_type': None, 'meta': {}, + 'tags': [], }, 'field_2': { 'name': 'field_2', 'description': 'The second field', 'data_type': None, 'meta': {}, + 'tags': [], }, 'field_3': { 'name': 'field_3', 'description': 'The third field', 'data_type': None, 'meta': {}, + 'tags': [], }, 'nested_field.field_4': { 'name': 'nested_field.field_4', 'description': 'The first nested field', 'data_type': None, 'meta': {}, + 'tags': [], }, 'nested_field.field_5': { 'name': 'nested_field.field_5', 'description': 'The second nested field', 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'description': 'The test model', @@ -2107,30 +2143,35 @@ def expected_bigquery_complex_manifest(self): 'description': 'The user ID number', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, 'email': { 'name': 'email', 'description': "The user's email", 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'description': 'The test seed', @@ -2318,30 +2359,35 @@ def expected_redshift_incremental_view_manifest(self): 'description': 'The user ID number', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, 'email': { 'name': 'email', 'description': "The user's email", 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'patch_path': self.dir('rs_models/schema.yml'), @@ -2395,30 +2441,35 @@ def expected_redshift_incremental_view_manifest(self): 'description': 'The user ID number', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, 'email': { 'name': 'email', 'description': "The user's email", 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'description': 'The test seed', @@ -2602,31 +2653,36 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'description': 'The user ID number', 'name': 'id', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'first_name': { 'description': "The user's first name", 'name': 'first_name', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'email': { 'description': "The user's email", 'name': 'email', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'ip_address': { 'description': "The user's IP address", 'name': 'ip_address', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'updated_at': { 'description': "The last time this user's email was updated", 'name': 'updated_at', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], } }, 'compiled': True, @@ -2677,31 +2733,36 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'description': 'The user ID number', 'name': 'id', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'first_name': { 'description': "The user's first name", 'name': 'first_name', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'email': { 'description': "The user's email", 'name': 'email', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'ip_address': { 'description': "The user's IP address", 'name': 'ip_address', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], }, 'updated_at': { 'description': "The last time this user's email was updated", 'name': 'updated_at', 'data_type': None, - 'meta': {} + 'meta': {}, + 'tags': [], } }, 'compiled': True, @@ -2965,12 +3026,14 @@ def expected_postgres_references_run_results(self): 'name': 'first_name', 'data_type': None, 'meta': {}, + 'tags': [], }, 'ct': { 'description': 'The number of instances of the first name', 'name': 'ct', 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'compiled': True, @@ -3060,12 +3123,14 @@ def expected_postgres_references_run_results(self): 'name': 'first_name', 'data_type': None, 'meta': {}, + 'tags': [], }, 'ct': { 'description': 'The number of instances of the first name', 'name': 'ct', 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'compiled': True, @@ -3150,30 +3215,35 @@ def expected_postgres_references_run_results(self): 'description': 'The user ID number', 'data_type': None, 'meta': {}, + 'tags': [], }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, 'email': { 'name': 'email', 'description': "The user's email", 'data_type': None, 'meta': {}, + 'tags': [], }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", 'data_type': None, 'meta': {}, + 'tags': [], }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", 'data_type': None, 'meta': {}, + 'tags': [], }, }, 'compiled': True, diff --git a/test/integration/035_docs_blocks/test_docs_blocks.py b/test/integration/035_docs_blocks/test_docs_blocks.py index 12df291dae3..29b8eb4341e 100644 --- a/test/integration/035_docs_blocks/test_docs_blocks.py +++ b/test/integration/035_docs_blocks/test_docs_blocks.py @@ -18,7 +18,6 @@ def dir(path): def models(self): return self.dir("models") - @use_profile('postgres') def test_postgres_valid_doc_ref(self): self.assertEqual(len(self.run_dbt()), 1) @@ -39,6 +38,7 @@ def test_postgres_valid_doc_ref(self): 'description': 'The user ID number', 'data_type': None, 'meta': {}, + 'tags': [], }, model_data['columns']['id'] ) @@ -48,6 +48,7 @@ def test_postgres_valid_doc_ref(self): 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, model_data['columns']['first_name'] ) @@ -58,6 +59,7 @@ def test_postgres_valid_doc_ref(self): 'description': "The user's last name", 'data_type': None, 'meta': {}, + 'tags': [], }, model_data['columns']['last_name'] ) @@ -84,6 +86,7 @@ def test_postgres_alternative_docs_path(self): 'description': 'The user ID number with alternative text', 'data_type': None, 'meta': {}, + 'tags': [], }, model_data['columns']['id'] ) @@ -93,6 +96,7 @@ def test_postgres_alternative_docs_path(self): 'description': "The user's first name", 'data_type': None, 'meta': {}, + 'tags': [], }, model_data['columns']['first_name'] ) @@ -103,6 +107,7 @@ def test_postgres_alternative_docs_path(self): 'description': "The user's last name in this other file", 'data_type': None, 'meta': {}, + 'tags': [], }, model_data['columns']['last_name'] ) @@ -114,6 +119,7 @@ def test_postgres_alternative_docs_path_missing(self): with self.assertRaises(dbt.exceptions.CompilationException): self.run_dbt() + class TestMissingDocsBlocks(DBTIntegrationTest): @property def schema(self): @@ -133,6 +139,7 @@ def test_postgres_missing_doc_ref(self): with self.assertRaises(dbt.exceptions.CompilationException): self.run_dbt() + class TestBadDocsBlocks(DBTIntegrationTest): @property def schema(self): @@ -151,5 +158,3 @@ def test_postgres_invalid_doc_ref(self): # The run should fail since we could not find the docs reference. with self.assertRaises(dbt.exceptions.CompilationException): self.run_dbt(expect_pass=False) - - diff --git a/test/integration/042_sources_test/models/schema.yml b/test/integration/042_sources_test/models/schema.yml index 413807d4f6e..ecf400d224f 100644 --- a/test/integration/042_sources_test/models/schema.yml +++ b/test/integration/042_sources_test/models/schema.yml @@ -7,6 +7,7 @@ models: - relationships: to: source('test_source', 'test_table') field: favorite_color + sources: - name: test_source loader: custom @@ -16,12 +17,16 @@ sources: schema: "{{ var(env_var('DBT_TEST_SCHEMA_NAME_VARIABLE')) }}" quoting: identifier: True + tags: + - my_test_source_tag tables: - name: test_table identifier: source loaded_at_field: "{{ var('test_loaded_at') }}" freshness: error_after: {count: 18, period: hour} + tags: + - my_test_source_table_tag columns: - name: favorite_color description: The favorite color @@ -30,6 +35,8 @@ sources: tests: - unique - not_null + tags: + - id_column - name: first_name description: The first name of the user tests: [] @@ -47,6 +54,13 @@ sources: field: favorite_color - name: other_test_table identifier: other_table + columns: + - name: id + tests: + - not_null + - unique + tags: + - id_column - name: disabled_test_table freshness: null loaded_at_field: "{{ var('test_loaded_at') }}" diff --git a/test/integration/042_sources_test/test_sources.py b/test/integration/042_sources_test/test_sources.py index 694e187030b..e8613026e97 100644 --- a/test/integration/042_sources_test/test_sources.py +++ b/test/integration/042_sources_test/test_sources.py @@ -127,7 +127,7 @@ def test_postgres_basic_source_def(self): ['source', 'descendant_model', 'nonsource_descendant'], ['expected_multi_source', 'multi_source_model']) results = self.run_dbt_with_vars(['test']) - self.assertEqual(len(results), 4) + self.assertEqual(len(results), 6) @use_profile('postgres') def test_postgres_source_selector(self): @@ -141,6 +141,15 @@ def test_postgres_source_selector(self): self.assertTablesEqual('source', 'descendant_model') self.assertTableDoesNotExist('nonsource_descendant') self.assertTableDoesNotExist('multi_source_model') + + # do the same thing, but with tags + results = self.run_dbt_with_vars([ + 'run', + '--models', + 'tag:my_test_source_table_tag+' + ]) + self.assertEqual(len(results), 1) + results = self.run_dbt_with_vars([ 'test', '--models', @@ -148,6 +157,23 @@ def test_postgres_source_selector(self): ]) self.assertEqual(len(results), 4) + results = self.run_dbt_with_vars([ + 'test', '--models', 'tag:my_test_source_table_tag+' + ]) + self.assertEqual(len(results), 4) + + results = self.run_dbt_with_vars([ + 'test', '--models', 'tag:my_test_source_tag+' + ]) + # test_table + other_test_table + self.assertEqual(len(results), 6) + + results = self.run_dbt_with_vars([ + 'test', '--models', 'tag:id_column' + ]) + # all 4 id column tests + self.assertEqual(len(results), 4) + @use_profile('postgres') def test_postgres_empty_source_def(self): # sources themselves can never be selected, so nothing should be run diff --git a/test/integration/047_dbt_ls_test/test_ls.py b/test/integration/047_dbt_ls_test/test_ls.py index d7a30777f22..cece73b1f3c 100644 --- a/test/integration/047_dbt_ls_test/test_ls.py +++ b/test/integration/047_dbt_ls_test/test_ls.py @@ -224,6 +224,7 @@ def expect_source_output(self): 'name': 'my_table', 'source_name': 'my_source', 'resource_type': 'source', + 'tags': [], }, 'path': self.dir('models/schema.yml'), } diff --git a/test/unit/test_contracts_graph_parsed.py b/test/unit/test_contracts_graph_parsed.py index 1652cba44a7..e060116e114 100644 --- a/test/unit/test_contracts_graph_parsed.py +++ b/test/unit/test_contracts_graph_parsed.py @@ -169,7 +169,14 @@ def test_complex(self): 'vars': {'foo': 100}, }, 'docrefs': [], - 'columns': {'a': {'name': 'a', 'description': 'a text field', 'meta': {}}}, + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, + 'tags': [], + }, + }, } node = self.ContractType( @@ -347,7 +354,14 @@ def test_patch_ok(self): 'vars': {}, }, 'patch_path': '/path/to/schema.yml', - 'columns': {'a': {'name': 'a', 'description': 'a text field', 'meta':{}}}, + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, + 'tags': [], + }, + }, 'docrefs': [ { 'documentation_name': 'foo', @@ -532,7 +546,14 @@ def test_complex(self): 'vars': {}, }, 'docrefs': [], - 'columns': {'a': {'name': 'a', 'description': 'a text field', 'meta':{}}}, + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, + 'tags': [], + }, + }, 'index': 13, } @@ -644,7 +665,6 @@ def test_ok(self): }, 'docrefs': [], 'columns': {}, - 'meta': {}, } node = self.ContractType( package_name='test', @@ -725,7 +745,14 @@ def test_complex(self): 'extra_key': 'extra value' }, 'docrefs': [], - 'columns': {'a': {'name': 'a', 'description': 'a text field', 'meta': {}}}, + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, + 'tags': [], + }, + }, 'column_name': 'id', } @@ -1384,7 +1411,14 @@ def test_populated(self): 'name': 'foo', 'description': 'The foo model', 'original_file_path': '/path/to/schema.yml', - 'columns': {'a': {'name': 'a', 'description': 'a text field', 'meta':{}}}, + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, + 'tags': [], + }, + }, 'docrefs': [ { 'documentation_name': 'foo', @@ -1555,6 +1589,7 @@ def test_basic(self): 'unique_id': 'test.source.my_source.my_source_table', 'meta': {}, 'source_meta': {}, + 'tags': [], } source_def = self.ContractType( columns={}, @@ -1575,6 +1610,7 @@ def test_basic(self): source_description='my source description', source_name='my_source', unique_id='test.source.my_source.my_source_table', + tags=[], ) self.assert_symmetric(source_def, source_def_dict) minimum = { diff --git a/test/unit/test_contracts_graph_unparsed.py b/test/unit/test_contracts_graph_unparsed.py index 4f1ebd790d1..d037a69e7d1 100644 --- a/test/unit/test_contracts_graph_unparsed.py +++ b/test/unit/test_contracts_graph_unparsed.py @@ -4,7 +4,7 @@ from dbt.contracts.graph.unparsed import ( UnparsedNode, UnparsedRunHook, UnparsedMacro, Time, TimePeriod, FreshnessStatus, FreshnessThreshold, Quoting, UnparsedSourceDefinition, - UnparsedSourceTableDefinition, UnparsedDocumentationFile, NamedTested, + UnparsedSourceTableDefinition, UnparsedDocumentationFile, UnparsedColumn, UnparsedNodeUpdate ) from dbt.node_types import NodeType @@ -252,6 +252,7 @@ def test_defaults(self): 'tables': [], 'loader': '', 'meta': {}, + 'tags': [], } self.assert_from_dict(minimum, from_dict) self.assert_to_dict(minimum, to_dict) @@ -274,6 +275,7 @@ def test_contents(self): 'freshness': {}, 'tables': [], 'meta': {}, + 'tags': [], } self.assert_symmetric(empty, dct) @@ -316,6 +318,7 @@ def test_table_defaults(self): 'external': {}, 'freshness': {}, 'meta': {}, + 'tags': [], }, { 'name': 'table2', @@ -326,8 +329,10 @@ def test_table_defaults(self): 'external': {}, 'freshness': {}, 'meta': {}, + 'tags': [], }, ], + 'tags': [], } self.assert_from_dict(source, from_dict) self.assert_symmetric(source, to_dict) @@ -409,12 +414,12 @@ def test_contents(self): tests=['table_test'], meta={'key': ['value1', 'value2']}, columns=[ - NamedTested( + UnparsedColumn( name='x', description='x description', meta={'key2': 'value3'}, ), - NamedTested( + UnparsedColumn( name='y', description='y description', tests=[ @@ -422,6 +427,7 @@ def test_contents(self): {'accepted_values': {'values': ['blue', 'green']}} ], meta={}, + tags=['a', 'b'], ), ], ) @@ -439,6 +445,7 @@ def test_contents(self): 'description': 'x description', 'tests': [], 'meta': {'key2': 'value3'}, + 'tags': [], }, { 'name': 'y', @@ -448,6 +455,7 @@ def test_contents(self): {'accepted_values': {'values': ['blue', 'green']}} ], 'meta': {}, + 'tags': ['a', 'b'], }, ], } From 5c80d6ab4e83f6046d41dc9d249f12e18ebb29b2 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 27 Jan 2020 08:35:46 -0700 Subject: [PATCH 2/3] Add a tags parameter to tests, like severity - edited a test to exercise it --- core/dbt/parser/schema_test_builders.py | 16 +++++++++++++++- core/dbt/parser/schemas.py | 1 + .../models-v2/models/schema.yml | 2 ++ .../test_schema_v2_tests.py | 4 ++++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/core/dbt/parser/schema_test_builders.py b/core/dbt/parser/schema_test_builders.py index 7dcc4ede7a2..b07c808645b 100644 --- a/core/dbt/parser/schema_test_builders.py +++ b/core/dbt/parser/schema_test_builders.py @@ -169,7 +169,7 @@ class TestBuilder(Generic[Target]): r'(?P([a-zA-Z_][0-9a-zA-Z_]*))' ) # map magic keys to default values - MODIFIER_ARGS = {'severity': 'ERROR'} + MODIFIER_ARGS = {'severity': 'ERROR', 'tags': []} def __init__( self, @@ -247,6 +247,20 @@ def extract_test_args(test, name=None) -> Tuple[str, Dict[str, Any]]: def severity(self) -> str: return self.modifiers.get('severity', 'ERROR').upper() + def tags(self) -> List[str]: + tags = self.modifiers.get('tags', []) + if not isinstance(tags, list): + raise_compiler_error( + f'got {tags} ({type(tags)}) for tags, expected a list of ' + f'strings' + ) + for tag in tags: + if not isinstance(tag, str): + raise_compiler_error( + f'got {tag} ({type(tag)}) for tag, expected a str' + ) + return tags[:] + def test_kwargs_str(self) -> str: # sort the dict so the keys are rendered deterministically (for tests) return ', '.join(( diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 93ff16e899f..7732777f4df 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -215,6 +215,7 @@ def parse_node(self, block: SchemaTestBlock) -> ParsedTestNode: # copy - we don't want to mutate the tags! tags = block.tags[:] + tags.extend(builder.tags()) if 'schema' not in tags: tags.append('schema') diff --git a/test/integration/008_schema_tests_test/models-v2/models/schema.yml b/test/integration/008_schema_tests_test/models-v2/models/schema.yml index 8721fe965c8..1ce3dae344e 100644 --- a/test/integration/008_schema_tests_test/models-v2/models/schema.yml +++ b/test/integration/008_schema_tests_test/models-v2/models/schema.yml @@ -41,6 +41,8 @@ models: - accepted_values: values: [3.14159265] quote: false + tags: + - favorite_number_is_pi - name: table_summary diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index c7a5add39a5..aa665e39692 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -70,6 +70,10 @@ def test_postgres_schema_test_selection(self): for result in test_results: self.assertTestPassed(result) + test_results = self.run_dbt(['test', '--models', 'tag:favorite_number_is_pi']) + self.assertEqual(len(test_results), 1) + self.assertTestPassed(test_results[0]) + @use_profile('postgres') def test_postgres_schema_test_exclude_failures(self): results = self.run_dbt() From 8b722c79511f52bac5d659b6f9131fe67a2a5188 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 27 Jan 2020 10:36:45 -0700 Subject: [PATCH 3/3] allow single name tags in test configs --- core/dbt/parser/schema_test_builders.py | 2 ++ .../008_schema_tests_test/models-v2/models/schema.yml | 8 ++++++-- .../008_schema_tests_test/test_schema_v2_tests.py | 5 +++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/core/dbt/parser/schema_test_builders.py b/core/dbt/parser/schema_test_builders.py index b07c808645b..75d38fe09c7 100644 --- a/core/dbt/parser/schema_test_builders.py +++ b/core/dbt/parser/schema_test_builders.py @@ -249,6 +249,8 @@ def severity(self) -> str: def tags(self) -> List[str]: tags = self.modifiers.get('tags', []) + if isinstance(tags, str): + tags = [tags] if not isinstance(tags, list): raise_compiler_error( f'got {tags} ({type(tags)}) for tags, expected a list of ' diff --git a/test/integration/008_schema_tests_test/models-v2/models/schema.yml b/test/integration/008_schema_tests_test/models-v2/models/schema.yml index 1ce3dae344e..f87a970898d 100644 --- a/test/integration/008_schema_tests_test/models-v2/models/schema.yml +++ b/test/integration/008_schema_tests_test/models-v2/models/schema.yml @@ -32,7 +32,11 @@ models: - name: favorite_color description: "The user's favorite color" tests: - - accepted_values: { values: ['blue', 'green'], quote: true } + - accepted_values: { + values: ['blue', 'green'], + quote: true, + tags: table_copy_favorite_color # tags can be a single string + } tags: - table_favorite_color - name: fav_number @@ -41,7 +45,7 @@ models: - accepted_values: values: [3.14159265] quote: false - tags: + tags: # tags can be a list of strings - favorite_number_is_pi diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index aa665e39692..0e6a38ebeb6 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -74,6 +74,11 @@ def test_postgres_schema_test_selection(self): self.assertEqual(len(test_results), 1) self.assertTestPassed(test_results[0]) + test_results = self.run_dbt(['test', '--models', 'tag:table_copy_favorite_color']) + self.assertEqual(len(test_results), 1) + self.assertTestPassed(test_results[0]) + + @use_profile('postgres') def test_postgres_schema_test_exclude_failures(self): results = self.run_dbt()