add generic tests to test-paths (#4052)

* removed overlooked breakpoint * first pass * save progress - singualr tests broken * fixed to work with both generic and singular tests * fixed formatting * added a comment * change to use /generic subfolder * fix formatting issues * fixed bug on code consolidation * fixed typo * added test for generic tests * added changelog entry * added logic to treat generic tests like macro tests * add generic test to macro_edges * fixed generic tests to match unique_ids * fixed test
dbt-labs · Oct 21, 2021 · f79a968 · f79a968
1 parent 34c23fe
commit f79a968
Show file tree

Hide file tree

Showing 16 changed files with 304 additions and 25 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,6 +36,7 @@ Contributors:
 - Turns on the static parser by default and adds the flag `--no-static-parser` to disable it. ([#3377](https://github.com/dbt-labs/dbt-core/issues/3377), [#3939](https://github.com/dbt-labs/dbt-core/pull/3939))
 - Generic test FQNs have changed to include the relative path, resource, and column (if applicable) where they are defined. This makes it easier to configure them from the `tests` block in `dbt_project.yml` ([#3259](https://github.com/dbt-labs/dbt-core/pull/3259), [#3880](https://github.com/dbt-labs/dbt-core/pull/3880)
 - Turn on partial parsing by default ([#3867](https://github.com/dbt-labs/dbt-core/issues/3867), [#3989](https://github.com/dbt-labs/dbt-core/issues/3989))
+- Generic test can now be added under a `generic` subfolder in the `test-paths` directory. ([#4052](https://github.com/dbt-labs/dbt-core/pull/4052))
 
 ### Fixes
 - Add generic tests defined on sources to the manifest once, not twice ([#3347](https://github.com/dbt-labs/dbt/issues/3347), [#3880](https://github.com/dbt-labs/dbt/pull/3880))

diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py
@@ -18,7 +18,8 @@ class ParseFileType(StrEnum):
     Model = 'model'
     Snapshot = 'snapshot'
     Analysis = 'analysis'
-    Test = 'test'
+    SingularTest = 'singular_test'
+    GenericTest = 'generic_test'
     Seed = 'seed'
     Documentation = 'docs'
     Schema = 'schema'
@@ -30,7 +31,8 @@ class ParseFileType(StrEnum):
     ParseFileType.Model: 'ModelParser',
     ParseFileType.Snapshot: 'SnapshotParser',
     ParseFileType.Analysis: 'AnalysisParser',
-    ParseFileType.Test: 'SingularTestParser',
+    ParseFileType.SingularTest: 'SingularTestParser',
+    ParseFileType.GenericTest: 'GenericTestParser',
     ParseFileType.Seed: 'SeedParser',
     ParseFileType.Documentation: 'DocumentationParser',
     ParseFileType.Schema: 'SchemaParser',

diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py
@@ -291,7 +291,7 @@ def build_node_edges(nodes: List[ManifestNode]):
     return _sort_values(forward_edges), _sort_values(backward_edges)
 
 
-# Build a map of children of macros
+# Build a map of children of macros and generic tests
 def build_macro_edges(nodes: List[Any]):
     forward_edges: Dict[str, List[str]] = {
         n.unique_id: [] for n in nodes if n.unique_id.startswith('macro') or n.depends_on.macros

diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py
@@ -44,6 +44,11 @@ class UnparsedMacro(UnparsedBaseNode, HasSQL):
     resource_type: NodeType = field(metadata={'restrict': [NodeType.Macro]})
 
 
+@dataclass
+class UnparsedGenericTest(UnparsedBaseNode, HasSQL):
+    resource_type: NodeType = field(metadata={'restrict': [NodeType.Macro]})
+
+
 @dataclass
 class UnparsedNode(UnparsedBaseNode, HasSQL):
     name: str

diff --git a/core/dbt/parser/__init__.py b/core/dbt/parser/__init__.py
@@ -1,6 +1,7 @@
 from .analysis import AnalysisParser  # noqa
 from .base import Parser, ConfiguredParser  # noqa
 from .singular_test import SingularTestParser  # noqa
+from .generic_test import GenericTestParser  # noqa
 from .docs import DocumentationParser  # noqa
 from .hooks import HookParser  # noqa
 from .macros import MacroParser  # noqa
@@ -10,6 +11,6 @@
 from .snapshots import SnapshotParser  # noqa
 
 from . import (  # noqa
-    analysis, base, singular_test, docs, hooks, macros, models, schemas,
+    analysis, base, generic_test, singular_test, docs, hooks, macros, models, schemas,
     snapshots
 )
diff --git a/core/dbt/parser/generic_test.py b/core/dbt/parser/generic_test.py
@@ -0,0 +1,106 @@
+from typing import Iterable, List
+
+import jinja2
+
+from dbt.exceptions import CompilationException
+from dbt.clients import jinja
+from dbt.contracts.graph.parsed import ParsedGenericTestNode
+from dbt.contracts.graph.unparsed import UnparsedMacro
+from dbt.contracts.graph.parsed import ParsedMacro
+from dbt.contracts.files import SourceFile
+from dbt.logger import GLOBAL_LOGGER as logger
+from dbt.node_types import NodeType
+from dbt.parser.base import BaseParser
+from dbt.parser.search import FileBlock
+from dbt.utils import MACRO_PREFIX
+
+
+class GenericTestParser(BaseParser[ParsedGenericTestNode]):
+
+    @property
+    def resource_type(self) -> NodeType:
+        return NodeType.Macro
+
+    @classmethod
+    def get_compiled_path(cls, block: FileBlock):
+        return block.path.relative_path
+
+    def parse_generic_test(
+        self, block: jinja.BlockTag, base_node: UnparsedMacro, name: str
+    ) -> ParsedMacro:
+        unique_id = self.generate_unique_id(name)
+
+        return ParsedMacro(
+            path=base_node.path,
+            macro_sql=block.full_block,
+            original_file_path=base_node.original_file_path,
+            package_name=base_node.package_name,
+            root_path=base_node.root_path,
+            resource_type=base_node.resource_type,
+            name=name,
+            unique_id=unique_id,
+        )
+
+    def parse_unparsed_generic_test(
+        self, base_node: UnparsedMacro
+    ) -> Iterable[ParsedMacro]:
+        try:
+            blocks: List[jinja.BlockTag] = [
+                t for t in
+                jinja.extract_toplevel_blocks(
+                    base_node.raw_sql,
+                    allowed_blocks={'test'},
+                    collect_raw_data=False,
+                )
+                if isinstance(t, jinja.BlockTag)
+            ]
+        except CompilationException as exc:
+            exc.add_node(base_node)
+            raise
+
+        for block in blocks:
+            try:
+                ast = jinja.parse(block.full_block)
+            except CompilationException as e:
+                e.add_node(base_node)
+                raise
+
+            # generic tests are structured as macros so we want to count the number of macro blocks
+            generic_test_nodes = list(ast.find_all(jinja2.nodes.Macro))
+
+            if len(generic_test_nodes) != 1:
+                # things have gone disastrously wrong, we thought we only
+                # parsed one block!
+                raise CompilationException(
+                    f'Found multiple generic tests in {block.full_block}, expected 1',
+                    node=base_node
+                )
+
+            generic_test_name = generic_test_nodes[0].name
+
+            if not generic_test_name.startswith(MACRO_PREFIX):
+                continue
+
+            name: str = generic_test_name.replace(MACRO_PREFIX, '')
+            node = self.parse_generic_test(block, base_node, name)
+            yield node
+
+    def parse_file(self, block: FileBlock):
+        assert isinstance(block.file, SourceFile)
+        source_file = block.file
+        assert isinstance(source_file.contents, str)
+        original_file_path = source_file.path.original_file_path
+        logger.debug("Parsing {}".format(original_file_path))
+
+        # this is really only used for error messages
+        base_node = UnparsedMacro(
+            path=original_file_path,
+            original_file_path=original_file_path,
+            package_name=self.project.project_name,
+            raw_sql=source_file.contents,
+            root_path=self.project.project_root,
+            resource_type=NodeType.Macro,
+        )
+
+        for node in self.parse_unparsed_generic_test(base_node):
+            self.manifest.add_macro(block.file, node)
diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py
@@ -49,6 +49,7 @@
 )
 from dbt.parser.base import Parser
 from dbt.parser.analysis import AnalysisParser
+from dbt.parser.generic_test import GenericTestParser
 from dbt.parser.singular_test import SingularTestParser
 from dbt.parser.docs import DocumentationParser
 from dbt.parser.hooks import HookParser
@@ -277,9 +278,10 @@ def load(self):
         if skip_parsing:
             logger.debug("Partial parsing enabled, no changes found, skipping parsing")
         else:
-            # Load Macros
+            # Load Macros and tests
             # We need to parse the macros first, so they're resolvable when
-            # the other files are loaded
+            # the other files are loaded.  Also need to parse tests, specifically
+            # generic tests
             start_load_macros = time.perf_counter()
             self.load_and_parse_macros(project_parser_files)
 
@@ -379,14 +381,22 @@ def load_and_parse_macros(self, project_parser_files):
             if project.project_name not in project_parser_files:
                 continue
             parser_files = project_parser_files[project.project_name]
-            if 'MacroParser' not in parser_files:
-                continue
-            parser = MacroParser(project, self.manifest)
-            for file_id in parser_files['MacroParser']:
-                block = FileBlock(self.manifest.files[file_id])
-                parser.parse_file(block)
-                # increment parsed path count for performance tracking
-                self._perf_info.parsed_path_count = self._perf_info.parsed_path_count + 1
+            if 'MacroParser' in parser_files:
+                parser = MacroParser(project, self.manifest)
+                for file_id in parser_files['MacroParser']:
+                    block = FileBlock(self.manifest.files[file_id])
+                    parser.parse_file(block)
+                    # increment parsed path count for performance tracking
+                    self._perf_info.parsed_path_count = self._perf_info.parsed_path_count + 1
+            # generic tests hisotrically lived in the macros directoy but can now be nested
+            # in a /generic directory under /tests so we want to process them here as well
+            if 'GenericTestParser' in parser_files:
+                parser = GenericTestParser(project, self.manifest)
+                for file_id in parser_files['GenericTestParser']:
+                    block = FileBlock(self.manifest.files[file_id])
+                    parser.parse_file(block)
+                    # increment parsed path count for performance tracking
+                    self._perf_info.parsed_path_count = self._perf_info.parsed_path_count + 1
 
         self.build_macro_resolver()
         # Look at changed macros and update the macro.depends_on.macros

diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py
@@ -13,7 +13,12 @@
     ParseFileType.Seed,
     ParseFileType.Snapshot,
     ParseFileType.Analysis,
-    ParseFileType.Test,
+    ParseFileType.SingularTest,
+)
+
+mg_files = (
+    ParseFileType.Macro,
+    ParseFileType.GenericTest,
 )
 
 
@@ -88,7 +93,7 @@ def build_file_diff(self):
             if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
                 deleted_schema_files.append(file_id)
             else:
-                if self.saved_files[file_id].parse_file_type == ParseFileType.Macro:
+                if self.saved_files[file_id].parse_file_type in mg_files:
                     changed_or_deleted_macro_file = True
                 deleted.append(file_id)
 
@@ -106,7 +111,7 @@ def build_file_diff(self):
                         raise Exception(f"Serialization failure for {file_id}")
                     changed_schema_files.append(file_id)
                 else:
-                    if self.saved_files[file_id].parse_file_type == ParseFileType.Macro:
+                    if self.saved_files[file_id].parse_file_type in mg_files:
                         changed_or_deleted_macro_file = True
                     changed.append(file_id)
         file_diff = {
@@ -213,7 +218,7 @@ def delete_from_saved(self, file_id):
             self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id)
 
         # macros
-        if saved_source_file.parse_file_type == ParseFileType.Macro:
+        if saved_source_file.parse_file_type in mg_files:
             self.delete_macro_file(saved_source_file, follow_references=True)
 
         # docs
@@ -229,7 +234,7 @@ def update_in_saved(self, file_id):
 
         if new_source_file.parse_file_type in mssat_files:
             self.update_mssat_in_saved(new_source_file, old_source_file)
-        elif new_source_file.parse_file_type == ParseFileType.Macro:
+        elif new_source_file.parse_file_type in mg_files:
             self.update_macro_in_saved(new_source_file, old_source_file)
         elif new_source_file.parse_file_type == ParseFileType.Documentation:
             self.update_doc_in_saved(new_source_file, old_source_file)

diff --git a/core/dbt/parser/read_files.py b/core/dbt/parser/read_files.py
@@ -93,6 +93,11 @@ def get_source_files(project, paths, extension, parse_file_type, saved_files):
     for fp in fp_list:
         if parse_file_type == ParseFileType.Seed:
             fb_list.append(load_seed_source_file(fp, project.project_name))
+        # singular tests live in /tests but only generic tests live
+        # in /tests/generic so we want to skip those
+        elif (parse_file_type == ParseFileType.SingularTest and
+              'generic/' in fp.relative_path):
+            continue
         else:
             file = load_source_file(fp, parse_file_type, project.project_name, saved_files)
             # only append the list if it has contents. added to fix #3568
@@ -137,7 +142,13 @@ def read_files(project, files, parser_files, saved_files):
     )
 
     project_files['SingularTestParser'] = read_files_for_parser(
-        project, files, project.test_paths, '.sql', ParseFileType.Test, saved_files
+        project, files, project.test_paths, '.sql', ParseFileType.SingularTest, saved_files
+    )
+
+    # all generic tests within /tests must be nested under a /generic subfolder
+    project_files['GenericTestParser'] = read_files_for_parser(
+        project, files, ["{}{}".format(test_path, '/generic') for test_path in project.test_paths],
+        '.sql', ParseFileType.GenericTest, saved_files
     )
 
     project_files['SeedParser'] = read_files_for_parser(

diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py
@@ -544,7 +544,6 @@ def test_postgres_test_context_with_macro_namespace(self):
         run_result = self.run_dbt(['test'], expect_pass=False)
         results = run_result.results
         results = sorted(results, key=lambda r: r.node.name)
-        # breakpoint()
         self.assertEqual(len(results), 4)
         # call_pkg_macro_model_c_
         self.assertEqual(results[0].status, TestStatus.Fail)

diff --git a/test/integration/068_partial_parsing_tests/test-files/generic_schema.yml b/test/integration/068_partial_parsing_tests/test-files/generic_schema.yml
@@ -0,0 +1,9 @@
+version: 2
+
+models:
+  - name: orders
+    description: "Some order data"
+    columns:
+      - name: id
+        tests:
+          - unique
diff --git a/test/integration/068_partial_parsing_tests/test-files/generic_test.sql b/test/integration/068_partial_parsing_tests/test-files/generic_test.sql
@@ -0,0 +1,26 @@
+{% test is_odd(model, column_name) %}
+
+with validation as (
+
+    select
+        {{ column_name }} as odd_field
+
+    from {{ model }}
+
+),
+
+validation_errors as (
+
+    select
+        odd_field
+
+    from validation
+    -- if this is true, then odd_field is actually even!
+    where (odd_field % 2) = 0
+
+)
+
+select *
+from validation_errors
+
+{% endtest %}
diff --git a/test/integration/068_partial_parsing_tests/test-files/generic_test_edited.sql b/test/integration/068_partial_parsing_tests/test-files/generic_test_edited.sql
@@ -0,0 +1,26 @@
+{% test is_odd(model, column_name) %}
+
+with validation as (
+
+    select
+        {{ column_name }} as odd_field2
+
+    from {{ model }}
+
+),
+
+validation_errors as (
+
+    select
+        odd_field2
+
+    from validation
+    -- if this is true, then odd_field is actually even!
+    where (odd_field2 % 2) = 0
+
+)
+
+select *
+from validation_errors
+
+{% endtest %}
diff --git a/test/integration/068_partial_parsing_tests/test-files/generic_test_schema.yml b/test/integration/068_partial_parsing_tests/test-files/generic_test_schema.yml
@@ -0,0 +1,10 @@
+version: 2
+
+models:
+  - name: orders
+    description: "Some order data"
+    columns:
+      - name: id
+        tests:
+          - unique
+          - is_odd