Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disabled Models in schema files #5868

Merged
merged 22 commits into from
Sep 29, 2022
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20220916-104854.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Account for disabled flags on models in schema files more completely
time: 2022-09-16T10:48:54.162273-05:00
custom:
Author: emmyoop
Issue: "3992"
PR: "5868"
9 changes: 7 additions & 2 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,8 +1088,13 @@ def add_node_nofile(self, node: ManifestNodes):
def add_node(self, source_file: AnySourceFile, node: ManifestNodes, test_from=None):
self.add_node_nofile(node)
if isinstance(source_file, SchemaSourceFile):
assert test_from
source_file.add_test(node.unique_id, test_from)
if isinstance(node, ParsedGenericTestNode):
assert test_from
source_file.add_test(node.unique_id, test_from)
if isinstance(node, ParsedMetric):
source_file.metrics.append(node.unique_id)
if isinstance(node, ParsedExposure):
source_file.exposures.append(node.unique_id)
else:
source_file.nodes.append(node.unique_id)

Expand Down
27 changes: 27 additions & 0 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from copy import deepcopy
from dataclasses import dataclass
from dataclasses import field
from datetime import datetime
Expand Down Expand Up @@ -368,6 +369,8 @@ def load(self):
project, project_parser_files[project.project_name], parser_types
)

self.process_nodes()

self._perf_info.parse_project_elapsed = time.perf_counter() - start_parse_projects

# patch_sources converts the UnparsedSourceDefinitions in the
Expand Down Expand Up @@ -468,6 +471,7 @@ def parse_project(
dct = block.file.pp_dict
else:
dct = block.file.dict_from_yaml
# this is where the schema file gets parsed
parser.parse_file(block, dct=dct)
# Came out of here with UnpatchedSourceDefinition containing configs at the source level
# and not configs at the table level (as expected)
Expand Down Expand Up @@ -926,6 +930,29 @@ def process_sources(self, current_project: str):
continue
_process_sources_for_exposure(self.manifest, current_project, exposure)

def process_nodes(self):
# make sure the nodes are in the manifest.nodes or the disabled dict,
# correctly now that the schema files are also parsed
disable_node_copy = deepcopy(self.manifest.nodes)
for node in disable_node_copy.values():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little concerned by the overhead of deepcopying the entire nodes dictionary. Maybe we could have a list of disabled unique_ids, leave the 'add_disabled_nofile' in place, and just remove the disabled nodes afterward? I'm not so concerned by the disabled dictionary because it will be much smaller.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, that's cleaner than creating a list of all nodes. 👍

if not node.config.enabled:
self.manifest.nodes.pop(node.unique_id)
self.manifest.add_disabled_nofile(node)

disabled_copy = deepcopy(self.manifest.disabled)
for disabled in disabled_copy.values():
for node in disabled:
if node.config.enabled:
for dis_index, dis_node in enumerate(disabled):
# Remove node from disabled and unique_id from disabled dict if necessary
del self.manifest.disabled[node.unique_id][dis_index]
if not self.manifest.disabled[node.unique_id]:
self.manifest.disabled.pop(node.unique_id)

self.manifest.add_node_nofile(node)

self.manifest.rebuild_ref_lookup()


def invalid_ref_fail_unless_test(node, target_model_name, target_model_package, disabled):

Expand Down
27 changes: 17 additions & 10 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,17 +834,24 @@ def delete_schema_mssa_links(self, schema_file, dict_key, elem):
# remove elem node and remove unique_id from node_patches
if elem_unique_id:
# might have been already removed
if elem_unique_id in self.saved_manifest.nodes:
node = self.saved_manifest.nodes.pop(elem_unique_id)
self.deleted_manifest.nodes[elem_unique_id] = node
if (
elem_unique_id in self.saved_manifest.nodes
or elem_unique_id in self.saved_manifest.disabled
):
if elem_unique_id in self.saved_manifest.nodes:
nodes = [self.saved_manifest.nodes.pop(elem_unique_id)]
else:
# The value of disabled items is a list of nodes
nodes = self.saved_manifest.disabled.pop(elem_unique_id)
# need to add the node source_file to pp_files
file_id = node.file_id
# need to copy new file to saved files in order to get content
if file_id in self.new_files:
self.saved_files[file_id] = deepcopy(self.new_files[file_id])
if self.saved_files[file_id]:
source_file = self.saved_files[file_id]
self.add_to_pp_files(source_file)
for node in nodes:
file_id = node.file_id
# need to copy new file to saved files in order to get content
if file_id in self.new_files:
self.saved_files[file_id] = deepcopy(self.new_files[file_id])
if self.saved_files[file_id]:
source_file = self.saved_files[file_id]
self.add_to_pp_files(source_file)
# remove from patches
schema_file.node_patches.remove(elem_unique_id)

Expand Down
38 changes: 30 additions & 8 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ def parse_file(self, block: FileBlock, dct: Dict = None) -> None:
# NonSourceParser.parse(), TestablePatchParser is a variety of
# NodePatchParser
if "models" in dct:
# the models are already in the manifest as nodes when we reach this code,
# even if they are disabled in the schema file
parser = TestablePatchParser(self, yaml_block, "models")
for test_block in parser.parse():
self.parse_tests(test_block)
Expand Down Expand Up @@ -775,9 +777,10 @@ def parse(self) -> List[TestBlock]:
refs: ParserRef = ParserRef.from_target(node)
else:
refs = ParserRef()
# This adds the node_block to self.manifest
# as a ParsedNodePatch or ParsedMacroPatch

# There's no unique_id on the node yet so cannot add to disabled dict
self.parse_patch(node_block, refs)

# This will always be empty if the node a macro or analysis
return test_blocks

Expand Down Expand Up @@ -881,15 +884,32 @@ def parse_patch(self, block: TargetBlock[NodeTarget], refs: ParserRef) -> None:
f"Unexpected yaml_key {patch.yaml_key} for patch in "
f"file {source_file.path.original_file_path}"
)
# handle disabled nodes
if unique_id is None:
# Node might be disabled. Following call returns list of matching disabled nodes
found_nodes = self.manifest.disabled_lookup.find(patch.name, patch.package_name)
if found_nodes:
# There might be multiple disabled nodes for this model
if len(found_nodes) > 1 and patch.config.get("enabled"):
# There are multiple disabled nodes for this model and the schema file wants to enable one.
# We have no way to know which one to enable.
msg = (
f"Found {len(found_nodes)} matching disabled nodes for '{patch.name}'. "
"Multiple nodes for the same unique id cannot be enabled in the schema "
"file. They must be enabled in `dbt_project.yml` or in the sql files."
)
raise ParsingException(msg)

# all nodes in the disabled dict have the same unique_id so just grab the first one
emmyoop marked this conversation as resolved.
Show resolved Hide resolved
# to append with the uniqe id
source_file.append_patch(patch.yaml_key, found_nodes[0].unique_id)
for node in found_nodes:
# We're saving the patch_path because we need to schedule
# re-application of the patch in partial parsing.
node.patch_path = source_file.file_id
# re-calculate the node config with the patch config. Always do this
# for the case when no config is set to ensure the default of true gets captured
if patch.config:
self.patch_node_config(node, patch)

node.patch(patch)
else:
msg = (
f"Did not find matching node for patch with name '{patch.name}' "
Expand All @@ -905,11 +925,13 @@ def parse_patch(self, block: TargetBlock[NodeTarget], refs: ParserRef) -> None:
if node.patch_path:
package_name, existing_file_path = node.patch_path.split("://")
raise_duplicate_patch_name(patch, existing_file_path)
source_file.append_patch(patch.yaml_key, unique_id)
# If this patch has config changes, re-calculate the node config
# with the patch config

source_file.append_patch(patch.yaml_key, node.unique_id)
# re-calculate the node config with the patch config. Always do this
# for the case when no config is set to ensure the default of true gets captured
if patch.config:
self.patch_node_config(node, patch)

node.patch(patch)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: 2

models:
- name: model_one
description: "The first model"
- name: model_three
description: "The third model"
config:
enabled: false
columns:
- name: id
tests:
- unique
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: 2

models:
- name: model_one
description: "The first model"
- name: model_three
description: "The third model"
config:
enabled: true
columns:
- name: id
tests:
- unique
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,26 @@ def test_postgres_pp_models(self):
results = self.run_dbt(["--partial-parse", "run"])
self.assertEqual(len(results), 3)

# disable model three in the schema file
self.copy_file('test-files/models-schema4.yml', 'models/schema.yml')
results = self.run_dbt(["--partial-parse", "run"])
self.assertEqual(len(results), 2)

# update enabled config to be true for model three in the schema file
self.copy_file('test-files/models-schema4b.yml', 'models/schema.yml')
results = self.run_dbt(["--partial-parse", "run"])
self.assertEqual(len(results), 3)

# disable model three in the schema file again
self.copy_file('test-files/models-schema4.yml', 'models/schema.yml')
results = self.run_dbt(["--partial-parse", "run"])
self.assertEqual(len(results), 2)

# remove disabled config for model three in the schema file to check it gets enabled
self.copy_file('test-files/models-schema3.yml', 'models/schema.yml')
results = self.run_dbt(["--partial-parse", "run"])
self.assertEqual(len(results), 3)

# Add a macro
self.copy_file('test-files/my_macro.sql', 'macros/my_macro.sql')
results = self.run_dbt(["--partial-parse", "run"])
Expand Down
76 changes: 76 additions & 0 deletions tests/functional/configs/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,82 @@

"""

my_model = """
select 1 as user
"""

my_model_2 = """
select * from {{ ref('my_model') }}
"""

my_model_3 = """
select * from {{ ref('my_model_2') }}
"""

my_model_2_disabled = """
{{ config(enabled=false) }}
select * from {{ ref('my_model') }}
"""

my_model_3_disabled = """
{{ config(enabled=false) }}
select * from {{ ref('my_model_2') }}
"""

my_model_2_enabled = """
{{ config(enabled=true) }}
select * from {{ ref('my_model') }}
"""

my_model_3_enabled = """
{{ config(enabled=true) }}
select * from {{ ref('my_model') }}
"""

schema_all_disabled_yml = """
version: 2
models:
- name: my_model
- name: my_model_2
config:
enabled: false
- name: my_model_3
config:
enabled: false
"""

schema_explicit_enabled_yml = """
version: 2
models:
- name: my_model
- name: my_model_2
config:
enabled: true
- name: my_model_3
config:
enabled: true
"""

schema_partial_disabled_yml = """
version: 2
models:
- name: my_model
- name: my_model_2
config:
enabled: false
- name: my_model_3
"""

schema_partial_enabled_yml = """
version: 2
models:
- name: my_model
- name: my_model_2
config:
enabled: True
- name: my_model_3
"""


class BaseConfigProject:
@pytest.fixture(scope="class")
Expand Down
Loading