Skip to content

Commit

Permalink
Add primary_key to manifest (#10096)
Browse files Browse the repository at this point in the history
  • Loading branch information
dave-connors-3 committed May 10, 2024
1 parent ecf9436 commit 8fe7d65
Show file tree
Hide file tree
Showing 12 changed files with 5,641 additions and 6,399 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240506-175642.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: serialize inferred primary key
time: 2024-05-06T17:56:42.757673-05:00
custom:
Author: dave-connors-3
Issue: "9824"
9 changes: 9 additions & 0 deletions core/dbt/artifacts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ Freely make incremental, non-breaking changes in-place to the latest major versi

These types of minor, non-breaking changes are tested by [tests/unit/artifacts/test_base_resource.py::TestMinorSchemaChange](https://github.com/dbt-labs/dbt-core/blob/main/tests/unit/artifacts/test_base_resource.py).


#### Updating [schemas.getdbt.com](https://schemas.getdbt.com)
Non-breaking changes to artifact schemas require an update to the corresponding jsonschemas published to [schemas.getdbt.com](https://schemas.getdbt.com), which are defined in https://github.com/dbt-labs/schemas.getdbt.com. To do so:
1. Create a PR in https://github.com/dbt-labs/schemas.getdbt.com which reflects the schema changes to the artifact. The schema can be updated in-place for non-breaking changes. Example PR: https://github.com/dbt-labs/schemas.getdbt.com/pull/39
2. Merge the https://github.com/dbt-labs/schemas.getdbt.com PR
3. Observe the `Artifact Schema Check` CI check pass on the `dbt-core` PR that updates the artifact schemas, and merge the `dbt-core` PR!

Note: Although `jsonschema` validation using the schemas in [schemas.getdbt.com](https://schemas.getdbt.com) is not encouraged or formally supported, `jsonschema` validation should still continue to work once the schemas are updated because they are forward-compatible and can therefore be used to validate previous minor versions of the schema.

### Breaking changes
A breaking change is anything that:
* Deletes a required field
Expand Down
1 change: 1 addition & 0 deletions core/dbt/artifacts/resources/v1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class Model(CompiledResource):
latest_version: Optional[NodeVersion] = None
deprecation_date: Optional[datetime] = None
defer_relation: Optional[DeferRelation] = None
primary_key: List[str] = field(default_factory=list)

def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
dct = super().__post_serialize__(dct, context)
Expand Down
29 changes: 29 additions & 0 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
)
from dbt.contracts.graph.nodes import (
Exposure,
GenericTestNode,
Macro,
ManifestNode,
Metric,
Expand Down Expand Up @@ -466,6 +467,7 @@ def load(self) -> Manifest:
self.process_docs(self.root_project)
self.process_metrics(self.root_project)
self.process_saved_queries(self.root_project)
self.process_model_inferred_primary_keys()
self.check_valid_group_config()
self.check_valid_access_property()

Expand Down Expand Up @@ -1149,6 +1151,15 @@ def process_saved_queries(self, config: RuntimeConfig):
# 2. process `group_by` of SavedQuery for `depends_on``
_process_metrics_for_node(self.manifest, current_project, saved_query)

def process_model_inferred_primary_keys(self):
"""Processes Model nodes to populate their `primary_key`."""
for node in self.manifest.nodes.values():
if not isinstance(node, ModelNode):
continue
generic_tests = self._get_generic_tests_for_model(node)
primary_key = node.infer_primary_key(generic_tests)
node.primary_key = sorted(primary_key)

def update_semantic_model(self, semantic_model) -> None:
# This has to be done at the end of parsing because the referenced model
# might have alias/schema/database fields that are updated by yaml config.
Expand Down Expand Up @@ -1344,6 +1355,24 @@ def write_perf_info(self, target_path: str):
write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4))
fire_event(ParsePerfInfoPath(path=path))

def _get_generic_tests_for_model(
self,
model: ModelNode,
) -> List[GenericTestNode]:
"""Return a list of generic tests that are attached to the given model, including disabled tests"""
tests = []
for _, node in self.manifest.nodes.items():
if isinstance(node, GenericTestNode) and node.attached_node == model.unique_id:
tests.append(node)
for _, nodes in self.manifest.disabled.items():
for disabled_node in nodes:
if (
isinstance(disabled_node, GenericTestNode)
and disabled_node.attached_node == model.unique_id
):
tests.append(disabled_node)
return tests


def invalid_target_fail_unless_test(
node,
Expand Down

0 comments on commit 8fe7d65

Please sign in to comment.