Skip to content

Commit

Permalink
dbt-materialize: more gracefully handle contracts on unknown types
Browse files Browse the repository at this point in the history
Backport dbt-labs/dbt-core#8887 to make data contracts work correctly
with custom PostgreSQL types that are unknown to dbt/psycopg2. The error
messages are bad when contract validation on such types fails, but the
contracts fundamentally work, which is a big improvement.

See comments within the patch for details.
  • Loading branch information
benesch committed Dec 17, 2023
1 parent f2a47ad commit e837a48
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 4 deletions.
5 changes: 5 additions & 0 deletions misc/dbt-materialize/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# dbt-materialize Changelog

## Unreleased

* Backport [dbt-core #8887](https://github.com/dbt-labs/dbt-core/pull/8887) to
unblock users using any custom type with data contracts.

## 1.7.1 - 2023-12-14

* Remove the dependency of data contracts pre-flight checks on the existence of
Expand Down
45 changes: 41 additions & 4 deletions misc/dbt-materialize/dbt/adapters/materialize/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import Optional

import psycopg2
from psycopg2.extensions import string_types
from psycopg2.extras import register_uuid

import dbt.adapters.postgres.connections
Expand All @@ -31,6 +32,12 @@

logger = AdapterLogger("Materialize")

# NOTE(morsapaes): registering the UUID type produces nicer error messages when
# data contracts fail on a UUID type. See comment in the
# `data_type_code_to_name` method for details. We may be able to remove this
# when dbt-core#8900 lands.
register_uuid()

# Override the psycopg2 connect function in order to inject Materialize-specific
# session parameter defaults.
#
Expand All @@ -50,10 +57,6 @@ def connect(**kwargs):
]
kwargs["options"] = " ".join(options)

# NOTE(morsapaes): work around dbt-core #8353 while #8900 doesn't land to
# unblock users using UUID types.
register_uuid()

return _connect(**kwargs)


Expand Down Expand Up @@ -135,6 +138,40 @@ def cancel(self, connection):
# probably bad, re-raise it
raise

# NOTE(benesch): this is a backport, with modifications, of dbt-core#8887.
# TODO(benesch): consider removing this when v1.8 ships with this code.
@classmethod
def data_type_code_to_name(cls, type_code: int) -> str:
if type_code in string_types:
return string_types[type_code].name
else:
# The type is unknown to psycopg2, so make up a unique name based on
# the type's OID. Here are the consequences for data contracts that
# reference unknown types:
#
# * Data contracts that are valid work flawlessly. Take the
# `mz_timestamp` type, for example, which is unknown to psycopg2
# because it is a special Materialize type. It has OID 16552. If
# the data contract specifies a column of type `mz_timestamp`
# and the model's column is actually of type `mz_timestamp`, the
# contract will validate successfully and the user will have no
# idea that under the hood dbt validated these two strings
# against one another:
#
# expected: `custom type unknown to dbt (OID 16552)`
# actual: `custom type unknown to dbt (OID 16552)`
#
# * Data contracts that are invalid produce an ugly error message.
# If the contract specifies the `timestamp` type but the model's
# column is actually of type `mz_timestamp`, dbt will complain
# with an error message like "expected type DATETIME, got custom
# type unknown to dbt (OID 16552)".
#
# Still, this is much better than the built-in behavior with dbt
# 1.7, which is to raise "Unhandled error while executing:
# 16552". See dbt-core#8353 for details.
return f"custom type unknown to dbt (OID {type_code})"

# Disable transactions. Materialize transactions do not support arbitrary
# queries in transactions and therefore many of dbt's internal macros
# produce invalid transactions.
Expand Down
81 changes: 81 additions & 0 deletions misc/dbt-materialize/tests/adapter/test_contracts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import pytest
from dbt.tests.util import run_dbt, run_dbt_and_capture

# NOTE(benesch): these tests are backported, with modifications, from
# dbt-core#8887.

# TODO(benesch): consider removing these tests when v1.8 ships with these tests
# as part of core.

my_timestamp_model_sql = """
select
'2023-01-01T00:00:00'::timestamp as ts
"""

my_mz_timestamp_model_sql = """
select
'1672531200000'::mz_timestamp as ts
"""

model_schema_timestamp_yml = """
models:
- name: my_model
config:
contract:
enforced: true
columns:
- name: ts
data_type: timestamp
"""

model_schema_mz_timestamp_yml = """
models:
- name: my_model
config:
contract:
enforced: true
columns:
- name: ts
data_type: mz_timestamp
"""


class TestModelContractUnrecognizedTypeCode1:
@pytest.fixture(scope="class")
def models(self):
return {
"my_model.sql": my_mz_timestamp_model_sql,
"schema.yml": model_schema_mz_timestamp_yml,
}

def test_nonstandard_data_type(self, project):
run_dbt(["run"], expect_pass=True)


class TestModelContractUnrecognizedTypeCodeActualMismatch:
@pytest.fixture(scope="class")
def models(self):
return {
"my_model.sql": my_mz_timestamp_model_sql,
"schema.yml": model_schema_timestamp_yml,
}

def test_nonstandard_data_type(self, project):
expected_msg = "custom type unknown to dbt (OID 16552) | DATETIME | data type mismatch"
_, logs = run_dbt_and_capture(["run"], expect_pass=False)
assert expected_msg in logs


class TestModelContractUnrecognizedTypeCodeExpectedMismatch:
@pytest.fixture(scope="class")
def models(self):
return {
"my_model.sql": my_timestamp_model_sql,
"schema.yml": model_schema_mz_timestamp_yml,
}

def test_nonstandard_data_type(self, project):
expected_msg = "DATETIME | custom type unknown to dbt (OID 16552) | data type mismatch"
_, logs = run_dbt_and_capture(["run"], expect_pass=False)
print(logs)
assert expected_msg in logs

0 comments on commit e837a48

Please sign in to comment.