From 8762cd4376ab304c673a4571755b4af21ba44f72 Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Sat, 25 Mar 2023 11:26:56 +0100 Subject: [PATCH 1/5] Add regex error tests --- .../models/schema_tests/schema.yml | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 998ece2..6aedbe3 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -6,53 +6,172 @@ models: tests: - dbt_expectations.expect_column_values_to_match_regex: regex: "@[^.]*" + - dbt_expectations.expect_column_values_to_match_regex: + regex: "&[^.]*" + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_match_regex: regex: "[A-Z]" flags: i config: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" + - dbt_expectations.expect_column_values_to_match_regex: + regex: "&[^.]*" + flags: i + config: + enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_not_match_regex: regex: "&[^.]*" + - dbt_expectations.expect_column_values_to_not_match_regex: + regex: "@[^.]*" + config: + error_if: "=0" + warn_if: "<4" + - dbt_expectations.expect_column_values_to_not_match_regex: + regex: "[A-Z]" - dbt_expectations.expect_column_values_to_not_match_regex: regex: "[A-Z]" + flags: i + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_match_regex_list: regex_list: ["@[^.]*", "&[^.]*"] + - dbt_expectations.expect_column_values_to_match_regex_list: + regex_list: ["#[^.]*", "&[^.]*"] + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_match_regex_list: regex_list: ["[A-G]", "[H-Z]"] flags: i config: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" + - dbt_expectations.expect_column_values_to_match_regex_list: + regex_list: ["![A-G]", "![H-Z]"] + flags: i + config: + enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" + error_if: "=0" + warn_if: "<4" + - dbt_expectations.expect_column_values_to_not_match_regex_list: + regex_list: ["@[^.]*", "&[^.]*"] + - dbt_expectations.expect_column_values_to_not_match_regex_list: + regex_list: ["@[^.]*", "@[^.]*"] + config: + error_if: "=0" + warn_if: "<4" + - dbt_expectations.expect_column_values_to_not_match_regex_list: + regex_list: ["&[^.]*", "&[^.]*"] + match_on: all - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["@[^.]*", "&[^.]*"] + match_on: all + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["[A-G]", "[H-Z]"] + - dbt_expectations.expect_column_values_to_not_match_regex_list: + regex_list: ["[N-Q]", "[R-Z]"] + flags: i + - dbt_expectations.expect_column_values_to_not_match_regex_list: + regex_list: ["[A-G]", "[N-Q]", "[R-Z]"] + flags: i + match_on: all + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_match_like_pattern: like_pattern: "%@%" + - dbt_expectations.expect_column_values_to_match_like_pattern: + like_pattern: "%&%" + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_not_match_like_pattern: like_pattern: "%&%" + - dbt_expectations.expect_column_values_to_not_match_like_pattern: + like_pattern: "%@%" + config: + error_if: "=0" + warn_if: "<4" + - dbt_expectations.expect_column_values_to_match_like_pattern_list: + like_pattern_list: ["%@%", "%&%"] - dbt_expectations.expect_column_values_to_match_like_pattern_list: like_pattern_list: ["%@%", "%&%"] + match_on: all + config: + error_if: "=0" + warn_if: "<4" + - dbt_expectations.expect_column_values_to_not_match_like_pattern_list: + like_pattern_list: ["%@%", "%&%"] - dbt_expectations.expect_column_values_to_not_match_like_pattern_list: like_pattern_list: ["%@%", "%&%"] + match_on: all + config: + error_if: "=0" + warn_if: "<4" - name: postal_code_5 tests: - dbt_expectations.expect_column_values_to_match_regex: regex: "^\\d{5}" is_raw: True + - dbt_expectations.expect_column_values_to_match_regex: + regex: "^\\d{55}" + is_raw: True + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_not_match_regex: regex: "@[^.]*" is_raw: True + - dbt_expectations.expect_column_values_to_not_match_regex: + regex: "^\\d{5}" + is_raw: True + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_match_regex_list: regex_list: ["^\\d{5}"] is_raw: True + - dbt_expectations.expect_column_values_to_match_regex_list: + regex_list: ["^\\d{5}", "@[^.]*"] + is_raw: True + - dbt_expectations.expect_column_values_to_match_regex_list: + regex_list: ["^\\d{5}", "@[^.]*"] + is_raw: True + match_on: all + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["@[^.]*"] is_raw: True + - dbt_expectations.expect_column_values_to_not_match_regex_list: + regex_list: ["^\\d{5}", "@[^.]*"] + is_raw: True + - dbt_expectations.expect_column_values_to_not_match_regex_list: + regex_list: ["^\\d{5}", "@[^.]*"] + is_raw: True + match_on: all + config: + error_if: "=0" + warn_if: "<4" - name: postal_code_5_3 tests: - dbt_expectations.expect_column_values_to_match_regex: regex: "^\\d{5}-\\d{3}" is_raw: True + - dbt_expectations.expect_column_values_to_match_regex: + regex: "^\\d{5}-\\d{9}" + is_raw: True + config: + error_if: "=0" + warn_if: "<4" - name: timeseries_data From 48450303d4b8b809fbd2da032a24d2c38b729202 Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Sat, 25 Mar 2023 11:28:38 +0100 Subject: [PATCH 2/5] Fix postgres__regexp_instr --- macros/regex/regexp_instr.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/regex/regexp_instr.sql b/macros/regex/regexp_instr.sql index bbc21cb..df55070 100644 --- a/macros/regex/regexp_instr.sql +++ b/macros/regex/regexp_instr.sql @@ -38,7 +38,7 @@ regexp_instr({{ source_value }}, {{ regexp }}, {{ position }}, {{ occurrence }}) {# Postgres does not need to escape raw strings #} {% macro postgres__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %} {% if flags %}{{ dbt_expectations._validate_flags(flags, 'bcegimnpqstwx') }}{% endif %} -array_length((select regexp_matches({{ source_value }}, '{{ regexp }}', '{{ flags }}')), 1) +coalesce(array_length((select regexp_matches({{ source_value }}, '{{ regexp }}', '{{ flags }}')), 1), 0) {% endmacro %} {# Unclear what Redshift does to escape raw strings #} From 87bd8d3814900d0f5ee02db51aeb624d9fc74d9c Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Sat, 25 Mar 2023 16:19:06 +0100 Subject: [PATCH 3/5] cleaned up test regex --- integration_tests/models/schema_tests/schema.yml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 6aedbe3..94360cd 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -17,8 +17,8 @@ models: config: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" - dbt_expectations.expect_column_values_to_match_regex: - regex: "&[^.]*" - flags: i + regex: "[A-Z]" + flags: c config: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" error_if: "=0" @@ -33,8 +33,7 @@ models: - dbt_expectations.expect_column_values_to_not_match_regex: regex: "[A-Z]" - dbt_expectations.expect_column_values_to_not_match_regex: - regex: "[A-Z]" - flags: i + regex: "[a-z]" config: error_if: "=0" warn_if: "<4" @@ -51,8 +50,8 @@ models: config: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" - dbt_expectations.expect_column_values_to_match_regex_list: - regex_list: ["![A-G]", "![H-Z]"] - flags: i + regex_list: ["[A-G]", "[H-Z]"] + flags: c config: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" error_if: "=0" @@ -76,10 +75,10 @@ models: - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["[A-G]", "[H-Z]"] - dbt_expectations.expect_column_values_to_not_match_regex_list: - regex_list: ["[N-Q]", "[R-Z]"] + regex_list: ["[A-Z]", "[0-9]"] flags: i - dbt_expectations.expect_column_values_to_not_match_regex_list: - regex_list: ["[A-G]", "[N-Q]", "[R-Z]"] + regex_list: ["[A-Z]", "[0-9]"] flags: i match_on: all config: From 58a5a8e0b892e699945617c84e34373f5e5ff7de Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Mon, 27 Mar 2023 09:49:00 +0200 Subject: [PATCH 4/5] specified target adapters for regex flagged tests --- integration_tests/models/schema_tests/schema.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 94360cd..db7b30d 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -77,11 +77,14 @@ models: - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["[A-Z]", "[0-9]"] flags: i + config: + enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["[A-Z]", "[0-9]"] flags: i match_on: all config: + enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" error_if: "=0" warn_if: "<4" - dbt_expectations.expect_column_values_to_match_like_pattern: From 65ab547f8b8deedf3380f6093cd74ffb96abb73a Mon Sep 17 00:00:00 2001 From: Claus Herther Date: Mon, 27 Mar 2023 15:14:06 -0700 Subject: [PATCH 5/5] Add regex config comments --- .../models/schema_tests/schema.yml | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 94360cd..8f2534a 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -4,18 +4,22 @@ models: columns: - name: email_address tests: + # match email address - dbt_expectations.expect_column_values_to_match_regex: regex: "@[^.]*" + # does not match email address, should fail - dbt_expectations.expect_column_values_to_match_regex: regex: "&[^.]*" config: error_if: "=0" warn_if: "<4" + # match all uppercase, but match case-insensitive (where implemented) - dbt_expectations.expect_column_values_to_match_regex: regex: "[A-Z]" flags: i config: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" + # match all uppercase, case-sensitive (where implemented), should fail - dbt_expectations.expect_column_values_to_match_regex: regex: "[A-Z]" flags: c @@ -23,32 +27,40 @@ models: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" error_if: "=0" warn_if: "<4" + # do not match other non-email string, should pass - dbt_expectations.expect_column_values_to_not_match_regex: regex: "&[^.]*" + # match email address, should fail - dbt_expectations.expect_column_values_to_not_match_regex: regex: "@[^.]*" config: error_if: "=0" warn_if: "<4" + # match all uppercase, case-sensitive (default), should pass - dbt_expectations.expect_column_values_to_not_match_regex: regex: "[A-Z]" + # match all lowercase, case-sensitive (default), should fail - dbt_expectations.expect_column_values_to_not_match_regex: regex: "[a-z]" config: error_if: "=0" warn_if: "<4" + # do match one of email address or other non-email string - dbt_expectations.expect_column_values_to_match_regex_list: regex_list: ["@[^.]*", "&[^.]*"] + # do not match other non-email strings, should fail - dbt_expectations.expect_column_values_to_match_regex_list: regex_list: ["#[^.]*", "&[^.]*"] config: error_if: "=0" warn_if: "<4" + # match all uppercase, but match case-insensitive (where implemented) - dbt_expectations.expect_column_values_to_match_regex_list: regex_list: ["[A-G]", "[H-Z]"] flags: i config: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" + # match all uppercase, but match case-sensitive (where implemented), should fail - dbt_expectations.expect_column_values_to_match_regex_list: regex_list: ["[A-G]", "[H-Z]"] flags: c @@ -56,58 +68,76 @@ models: enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" error_if: "=0" warn_if: "<4" + # match email address or other string - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["@[^.]*", "&[^.]*"] + # match email address, should fail - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["@[^.]*", "@[^.]*"] config: error_if: "=0" warn_if: "<4" + # do not match any of other non-email string - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["&[^.]*", "&[^.]*"] match_on: all + # do not match any of email or other non-email string, should fail - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["@[^.]*", "&[^.]*"] match_on: all config: error_if: "=0" warn_if: "<4" + # do not match all uppercase - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["[A-G]", "[H-Z]"] + # do not match all uppercase or numbers, case-insensitive (where implemented) - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["[A-Z]", "[0-9]"] flags: i + config: + enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" + # do not match all uppercase and numbers, case-insensitive (where implemented) - dbt_expectations.expect_column_values_to_not_match_regex_list: regex_list: ["[A-Z]", "[0-9]"] flags: i match_on: all config: + enabled: "{{ target.type in ['postgres', 'snowflake', 'redshift' ] }}" error_if: "=0" warn_if: "<4" + # match '@' anywhere in string - dbt_expectations.expect_column_values_to_match_like_pattern: like_pattern: "%@%" + # match '&' anywhere in string, should fail - dbt_expectations.expect_column_values_to_match_like_pattern: like_pattern: "%&%" config: error_if: "=0" warn_if: "<4" + # do not match '&' anywhere in string - dbt_expectations.expect_column_values_to_not_match_like_pattern: like_pattern: "%&%" + # do not match '@' anywhere in string, should fail - dbt_expectations.expect_column_values_to_not_match_like_pattern: like_pattern: "%@%" config: error_if: "=0" warn_if: "<4" + # match at least one of '@' or '&' anywhere in string - dbt_expectations.expect_column_values_to_match_like_pattern_list: like_pattern_list: ["%@%", "%&%"] + # match both '@' or '&' anywhere in string, should fail - dbt_expectations.expect_column_values_to_match_like_pattern_list: like_pattern_list: ["%@%", "%&%"] match_on: all config: error_if: "=0" warn_if: "<4" + # do not match at least one of '@' or '&' anywhere in string - dbt_expectations.expect_column_values_to_not_match_like_pattern_list: like_pattern_list: ["%@%", "%&%"] + # do not match either of '@' or '&' anywhere in string, should fail - dbt_expectations.expect_column_values_to_not_match_like_pattern_list: like_pattern_list: ["%@%", "%&%"] match_on: all