forked from elementary-data/dbt-data-reliability
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Sync fork with original repository (#15)
* support count_true and count_false for boolean columns in BigQuery * support count_true and count_false for boolean columns in BigQuery * fix: change generate profile args macro name for Athena * Add unique_combination_of_columns to common_tests_configs_mapping * missing comma * fix drop_failure_percent_threshold failing a non anomalous test * Support all anomaly vars on all configuration levels * ELE-2470 temp tables are not being deleted * Add empty line at the end of a filre * fix typo * Removed default detection/training_period * Make sure we delete temp tables last * removed unused import * Not collecting metrics by default. * release 0.14.0 * fix bug when no temp tables xist * Update macros/edr/tests/test_utils/clean_elementary_test_tables.sql Co-authored-by: IDoneShaveIt <48473443+IDoneShaveIt@users.noreply.github.com> * 1. add tags to all elementary monitors 2. only run tests if elementary is enabled * rename tag * Create clean_dbt_columns_temp_tables macro * Add empty line at the end of a file * clean logs * add arg chunk_size for all insert_rows() (elementary-data#669) * release 0.14.1 * override primary_test_model_id (elementary-data#671) * added ignore_small_changes to freshness and event_freshness * improvement: bigquery specific for query_table_metrics (elementary-data#674) * improvement: bigquery specific for query_table_metrics Using information schema to get row count is much more performant than doing a full table scan * use TABLE_STORAGE and add database & schema * add empty case * add set * Add index on created_at test_result_rows and remove backfill post hook * Ele 2606 package version with caching and extra logs (elementary-data#673) * artifacts: use cache also for model post-hook * add performance logs to artifacts logic * duration monitoring - bugfix - handle the case the duration stack is not initialized * Change the aggregate of failed_row_count_calc to count(*) * Readme updates (elementary-data#684) * changes to readme * changes to readme * changes * changes * image url * image url * changes * formating * formating * changes * link * pre commit * improve flattening performance for dbt_columns (elementary-data#681) * improve flattening performance for dbt_columns * removed unused const * black * Add get_requires_permissions and validate_required_permissions macros * Improved messages * Fixed default__get_required_permissions + add target.database to get_relevant_databases --------- Co-authored-by: suelai <suela.isaj@gmail.com> Co-authored-by: Roman Korsun <romakorsun2000@gmail.com> Co-authored-by: Yasuhisa Yoshida <syou6162@gmail.com> Co-authored-by: Ofek Weiss <55920061+ofek1weiss@users.noreply.github.com> Co-authored-by: Ofek Weiss <ofek1weiss@gmail.com> Co-authored-by: IDoneShaveIt <idanshavit31@gmail.com> Co-authored-by: IDoneShaveIt <48473443+IDoneShaveIt@users.noreply.github.com> Co-authored-by: Elon Gliksberg <elongliks@gmail.com> Co-authored-by: GitHub Actions <noreply@github.com> Co-authored-by: Ella Katz <ella@elementary-data.com> Co-authored-by: J.C <zhang@sansan.com> Co-authored-by: Noy Arie <noyarie1992@gmail.com> Co-authored-by: Chris Dong <86695140+dongchris@users.noreply.github.com> Co-authored-by: noakurman <kurman.noa@gmail.com> Co-authored-by: Itamar Hartstein <haritamar@gmail.com> Co-authored-by: Maayan Salom <maayansalom@gmail.com>
- Loading branch information
1 parent
fad76af
commit fb111d4
Showing
57 changed files
with
763 additions
and
139 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
name: "elementary" | ||
version: "0.13.2" | ||
version: "0.14.1" | ||
|
||
require-dbt-version: [">=1.0.0", "<2.0.0"] | ||
|
||
|
22 changes: 22 additions & 0 deletions
22
integration_tests/dbt_project/macros/get_anomaly_config.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{% macro get_anomaly_config(model_config, config) %} | ||
{% set mock_model = { | ||
"alias": "mock_model", | ||
"config": { | ||
"elementary": model_config | ||
} | ||
} %} | ||
{# trick elementary into thinking this is the running model #} | ||
{% do context.update({ | ||
"model": { | ||
"depends_on": { | ||
"nodes": ["id"] | ||
} | ||
}, | ||
"graph": { | ||
"nodes": { | ||
"id": mock_model | ||
} | ||
} | ||
}) %} | ||
{% do return(elementary.get_anomalies_test_configuration(api.Relation.create("db", "schema", "mock_model"), **config)[0]) %} | ||
{% endmacro %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
126 changes: 126 additions & 0 deletions
126
integration_tests/tests/test_anomaly_test_configuration.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import json | ||
from dataclasses import dataclass | ||
from typing import Generic, Literal, TypeVar | ||
|
||
from dbt_project import DbtProject | ||
from parametrization import Parametrization | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
@dataclass | ||
class ParamValues(Generic[T]): | ||
vars: T | ||
model: T | ||
test: T | ||
|
||
|
||
PARAM_VALUES = { | ||
"timestamp_column": ParamValues( | ||
"vars.updated_at", "model.updated_at", "test.updated_at" | ||
), | ||
"where_expression": ParamValues( | ||
"where = 'var'", "where = 'model'", "where = 'test'" | ||
), | ||
"anomaly_sensitivity": ParamValues(1, 2, 3), | ||
"anomaly_direction": ParamValues("spike", "drop", "both"), | ||
"min_training_set_size": ParamValues(10, 20, 30), | ||
"time_bucket": ParamValues( | ||
{"count": 1, "period": "day"}, | ||
{"count": 1, "period": "hour"}, | ||
{"count": 1, "period": "day"}, | ||
), | ||
"backfill_days": ParamValues(30, 60, 90), | ||
"seasonality": ParamValues("day_of_week", "hour_of_day", "day_of_week"), | ||
"event_timestamp_column": ParamValues( | ||
"vars.updated_at", "model.updated_at", "test.updated_at" | ||
), | ||
"ignore_small_changes": ParamValues( | ||
{"spike_failure_percent_threshold": 10, "drop_failure_percent_threshold": 10}, | ||
{"spike_failure_percent_threshold": 20, "drop_failure_percent_threshold": 20}, | ||
{"spike_failure_percent_threshold": 30, "drop_failure_percent_threshold": 30}, | ||
), | ||
"fail_on_zero": ParamValues(True, False, True), | ||
"detection_delay": ParamValues( | ||
{"count": 1, "period": "day"}, | ||
{"count": 2, "period": "day"}, | ||
{"count": 3, "period": "day"}, | ||
), | ||
"anomaly_exclude_metrics": ParamValues( | ||
"where = 'var'", "where = 'model'", "where = 'test'" | ||
), | ||
"detection_period": ParamValues( | ||
{"count": 1, "period": "day"}, | ||
{"count": 2, "period": "day"}, | ||
{"count": 3, "period": "day"}, | ||
), | ||
"training_period": ParamValues( | ||
{"count": 30, "period": "day"}, | ||
{"count": 60, "period": "day"}, | ||
{"count": 90, "period": "day"}, | ||
), | ||
} | ||
|
||
|
||
def _get_expected_adapted_config(values_type: Literal["vars", "model", "test"]): | ||
def get_value(key: str): | ||
return PARAM_VALUES[key].__dict__[values_type] | ||
|
||
days_back_multiplier = ( | ||
7 if get_value("seasonality") in ["day_of_week", "hour_of_week"] else 1 | ||
) | ||
return { | ||
"timestamp_column": get_value("timestamp_column"), | ||
"where_expression": get_value("where_expression"), | ||
"anomaly_sensitivity": get_value("anomaly_sensitivity"), | ||
"anomaly_direction": get_value("anomaly_direction"), | ||
"time_bucket": get_value("time_bucket"), | ||
"days_back": get_value("training_period")["count"] * days_back_multiplier, | ||
"backfill_days": get_value("detection_period")["count"], | ||
"seasonality": get_value("seasonality"), | ||
"event_timestamp_column": get_value("event_timestamp_column"), | ||
"ignore_small_changes": get_value("ignore_small_changes"), | ||
"fail_on_zero": get_value("fail_on_zero"), | ||
"detection_delay": get_value("detection_delay"), | ||
"anomaly_exclude_metrics": get_value("anomaly_exclude_metrics"), | ||
"freshness_column": None, # Deprecated | ||
"dimensions": None, # should only be set at the test level | ||
} | ||
|
||
|
||
@Parametrization.autodetect_parameters() | ||
@Parametrization.case( | ||
name="vars", | ||
vars_config={key: value.vars for key, value in PARAM_VALUES.items()}, | ||
model_config={}, | ||
test_config={}, | ||
expected_config=_get_expected_adapted_config("vars"), | ||
) | ||
@Parametrization.case( | ||
name="model", | ||
vars_config={key: value.vars for key, value in PARAM_VALUES.items()}, | ||
model_config={key: value.model for key, value in PARAM_VALUES.items()}, | ||
test_config={}, | ||
expected_config=_get_expected_adapted_config("model"), | ||
) | ||
@Parametrization.case( | ||
name="test", | ||
vars_config={key: value.vars for key, value in PARAM_VALUES.items()}, | ||
model_config={key: value.model for key, value in PARAM_VALUES.items()}, | ||
test_config={key: value.test for key, value in PARAM_VALUES.items()}, | ||
expected_config=_get_expected_adapted_config("test"), | ||
) | ||
def test_anomaly_test_configuration( | ||
dbt_project: DbtProject, | ||
vars_config: dict, | ||
model_config: dict, | ||
test_config: dict, | ||
expected_config: dict, | ||
): | ||
dbt_project.dbt_runner.vars.update(vars_config) | ||
result = dbt_project.dbt_runner.run_operation( | ||
"elementary_tests.get_anomaly_config", | ||
macro_args={"model_config": model_config, "config": test_config}, | ||
) | ||
adapted_config = json.loads(result[0]) | ||
assert adapted_config == expected_config |
File renamed without changes.
64 changes: 64 additions & 0 deletions
64
integration_tests/tests/test_dbt_artifacts/test_columns.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import json | ||
from typing import List, Optional | ||
|
||
from dbt_project import DbtProject | ||
from parametrization import Parametrization | ||
|
||
TABLE_NODE = { | ||
"columns": { | ||
"with_description": { | ||
"name": "with_description", | ||
"description": "This column has a description", | ||
}, | ||
"without_description": { | ||
"name": "without_description", | ||
}, | ||
"with_empty_description": { | ||
"name": "with_empty_description", | ||
"description": "", | ||
}, | ||
"with_null_description": { | ||
"name": "with_null_description", | ||
"description": None, | ||
}, | ||
} | ||
} | ||
|
||
|
||
@Parametrization.autodetect_parameters() | ||
@Parametrization.case( | ||
name="default", | ||
only_with_description=None, | ||
expected_columns=["with_description"], | ||
) | ||
@Parametrization.case( | ||
name="only_with_description", | ||
only_with_description=True, | ||
expected_columns=["with_description"], | ||
) | ||
@Parametrization.case( | ||
name="all", | ||
only_with_description=False, | ||
expected_columns=[ | ||
"with_description", | ||
"without_description", | ||
"with_empty_description", | ||
"with_null_description", | ||
], | ||
) | ||
def test_flatten_table_columns( | ||
dbt_project: DbtProject, | ||
only_with_description: Optional[bool], | ||
expected_columns: List[str], | ||
) -> None: | ||
if only_with_description is not None: | ||
dbt_project.dbt_runner.vars[ | ||
"upload_only_columns_with_descriptions" | ||
] = only_with_description | ||
flattened_columns = json.loads( | ||
dbt_project.dbt_runner.run_operation( | ||
"elementary.flatten_table_columns", macro_args={"table_node": TABLE_NODE} | ||
)[0] | ||
) | ||
flattened_column_names = [column["name"] for column in flattened_columns] | ||
assert flattened_column_names == expected_columns |
Oops, something went wrong.