From d3e6f256e6c35d03166bb9bcac62f9492593109a Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 12 Jul 2023 18:34:55 +0300 Subject: [PATCH 01/21] mypy: fixed all remaining issues --- dev-requirements.txt | 15 ++++ elementary/cli/upgrade.py | 4 ++ elementary/clients/dbt/base_dbt_runner.py | 2 +- elementary/clients/dbt/slim_dbt_runner.py | 49 ++++++++++--- elementary/clients/slack/client.py | 22 ++++-- .../clients/slack/slack_message_builder.py | 2 +- elementary/monitor/alerts/alert.py | 25 ++++--- elementary/monitor/alerts/alerts.py | 4 +- elementary/monitor/alerts/group_of_alerts.py | 30 ++++---- elementary/monitor/alerts/model.py | 11 ++- elementary/monitor/alerts/source_freshness.py | 11 ++- elementary/monitor/alerts/test.py | 15 ++-- elementary/monitor/api/filters/filters.py | 2 +- elementary/monitor/api/models/models.py | 8 +-- elementary/monitor/api/models/schema.py | 35 +--------- elementary/monitor/api/report/report.py | 14 ++-- elementary/monitor/api/selector/schema.py | 4 +- .../api/test_management/test_management.py | 4 +- elementary/monitor/api/tests/schema.py | 41 ++--------- elementary/monitor/api/tests/tests.py | 68 ++++++++++++++----- elementary/monitor/api/totals_schema.py | 31 +++++++++ .../data_monitoring/data_monitoring.py | 3 +- .../report/data_monitoring_report.py | 8 +-- .../slack_report_summary_message_builder.py | 12 ++-- elementary/monitor/data_monitoring/schema.py | 2 +- .../fetchers/alerts/normalized_alert.py | 6 +- elementary/py.typed | 0 elementary/tracking/tracking_interface.py | 39 ++++++----- mypy.ini | 1 + scripts/dbt_log_to_elementary_alerts.py | 5 +- 30 files changed, 275 insertions(+), 198 deletions(-) create mode 100644 elementary/monitor/api/totals_schema.py create mode 100644 elementary/py.typed diff --git a/dev-requirements.txt b/dev-requirements.txt index c63175ded..0dbad9a70 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -11,3 +11,18 @@ boto3-stubs google-auth-stubs ratelimit-stubs types-google-cloud-ndb +types-PyYAML +types-Pygments +types-cffi +types-colorama +types-decorator +types-entrypoints +types-jsonschema +types-paramiko +types-protobuf +types-psutil +types-pytz +types-pywin32 +types-six +types-typed-ast +types-setuptools \ No newline at end of file diff --git a/elementary/cli/upgrade.py b/elementary/cli/upgrade.py index a1180db9d..2998369c8 100644 --- a/elementary/cli/upgrade.py +++ b/elementary/cli/upgrade.py @@ -10,6 +10,10 @@ def recommend_version_upgrade(): try: latest_version = package.get_latest_package_version() current_version = package.get_package_version() + if not latest_version or not current_version: + # Failed to parse versions, so skip the check + return + if version.parse(current_version) < version.parse(latest_version): click.secho( f"You are using Elementary {current_version}, however version {latest_version} is available.\n" diff --git a/elementary/clients/dbt/base_dbt_runner.py b/elementary/clients/dbt/base_dbt_runner.py index 181aae60e..49557c3d7 100644 --- a/elementary/clients/dbt/base_dbt_runner.py +++ b/elementary/clients/dbt/base_dbt_runner.py @@ -6,7 +6,7 @@ class BaseDbtRunner(ABC): def __init__( self, project_dir: str, - profiles_dir: Optional[str] = None, + profiles_dir: Optional[str], target: Optional[str] = None, vars: Optional[dict] = None, secret_vars: Optional[dict] = None, diff --git a/elementary/clients/dbt/slim_dbt_runner.py b/elementary/clients/dbt/slim_dbt_runner.py index 74ca97740..5d4fca4ca 100644 --- a/elementary/clients/dbt/slim_dbt_runner.py +++ b/elementary/clients/dbt/slim_dbt_runner.py @@ -2,9 +2,10 @@ import json import os from pathlib import Path -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional, Union, cast import dbt.adapters.factory +from dbt.adapters.base import BaseAdapter, BaseConnectionManager from packaging import version # IMPORTANT: This must be kept before the rest of the dbt imports @@ -82,10 +83,12 @@ def __init__( **kwargs, ): super().__init__(project_dir, profiles_dir, target, vars, secret_vars) - self.config = None - self.adapter = None - self.adapter_name = None - self.project_parser = None + + self.config: Optional[RuntimeConfig] = None + self.adapter: Optional[BaseAdapter] = None + self.adapter_name: Optional[str] = None + self.connections_manager: Optional[BaseConnectionManager] = None + self.project_parser: Optional[ManifestLoader] = None self.manifest = None def _load_runner( @@ -126,23 +129,41 @@ def _load_config(self): self.config = RuntimeConfig.from_args(self.args) def _load_adapter(self): + if not self.config: + raise Exception("Config not loaded") + register_adapter(self.config) self.adapter_name = self.config.credentials.type - self.adapter = get_adapter_class_by_name(self.adapter_name)(self.config) - self.adapter.connections.set_connection_name() - self.config.adapter = self.adapter + self.adapter = cast( + BaseAdapter, get_adapter_class_by_name(self.adapter_name)(self.config) + ) + + self.connections_manager = cast(BaseConnectionManager, self.adapter.connections) + self.connections_manager.set_connection_name() + + self.config.adapter = self.adapter # type: ignore[attr-defined] def _load_manifest(self): + if not self.config: + raise Exception("Config not loaded") + if not self.adapter or not self.connections_manager: + raise Exception("Adapter not loaded") + if not self.manifest: + raise Exception("Manifest not loaded") + self.project_parser = ManifestLoader( self.config, self.config.load_dependencies(), - self.adapter.connections.set_query_header, + self.connections_manager.set_query_header, ) self.manifest = self.project_parser.load() self.manifest.build_flat_graph() self.project_parser.save_macros_to_adapter(self.adapter) def _execute_macro(self, macro_name, **kwargs): + if not self.adapter: + raise Exception("Adapter not loaded") + if "." in macro_name: package_name, actual_macro_name = macro_name.split(".", 1) else: @@ -157,18 +178,24 @@ def _execute_macro(self, macro_name, **kwargs): ) def close_connection(self): - self.adapter.connections.cleanup_all() + if self.connections_manager: + self.connections_manager.cleanup_all() def run_operation( self, macro_name: str, capture_output: bool = True, - macro_args: dict = dict(), + macro_args: Optional[dict] = None, log_errors: bool = True, vars: Optional[dict] = None, quiet: bool = False, **kwargs, ) -> list: + if self.profiles_dir is None: + raise Exception("profiles_dir must be passed to SlimDbtRunner") + + macro_args = macro_args or {} + all_vars = self._get_all_vars(vars) self._load_runner( project_dir=self.project_dir, diff --git a/elementary/clients/slack/client.py b/elementary/clients/slack/client.py index e0764fa22..acaacdbe2 100644 --- a/elementary/clients/slack/client.py +++ b/elementary/clients/slack/client.py @@ -22,12 +22,8 @@ class SlackClient(ABC): def __init__( self, - token: Optional[str] = None, - webhook: Optional[str] = None, tracking: Optional[Tracking] = None, - ) -> None: - self.token = token - self.webhook = webhook + ): self.client = self._initial_client() self.tracking = tracking self._initial_retry_handlers() @@ -73,6 +69,14 @@ def get_user_id_from_email(self, email: str) -> Optional[str]: class SlackWebClient(SlackClient): + def __init__( + self, + token: str, + tracking: Optional[Tracking] = None, + ): + super().__init__(tracking) + self.token = token + def _initial_client(self): return WebClient(token=self.token) @@ -203,6 +207,14 @@ def _handle_send_err(self, err: SlackApiError, channel_name: str) -> bool: class SlackWebhookClient(SlackClient): + def __init__( + self, + webhook: str, + tracking: Optional[Tracking] = None, + ): + super().__init__(tracking) + self.webhook = webhook + def _initial_client(self): return WebhookClient( url=self.webhook, default_headers={"Content-type": "application/json"} diff --git a/elementary/clients/slack/slack_message_builder.py b/elementary/clients/slack/slack_message_builder.py index 26d3ee185..19808dc2a 100644 --- a/elementary/clients/slack/slack_message_builder.py +++ b/elementary/clients/slack/slack_message_builder.py @@ -151,7 +151,7 @@ def get_slack_status_icon(status: Optional[str]) -> str: icon = ":x:" return icon - def get_slack_message(self) -> SlackMessageSchema: + def get_slack_message(self, *args, **kwargs) -> SlackMessageSchema: return SlackMessageSchema(**self.slack_message) @staticmethod diff --git a/elementary/monitor/alerts/alert.py b/elementary/monitor/alerts/alert.py index c7cfe3ef8..bf1e0361e 100644 --- a/elementary/monitor/alerts/alert.py +++ b/elementary/monitor/alerts/alert.py @@ -7,7 +7,7 @@ from elementary.monitor.alerts.schema.alert import AlertSuppressionSchema from elementary.utils.json_utils import try_load_json from elementary.utils.log import get_logger -from elementary.utils.time import convert_utc_iso_format_to_datetime +from elementary.utils.time import DATETIME_FORMAT, convert_utc_iso_format_to_datetime logger = get_logger(__name__) @@ -57,14 +57,17 @@ def __init__( ) except Exception: logger.error('Failed to parse "detected_at" field.') + self.detected_at_str = ( + self.detected_at.strftime(DATETIME_FORMAT) if self.detected_at else "N/A" + ) self.database_name = database_name self.schema_name = schema_name - self.owners = owners - self.tags = tags + self.owners: List[str] = owners or [] + self.tags: List[str] = tags or [] self.meta = test_meta self.model_meta = try_load_json(model_meta) or {} self.status = status - self.subscribers = subscribers + self.subscribers: List[str] = subscribers or [] self.slack_channel = slack_channel self.alert_suppression_interval = alert_suppression_interval self.alert_fields = alert_fields @@ -81,7 +84,7 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: raise NotImplementedError @property - def consice_name(self): + def concise_name(self): return "Alert" @@ -124,24 +127,24 @@ def _create_slack_alert( result: Optional[SlackBlocksType] = None, configuration: Optional[SlackBlocksType] = None, ) -> SlackMessageSchema: - self._add_title_to_slack_alert(title) - self._add_preview_to_slack_alert(preview) - self._add_details_to_slack_alert(result, configuration) + self.add_title_to_slack_alert(title) + self.add_preview_to_slack_alert(preview) + self.add_details_to_slack_alert(result, configuration) return super().get_slack_message() - def _add_title_to_slack_alert(self, title_blocks: Optional[SlackBlocksType] = None): + def add_title_to_slack_alert(self, title_blocks: Optional[SlackBlocksType] = None): if title_blocks: title = [*title_blocks, self.create_divider_block()] self._add_always_displayed_blocks(title) - def _add_preview_to_slack_alert( + def add_preview_to_slack_alert( self, preview_blocks: Optional[SlackBlocksType] = None ): if preview_blocks: validated_preview_blocks = self._validate_preview_blocks(preview_blocks) self._add_blocks_as_attachments(validated_preview_blocks) - def _add_details_to_slack_alert( + def add_details_to_slack_alert( self, result: Optional[SlackBlocksType] = None, configuration: Optional[SlackBlocksType] = None, diff --git a/elementary/monitor/alerts/alerts.py b/elementary/monitor/alerts/alerts.py index 55d596667..7261ae259 100644 --- a/elementary/monitor/alerts/alerts.py +++ b/elementary/monitor/alerts/alerts.py @@ -1,6 +1,6 @@ from collections import defaultdict from dataclasses import dataclass -from typing import Generic, List, Optional, Union +from typing import DefaultDict, Generic, List, Optional, Union from elementary.monitor.alerts.alert import Alert, AlertType from elementary.monitor.alerts.malformed import MalformedAlert @@ -55,7 +55,7 @@ def get_all(self) -> List[Alert]: ) def get_elementary_test_count(self): - elementary_test_count = defaultdict(int) + elementary_test_count: DefaultDict[str, int] = defaultdict(int) for test_result in self.tests.alerts: if isinstance(test_result, ElementaryTestAlert): elementary_test_count[test_result.test_name] += 1 diff --git a/elementary/monitor/alerts/group_of_alerts.py b/elementary/monitor/alerts/group_of_alerts.py index 603141aa6..245fecdae 100644 --- a/elementary/monitor/alerts/group_of_alerts.py +++ b/elementary/monitor/alerts/group_of_alerts.py @@ -12,7 +12,6 @@ from elementary.monitor.fetchers.alerts.normalized_alert import CHANNEL_KEY from elementary.utils.json_utils import ( list_of_lists_of_strings_to_comma_delimited_unique_strings, - try_load_json, ) from elementary.utils.models import get_shortened_model_name @@ -109,7 +108,7 @@ def _fill_components_to_alerts(self): test_errors = [] test_warnings = [] test_failures = [] - model_errors = [] + model_errors: List[Alert] = [] for alert in self.alerts: if isinstance(alert, ModelAlert): model_errors.append(alert) @@ -164,14 +163,14 @@ def to_slack(self) -> SlackMessageSchema: f":{TestErrorComponent.emoji_in_summary}: {TestErrorComponent.name_in_summary}: {len(alert_list)}" ) title_blocks.append(self._message_builder.create_context_block(fields_summary)) - self._message_builder._add_title_to_slack_alert(title_blocks=title_blocks) + self._message_builder.add_title_to_slack_alert(title_blocks=title_blocks) # attention required : tags, owners, subscribers preview_blocks = [ self._message_builder.create_text_section_block(block) for block in self._attention_required_blocks() ] + [self._message_builder.create_empty_section_block()] - self._message_builder._add_preview_to_slack_alert(preview_blocks=preview_blocks) + self._message_builder.add_preview_to_slack_alert(preview_blocks=preview_blocks) details_blocks = [] for component, alerts_list in self._components_to_alerts.items(): @@ -237,7 +236,7 @@ def _get_model_error_block_body(self) -> str: return "" def _attention_required_blocks(self): - preview_blocks = [f"*{self._db}.{self._schema}.{self._model}*"] + preview_blocks = [] for component, val in sorted( self._components_to_attention_required.items(), key=lambda x: x[0].order @@ -299,9 +298,9 @@ def _sort_channel_destination(self, default_channel): if alert.slack_channel: model_specific_channel_config = alert.slack_channel break - model_meta_data = try_load_json(alert.model_meta) - if model_meta_data and isinstance(model_meta_data, dict): - model_specific_channel_config = model_meta_data.get(CHANNEL_KEY) + + model_specific_channel_config = alert.model_meta.get(CHANNEL_KEY) + if model_specific_channel_config: break if model_specific_channel_config: @@ -310,7 +309,14 @@ def _sort_channel_destination(self, default_channel): self.channel_destination = default_channel def _get_tabulated_row_from_alert(self, alert: Alert) -> str: - return alert.consice_name + return alert.concise_name + + def _attention_required_blocks(self): + preview_blocks = [f"*{self._db}.{self._schema}.{self._model}*"] + preview_blocks.extend( + super(GroupOfAlertsByTable, self)._attention_required_blocks() + ) + return preview_blocks class GroupOfAlertsBySingleAlert(GroupOfAlerts): @@ -333,10 +339,10 @@ def to_slack(self): return self.alerts[0].to_slack() def set_owners(self, owners: List[str]): - self.alerts[0].owners = ", ".join(owners) + self.alerts[0].owners = owners def set_subscribers(self, subscribers: List[str]): - self.alerts[0].subscribers = ", ".join(subscribers) + self.alerts[0].subscribers = subscribers def set_tags(self, tags: List[str]): - self.alerts[0].tags = ", ".join(tags) + self.alerts[0].tags = tags diff --git a/elementary/monitor/alerts/model.py b/elementary/monitor/alerts/model.py index 5ca9ba355..d508812a7 100644 --- a/elementary/monitor/alerts/model.py +++ b/elementary/monitor/alerts/model.py @@ -3,7 +3,6 @@ from elementary.clients.slack.schema import SlackMessageSchema from elementary.monitor.alerts.alert import Alert from elementary.utils.log import get_logger -from elementary.utils.time import DATETIME_FORMAT logger = get_logger(__name__) @@ -61,7 +60,7 @@ def _model_to_slack(self): ), self.slack_message_builder.create_context_block( [ - f"*Time:* {self.detected_at.strftime(DATETIME_FORMAT)} |", + f"*Time:* {self.detected_at_str} |", f"*Suppression interval:* {self.alert_suppression_interval} hours", ], ), @@ -73,7 +72,7 @@ def _model_to_slack(self): [ f"*Model:* {self.alias} |", f"*Status:* {self.status} |", - f"*{self.detected_at.strftime(DATETIME_FORMAT)}*", + f"*{self.detected_at_str}*", ], ), ) @@ -152,7 +151,7 @@ def _snapshot_to_slack(self): ), self.slack_message_builder.create_context_block( [ - f"*Time:* {self.detected_at.strftime(DATETIME_FORMAT)} |", + f"*Time:* {self.detected_at_str} |", f"*Suppression interval:* {self.alert_suppression_interval} hours", ], ), @@ -164,7 +163,7 @@ def _snapshot_to_slack(self): [ f"*Snapshot:* {self.alias} |", f"*Status:* {self.status} |", - f"*{self.detected_at.strftime(DATETIME_FORMAT)}*", + f"*{self.detected_at_str}*", ], ), ) @@ -206,7 +205,7 @@ def _snapshot_to_slack(self): ) @property - def consice_name(self): + def concise_name(self): if self.materialization == "snapshot": text = "snapshot" else: diff --git a/elementary/monitor/alerts/source_freshness.py b/elementary/monitor/alerts/source_freshness.py index 286e8e104..085b17592 100644 --- a/elementary/monitor/alerts/source_freshness.py +++ b/elementary/monitor/alerts/source_freshness.py @@ -4,10 +4,7 @@ from elementary.clients.slack.schema import SlackMessageSchema from elementary.monitor.alerts.alert import Alert from elementary.utils.log import get_logger -from elementary.utils.time import ( - DATETIME_FORMAT, - convert_datetime_utc_str_to_timezone_str, -) +from elementary.utils.time import convert_datetime_utc_str_to_timezone_str logger = get_logger(__name__) @@ -76,7 +73,7 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: ), self.slack_message_builder.create_context_block( [ - f"*Time:* {self.detected_at.strftime(DATETIME_FORMAT) if self.detected_at else 'N/A'} |", + f"*Time:* {self.detected_at_str} |", f"*Suppression interval:* {self.alert_suppression_interval} hours", ], ), @@ -88,7 +85,7 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: [ f"*Source:* {self.source_name}.{self.identifier} |", f"*Status:* {self.status} |", - f"*{self.detected_at.strftime(DATETIME_FORMAT) if self.detected_at else 'N/A'}*", + f"*{self.detected_at_str}*", ], ), ) @@ -166,5 +163,5 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: ) @property - def consice_name(self): + def concise_name(self): return f"source freshness alert - {self.source_name}.{self.identifier}" diff --git a/elementary/monitor/alerts/test.py b/elementary/monitor/alerts/test.py index 692d8f3fb..4e67c8358 100644 --- a/elementary/monitor/alerts/test.py +++ b/elementary/monitor/alerts/test.py @@ -20,7 +20,6 @@ ) from elementary.utils.json_utils import try_load_json from elementary.utils.log import get_logger -from elementary.utils.time import DATETIME_FORMAT logger = get_logger(__name__) @@ -33,7 +32,7 @@ def __init__( self, model_unique_id: str, test_unique_id: str, - test_name: Optional[str] = None, + test_name: str, test_created_at: Optional[str] = None, test_meta: Optional[str] = None, **kwargs, @@ -140,7 +139,7 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: ), self.slack_message_builder.create_context_block( [ - f"*Time*: {self.detected_at.strftime(DATETIME_FORMAT) if self.detected_at else 'N/A'} |", + f"*Time*: {self.detected_at_str} |", f"*Suppression interval:* {self.alert_suppression_interval} hours", ], ), @@ -152,7 +151,7 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: [ f"*Test:* {self.test_short_name or self.test_name} - {self.test_sub_type_display_name} |", f"*Status:* {self.status} |", - f"*{self.detected_at.strftime(DATETIME_FORMAT) if self.detected_at else 'N/A'}*", + f"*{self.detected_at_str}*", ], ), ) @@ -268,7 +267,7 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: ) @property - def consice_name(self): + def concise_name(self): return f"{self.test_short_name or self.test_name}" @@ -302,7 +301,7 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: ), self.slack_message_builder.create_context_block( [ - f"*Time*: {self.detected_at.strftime(DATETIME_FORMAT) if self.detected_at else 'N/A'} |", + f"*Time*: {self.detected_at_str} |", f"*Suppression interval:* {self.alert_suppression_interval} hours", ], ), @@ -314,7 +313,7 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: [ f"*Test:* {self.test_short_name or self.test_name} - {self.test_sub_type_display_name} |", f"*Status:* {self.status} |", - f"*{self.detected_at.strftime(DATETIME_FORMAT) if self.detected_at else 'N/A'}*", + f"*{self.detected_at_str}*", ], ), ) @@ -417,5 +416,5 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: ) @property - def consice_name(self): + def concise_name(self): return f"{self.test_short_name or self.test_name} - {self.test_sub_type_display_name}" diff --git a/elementary/monitor/api/filters/filters.py b/elementary/monitor/api/filters/filters.py index ba919b871..1f73b5d24 100644 --- a/elementary/monitor/api/filters/filters.py +++ b/elementary/monitor/api/filters/filters.py @@ -7,7 +7,7 @@ NormalizedModelSchema, NormalizedSourceSchema, ) -from elementary.monitor.api.tests.schema import TotalsSchema +from elementary.monitor.api.totals_schema import TotalsSchema from elementary.monitor.fetchers.models.schema import ArtifactSchema from elementary.utils.log import get_logger diff --git a/elementary/monitor/api/models/models.py b/elementary/monitor/api/models/models.py index 29dbb3cea..a0fe35f27 100644 --- a/elementary/monitor/api/models/models.py +++ b/elementary/monitor/api/models/models.py @@ -15,8 +15,8 @@ NormalizedModelSchema, NormalizedSourceSchema, TotalsModelRunsSchema, - TotalsSchema, ) +from elementary.monitor.api.totals_schema import TotalsSchema from elementary.monitor.fetchers.models.models import ModelsFetcher from elementary.monitor.fetchers.models.schema import ArtifactSchemaType, ExposureSchema from elementary.monitor.fetchers.models.schema import ( @@ -215,7 +215,7 @@ def _normalize_dbt_artifact_dict( @classmethod def _normalize_artifact_path( cls, - artifact: Union[ArtifactSchemaType], + artifact: ArtifactSchemaType, ) -> str: if artifact.full_path is None: raise Exception("Artifact full path can't be null") @@ -240,11 +240,11 @@ def _normalize_artifact_path( @classmethod def _fqn( cls, - artifact: Union[ArtifactSchemaType], + artifact: Union[ModelSchema, ExposureSchema, SourceSchema], ) -> str: if isinstance(artifact, ExposureSchema): path = (artifact.meta or {}).get("path") - name = artifact.label or artifact.name + name = artifact.label or artifact.name or "N/A" fqn = f"{path}/{name}" if path else name return fqn diff --git a/elementary/monitor/api/models/schema.py b/elementary/monitor/api/models/schema.py index dc1b81b6e..ab72e7ee5 100644 --- a/elementary/monitor/api/models/schema.py +++ b/elementary/monitor/api/models/schema.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, Field, validator +from elementary.monitor.api.totals_schema import TotalsSchema from elementary.monitor.fetchers.models.schema import ( ExposureSchema, ModelSchema, @@ -69,8 +70,8 @@ def format_time_utc(cls, time_utc): class TotalsModelRunsSchema(BaseModel): - errors: Optional[int] = 0 - success: Optional[int] = 0 + errors: int = 0 + success: int = 0 class ModelRunsSchema(BaseModel): @@ -86,36 +87,6 @@ class ModelRunsSchema(BaseModel): runs: List[ModelRunSchema] -class TotalsSchema(BaseModel): - errors: Optional[int] = 0 - warnings: Optional[int] = 0 - passed: Optional[int] = 0 - failures: Optional[int] = 0 - - def add_total(self, status): - total_adders = { - "error": self._add_error, - "warn": self._add_warning, - "fail": self._add_failure, - "pass": self._add_passed, - } - adder = total_adders.get(status) - if adder: - adder() - - def _add_error(self): - self.errors += 1 - - def _add_warning(self): - self.warnings += 1 - - def _add_passed(self): - self.passed += 1 - - def _add_failure(self): - self.failures += 1 - - class ModelRunsWithTotalsSchema(BaseModel): runs: List[ModelRunsSchema] = list() totals: Dict[str, TotalsSchema] = dict() diff --git a/elementary/monitor/api/report/report.py b/elementary/monitor/api/report/report.py index 932ca6e43..9c7b92acb 100644 --- a/elementary/monitor/api/report/report.py +++ b/elementary/monitor/api/report/report.py @@ -13,11 +13,11 @@ NormalizedExposureSchema, NormalizedModelSchema, NormalizedSourceSchema, - TotalsSchema, ) from elementary.monitor.api.report.schema import ReportDataEnvSchema, ReportDataSchema from elementary.monitor.api.tests.schema import TestResultSchema, TestRunSchema from elementary.monitor.api.tests.tests import TestsAPI +from elementary.monitor.api.totals_schema import TotalsSchema from elementary.monitor.data_monitoring.schema import SelectorFilterSchema from elementary.utils.time import get_now_utc_iso_format @@ -25,8 +25,8 @@ class ReportAPI(APIClient): def get_report_data( self, - days_back: Optional[int] = None, - test_runs_amount: Optional[int] = None, + days_back: int = 7, + test_runs_amount: int = 720, disable_passed_test_metrics: bool = False, exclude_elementary_models: bool = False, project_name: Optional[str] = None, @@ -81,7 +81,7 @@ def get_report_data( serializable_test_results = self._serilize_test_results( test_results.results ) - serializable_test_restuls_totals = self._serialize_totals( + serializable_test_results_totals = self._serialize_totals( test_results.totals ) serializable_test_runs = self._serilize_test_runs(test_runs.runs) @@ -110,7 +110,7 @@ def get_report_data( groups=serializable_groups, invocation=serializable_invocation, test_results=serializable_test_results, - test_results_totals=serializable_test_restuls_totals, + test_results_totals=serializable_test_results_totals, test_runs=serializable_test_runs, test_runs_totals=serializable_test_runs_totals, coverages=serializable_models_coverages, @@ -150,7 +150,7 @@ def _serilize_models_runs(self, models_runs: List[ModelRunsSchema]) -> List[dict return [model_runs.dict(by_alias=True) for model_runs in models_runs] def _serilize_test_results( - self, test_results: Dict[Optional[str], List[TestResultSchema]] + self, test_results: Dict[str, List[TestResultSchema]] ) -> Dict[str, List[dict]]: serializable_test_results = defaultdict(list) for model_unique_id, test_result in test_results.items(): @@ -160,7 +160,7 @@ def _serilize_test_results( return serializable_test_results def _serilize_test_runs( - self, test_runs: Dict[Optional[str], List[TestRunSchema]] + self, test_runs: Dict[str, List[TestRunSchema]] ) -> Dict[str, List[dict]]: serializable_test_runs = defaultdict(list) for model_unique_id, test_run in test_runs.items(): diff --git a/elementary/monitor/api/selector/schema.py b/elementary/monitor/api/selector/schema.py index fa443fbec..cab1ce6e6 100644 --- a/elementary/monitor/api/selector/schema.py +++ b/elementary/monitor/api/selector/schema.py @@ -1,8 +1,8 @@ -from typing import List, Optional +from typing import List from pydantic import BaseModel class SelectorSchema(BaseModel): selector: str - results: List[Optional[str]] + results: List[str] diff --git a/elementary/monitor/api/test_management/test_management.py b/elementary/monitor/api/test_management/test_management.py index 54d5d3b8c..b8b24be30 100644 --- a/elementary/monitor/api/test_management/test_management.py +++ b/elementary/monitor/api/test_management/test_management.py @@ -36,9 +36,9 @@ def get_tags(self) -> TagsModel: return self.test_management_fetcher.get_tags() def get_project_users(self) -> UsersModel: - project_users = self.test_management_fetcher.get_all_project_users() + project_user_names = self.test_management_fetcher.get_all_project_users() project_users = [ UserModel(name=project_user, origin="project") - for project_user in project_users + for project_user in project_user_names ] return UsersModel(users=project_users) diff --git a/elementary/monitor/api/tests/schema.py b/elementary/monitor/api/tests/schema.py index 2bf91628d..ba98126f0 100644 --- a/elementary/monitor/api/tests/schema.py +++ b/elementary/monitor/api/tests/schema.py @@ -2,13 +2,14 @@ from pydantic import BaseModel, Field, validator +from elementary.monitor.api.totals_schema import TotalsSchema from elementary.monitor.fetchers.invocations.schema import DbtInvocationSchema from elementary.utils.time import convert_partial_iso_format_to_full_iso_format class ElementaryTestResultSchema(BaseModel): display_name: Optional[str] = None - metrics: Optional[Union[list, dict]] + metrics: Optional[Union[list, dict]] = None result_description: Optional[str] = None class Config: @@ -22,36 +23,6 @@ class DbtTestResultSchema(BaseModel): failed_rows_count: Optional[int] = None -class TotalsSchema(BaseModel): - errors: int = 0 - warnings: int = 0 - passed: int = 0 - failures: int = 0 - - def add_total(self, status): - total_adders = { - "error": self._add_error, - "warn": self._add_warning, - "fail": self._add_failure, - "pass": self._add_passed, - } - adder = total_adders.get(status) - if adder: - adder() - - def _add_error(self): - self.errors += 1 - - def _add_warning(self): - self.warnings += 1 - - def _add_passed(self): - self.passed += 1 - - def _add_failure(self): - self.failures += 1 - - class InvocationSchema(BaseModel): affected_rows: Optional[int] time_utc: str @@ -103,8 +74,8 @@ class Config: class TestResultsWithTotalsSchema(BaseModel): - results: Dict[Optional[str], List[TestResultSchema]] = dict() - totals: Dict[Optional[str], TotalsSchema] = dict() + results: Dict[str, List[TestResultSchema]] = dict() + totals: Dict[str, TotalsSchema] = dict() invocation: DbtInvocationSchema = Field(default_factory=DbtInvocationSchema) @@ -114,8 +85,8 @@ class TestRunSchema(BaseModel): class TestRunsWithTotalsSchema(BaseModel): - runs: Dict[Optional[str], List[TestRunSchema]] = dict() - totals: Dict[Optional[str], TotalsSchema] = dict() + runs: Dict[str, List[TestRunSchema]] = dict() + totals: Dict[str, TotalsSchema] = dict() class TestResultSummarySchema(BaseModel): diff --git a/elementary/monitor/api/tests/tests.py b/elementary/monitor/api/tests/tests.py index bf72f591d..b7225392a 100644 --- a/elementary/monitor/api/tests/tests.py +++ b/elementary/monitor/api/tests/tests.py @@ -18,8 +18,8 @@ TestResultsWithTotalsSchema, TestRunSchema, TestRunsWithTotalsSchema, - TotalsSchema, ) +from elementary.monitor.api.totals_schema import TotalsSchema from elementary.monitor.data_monitoring.schema import SelectorFilterSchema from elementary.monitor.fetchers.invocations.schema import DbtInvocationSchema from elementary.monitor.fetchers.tests.schema import TestResultDBRowSchema @@ -34,7 +34,7 @@ class TestsAPI(APIClient): def __init__( self, dbt_runner: BaseDbtRunner, - days_back: Optional[int] = 7, + days_back: int = 7, invocations_per_test: int = 720, disable_passed_test_metrics: bool = False, ): @@ -148,17 +148,27 @@ def get_test_results( if test_result.invocations_rank_index == 1 ] - tests_results = defaultdict(list) + tests_results: DefaultDict[str, List[TestResultSchema]] = defaultdict(list) for test_result_db_row in filtered_test_results_db_rows: - test_result = TestResultSchema( - metadata=self._get_test_metadata_from_test_result_db_row( - test_result_db_row - ), - test_results=self._get_test_result_from_test_result_db_row( - test_result_db_row, disable_samples=disable_samples - ), + if not test_result_db_row.model_unique_id: + continue + + metadata = self._get_test_metadata_from_test_result_db_row( + test_result_db_row + ) + inner_test_results = self._get_test_result_from_test_result_db_row( + test_result_db_row, disable_samples=disable_samples + ) + + if inner_test_results is None: + continue + + tests_results[test_result_db_row.model_unique_id].append( + TestResultSchema( + metadata=metadata, + test_results=inner_test_results, + ) ) - tests_results[test_result_db_row.model_unique_id].append(test_result) test_metadatas = [] for test_results in tests_results.values(): @@ -180,6 +190,9 @@ def get_test_runs(self) -> TestRunsWithTotalsSchema: test_runs = defaultdict(list) for test_result_db_row in latest_test_results: + if not test_result_db_row.model_unique_id: + continue + test_invocations = tests_invocations.get( test_result_db_row.elementary_unique_id ) @@ -380,10 +393,15 @@ def _get_test_metadata_from_test_result_db_row( def _get_test_result_from_test_result_db_row( test_result_db_row: TestResultDBRowSchema, disable_samples: bool = False, - ) -> Union[DbtTestResultSchema, ElementaryTestResultSchema]: - test_results = None + ) -> Optional[Union[DbtTestResultSchema, ElementaryTestResultSchema]]: + test_results: Optional[Union[DbtTestResultSchema, ElementaryTestResultSchema]] + sample_data = test_result_db_row.sample_data if not disable_samples else None if test_result_db_row.test_type == "dbt_test": + if sample_data is not None and not isinstance(sample_data, list): + # Sanity check, shouldn't happen + raise Exception("Invalid sample data for dbt test") + test_results = DbtTestResultSchema( display_name=test_result_db_row.test_name, results_sample=sample_data, @@ -410,6 +428,14 @@ def _get_test_result_from_test_result_db_row( display_name=test_sub_type_display_name.lower(), result_description=test_result_db_row.test_results_description, ) + else: + # Unexpected test type - might have been introduced in a new package version. + # So we have no choice but to log a warning and ignore it. + logger.warning( + f"Unexpected elementary test type: {test_result_db_row.test_type}" + ) + test_results = None + return test_results @staticmethod @@ -433,19 +459,25 @@ def _get_total_tests_results( ) -> Dict[str, TotalsSchema]: totals: Dict[str, TotalsSchema] = dict() for test in test_metadatas: + if not test.model_unique_id: + continue + self._update_test_results_totals( totals_dict=totals, model_unique_id=test.model_unique_id, - status=test.latest_run_status, + status=test.latest_run_status or "unknown", ) return totals def _get_total_tests_runs( - self, tests_runs: Dict[Optional[str], List[TestRunSchema]] - ) -> Dict[Optional[str], TotalsSchema]: - totals = dict() + self, tests_runs: Dict[str, List[TestRunSchema]] + ) -> Dict[str, TotalsSchema]: + totals: Dict[str, TotalsSchema] = dict() for test_runs in tests_runs.values(): for test_run in test_runs: + if not test_run.test_runs: + continue + test_invocations = test_run.test_runs.invocations self._update_test_runs_totals( totals_dict=totals, @@ -461,6 +493,8 @@ def _update_test_runs_totals( test_invocations: List[InvocationSchema], ): model_unique_id = test.model_unique_id + if model_unique_id is None: + return if model_unique_id not in totals_dict: totals_dict[model_unique_id] = TotalsSchema() diff --git a/elementary/monitor/api/totals_schema.py b/elementary/monitor/api/totals_schema.py new file mode 100644 index 000000000..1cc94bcdc --- /dev/null +++ b/elementary/monitor/api/totals_schema.py @@ -0,0 +1,31 @@ +from pydantic import BaseModel + + +class TotalsSchema(BaseModel): + errors: int = 0 + warnings: int = 0 + passed: int = 0 + failures: int = 0 + + def add_total(self, status): + total_adders = { + "error": self._add_error, + "warn": self._add_warning, + "fail": self._add_failure, + "pass": self._add_passed, + } + adder = total_adders.get(status) + if adder: + adder() + + def _add_error(self): + self.errors += 1 + + def _add_warning(self): + self.warnings += 1 + + def _add_passed(self): + self.passed += 1 + + def _add_failure(self): + self.failures += 1 diff --git a/elementary/monitor/data_monitoring/data_monitoring.py b/elementary/monitor/data_monitoring/data_monitoring.py index 734516efd..5755e0b43 100644 --- a/elementary/monitor/data_monitoring/data_monitoring.py +++ b/elementary/monitor/data_monitoring/data_monitoring.py @@ -123,7 +123,8 @@ def get_elementary_database_and_schema(self): return relation except Exception as ex: logger.error("Failed to parse Elementary's database and schema.") - self.tracking.record_internal_exception(ex) + if self.tracking: + self.tracking.record_internal_exception(ex) return "." def get_latest_invocation(self) -> Dict[str, Any]: diff --git a/elementary/monitor/data_monitoring/report/data_monitoring_report.py b/elementary/monitor/data_monitoring/report/data_monitoring_report.py index 75948e413..f14cc6735 100644 --- a/elementary/monitor/data_monitoring/report/data_monitoring_report.py +++ b/elementary/monitor/data_monitoring/report/data_monitoring_report.py @@ -45,8 +45,8 @@ def __init__( def generate_report( self, - days_back: Optional[int] = None, - test_runs_amount: Optional[int] = None, + days_back: int = 7, + test_runs_amount: int = 720, file_path: Optional[str] = None, disable_passed_test_metrics: bool = False, should_open_browser: bool = True, @@ -92,8 +92,8 @@ def generate_report( def get_report_data( self, - days_back: Optional[int] = None, - test_runs_amount: Optional[int] = None, + days_back: int = 7, + test_runs_amount: int = 720, disable_passed_test_metrics: bool = False, exclude_elementary_models: bool = False, project_name: Optional[str] = None, diff --git a/elementary/monitor/data_monitoring/report/slack_report_summary_message_builder.py b/elementary/monitor/data_monitoring/report/slack_report_summary_message_builder.py index 5f4a02383..b5f2a910f 100644 --- a/elementary/monitor/data_monitoring/report/slack_report_summary_message_builder.py +++ b/elementary/monitor/data_monitoring/report/slack_report_summary_message_builder.py @@ -20,21 +20,21 @@ def get_slack_message( filter: SelectorFilterSchema = SelectorFilterSchema(), include_description: bool = False, ) -> SlackMessageSchema: - self._add_title_to_slack_alert() - self._add_preview_to_slack_alert( + self.add_title_to_slack_alert() + self.add_preview_to_slack_alert( test_results, days_back=days_back, bucket_website_url=bucket_website_url, filter=filter, ) - self._add_details_to_slack_alert( + self.add_details_to_slack_alert( test_results=test_results, include_description=include_description, bucket_website_url=bucket_website_url, ) return super().get_slack_message() - def _add_title_to_slack_alert(self): + def add_title_to_slack_alert(self): title_blocks = [ self.create_header_block(":mag: Monitoring summary"), self.create_divider_block(), @@ -59,7 +59,7 @@ def _get_summary_filter_text( return f"_This summary was generated with the following filters - {days_back_text}{f', {selector_text}' if selector_text else ''}_" - def _add_preview_to_slack_alert( + def add_preview_to_slack_alert( self, test_results: List[TestResultSummarySchema], days_back: int, @@ -96,7 +96,7 @@ def _add_preview_to_slack_alert( preview_blocks.extend(preview_blocks_filler) self._add_blocks_as_attachments(preview_blocks) - def _add_details_to_slack_alert( + def add_details_to_slack_alert( self, test_results: List[TestResultSummarySchema], include_description: bool = False, diff --git a/elementary/monitor/data_monitoring/schema.py b/elementary/monitor/data_monitoring/schema.py index 2e43aa9b3..120eece4e 100644 --- a/elementary/monitor/data_monitoring/schema.py +++ b/elementary/monitor/data_monitoring/schema.py @@ -7,8 +7,8 @@ InvocationSchema, TestResultSchema, TestRunSchema, - TotalsSchema, ) +from elementary.monitor.api.totals_schema import TotalsSchema from elementary.utils.log import get_logger from elementary.utils.time import DATETIME_FORMAT, convert_local_time_to_timezone diff --git a/elementary/monitor/fetchers/alerts/normalized_alert.py b/elementary/monitor/fetchers/alerts/normalized_alert.py index 921a355f4..af203d496 100644 --- a/elementary/monitor/fetchers/alerts/normalized_alert.py +++ b/elementary/monitor/fetchers/alerts/normalized_alert.py @@ -110,18 +110,18 @@ def _normalize_alert(self): ) return self.alert - def _get_alert_meta_attrs(self, meta_key: str) -> List[Optional[str]]: + def _get_alert_meta_attrs(self, meta_key: str) -> List[str]: attrs = [] test_attrs = self.test_meta.get(meta_key, []) model_attrs = self.model_meta.get(meta_key, []) if isinstance(test_attrs, list): attrs.extend(test_attrs) - else: + elif isinstance(test_attrs, str): attrs.append(test_attrs) if isinstance(model_attrs, list): attrs.extend(model_attrs) - else: + elif isinstance(model_attrs, str): attrs.append(model_attrs) return attrs diff --git a/elementary/py.typed b/elementary/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/elementary/tracking/tracking_interface.py b/elementary/tracking/tracking_interface.py index d3b2a2472..f43b15a5e 100644 --- a/elementary/tracking/tracking_interface.py +++ b/elementary/tracking/tracking_interface.py @@ -6,6 +6,9 @@ from elementary.config.config import Config from elementary.utils.hash import hash +from elementary.utils.log import get_logger + +logger = get_logger(__name__) class BaseTracking(ABC): @@ -15,7 +18,7 @@ class BaseTracking(ABC): def __init__(self, config: Config): self._config = config self._props: Dict[str, Any] = {} - self.groups = {} + self.groups: Dict[str, str] = {} @staticmethod def _hash(content: str): @@ -30,7 +33,7 @@ def set_env(self, key: str, value): @abstractmethod def register_group( self, group_type: str, group_identifier: str, group_props: Optional[dict] = None - ) -> None: + ): raise NotImplementedError @abstractmethod @@ -39,7 +42,7 @@ def _send_event( distinct_id: str, event_name: str, properties: Optional[dict] = None, - ) -> None: + ): raise NotImplementedError @@ -53,7 +56,7 @@ def _send_event( distinct_id: str, event_name: str, properties: Optional[dict] = None, - ) -> None: + ): posthog.capture( distinct_id=distinct_id, event=event_name, @@ -63,7 +66,7 @@ def _send_event( def register_group( self, group_type: str, group_identifier: str, group_props: Optional[dict] = None - ) -> None: + ): posthog.group_identify(group_type, group_identifier, group_props) self.groups[group_type] = group_identifier @@ -73,13 +76,12 @@ def __init__(self, config: Config): super().__init__(config) self.client = self._init_client() - def _init_client(self) -> requests.Session: + @staticmethod + def _init_client() -> requests.Session: session = requests.Session() return session - def _group_identify( - self, group_type, group_identifier, group_props - ) -> requests.Response: + def _group_identify(self, group_type, group_identifier, group_props): response = self.client.post( f"{self.POSTHOG_API_HOST}/capture/", json=dict( @@ -93,7 +95,10 @@ def _group_identify( }, ), ) - return response + if response.status_code != requests.codes.ok: + logger.warning( + f"Failed to register group in Posthog - {group_type} {group_identifier}" + ) def _capture( self, @@ -101,7 +106,7 @@ def _capture( event_name: str, properties: Optional[dict] = None, groups: Optional[dict] = None, - ) -> requests.Response: + ): response = self.client.post( f"{self.POSTHOG_API_HOST}/capture/", json=dict( @@ -111,22 +116,22 @@ def _capture( properties={**(properties or {}), "$groups": groups}, ), ) - return response + if response.status_code != requests.codes.ok: + logger.debug(f"Failed to capture event - {event_name} {distinct_id}") def register_group( self, group_type: str, group_identifier: str, group_props: Optional[dict] = None - ) -> requests.Response: - resp = self._group_identify(group_type, group_identifier, group_props) + ): + self._group_identify(group_type, group_identifier, group_props) self.groups[group_type] = group_identifier - return resp def _send_event( self, distinct_id: str, event_name: str, properties: Optional[dict] = None, - ) -> requests.Response: - return self._capture( + ): + self._capture( distinct_id=distinct_id, event_name=event_name, properties=properties, diff --git a/mypy.ini b/mypy.ini index 5108c7a49..201d197aa 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,7 @@ [mypy] packages = elementary exclude = ^elementary/monitor/dbt_project/ +check_untyped_defs = True [mypy-alive_progress] ignore_missing_imports = True diff --git a/scripts/dbt_log_to_elementary_alerts.py b/scripts/dbt_log_to_elementary_alerts.py index 43e215ee7..de38b6b70 100644 --- a/scripts/dbt_log_to_elementary_alerts.py +++ b/scripts/dbt_log_to_elementary_alerts.py @@ -1,5 +1,6 @@ import json import sys +from typing import List ALERT_PREFIX = "Elementary: " @@ -8,7 +9,7 @@ def get_elementary_log_lines(dbt_log: str): return [line for line in dbt_log.splitlines() if ALERT_PREFIX in line] -def get_json_logs(log_lines: [str]): +def get_json_logs(log_lines: List[str]): json_logs = [] for log in log_lines: try: @@ -18,7 +19,7 @@ def get_json_logs(log_lines: [str]): return json_logs -def get_elementary_alerts(elementary_json_logs: [dict]): +def get_elementary_alerts(elementary_json_logs: List[dict]): elementary_alerts = [] for json_log in elementary_json_logs: elementary_alerts.extend( From 8beca3ded7f2247fad3e77d491c8d2a62887ac98 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 12 Jul 2023 18:43:35 +0300 Subject: [PATCH 02/21] bugfix --- elementary/clients/dbt/base_dbt_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elementary/clients/dbt/base_dbt_runner.py b/elementary/clients/dbt/base_dbt_runner.py index 49557c3d7..181aae60e 100644 --- a/elementary/clients/dbt/base_dbt_runner.py +++ b/elementary/clients/dbt/base_dbt_runner.py @@ -6,7 +6,7 @@ class BaseDbtRunner(ABC): def __init__( self, project_dir: str, - profiles_dir: Optional[str], + profiles_dir: Optional[str] = None, target: Optional[str] = None, vars: Optional[dict] = None, secret_vars: Optional[dict] = None, From 6a8a103f9334253037186b1e747d033937f25be5 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 12 Jul 2023 19:32:16 +0300 Subject: [PATCH 03/21] fixed tests --- elementary/monitor/alerts/malformed.py | 19 +++++++++++++++---- .../group_alerts_by_table/mock_classes.py | 4 +--- .../alerts/group_alerts_by_table/mock_data.py | 10 ++++------ .../test_slack_alert_message_builder.py | 12 ++++++------ ...st_slack_report_summary_message_builder.py | 8 ++++---- 5 files changed, 30 insertions(+), 23 deletions(-) diff --git a/elementary/monitor/alerts/malformed.py b/elementary/monitor/alerts/malformed.py index e85c9416f..bf86aff53 100644 --- a/elementary/monitor/alerts/malformed.py +++ b/elementary/monitor/alerts/malformed.py @@ -19,10 +19,21 @@ def to_slack(self, is_slack_workflow: bool = False) -> SlackMessageSchema: ) ) - # We use getattribute and not getattr because "Alert" set some attributes to None if not given which skip the self.data.get(_name) - # For example - tags is set to None, so getattr for tags will return None although data contains it. + # We use getattribute and not getattr because "Alert" set some attributes to None if not given which skip the + # self.data.get(_name) For example - tags is set to None, so getattr for tags will return None although data + # contains it. def __getattribute__(self, __name: str) -> Any: try: - return super().__getattribute__(__name) or self.data.get(__name) + res = super().__getattribute__(__name) except AttributeError: - return self.data.get(__name) + res = None + + if not res: + # Try to get from self.data, but also safely handle the case it doesn't + # exist yet without infinite recursion. + try: + data = super().__getattribute__("data") + res = data.get(__name) + except AttributeError: + pass + return res diff --git a/tests/unit/monitor/alerts/group_alerts_by_table/mock_classes.py b/tests/unit/monitor/alerts/group_alerts_by_table/mock_classes.py index 49954cdfa..50a9ae830 100644 --- a/tests/unit/monitor/alerts/group_alerts_by_table/mock_classes.py +++ b/tests/unit/monitor/alerts/group_alerts_by_table/mock_classes.py @@ -10,9 +10,7 @@ class MockAlert: model_unique_id: Optional[str] slack_channel: Optional[str] detected_at: Optional[Union[str, datetime]] - model_meta: Optional[ - str - ] # this string should be a json of a dict that has or has not the key "channel" + model_meta: Optional[dict] owners: Optional[List[str]] subscribers: Optional[List[str]] tags: Optional[List[str]] diff --git a/tests/unit/monitor/alerts/group_alerts_by_table/mock_data.py b/tests/unit/monitor/alerts/group_alerts_by_table/mock_data.py index f96689801..710545a3a 100644 --- a/tests/unit/monitor/alerts/group_alerts_by_table/mock_data.py +++ b/tests/unit/monitor/alerts/group_alerts_by_table/mock_data.py @@ -1,5 +1,3 @@ -import json - from elementary.monitor.alerts.group_of_alerts import GroupingType from elementary.monitor.fetchers.alerts.normalized_alert import CHANNEL_KEY from tests.unit.monitor.alerts.group_alerts_by_table.mock_classes import MockAlert @@ -18,9 +16,9 @@ DETECTED_AT_1 = "1992-11-11 20:00:03+0200" DETECTED_AT_2 = "1992-11-11 20:01:03+0200" DETECTED_AT_3 = "1992-11-11 20:02:03+0200" -EMPTY_DICT = json.dumps(dict()) -GROUP_BY_ALERT_IN_DICT = json.dumps({"group_alerts_by": GroupingType.BY_ALERT.value}) -OTHER_CHANNEL_IN_DICT = json.dumps({CHANNEL_KEY: OTHER_CHANNEL}) +EMPTY_DICT = dict() +GROUP_BY_ALERT_IN_DICT = {"group_alerts_by": GroupingType.BY_ALERT.value} +OTHER_CHANNEL_IN_DICT = {CHANNEL_KEY: OTHER_CHANNEL} AL_WARN_MODEL1_NO_CHANNEL_NO_GROUPING_TS3 = MockAlert( status="warn", @@ -39,7 +37,7 @@ model_unique_id=MODEL_1, slack_channel=OTHER_CHANNEL, detected_at=DETECTED_AT_3, - model_meta="{}", + model_meta=EMPTY_DICT, owners=[OWNER_1, OWNER_3], subscribers=[], tags=TAGS_2, diff --git a/tests/unit/monitor/alerts/test_slack_alert_message_builder.py b/tests/unit/monitor/alerts/test_slack_alert_message_builder.py index 731d07518..63db01a86 100644 --- a/tests/unit/monitor/alerts/test_slack_alert_message_builder.py +++ b/tests/unit/monitor/alerts/test_slack_alert_message_builder.py @@ -12,7 +12,7 @@ def test_add_title_to_slack_alert(): message_builder = SlackAlertMessageBuilder() title = message_builder.create_header_block("This is an header!") sub_title = message_builder.create_context_block(["I am only a sub title :("]) - message_builder._add_title_to_slack_alert([title, sub_title]) + message_builder.add_title_to_slack_alert([title, sub_title]) assert json.dumps(message_builder.slack_message, sort_keys=True) == json.dumps( { "blocks": [ @@ -93,7 +93,7 @@ def test_add_preview_to_slack_alert(): message_builder = SlackAlertMessageBuilder() title = message_builder.create_header_block("This is an header!") sub_title = message_builder.create_context_block(["I am only a sub title :("]) - message_builder._add_preview_to_slack_alert([title, sub_title]) + message_builder.add_preview_to_slack_alert([title, sub_title]) assert json.dumps(message_builder.slack_message, sort_keys=True) == json.dumps( { "blocks": [], @@ -136,7 +136,7 @@ def test_add_details_to_slack_alert(): # No result and configuration blocks message_builder = SlackAlertMessageBuilder() - message_builder._add_details_to_slack_alert() + message_builder.add_details_to_slack_alert() assert json.dumps(message_builder.slack_message, sort_keys=True) == json.dumps( { "blocks": [], @@ -147,7 +147,7 @@ def test_add_details_to_slack_alert(): # Only result blocks message_builder = SlackAlertMessageBuilder() - message_builder._add_details_to_slack_alert(result=[block, block]) + message_builder.add_details_to_slack_alert(result=[block, block]) assert json.dumps(message_builder.slack_message, sort_keys=True) == json.dumps( { "blocks": [], @@ -173,7 +173,7 @@ def test_add_details_to_slack_alert(): # Only configuration blocks message_builder = SlackAlertMessageBuilder() - message_builder._add_details_to_slack_alert(configuration=[block, block]) + message_builder.add_details_to_slack_alert(configuration=[block, block]) assert json.dumps(message_builder.slack_message, sort_keys=True) == json.dumps( { "blocks": [], @@ -199,7 +199,7 @@ def test_add_details_to_slack_alert(): # All details message_builder = SlackAlertMessageBuilder() - message_builder._add_details_to_slack_alert( + message_builder.add_details_to_slack_alert( configuration=[block, block], result=[block, block] ) assert json.dumps(message_builder.slack_message, sort_keys=True) == json.dumps( diff --git a/tests/unit/monitor/data_monitoring/report/test_slack_report_summary_message_builder.py b/tests/unit/monitor/data_monitoring/report/test_slack_report_summary_message_builder.py index 82f4fa198..f1d81aec7 100644 --- a/tests/unit/monitor/data_monitoring/report/test_slack_report_summary_message_builder.py +++ b/tests/unit/monitor/data_monitoring/report/test_slack_report_summary_message_builder.py @@ -21,7 +21,7 @@ def test_get_test_results_totals(test_results_summary): def test_add_details_to_slack_alert_attachments_limit(test_results_summary): # Within attachments limitation message_builder = SlackReportSummaryMessageBuilder() - message_builder._add_details_to_slack_alert(test_results_summary) + message_builder.add_details_to_slack_alert(test_results_summary) attachments_as_string = json.dumps( message_builder.slack_message.get("attachments")[0].get("blocks") ) @@ -30,7 +30,7 @@ def test_add_details_to_slack_alert_attachments_limit(test_results_summary): assert ":exclamation: *Error*" in attachments_as_string message_builder = SlackReportSummaryMessageBuilder() - message_builder._add_details_to_slack_alert((test_results_summary * 5)[0:39]) + message_builder.add_details_to_slack_alert((test_results_summary * 5)[0:39]) attachments_as_string = json.dumps( message_builder.slack_message.get("attachments")[0].get("blocks") ) @@ -43,7 +43,7 @@ def test_add_details_to_slack_alert_attachments_limit(test_results_summary): nonsuccessful_parts_of_fixture = [ x for x in test_results_summary if x.status != "pass" ] - message_builder._add_details_to_slack_alert( + message_builder.add_details_to_slack_alert( (nonsuccessful_parts_of_fixture * 50)[0:40] ) attachments_as_string = json.dumps( @@ -58,7 +58,7 @@ def test_passed_tests_filtered_out_of_details_view( # Within attachments limitation message_builder = SlackReportSummaryMessageBuilder() passed_tests_from_fixture = [x for x in test_results_summary if x.status == "pass"] - message_builder._add_details_to_slack_alert(passed_tests_from_fixture) + message_builder.add_details_to_slack_alert(passed_tests_from_fixture) attachments_as_string = json.dumps( message_builder.slack_message.get("attachments")[0].get("blocks") ) From 2f02c49971c99a32cf79875b61261f78dd697697 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 12 Jul 2023 19:56:32 +0300 Subject: [PATCH 04/21] add mypy to pre-commit! --- .pre-commit-config.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 723569489..bfb64f232 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,3 +21,12 @@ repos: name: Check for NO_COMMIT marker entry: bash -c "git diff --cached -U0 | (! grep NO_COMMIT)" language: system + require_serial: true + + - repo: local + hooks: + - id: mypy + name: mypy + entry: bash -c "mypy" + language: system + require_serial: true From 33f73562ec26435e35c2f77039c5b21fea8674ec Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 12 Jul 2023 20:04:30 +0300 Subject: [PATCH 05/21] run-precommit: run the workflow if the precommit config itself changed --- .github/workflows/run-precommit.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run-precommit.yml b/.github/workflows/run-precommit.yml index 301a70a80..ffe7c9bff 100644 --- a/.github/workflows/run-precommit.yml +++ b/.github/workflows/run-precommit.yml @@ -3,6 +3,7 @@ on: push: paths: - '**.py' + - '.pre-commit-config.yaml' workflow_dispatch: jobs: From 4b07653c12d064342406b7132e97d7fd06aaade6 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 12 Jul 2023 20:10:55 +0300 Subject: [PATCH 06/21] run-precommit: install elementary --- .github/workflows/run-precommit.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/run-precommit.yml b/.github/workflows/run-precommit.yml index ffe7c9bff..c8ea12ca8 100644 --- a/.github/workflows/run-precommit.yml +++ b/.github/workflows/run-precommit.yml @@ -4,6 +4,7 @@ on: paths: - '**.py' - '.pre-commit-config.yaml' + - '.github/workflows/run-precommit.yml' workflow_dispatch: jobs: @@ -18,6 +19,9 @@ jobs: with: python-version: '3.8' + - name: Install elementary (mainly needed so mypy will have the dependencies it needs) + run: pip install -e . + - name: Install dev requirements run: pip install -r dev-requirements.txt From dc47a7a2be48e1b160dc45117bcef50f4c52187a Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 00:40:47 +0300 Subject: [PATCH 07/21] run-precommit: change to python 3.9 --- .github/workflows/run-precommit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-precommit.yml b/.github/workflows/run-precommit.yml index c8ea12ca8..423c0305f 100644 --- a/.github/workflows/run-precommit.yml +++ b/.github/workflows/run-precommit.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4.3.0 with: - python-version: '3.8' + python-version: '3.9' - name: Install elementary (mainly needed so mypy will have the dependencies it needs) run: pip install -e . From 7aab97b8861b26abb990dcd4ac74bd928e2436be Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 00:50:52 +0300 Subject: [PATCH 08/21] temporarily restrict to click<8.1.4 due to typing issues there + return precommit workflow to py 3.8 --- .github/workflows/run-precommit.yml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-precommit.yml b/.github/workflows/run-precommit.yml index 423c0305f..c8ea12ca8 100644 --- a/.github/workflows/run-precommit.yml +++ b/.github/workflows/run-precommit.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4.3.0 with: - python-version: '3.9' + python-version: '3.8' - name: Install elementary (mainly needed so mypy will have the dependencies it needs) run: pip install -e . diff --git a/setup.py b/setup.py index 70b589c7b..9daf9abfc 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ "Snowflake, BigQuery, Redshift, data reliability, analytics engineering", long_description=README, install_requires=[ - "click>=7.0,<9", + "click>=7.0,<8.1.4", "pyfiglet", "dbt-core>=0.20,<2.0.0", "requests>=2.28.1,<3.0.0", From 433d8b534fdb1283a1bea5f9db22a95b34a363c4 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 00:53:42 +0300 Subject: [PATCH 09/21] slack: bugfix --- elementary/clients/slack/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elementary/clients/slack/client.py b/elementary/clients/slack/client.py index acaacdbe2..fb410ff62 100644 --- a/elementary/clients/slack/client.py +++ b/elementary/clients/slack/client.py @@ -74,8 +74,8 @@ def __init__( token: str, tracking: Optional[Tracking] = None, ): - super().__init__(tracking) self.token = token + super().__init__(tracking) def _initial_client(self): return WebClient(token=self.token) @@ -212,8 +212,8 @@ def __init__( webhook: str, tracking: Optional[Tracking] = None, ): - super().__init__(tracking) self.webhook = webhook + super().__init__(tracking) def _initial_client(self): return WebhookClient( From bf8d9f1b74ea368b13bc3a28db7e10a0af54b7d0 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 01:15:49 +0300 Subject: [PATCH 10/21] remove excess dev requirements, let's see what's really needed --- dev-requirements.txt | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 0dbad9a70..c63175ded 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -11,18 +11,3 @@ boto3-stubs google-auth-stubs ratelimit-stubs types-google-cloud-ndb -types-PyYAML -types-Pygments -types-cffi -types-colorama -types-decorator -types-entrypoints -types-jsonschema -types-paramiko -types-protobuf -types-psutil -types-pytz -types-pywin32 -types-six -types-typed-ast -types-setuptools \ No newline at end of file From 0aa2516cf83fc7abd293505ef7d2ae37e0ac3d4a Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 01:27:29 +0300 Subject: [PATCH 11/21] readd needed stubs --- dev-requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dev-requirements.txt b/dev-requirements.txt index c63175ded..faa4db352 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -11,3 +11,8 @@ boto3-stubs google-auth-stubs ratelimit-stubs types-google-cloud-ndb +types-protobuf +types-pytz +types-jsonschema +types-PyYAML +types-setuptools From 7e292636bc7401923459b03283b84c2682a8bdae Mon Sep 17 00:00:00 2001 From: Elon Gliksberg Date: Thu, 13 Jul 2023 10:42:34 +0300 Subject: [PATCH 12/21] Added 'py.typed' to 'MANIFEST.in'. --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 6b7b9a271..dd1598856 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -recursive-include elementary *.py *.sql *.yml *.html *.md .gitkeep .gitignore +recursive-include elementary *.py *.sql *.yml *.html *.md .gitkeep .gitignore py.typed From dad9a74614925ae6c8c5de3b8cdc4c14b6eaf301 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 12:05:37 +0300 Subject: [PATCH 13/21] pre-commit: run mypy only on changed files --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bfb64f232..3c1863f89 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,6 @@ repos: hooks: - id: mypy name: mypy - entry: bash -c "mypy" + entry: bash -c "mypy --no-error-summary $@" mypy language: system - require_serial: true + files: ^elementary/.*\.py$ From bf26b2b3f408e3c5fdb8624912e4303205ccf28c Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 13:03:00 +0300 Subject: [PATCH 14/21] simplify pre-commit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c1863f89..42f6a789d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,6 @@ repos: hooks: - id: mypy name: mypy - entry: bash -c "mypy --no-error-summary $@" mypy + entry: mypy language: system files: ^elementary/.*\.py$ From 7c2b5887ec79bf77acf932125964a82ad52d29f5 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 13:03:47 +0300 Subject: [PATCH 15/21] simplify pre-commit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 42f6a789d..72f5ac147 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,6 @@ repos: hooks: - id: mypy name: mypy - entry: mypy + entry: mypy --no-error-summary language: system files: ^elementary/.*\.py$ From 47835e8ea18930b94db8ea12399b6c75db466c05 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 13:41:00 +0300 Subject: [PATCH 16/21] data_monitoring_alerts: fix mypy issue --- elementary/monitor/data_monitoring/data_monitoring_alerts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/elementary/monitor/data_monitoring/data_monitoring_alerts.py b/elementary/monitor/data_monitoring/data_monitoring_alerts.py index 30f26b30e..fecdb3869 100644 --- a/elementary/monitor/data_monitoring/data_monitoring_alerts.py +++ b/elementary/monitor/data_monitoring/data_monitoring_alerts.py @@ -155,7 +155,9 @@ def _group_alerts_per_config(self, alerts: List[Alert]) -> List[GroupOfAlerts]: grouped_alerts = by_table_group + by_alert_group return sorted( grouped_alerts, - key=lambda group: min(alert.detected_at for alert in group.alerts), + key=lambda group: min( + alert.detected_at or datetime.max for alert in group.alerts + ), ) def _send_test_message(self): From 481d55172949c0597027d822642f9513e64b8580 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 15:37:03 +0300 Subject: [PATCH 17/21] CR fixes --- .github/workflows/run-precommit.yml | 3 ++- elementary/cli/upgrade.py | 5 +++-- elementary/clients/dbt/slim_dbt_runner.py | 4 ++-- elementary/tracking/tracking_interface.py | 2 +- elementary/utils/package.py | 7 ++----- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/run-precommit.yml b/.github/workflows/run-precommit.yml index c8ea12ca8..df26d6e2e 100644 --- a/.github/workflows/run-precommit.yml +++ b/.github/workflows/run-precommit.yml @@ -19,7 +19,8 @@ jobs: with: python-version: '3.8' - - name: Install elementary (mainly needed so mypy will have the dependencies it needs) + # mainly needed so mypy will have the dependencies it needs + - name: Install elementary run: pip install -e . - name: Install dev requirements diff --git a/elementary/cli/upgrade.py b/elementary/cli/upgrade.py index 2998369c8..de23391b3 100644 --- a/elementary/cli/upgrade.py +++ b/elementary/cli/upgrade.py @@ -10,8 +10,9 @@ def recommend_version_upgrade(): try: latest_version = package.get_latest_package_version() current_version = package.get_package_version() - if not latest_version or not current_version: - # Failed to parse versions, so skip the check + + if not latest_version: + # Failed to obtain the latest version, so skip the check return if version.parse(current_version) < version.parse(latest_version): diff --git a/elementary/clients/dbt/slim_dbt_runner.py b/elementary/clients/dbt/slim_dbt_runner.py index 5d4fca4ca..5462576e7 100644 --- a/elementary/clients/dbt/slim_dbt_runner.py +++ b/elementary/clients/dbt/slim_dbt_runner.py @@ -148,8 +148,6 @@ def _load_manifest(self): raise Exception("Config not loaded") if not self.adapter or not self.connections_manager: raise Exception("Adapter not loaded") - if not self.manifest: - raise Exception("Manifest not loaded") self.project_parser = ManifestLoader( self.config, @@ -157,6 +155,8 @@ def _load_manifest(self): self.connections_manager.set_query_header, ) self.manifest = self.project_parser.load() + if self.manifest is None: + raise Exception("Failed to load manifest!") self.manifest.build_flat_graph() self.project_parser.save_macros_to_adapter(self.adapter) diff --git a/elementary/tracking/tracking_interface.py b/elementary/tracking/tracking_interface.py index f43b15a5e..7e383a4d5 100644 --- a/elementary/tracking/tracking_interface.py +++ b/elementary/tracking/tracking_interface.py @@ -95,7 +95,7 @@ def _group_identify(self, group_type, group_identifier, group_props): }, ), ) - if response.status_code != requests.codes.ok: + if not response.ok: logger.warning( f"Failed to register group in Posthog - {group_type} {group_identifier}" ) diff --git a/elementary/utils/package.py b/elementary/utils/package.py index 171e46bdd..36ca85eec 100644 --- a/elementary/utils/package.py +++ b/elementary/utils/package.py @@ -6,11 +6,8 @@ _PYPI_URL = "https://pypi.org/pypi/elementary-data/json" -def get_package_version() -> Optional[str]: - try: - return pkg_resources.get_distribution("elementary-data").version - except Exception: - return None +def get_package_version() -> str: + return pkg_resources.get_distribution("elementary-data").version def get_latest_package_version() -> Optional[str]: From a0785ecb29ee5064044991eb329a4035359b6a7f Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 16:08:42 +0300 Subject: [PATCH 18/21] more CR fixes --- elementary/monitor/api/filters/filters.py | 8 +++--- elementary/monitor/api/filters/schema.py | 2 +- elementary/monitor/api/report/report.py | 8 +++--- elementary/monitor/api/tests/schema.py | 32 +++++++++++------------ elementary/monitor/api/tests/tests.py | 28 +++++++++----------- 5 files changed, 38 insertions(+), 40 deletions(-) diff --git a/elementary/monitor/api/filters/filters.py b/elementary/monitor/api/filters/filters.py index 1f73b5d24..3c94b9b2d 100644 --- a/elementary/monitor/api/filters/filters.py +++ b/elementary/monitor/api/filters/filters.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Dict, List, Optional from elementary.clients.api.api_client import APIClient from elementary.monitor.api.filters.schema import FilterSchema, FiltersSchema @@ -20,8 +20,8 @@ class FiltersAPI(APIClient): def get_filters( self, - test_results_totals: Dict[str, TotalsSchema], - test_runs_totals: Dict[str, TotalsSchema], + test_results_totals: Dict[Optional[str], TotalsSchema], + test_runs_totals: Dict[Optional[str], TotalsSchema], models: Dict[str, NormalizedModelSchema], sources: Dict[str, NormalizedSourceSchema], models_runs: List[ModelRunsSchema], @@ -39,7 +39,7 @@ def get_filters( @staticmethod def _get_test_filters( - totals: Dict[str, TotalsSchema], + totals: Dict[Optional[str], TotalsSchema], models: Dict[str, NormalizedModelSchema], sources: Dict[str, NormalizedSourceSchema], ) -> List[FilterSchema]: diff --git a/elementary/monitor/api/filters/schema.py b/elementary/monitor/api/filters/schema.py index 2dae4f6a4..a24d2eddb 100644 --- a/elementary/monitor/api/filters/schema.py +++ b/elementary/monitor/api/filters/schema.py @@ -8,7 +8,7 @@ class FilterSchema(BaseModel): display_name: str model_unique_ids: List[Optional[str]] = [] - def add_model_unique_id(self, model_unique_id: str): + def add_model_unique_id(self, model_unique_id: Optional[str]): new_model_unique_ids = list({*self.model_unique_ids, model_unique_id}) self.model_unique_ids = new_model_unique_ids diff --git a/elementary/monitor/api/report/report.py b/elementary/monitor/api/report/report.py index 9c7b92acb..eface57b7 100644 --- a/elementary/monitor/api/report/report.py +++ b/elementary/monitor/api/report/report.py @@ -160,8 +160,8 @@ def _serilize_test_results( return serializable_test_results def _serilize_test_runs( - self, test_runs: Dict[str, List[TestRunSchema]] - ) -> Dict[str, List[dict]]: + self, test_runs: Dict[Optional[str], List[TestRunSchema]] + ) -> Dict[Optional[str], List[dict]]: serializable_test_runs = defaultdict(list) for model_unique_id, test_run in test_runs.items(): serializable_test_runs[model_unique_id].extend( @@ -169,7 +169,9 @@ def _serilize_test_runs( ) return serializable_test_runs - def _serialize_totals(self, totals: Dict[str, TotalsSchema]) -> Dict[str, dict]: + def _serialize_totals( + self, totals: Dict[Optional[str], TotalsSchema] + ) -> Dict[Optional[str], dict]: serialized_totals = dict() for model_unique_id, total in totals.items(): serialized_totals[model_unique_id] = total.dict() diff --git a/elementary/monitor/api/tests/schema.py b/elementary/monitor/api/tests/schema.py index ba98126f0..cb57cffc4 100644 --- a/elementary/monitor/api/tests/schema.py +++ b/elementary/monitor/api/tests/schema.py @@ -42,27 +42,27 @@ class InvocationsSchema(BaseModel): class TestMetadataSchema(BaseModel): - test_unique_id: Optional[str] = None - elementary_unique_id: Optional[str] = None + test_unique_id: str + elementary_unique_id: str database_name: Optional[str] = None - schema_name: Optional[str] = None + schema_name: str table_name: Optional[str] = None column_name: Optional[str] = None - test_name: Optional[str] = None - test_display_name: Optional[str] = None - latest_run_time: Optional[str] = None - latest_run_time_utc: Optional[str] = None - latest_run_status: Optional[str] = None + test_name: str + test_display_name: str + latest_run_time: str + latest_run_time_utc: str + latest_run_status: str model_unique_id: Optional[str] = None table_unique_id: Optional[str] = None - test_type: Optional[str] = None - test_sub_type: Optional[str] = None + test_type: str + test_sub_type: str test_query: Optional[str] = None - test_params: Optional[dict] = None + test_params: dict test_created_at: Optional[str] = None description: Optional[str] = None - result: Optional[dict] = None - configuration: Optional[dict] = None + result: dict + configuration: dict class TestResultSchema(BaseModel): @@ -75,7 +75,7 @@ class Config: class TestResultsWithTotalsSchema(BaseModel): results: Dict[str, List[TestResultSchema]] = dict() - totals: Dict[str, TotalsSchema] = dict() + totals: Dict[Optional[str], TotalsSchema] = dict() invocation: DbtInvocationSchema = Field(default_factory=DbtInvocationSchema) @@ -85,8 +85,8 @@ class TestRunSchema(BaseModel): class TestRunsWithTotalsSchema(BaseModel): - runs: Dict[str, List[TestRunSchema]] = dict() - totals: Dict[str, TotalsSchema] = dict() + runs: Dict[Optional[str], List[TestRunSchema]] = dict() + totals: Dict[Optional[str], TotalsSchema] = dict() class TestResultSummarySchema(BaseModel): diff --git a/elementary/monitor/api/tests/tests.py b/elementary/monitor/api/tests/tests.py index b7225392a..b0554b1f4 100644 --- a/elementary/monitor/api/tests/tests.py +++ b/elementary/monitor/api/tests/tests.py @@ -190,9 +190,6 @@ def get_test_runs(self) -> TestRunsWithTotalsSchema: test_runs = defaultdict(list) for test_result_db_row in latest_test_results: - if not test_result_db_row.model_unique_id: - continue - test_invocations = tests_invocations.get( test_result_db_row.elementary_unique_id ) @@ -456,25 +453,24 @@ def _get_failed_rows_count(test_result_db_row: TestResultDBRowSchema) -> int: def _get_total_tests_results( self, test_metadatas: List[TestMetadataSchema], - ) -> Dict[str, TotalsSchema]: - totals: Dict[str, TotalsSchema] = dict() + ) -> Dict[Optional[str], TotalsSchema]: + totals: Dict[Optional[str], TotalsSchema] = dict() for test in test_metadatas: - if not test.model_unique_id: - continue - self._update_test_results_totals( totals_dict=totals, model_unique_id=test.model_unique_id, - status=test.latest_run_status or "unknown", + status=test.latest_run_status, ) return totals def _get_total_tests_runs( - self, tests_runs: Dict[str, List[TestRunSchema]] - ) -> Dict[str, TotalsSchema]: - totals: Dict[str, TotalsSchema] = dict() + self, tests_runs: Dict[Optional[str], List[TestRunSchema]] + ) -> Dict[Optional[str], TotalsSchema]: + totals: Dict[Optional[str], TotalsSchema] = dict() for test_runs in tests_runs.values(): for test_run in test_runs: + # It's possible test_runs will be None if we didn't find any invocations associated + # with this test, in that case it also makes sense to skip it. if not test_run.test_runs: continue @@ -488,13 +484,11 @@ def _get_total_tests_runs( @staticmethod def _update_test_runs_totals( - totals_dict: Dict[str, TotalsSchema], + totals_dict: Dict[Optional[str], TotalsSchema], test: TestMetadataSchema, test_invocations: List[InvocationSchema], ): model_unique_id = test.model_unique_id - if model_unique_id is None: - return if model_unique_id not in totals_dict: totals_dict[model_unique_id] = TotalsSchema() @@ -504,7 +498,9 @@ def _update_test_runs_totals( @staticmethod def _update_test_results_totals( - totals_dict: Dict[str, TotalsSchema], model_unique_id: str, status: str + totals_dict: Dict[Optional[str], TotalsSchema], + model_unique_id: Optional[str], + status: str, ): if model_unique_id not in totals_dict: totals_dict[model_unique_id] = TotalsSchema() From fb2dbbbbd1f3a0468f171f700b136af29f25579d Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 16:15:36 +0300 Subject: [PATCH 19/21] more CR fixes #2 --- elementary/monitor/api/report/report.py | 4 ++-- elementary/monitor/api/tests/schema.py | 2 +- elementary/monitor/api/tests/tests.py | 14 ++++++-------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/elementary/monitor/api/report/report.py b/elementary/monitor/api/report/report.py index eface57b7..24026a839 100644 --- a/elementary/monitor/api/report/report.py +++ b/elementary/monitor/api/report/report.py @@ -150,8 +150,8 @@ def _serilize_models_runs(self, models_runs: List[ModelRunsSchema]) -> List[dict return [model_runs.dict(by_alias=True) for model_runs in models_runs] def _serilize_test_results( - self, test_results: Dict[str, List[TestResultSchema]] - ) -> Dict[str, List[dict]]: + self, test_results: Dict[Optional[str], List[TestResultSchema]] + ) -> Dict[Optional[str], List[dict]]: serializable_test_results = defaultdict(list) for model_unique_id, test_result in test_results.items(): serializable_test_results[model_unique_id].extend( diff --git a/elementary/monitor/api/tests/schema.py b/elementary/monitor/api/tests/schema.py index cb57cffc4..ca60615ae 100644 --- a/elementary/monitor/api/tests/schema.py +++ b/elementary/monitor/api/tests/schema.py @@ -74,7 +74,7 @@ class Config: class TestResultsWithTotalsSchema(BaseModel): - results: Dict[str, List[TestResultSchema]] = dict() + results: Dict[Optional[str], List[TestResultSchema]] = dict() totals: Dict[Optional[str], TotalsSchema] = dict() invocation: DbtInvocationSchema = Field(default_factory=DbtInvocationSchema) diff --git a/elementary/monitor/api/tests/tests.py b/elementary/monitor/api/tests/tests.py index b0554b1f4..32a7ef252 100644 --- a/elementary/monitor/api/tests/tests.py +++ b/elementary/monitor/api/tests/tests.py @@ -1,6 +1,6 @@ import re from collections import defaultdict -from typing import Any, DefaultDict, Dict, List, Optional, Union +from typing import Any, DefaultDict, Dict, List, Optional, Union, cast from dateutil import tz @@ -148,11 +148,10 @@ def get_test_results( if test_result.invocations_rank_index == 1 ] - tests_results: DefaultDict[str, List[TestResultSchema]] = defaultdict(list) + tests_results: DefaultDict[Optional[str], List[TestResultSchema]] = defaultdict( + list + ) for test_result_db_row in filtered_test_results_db_rows: - if not test_result_db_row.model_unique_id: - continue - metadata = self._get_test_metadata_from_test_result_db_row( test_result_db_row ) @@ -395,9 +394,8 @@ def _get_test_result_from_test_result_db_row( sample_data = test_result_db_row.sample_data if not disable_samples else None if test_result_db_row.test_type == "dbt_test": - if sample_data is not None and not isinstance(sample_data, list): - # Sanity check, shouldn't happen - raise Exception("Invalid sample data for dbt test") + # Sample data is always a list for non-elementary tests + sample_data = cast(Optional[list], sample_data) test_results = DbtTestResultSchema( display_name=test_result_db_row.test_name, From 89a62545222c5c2aa3bd9c17506ea6c0bc2f1b45 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 16:49:21 +0300 Subject: [PATCH 20/21] pre-commit-config: also no need for pass_filenames --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 72f5ac147..77f9c8c90 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,6 +22,7 @@ repos: entry: bash -c "git diff --cached -U0 | (! grep NO_COMMIT)" language: system require_serial: true + pass_filenames: false - repo: local hooks: From 462a7d2cd982bd5583b714e7dc7c3ef00a9c6b0e Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Thu, 13 Jul 2023 17:12:00 +0300 Subject: [PATCH 21/21] minor fix --- mypy.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy.ini b/mypy.ini index 201d197aa..30f0bf27b 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,7 +1,7 @@ [mypy] packages = elementary exclude = ^elementary/monitor/dbt_project/ -check_untyped_defs = True +check_untyped_defs = true [mypy-alive_progress] ignore_missing_imports = True