From feb3ded86ab03b620725e13c6276800967a4d9b9 Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Tue, 19 Oct 2021 17:17:24 -0400 Subject: [PATCH 1/4] Change outlier checks as warnings Also create summary report if too many warnings are created. --- _delphi_utils_python/delphi_utils/validator/dynamic.py | 4 ++-- _delphi_utils_python/delphi_utils/validator/report.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index 439fc738e..b81a445d1 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -525,7 +525,7 @@ def outlier_nearby(frame): if source_outliers.shape[0] > 0: for time_val in source_outliers["time_value"].unique(): - report.add_raised_error( + report.add_raised_warning( ValidationFailure( "check_positive_negative_spikes", time_val, @@ -637,7 +637,7 @@ def check_avg_val_vs_reference(self, df_to_test, df_to_reference, checking_date, thres["mean_abs_z"])).any() if mean_z_high or mean_abs_z_high: - report.add_raised_error( + report.add_raised_warning( ValidationFailure( "check_test_vs_reference_avg_changed", checking_date, diff --git a/_delphi_utils_python/delphi_utils/validator/report.py b/_delphi_utils_python/delphi_utils/validator/report.py index aff9196e5..f948dfb61 100644 --- a/_delphi_utils_python/delphi_utils/validator/report.py +++ b/_delphi_utils_python/delphi_utils/validator/report.py @@ -103,6 +103,15 @@ def log(self, logger=None): checks_suppressed = self.num_suppressed, warnings = len(self.raised_warnings), phase="validation") + excessive_warnings = len(self.raised_warnings) > 200 or len(self.raised_warnings) / self.total_checks > 0.015 + if excessive_warnings: + logger.info("Excessive number of warnings", + data_source = self.data_source, + checks_run = self.total_checks, + checks_failed = len(self.unsuppressed_errors), + checks_suppressed = self.num_suppressed, + warnings = len(self.raised_warnings), + phase = "validation") for error in self.unsuppressed_errors: logger.critical(str(error), phase="validation") for warning in self.raised_warnings: From 7cce2a28fda590477661777d75512d8b82a9dee4 Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Wed, 20 Oct 2021 01:05:08 -0400 Subject: [PATCH 2/4] Fix lint --- _delphi_utils_python/delphi_utils/validator/report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/_delphi_utils_python/delphi_utils/validator/report.py b/_delphi_utils_python/delphi_utils/validator/report.py index f948dfb61..9f2f01425 100644 --- a/_delphi_utils_python/delphi_utils/validator/report.py +++ b/_delphi_utils_python/delphi_utils/validator/report.py @@ -103,7 +103,8 @@ def log(self, logger=None): checks_suppressed = self.num_suppressed, warnings = len(self.raised_warnings), phase="validation") - excessive_warnings = len(self.raised_warnings) > 200 or len(self.raised_warnings) / self.total_checks > 0.015 + excessive_warnings = len(self.raised_warnings) > 200 or \ + len(self.raised_warnings) / self.total_checks > 0.015 if excessive_warnings: logger.info("Excessive number of warnings", data_source = self.data_source, From 22856b872cb2960868ac32957838a8761f294730 Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Wed, 20 Oct 2021 01:37:30 -0400 Subject: [PATCH 3/4] Fix Tests Converted outlier tests to check for raised_warnings rather than raised_errors Fixed a division by 0 possibility by first checking the denominator --- .../delphi_utils/validator/report.py | 5 ++-- .../tests/validator/test_dynamic.py | 30 +++++++++---------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/report.py b/_delphi_utils_python/delphi_utils/validator/report.py index 9f2f01425..31f4b4927 100644 --- a/_delphi_utils_python/delphi_utils/validator/report.py +++ b/_delphi_utils_python/delphi_utils/validator/report.py @@ -103,8 +103,9 @@ def log(self, logger=None): checks_suppressed = self.num_suppressed, warnings = len(self.raised_warnings), phase="validation") - excessive_warnings = len(self.raised_warnings) > 200 or \ - len(self.raised_warnings) / self.total_checks > 0.015 + excessive_warnings = self.total_checks > 0 and \ + (len(self.raised_warnings) > 200 or \ + len(self.raised_warnings) / self.total_checks > 0.015) if excessive_warnings: logger.info("Excessive number of warnings", data_source = self.data_source, diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py index 5c92d7e99..321ce63fb 100644 --- a/_delphi_utils_python/tests/validator/test_dynamic.py +++ b/_delphi_utils_python/tests/validator/test_dynamic.py @@ -202,8 +202,8 @@ def test_10x_val(self): test_df, ref_df, datetime.combine(date.today(), datetime.min.time()), "geo", "signal", report) - assert len(report.raised_errors) == 1 - assert report.raised_errors[0].check_name == "check_test_vs_reference_avg_changed" + assert len(report.raised_warnings) == 1 + assert report.raised_warnings[0].check_name == "check_test_vs_reference_avg_changed" def test_100x_val(self): validator = DynamicValidator(self.params) @@ -223,8 +223,8 @@ def test_100x_val(self): test_df, ref_df, datetime.combine(date.today(), datetime.min.time()), "geo", "signal", report) - assert len(report.raised_errors) == 1 - assert report.raised_errors[0].check_name == "check_test_vs_reference_avg_changed" + assert len(report.raised_warnings) == 1 + assert report.raised_warnings[0].check_name == "check_test_vs_reference_avg_changed" def test_1000x_val(self): validator = DynamicValidator(self.params) @@ -244,8 +244,8 @@ def test_1000x_val(self): test_df, ref_df, datetime.combine(date.today(), datetime.min.time()), "geo", "signal", report) - assert len(report.raised_errors) == 1 - assert report.raised_errors[0].check_name == "check_test_vs_reference_avg_changed" + assert len(report.raised_warnings) == 1 + assert report.raised_warnings[0].check_name == "check_test_vs_reference_avg_changed" class TestDataOutlier: @@ -292,8 +292,8 @@ def test_pos_outlier(self): validator.check_positive_negative_spikes( test_df, ref_df, "state", "signal", report) - assert len(report.raised_errors) == 2 - assert report.raised_errors[0].check_name == "check_positive_negative_spikes" + assert len(report.raised_warnings) == 2 + assert report.raised_warnings[0].check_name == "check_positive_negative_spikes" def test_neg_outlier(self): validator = DynamicValidator(self.params) @@ -329,8 +329,8 @@ def test_neg_outlier(self): validator.check_positive_negative_spikes( test_df, ref_df, "state", "signal", report) - assert len(report.raised_errors) == 2 - assert report.raised_errors[0].check_name == "check_positive_negative_spikes" + assert len(report.raised_warnings) == 2 + assert report.raised_warnings[0].check_name == "check_positive_negative_spikes" def test_zero_outlier(self): validator = DynamicValidator(self.params) @@ -365,8 +365,8 @@ def test_zero_outlier(self): validator.check_positive_negative_spikes( test_df, ref_df, "state", "signal", report) - assert len(report.raised_errors) == 1 - assert report.raised_errors[0].check_name == "check_positive_negative_spikes" + assert len(report.raised_warnings) == 1 + assert report.raised_warnings[0].check_name == "check_positive_negative_spikes" def test_no_outlier(self): validator = DynamicValidator(self.params) @@ -402,7 +402,7 @@ def test_no_outlier(self): validator.check_positive_negative_spikes( test_df, ref_df, "state", "signal", report) - assert len(report.raised_errors) == 0 + assert len(report.raised_warnings) == 0 def test_source_api_overlap(self): validator = DynamicValidator(self.params) @@ -438,5 +438,5 @@ def test_source_api_overlap(self): validator.check_positive_negative_spikes( test_df, ref_df, "state", "signal", report) - assert len(report.raised_errors) == 2 - assert report.raised_errors[0].check_name == "check_positive_negative_spikes" + assert len(report.raised_warnings) == 2 + assert report.raised_warnings[0].check_name == "check_positive_negative_spikes" From 95fdfb021ecea7a19dddf71043b4a554b451d43d Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Wed, 20 Oct 2021 11:40:58 -0400 Subject: [PATCH 4/4] Adding excessive_warnings explanation --- _delphi_utils_python/delphi_utils/validator/report.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/_delphi_utils_python/delphi_utils/validator/report.py b/_delphi_utils_python/delphi_utils/validator/report.py index 31f4b4927..6b12146e1 100644 --- a/_delphi_utils_python/delphi_utils/validator/report.py +++ b/_delphi_utils_python/delphi_utils/validator/report.py @@ -103,6 +103,8 @@ def log(self, logger=None): checks_suppressed = self.num_suppressed, warnings = len(self.raised_warnings), phase="validation") + # Threshold for slack alerts if warnings are excessive, + # Currently extremely strict, set by observation of 1 month's logs excessive_warnings = self.total_checks > 0 and \ (len(self.raised_warnings) > 200 or \ len(self.raised_warnings) / self.total_checks > 0.015)