diff --git a/popmon/utils.py b/popmon/utils.py new file mode 100644 index 00000000..35bbbeab --- /dev/null +++ b/popmon/utils.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021 ING Wholesale Banking Advanced Analytics +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import fnmatch +from textwrap import shorten +from typing import Iterable, Optional + + +def short_date(date: str): + return shorten(date, width=22, placeholder="") + + +def filter_metrics(metrics, ignore_stat_endswith, show_stats: Optional[Iterable]): + metrics = [ + m for m in metrics if not any([m.endswith(s) for s in ignore_stat_endswith]) + ] + if show_stats is not None: + metrics = [ + m + for m in metrics + if any(fnmatch.fnmatch(m, pattern) for pattern in show_stats) + ] + return metrics diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py index d5de1d5b..c8111def 100644 --- a/popmon/visualization/alert_section_generator.py +++ b/popmon/visualization/alert_section_generator.py @@ -18,7 +18,6 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -import fnmatch import multiprocessing import numpy as np @@ -28,6 +27,7 @@ from ..base import Module from ..config import get_stat_description +from ..utils import filter_metrics, short_date from ..visualization.utils import _prune, plot_bars_b64 from .traffic_light_section_generator import _plot_metrics @@ -115,9 +115,6 @@ def transform(self, datastore): f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}' ) - def short_date(date): - return date if len(date) <= 22 else date[:22] - for feature in tqdm(features, ncols=100): df = data_obj.get(feature, pd.DataFrame()) fdbounds = dynamic_bounds.get(feature, pd.DataFrame(index=df.index)) @@ -132,18 +129,9 @@ def short_date(date): ) dates = [short_date(str(date)) for date in df.index.tolist()] - # get base64 encoded plot for each metric; do parallel processing to speed up. - metrics = [ - m - for m in df.columns - if not any([m.endswith(s) for s in self.ignore_stat_endswith]) - ] - if self.show_stats is not None: - metrics = [ - m - for m in metrics - if any(fnmatch.fnmatch(m, pattern) for pattern in self.show_stats) - ] + metrics = filter_metrics( + df.columns, self.ignore_stat_endswith, self.show_stats + ) plots = [] if self.plot_overview: diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py index d5d7b7aa..2ec5d348 100644 --- a/popmon/visualization/histogram_section.py +++ b/popmon/visualization/histogram_section.py @@ -32,6 +32,7 @@ ) from ..base import Module from ..config import get_stat_description +from ..utils import short_date from ..visualization.utils import plot_overlay_1d_histogram_b64 @@ -83,9 +84,6 @@ def transform(self, datastore): self.logger.info(f'Generating section "{self.section_name}".') - def short_date(date): - return date if len(date) <= 22 else date[:22] - for feature in tqdm(features, ncols=100): df = data_obj.get(feature, pd.DataFrame()) diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py index 55bf2603..eae583cb 100644 --- a/popmon/visualization/section_generator.py +++ b/popmon/visualization/section_generator.py @@ -28,6 +28,7 @@ from ..base import Module from ..config import get_stat_description +from ..utils import filter_metrics, short_date from ..visualization.utils import _prune, plot_bars_b64 @@ -112,9 +113,6 @@ def transform(self, datastore): f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}' ) - def short_date(date): - return date if len(date) <= 22 else date[:22] - for feature in tqdm(features, ncols=100): df = data_obj.get(feature, pd.DataFrame()) fdbounds = dynamic_bounds.get(feature, pd.DataFrame(index=df.index)) @@ -129,18 +127,10 @@ def short_date(date): ) dates = [short_date(str(date)) for date in df.index.tolist()] - # get base64 encoded plot for each metric; do parallel processing to speed up. - metrics = [ - m - for m in df.columns - if not any([m.endswith(s) for s in self.ignore_stat_endswith]) - ] - if self.show_stats is not None: - metrics = [ - m - for m in metrics - if any(fnmatch.fnmatch(m, pattern) for pattern in self.show_stats) - ] + metrics = filter_metrics( + df.columns, self.ignore_stat_endswith, self.show_stats + ) + plots = Parallel(n_jobs=num_cores)( delayed(_plot_metric)( feature, diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py index 184b4161..ab11e584 100644 --- a/popmon/visualization/traffic_light_section_generator.py +++ b/popmon/visualization/traffic_light_section_generator.py @@ -18,7 +18,6 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -import fnmatch import multiprocessing import numpy as np @@ -28,6 +27,7 @@ from ..base import Module from ..config import get_stat_description +from ..utils import filter_metrics, short_date from ..visualization.utils import ( _prune, plot_traffic_lights_alerts_b64, @@ -120,9 +120,6 @@ def transform(self, datastore): f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}' ) - def short_date(date): - return date if len(date) <= 22 else date[:22] - for feature in tqdm(features, ncols=100): df = data_obj.get(feature, pd.DataFrame()) fdbounds = dynamic_bounds.get(feature, pd.DataFrame(index=df.index)) @@ -137,18 +134,9 @@ def short_date(date): ) dates = [short_date(str(date)) for date in df.index.tolist()] - # get base64 encoded plot for each metric; do parallel processing to speed up. - metrics = [ - m - for m in df.columns - if not any([m.endswith(s) for s in self.ignore_stat_endswith]) - ] - if self.show_stats is not None: - metrics = [ - m - for m in metrics - if any(fnmatch.fnmatch(m, pattern) for pattern in self.show_stats) - ] + metrics = filter_metrics( + df.columns, self.ignore_stat_endswith, self.show_stats + ) plots = [] if self.plot_overview: