From c9d861ff9e633b598f9daa399d356f4cb3005540 Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Thu, 14 Oct 2021 12:04:50 +0200 Subject: [PATCH] style: lint --- popmon/alerting/alerts_summary.py | 5 +- popmon/alerting/compute_tl_bounds.py | 9 ++- popmon/analysis/apply_func.py | 12 +++- popmon/analysis/comparison/hist_comparer.py | 11 +--- popmon/analysis/merge_statistics.py | 5 +- popmon/analysis/profiling/hist_profiler.py | 5 +- popmon/analysis/profiling/pull_calculator.py | 11 +--- popmon/base/module.py | 21 ++++--- popmon/base/pipeline.py | 22 +++---- popmon/hist/hist_splitter.py | 7 +-- popmon/io/file_reader.py | 4 +- popmon/io/file_writer.py | 5 +- popmon/pipeline/metrics_pipelines.py | 8 +-- popmon/pipeline/report.py | 5 +- popmon/stitching/hist_stitcher.py | 2 +- .../visualization/alert_section_generator.py | 5 +- popmon/visualization/histogram_section.py | 3 +- popmon/visualization/report_generator.py | 5 +- popmon/visualization/section_generator.py | 11 +++- .../traffic_light_section_generator.py | 10 +++- tests/popmon/base/test_pipeline.py | 4 +- tests/popmon/io/test_file_writer.py | 6 +- tools/pipeline_viz.py | 58 ++++++++++--------- 23 files changed, 118 insertions(+), 116 deletions(-) diff --git a/popmon/alerting/alerts_summary.py b/popmon/alerting/alerts_summary.py index 870abcce..ff21e568 100644 --- a/popmon/alerting/alerts_summary.py +++ b/popmon/alerting/alerts_summary.py @@ -32,8 +32,9 @@ class AlertsSummary(Module): It combines the alerts-summaries of all individual features into an artificial feature "_AGGREGATE_". """ - _input_keys = ("read_key", ) - _output_keys = ("store_key", ) + + _input_keys = ("read_key",) + _output_keys = ("store_key",) def __init__( self, diff --git a/popmon/alerting/compute_tl_bounds.py b/popmon/alerting/compute_tl_bounds.py index e8c77480..1eb1f3db 100644 --- a/popmon/alerting/compute_tl_bounds.py +++ b/popmon/alerting/compute_tl_bounds.py @@ -21,7 +21,7 @@ import copy import fnmatch from collections import defaultdict -from typing import Tuple, Any +from typing import Any, Tuple import numpy as np import pandas as pd @@ -116,7 +116,8 @@ class ComputeTLBounds(Module): meant to be generic. Then bounds can be stored as either raw values or as directly calculated values on the statistics of the data. """ - _input_keys = ("read_key", ) + + _input_keys = ("read_key",) _output_keys = ("store_key", "apply_funcs_key") def __init__( @@ -213,9 +214,7 @@ def transform(self, test_data: dict) -> Tuple[Any, Any]: # --- 1. tl bounds explicitly defined for a particular feature if feature in pkeys: explicit_cols = [ - pcol - for pcol in pkeys[feature] - if pcol in test_df.columns + pcol for pcol in pkeys[feature] if pcol in test_df.columns ] implicit_cols = set(pkeys[feature]) - set(explicit_cols) diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py index 617fc669..37a4296c 100644 --- a/popmon/analysis/apply_func.py +++ b/popmon/analysis/apply_func.py @@ -33,8 +33,9 @@ class ApplyFunc(Module): Extra parameters (kwargs) can be passed to the apply function. """ + _input_keys = ("apply_to_key", "assign_to_key", "apply_funcs_key") - _output_keys = ("store_key", ) + _output_keys = ("store_key",) def __init__( self, @@ -85,7 +86,7 @@ def __init__( def get_description(self): if len(self.apply_funcs) > 0: - return " and ".join([x['func'].__name__ for x in self.apply_funcs]) + return " and ".join([x["func"].__name__ for x in self.apply_funcs]) elif self.apply_funcs_key: return f"functions from arg '{self.apply_funcs_key}'" else: @@ -139,7 +140,12 @@ def add_apply_func( } ) - def transform(self, apply_to_data: dict, assign_to_data: Optional[dict] = None, apply_funcs: Optional[list] = None): + def transform( + self, + apply_to_data: dict, + assign_to_data: Optional[dict] = None, + apply_funcs: Optional[list] = None, + ): """ Apply functions to specified feature and metrics diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py index 9f51fb36..e542c6a5 100644 --- a/popmon/analysis/comparison/hist_comparer.py +++ b/popmon/analysis/comparison/hist_comparer.py @@ -380,16 +380,9 @@ def __init__( assign_to_key = read_key # make reference histogram(s) - hist_collector = ApplyFunc( - apply_to_key=read_key, - assign_to_key=assign_to_key - ) + hist_collector = ApplyFunc(apply_to_key=read_key, assign_to_key=assign_to_key) hist_collector.add_apply_func( - func=func_hist_collector, - hist_name=hist_col, - suffix="", - *args, - **kwargs + func=func_hist_collector, hist_name=hist_col, suffix="", *args, **kwargs ) # do histogram comparison diff --git a/popmon/analysis/merge_statistics.py b/popmon/analysis/merge_statistics.py index 3d6eb3be..232f8b98 100644 --- a/popmon/analysis/merge_statistics.py +++ b/popmon/analysis/merge_statistics.py @@ -27,8 +27,9 @@ class MergeStatistics(Module): """Merging dictionaries of features containing dataframes with statistics as its values.""" - _input_keys = ("read_keys", ) - _output_keys = ("store_key", ) + + _input_keys = ("read_keys",) + _output_keys = ("store_key",) def __init__(self, read_keys: List[str], store_key: str): """Initialize an instance of MergeStatistics. diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py index 45571ac8..4e4a7022 100644 --- a/popmon/analysis/profiling/hist_profiler.py +++ b/popmon/analysis/profiling/hist_profiler.py @@ -57,8 +57,9 @@ class HistProfiler(Module): :param str index_col: key for index in split dictionary :param dict stats_functions: function_name, function(bin_labels, bin_counts) dictionary """ - _input_keys = ("read_key", ) - _output_keys = ("store_key", ) + + _input_keys = ("read_key",) + _output_keys = ("store_key",) def __init__( self, diff --git a/popmon/analysis/profiling/pull_calculator.py b/popmon/analysis/profiling/pull_calculator.py index 3e266545..63290d8b 100644 --- a/popmon/analysis/profiling/pull_calculator.py +++ b/popmon/analysis/profiling/pull_calculator.py @@ -131,11 +131,7 @@ def __init__( :param args: (tuple, optional): residual args passed on to mean and std functions :param kwargs: (dict, optional): residual kwargs passed on to mean and std functions """ - kws = { - "window": window, - "shift": shift, - **kwargs - } + kws = {"window": window, "shift": shift, **kwargs} super().__init__( rolling_mean, rolling_std, @@ -186,10 +182,7 @@ def __init__( :param args: (tuple, optional): residual args passed on to mean and std functions :param kwargs: (dict, optional): residual kwargs passed on to mean and std functions """ - kws = { - "shift": shift, - **kwargs - } + kws = {"shift": shift, **kwargs} super().__init__( expanding_mean, expanding_std, diff --git a/popmon/base/module.py b/popmon/base/module.py index 56b5f33c..13208c8d 100644 --- a/popmon/base/module.py +++ b/popmon/base/module.py @@ -24,6 +24,7 @@ class Module(ABC): """Base class used for modules in a pipeline.""" + _input_keys = None _output_keys = None @@ -101,18 +102,12 @@ def get_features(self, all_features: list) -> list: features = [k for k in features if k not in self.ignore_features] features_not_in_input = [ - feature - for feature in features - if feature not in all_features + feature for feature in features if feature not in all_features ] for feature in features_not_in_input: self.logger.warning(f'Feature "{feature}" not in input data; skipping.') - features = [ - feature - for feature in features - if feature in all_features - ] + features = [feature for feature in features if feature in all_features] return features def _transform(self, datastore): @@ -130,7 +125,9 @@ def _transform(self, datastore): else: inputs[key] = None - self.logger.debug(f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})") + self.logger.debug( + f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})" + ) # cache datastore self._datastore = datastore @@ -145,8 +142,10 @@ def _transform(self, datastore): for k, v in zip(self._output_keys, outputs): key_value = self.__dict__[k] - self.logger.debug(f"store(key={k}, key_value={key_value}, value={str(v):.100s})") - if key_value and len(key_value) > 0: # and v is not None: + self.logger.debug( + f"store(key={k}, key_value={key_value}, value={str(v):.100s})" + ) + if key_value and len(key_value) > 0: # and v is not None: datastore[key_value] = v return datastore diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py index 3995235a..18b02146 100644 --- a/popmon/base/pipeline.py +++ b/popmon/base/pipeline.py @@ -85,9 +85,7 @@ def visualize(self, versioned=True, funcs=None, dsets=None): for module in self.modules: name = module.__class__.__name__ if isinstance(module, Pipeline): - modules.append( - module.visualize(versioned, funcs, dsets) - ) + modules.append(module.visualize(versioned, funcs, dsets)) else: in_keys = module.get_inputs() @@ -122,19 +120,15 @@ def visualize(self, versioned=True, funcs=None, dsets=None): modules.append( { - 'type': 'module', - 'name': f'{name}', - 'i': f'{funcs[name][id(module)]}', - 'desc': module.get_description(), - 'in': in_keys, - 'out': out_keys + "type": "module", + "name": f"{name}", + "i": f"{funcs[name][id(module)]}", + "desc": module.get_description(), + "in": in_keys, + "out": out_keys, } ) - data = { - 'type': 'subgraph', - 'name': self.__class__.__name__, - 'modules': modules - } + data = {"type": "subgraph", "name": self.__class__.__name__, "modules": modules} return data def to_json(self, file_name, versioned=True): diff --git a/popmon/hist/hist_splitter.py b/popmon/hist/hist_splitter.py index 43163414..27a1a883 100644 --- a/popmon/hist/hist_splitter.py +++ b/popmon/hist/hist_splitter.py @@ -37,8 +37,8 @@ class HistSplitter(Module): where time is the index and each row is a x:y histogram. """ - _input_keys = ("read_key", ) - _output_keys = ("store_key", ) + _input_keys = ("read_key",) + _output_keys = ("store_key",) def __init__( self, @@ -153,7 +153,6 @@ def transform(self, data: dict) -> dict: # turn divided dicts into dataframes with index divided = { - k: pd.DataFrame(v).set_index(self.index_col) - for k, v in divided.items() + k: pd.DataFrame(v).set_index(self.index_col) for k, v in divided.items() } return divided diff --git a/popmon/io/file_reader.py b/popmon/io/file_reader.py index 929bec0b..19353cb3 100644 --- a/popmon/io/file_reader.py +++ b/popmon/io/file_reader.py @@ -28,8 +28,8 @@ class FileReader(Module): """Module to read contents from a file, transform the contents with a function and write them to the datastore.""" - _input_keys = tuple() - _output_keys = ("store_key", ) + _input_keys = () + _output_keys = ("store_key",) def __init__( self, diff --git a/popmon/io/file_writer.py b/popmon/io/file_writer.py index 800729c2..6342291f 100644 --- a/popmon/io/file_writer.py +++ b/popmon/io/file_writer.py @@ -28,8 +28,9 @@ class FileWriter(Module): """Module transforms specific datastore content and writes it to a file.""" - _input_keys = ("read_key", ) - _output_keys = ("store_key", ) + + _input_keys = ("read_key",) + _output_keys = ("store_key",) def __init__( self, diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py index ba0bff9d..3de19b23 100644 --- a/popmon/pipeline/metrics_pipelines.py +++ b/popmon/pipeline/metrics_pipelines.py @@ -382,13 +382,7 @@ def metrics_rolling_reference( ), ApplyFunc( apply_to_key="traffic_lights", - apply_funcs=[ - { - "func": traffic_light_summary, - "axis": 1, - "suffix": "" - } - ], + apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}], assign_to_key="alerts", msg="Generating traffic light alerts summary.", ), diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py index 7b31a9bf..a25789e1 100644 --- a/popmon/pipeline/report.py +++ b/popmon/pipeline/report.py @@ -425,8 +425,9 @@ class StabilityReport(Module): after running the pipeline and generating the report. Report can be represented as a HTML string, HTML file or Jupyter notebook's cell output. """ - _input_keys = ("read_key", ) - _output_keys = tuple() + + _input_keys = ("read_key",) + _output_keys = () def __init__(self, read_key="html_report"): """Initialize an instance of StabilityReport. diff --git a/popmon/stitching/hist_stitcher.py b/popmon/stitching/hist_stitcher.py index 8b482682..2b79e91f 100644 --- a/popmon/stitching/hist_stitcher.py +++ b/popmon/stitching/hist_stitcher.py @@ -29,7 +29,7 @@ class HistStitcher(Module): """Module stitches histograms by date""" _input_keys = ("read_key", "delta_key") - _output_keys = ("store_key", ) + _output_keys = ("store_key",) def __init__( self, diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py index ca5712da..89ec31e4 100644 --- a/popmon/visualization/alert_section_generator.py +++ b/popmon/visualization/alert_section_generator.py @@ -36,8 +36,9 @@ class AlertSectionGenerator(Module): combines all the plots into a list which is stored together with the section name in a dictionary which later will be used for the report generation. """ + _input_keys = ("read_key", "static_bounds", "dynamic_bounds", "store_key") - _output_keys = ("store_key", ) + _output_keys = ("store_key",) def __init__( self, @@ -106,7 +107,7 @@ def transform( data_obj: dict, static_bounds: Optional[dict] = None, dynamic_bounds: Optional[dict] = None, - sections: Optional[list] = None + sections: Optional[list] = None, ): assert isinstance(data_obj, dict) if static_bounds is None: diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py index e5cb75ee..2f685ac2 100644 --- a/popmon/visualization/histogram_section.py +++ b/popmon/visualization/histogram_section.py @@ -37,8 +37,9 @@ class HistogramSection(Module): """This module plots histograms of all selected features for the last 'n' periods.""" + _input_keys = ("read_key", "store_key") - _output_keys = ("store_key", ) + _output_keys = ("store_key",) def __init__( self, diff --git a/popmon/visualization/report_generator.py b/popmon/visualization/report_generator.py index b95ac0b3..d5d806fb 100644 --- a/popmon/visualization/report_generator.py +++ b/popmon/visualization/report_generator.py @@ -29,8 +29,9 @@ class ReportGenerator(Module): """This module takes already prepared section data, renders HTML section template with the data and glues sections together into one compressed report which is created based on the provided template. """ - _input_keys = ("read_key", ) - _output_keys = ("store_key", ) + + _input_keys = ("read_key",) + _output_keys = ("store_key",) def __init__(self, read_key, store_key): """Initialize an instance of ReportGenerator. diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py index f1049858..342892ff 100644 --- a/popmon/visualization/section_generator.py +++ b/popmon/visualization/section_generator.py @@ -35,8 +35,9 @@ class SectionGenerator(Module): combines all the plots into a list which is stored together with the section name in a dictionary which later will be used for the report generation. """ + _input_keys = ("read_key", "static_bounds", "dynamic_bounds", "store_key") - _output_keys = ("store_key", ) + _output_keys = ("store_key",) def __init__( self, @@ -98,7 +99,13 @@ def __init__( def get_description(self): return self.section_name - def transform(self, data_obj: dict, static_bounds: Optional[dict] = None, dynamic_bounds: Optional[dict] = None, sections: Optional[list] = None): + def transform( + self, + data_obj: dict, + static_bounds: Optional[dict] = None, + dynamic_bounds: Optional[dict] = None, + sections: Optional[list] = None, + ): if static_bounds is None: static_bounds = {} if dynamic_bounds is None: diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py index 56d19d26..662b9a22 100644 --- a/popmon/visualization/traffic_light_section_generator.py +++ b/popmon/visualization/traffic_light_section_generator.py @@ -40,8 +40,9 @@ class TrafficLightSectionGenerator(Module): combines all the plots into a list which is stored together with the section name in a dictionary which later will be used for the report generation. """ + _input_keys = ("read_key", "dynamic_bounds", "store_key") - _output_keys = ("store_key", ) + _output_keys = ("store_key",) def __init__( self, @@ -109,7 +110,12 @@ def __init__( def get_description(self): return self.section_name - def transform(self, data_obj: dict, dynamic_bounds: Optional[dict] = None, sections: Optional[list] = None): + def transform( + self, + data_obj: dict, + dynamic_bounds: Optional[dict] = None, + sections: Optional[list] = None, + ): assert isinstance(data_obj, dict) if dynamic_bounds is None: dynamic_bounds = {} diff --git a/tests/popmon/base/test_pipeline.py b/tests/popmon/base/test_pipeline.py index 650a1c71..79c22908 100644 --- a/tests/popmon/base/test_pipeline.py +++ b/tests/popmon/base/test_pipeline.py @@ -6,8 +6,8 @@ class LogTransformer(Module): - _input_keys = ("input_key", ) - _output_keys = ("output_key", ) + _input_keys = ("input_key",) + _output_keys = ("output_key",) def __init__(self, input_key, output_key): super().__init__() diff --git a/tests/popmon/io/test_file_writer.py b/tests/popmon/io/test_file_writer.py index b505b4d0..7471a067 100644 --- a/tests/popmon/io/test_file_writer.py +++ b/tests/popmon/io/test_file_writer.py @@ -41,8 +41,8 @@ def test_file_writer_not_a_func(): def test_file_writer_df(): datastore = get_ready_ds() - FileWriter("my_data", store_key="transformed_data", apply_func=to_pandas)._transform( - datastore - ) + FileWriter( + "my_data", store_key="transformed_data", apply_func=to_pandas + )._transform(datastore) assert datastore["my_data"] == DATA assert datastore["transformed_data"].to_dict() == to_pandas(DATA).to_dict() diff --git a/tools/pipeline_viz.py b/tools/pipeline_viz.py index 69f2f117..8e50ad43 100644 --- a/tools/pipeline_viz.py +++ b/tools/pipeline_viz.py @@ -6,7 +6,12 @@ from networkx.drawing.nx_agraph import to_agraph -def generate_pipeline_vizualisation(input_file, output_file, include_subgraphs: bool = False, include_labels: bool = False): +def generate_pipeline_vizualisation( + input_file, + output_file, + include_subgraphs: bool = False, + include_labels: bool = False, +): data = Path(input_file).read_text() data = json.loads(data) @@ -14,22 +19,17 @@ def generate_pipeline_vizualisation(input_file, output_file, include_subgraphs: modules = [] def populate(item): - if item['type'] == 'subgraph': + if item["type"] == "subgraph": mods = [] - for m in item['modules']: + for m in item["modules"]: mods += populate(m) - subgraphs.append( - { - 'modules': mods, - 'name': item['name'] - } - ) + subgraphs.append({"modules": mods, "name": item["name"]}) return mods - elif item['type'] == 'module': + elif item["type"] == "module": modules.append(item) name = f"{item['name']}_{item['i']}" - return [name]+list(item["out"].values()) + return [name] + list(item["out"].values()) else: raise ValueError() @@ -38,7 +38,7 @@ def populate(item): G = nx.DiGraph() for module in modules: label = f"<{module['name']}" - d = module.get('desc', '') + d = module.get("desc", "") if len(d) > 0: label += f"
{d}" label += ">" @@ -46,34 +46,38 @@ def populate(item): # unique name name = f"{module['name']}_{module['i']}" - G.add_node(name, shape='rectangle', fillcolor='chartreuse', style='filled', label=label) - + G.add_node( + name, shape="rectangle", fillcolor="chartreuse", style="filled", label=label + ) - for k, v in module['in'].items(): + for k, v in module["in"].items(): kwargs = {} if include_labels: - kwargs['headlabel'] = k + kwargs["headlabel"] = k G.add_edge(v, name, **kwargs) - for k, v in module['out'].items(): + for k, v in module["out"].items(): kwargs = {} if include_labels: - kwargs['taillabel'] = k + kwargs["taillabel"] = k G.add_edge(name, v, **kwargs) # set defaults - G.graph['graph'] = {'rankdir':'TD'} - G.graph['node'] = {'shape':'oval', 'fillcolor': 'orange', 'style': 'filled'} - G.graph['edge'] = {'fontcolor':"gray50"} + G.graph["graph"] = {"rankdir": "TD"} + G.graph["node"] = {"shape": "oval", "fillcolor": "orange", "style": "filled"} + G.graph["edge"] = {"fontcolor": "gray50"} A = to_agraph(G) if include_subgraphs: for idx, subgraph in enumerate(subgraphs): - H = A.subgraph(subgraph["modules"], name=f'cluster_{idx}_{subgraph["name"].lower().replace(" ", "_")}') + H = A.subgraph( + subgraph["modules"], + name=f'cluster_{idx}_{subgraph["name"].lower().replace(" ", "_")}', + ) H.graph_attr["color"] = "blue" H.graph_attr["label"] = subgraph["name"] H.graph_attr["style"] = "dotted" - A.layout('dot') + A.layout("dot") A.draw(output_file) @@ -81,17 +85,17 @@ def populate(item): data_path = Path("<...>") input_file = data_path / "pipeline_self_reference_unversioned.json" - output_file = 'popmon-report-pipeline-subgraphs-unversioned.pdf' + output_file = "popmon-report-pipeline-subgraphs-unversioned.pdf" generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=True) input_file = data_path / "pipeline_self_reference_unversioned.json" - output_file = 'popmon-report-pipeline-unversioned.pdf' + output_file = "popmon-report-pipeline-unversioned.pdf" generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=False) input_file = data_path / "pipeline_self_reference_versioned.json" - output_file = 'popmon-report-pipeline-subgraphs-versioned.pdf' + output_file = "popmon-report-pipeline-subgraphs-versioned.pdf" generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=True) input_file = data_path / "pipeline_self_reference_versioned.json" - output_file = 'popmon-report-pipeline-versioned.pdf' + output_file = "popmon-report-pipeline-versioned.pdf" generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=False)