Skip to content

Commit

Permalink
refactor: module transform rather than _transform
Browse files Browse the repository at this point in the history
Refactor using metaclass
  • Loading branch information
sbrugman committed Nov 24, 2021
1 parent 871d2c6 commit c3bffb0
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 73 deletions.
133 changes: 77 additions & 56 deletions popmon/base/module.py
Expand Up @@ -19,10 +19,73 @@


import logging
from abc import ABC
from abc import ABCMeta
from functools import wraps


class Module(ABC):
def datastore_helper(func):
"""Decorator for passing and storing only the relevant keys in the datastore to
the transform() method."""

@wraps(func)
def _transform(self, datastore):
"""Transformation helper function"""
inputs = {}
self.logger.debug(f"load from: {type(self)}")
for key in self._input_keys:
key_value = self.__dict__[key]
if key_value and len(key_value) > 0:
if isinstance(key_value, list):
inputs[key] = [datastore.get(k) for k in key_value]
else:
inputs[key] = datastore.get(key_value)
else:
inputs[key] = None

self.logger.debug(
f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})"
)

# transformation
outputs = func(self, *list(inputs.values()))

# transform returns None if no update needs to be made
if outputs is not None:
if len(self._output_keys) == 1:
outputs = (outputs,)

for k, v in zip(self._output_keys, outputs):
key_value = self.__dict__[k]
self.logger.debug(
f"store(key={k}, key_value={key_value}, value={str(v):.100s})"
)
if key_value and len(key_value) > 0:
datastore[key_value] = v

return datastore

return _transform


class ModuleMetaClass(type):
"""Metaclass that wraps all transform() methods using the datastore_helper
This obviates the need to decorate all methods in subclasses"""

def __new__(cls, name, bases, local):
if "transform" in local:
value = local["transform"]
if callable(value):
local["transform"] = datastore_helper(value)
return type.__new__(cls, name, bases, local)


def combine_classes(*args):
"""Combine multiple metaclasses"""
name = "".join(a.__name__ for a in args)
return type(name, args, {})


class Module(metaclass=combine_classes(ABCMeta, ModuleMetaClass)):
"""Abstract base class used for modules in a pipeline."""

_input_keys = None
Expand All @@ -35,23 +98,21 @@ def __init__(self):
self.feature_begins_with = []
self.ignore_features = []

def _get_values(self, keys):
"""Get the class attribute values for certain keys."""
values = {}
for x in keys:
value = self.__dict__[x]
if value != "" and value is not None and value not in values:
values[x] = value
return values

def get_inputs(self):
in_keys = {}
for x in self._input_keys:
in_key = self.__dict__[x]
if in_key != "" and in_key is not None and in_key not in in_keys:
in_keys[x] = in_key
return in_keys
return self._get_values(self._input_keys)

def get_outputs(self):
out_keys = {}
for x in self._output_keys:
out_key = self.__dict__[x]
if out_key != "" and out_key is not None and out_key not in out_keys:
out_keys[x] = out_key
return out_keys

# @abstractmethod
return self._get_values(self._output_keys)

def get_description(self):
return ""

Expand Down Expand Up @@ -110,46 +171,6 @@ def get_features(self, all_features: list) -> list:
features = [feature for feature in features if feature in all_features]
return features

def _transform(self, datastore):
"""Transformation helper function"""

inputs = {}
self.logger.debug(f"load from: {type(self)}")
for key in self._input_keys:
key_value = self.__dict__[key]
if key_value and len(key_value) > 0:
if isinstance(key_value, list):
inputs[key] = [datastore.get(k) for k in key_value]
else:
inputs[key] = datastore.get(key_value)
else:
inputs[key] = None

self.logger.debug(
f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})"
)

# cache datastore
self._datastore = datastore

# transformation
outputs = self.transform(*list(inputs.values()))

# transform returns None if no update needs to be made
if outputs is not None:
if len(self._output_keys) == 1:
outputs = (outputs,)

for k, v in zip(self._output_keys, outputs):
key_value = self.__dict__[k]
self.logger.debug(
f"store(key={k}, key_value={key_value}, value={str(v):.100s})"
)
if key_value and len(key_value) > 0: # and v is not None:
datastore[key_value] = v

return datastore

def transform(self, *args):
"""Central function of the module.
Expand Down
5 changes: 1 addition & 4 deletions popmon/base/pipeline.py
Expand Up @@ -66,8 +66,5 @@ def transform(self, datastore):

for module in self.modules:
self.logger.debug(f"transform {module.__class__.__name__}")
if isinstance(module, Pipeline):
datastore = module.transform(datastore)
else:
datastore = module._transform(datastore)
datastore = module.transform(datastore)
return datastore
2 changes: 1 addition & 1 deletion popmon/io/json_reader.py
Expand Up @@ -37,4 +37,4 @@ def __init__(self, file_path: Union[str, Path], store_key: str):
super().__init__(store_key, file_path, apply_func=json.loads)

def transform(self, *args):
return super().transform(*args)
return super().transform.__wrapped__(self, *args)
4 changes: 2 additions & 2 deletions tests/popmon/alerting/test_compute_tl_bounds.py
Expand Up @@ -35,7 +35,7 @@ def test_compute_traffic_light_bounds():
monitoring_rules=conf["monitoring_rules"],
)

output = module._transform(datastore)["output_data"]
output = module.transform(datastore)["output_data"]
assert "dummy_feature:mae" not in output.keys()
assert output["the_feature:mae"] == [8, 4, 2, 2]
assert output["the_feature:mse"] == [0.2, 0.11, 0.09, 0]
Expand All @@ -60,7 +60,7 @@ def test_compute_traffic_light_funcs():
monitoring_rules=conf["monitoring_rules"],
)

output = module._transform(datastore)["output_data"]
output = module.transform(datastore)["output_data"]
assert len(output) == 3

assert output[0]["features"] == ["dummy_feature"]
Expand Down
2 changes: 1 addition & 1 deletion tests/popmon/analysis/profiling/test_apply_func.py
Expand Up @@ -60,7 +60,7 @@ def func(x):
module.add_apply_func(np.mean, entire=True)
module.add_apply_func(func)

datastore = module._transform(datastore)
datastore = module.transform(datastore)

p = datastore["profiled"]["asc_numbers"]

Expand Down
2 changes: 1 addition & 1 deletion tests/popmon/analysis/test_merge_statistics.py
Expand Up @@ -40,7 +40,7 @@ def test_merge_statistics():
}
datastore = MergeStatistics(
read_keys=["first_df", "second_df"], store_key="output_df"
)._transform(datastore)
).transform(datastore)

pd.testing.assert_frame_equal(df1.combine_first(df2), out)
pd.testing.assert_frame_equal(datastore["output_df"]["feature_1"], out)
Expand Down
2 changes: 1 addition & 1 deletion tests/popmon/io/test_file_reader.py
Expand Up @@ -10,7 +10,7 @@ def test_file_reader_json():
store_key="example",
apply_func=json.loads,
)
datastore = fr._transform(datastore={})
datastore = fr.transform(datastore={})

assert datastore["example"]["boolean"]
assert len(datastore["example"]["array"]) == 3
Expand Down
12 changes: 6 additions & 6 deletions tests/popmon/io/test_file_writer.py
Expand Up @@ -23,26 +23,26 @@ def to_pandas(data):

def test_file_writer_json():
datastore = get_ready_ds()
FileWriter("my_data", apply_func=to_json)._transform(datastore)
FileWriter("my_data", apply_func=to_json).transform(datastore)
assert datastore["my_data"] == to_json(DATA)


def test_file_writer_json_with_kwargument():
datastore = get_ready_ds()
FileWriter("my_data", apply_func=to_json, indent=4)._transform(datastore)
FileWriter("my_data", apply_func=to_json, indent=4).transform(datastore)
assert datastore["my_data"] == to_json(DATA, indent=4)


def test_file_writer_not_a_func():
datastore = get_ready_ds()
with pytest.raises(TypeError):
FileWriter("my_data", apply_func={})._transform(datastore)
FileWriter("my_data", apply_func={}).transform(datastore)


def test_file_writer_df():
datastore = get_ready_ds()
FileWriter(
"my_data", store_key="transformed_data", apply_func=to_pandas
)._transform(datastore)
FileWriter("my_data", store_key="transformed_data", apply_func=to_pandas).transform(
datastore
)
assert datastore["my_data"] == DATA
assert datastore["transformed_data"].to_dict() == to_pandas(DATA).to_dict()
2 changes: 1 addition & 1 deletion tests/popmon/io/test_json_reader.py
Expand Up @@ -4,7 +4,7 @@

def test_json_reader():
jr = JsonReader(file_path=resources.data("example.json"), store_key="example")
datastore = jr._transform(datastore={})
datastore = jr.transform(datastore={})

assert datastore["example"]["boolean"]
assert len(datastore["example"]["array"]) == 3
Expand Down

0 comments on commit c3bffb0

Please sign in to comment.