Skip to content

Commit

Permalink
Update dependencies (#1306)
Browse files Browse the repository at this point in the history
* Update requirements.in

* Fix some unittests

* Fix tests/gordo/server unit-tests

* Update black

* Fix issue with packaging dependencie

* Fix tests/gordo/serializer unit-tests

* black reformating

* Fix docstring unit-tests

* Remove debug lines from unit-test

* verbose=0 for predict

* Fix mypy complains

* black reformating

* create_instance function

* Docstring Examples

* Using create_instance in from_defintion module

* Black reformating

* Fix mypy issue

* Fix test_client.py unit-tests

* Update full_requirements.txt

* tensorflow>=2.11,<2.13

* Remove one debug assert message
  • Loading branch information
koropets committed Apr 3, 2023
1 parent 4b54d1f commit cdc3779
Show file tree
Hide file tree
Showing 16 changed files with 314 additions and 137 deletions.
2 changes: 1 addition & 1 deletion gordo/cli/custom_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def convert(
ipaddress.ip_address(value)
return value
except ValueError as e:
self.fail(e)
self.fail(str(e))


def key_value_par(val) -> Tuple[str, str]:
Expand Down
16 changes: 13 additions & 3 deletions gordo/machine/model/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
results:
np.ndarray
"""
kwargs.setdefault("verbose", 0)
return self.model.predict(X, **kwargs)

def get_params(self, **params):
Expand Down Expand Up @@ -371,6 +372,7 @@ def score(
X: Union[np.ndarray, pd.DataFrame],
y: Union[np.ndarray, pd.DataFrame],
sample_weight: Optional[np.ndarray] = None,
**kwargs,
) -> float:
"""
Returns the explained variance score between auto encoder's input vs output
Expand All @@ -383,6 +385,8 @@ def score(
Target
sample_weight: Optional[np.ndarray]
sample weights
kwargs
Additional kwargs for model.predict()
Returns
-------
Expand All @@ -394,7 +398,8 @@ def score(
f"This {self.__class__.__name__} has not been fitted yet."
)

out = self.model.predict(X)
kwargs.setdefault("verbose", 0)
out = self.model.predict(X, **kwargs)

return explained_variance_score(y, out)

Expand Down Expand Up @@ -664,13 +669,15 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
lookback_window=self.lookback_window,
lookahead=self.lookahead,
)
return self.model.predict(tsg)
kwargs.setdefault("verbose", 0)
return self.model.predict(tsg, **kwargs)

def score(
self,
X: Union[np.ndarray, pd.DataFrame],
y: Union[np.ndarray, pd.DataFrame],
sample_weight: Optional[np.ndarray] = None,
**kwargs,
) -> float:
"""
Returns the explained variance score between 1 step forecasted input and true
Expand All @@ -684,6 +691,8 @@ def score(
Target
sample_weight: Optional[np.ndarray]
Sample weights
kwargs
Additional kwargs for predict
Returns
-------
Expand All @@ -695,7 +704,8 @@ def score(
f"This {self.__class__.__name__} has not been fitted yet."
)

out = self.predict(X)
kwargs.setdefault("verbose", 0)
out = self.predict(X, **kwargs)

# Limit X samples to match the offset causes by LSTM lookback window
# ie, if look back window is 5, 'out' will be 5 rows less than X by now
Expand Down
53 changes: 50 additions & 3 deletions gordo/serializer/from_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
from typing import Union, Dict, Any, Iterable
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import BaseEstimator
from tensorflow.keras.models import Sequential
from tensorflow.keras import Sequential
from inspect import signature, Parameter

from gordo_core.import_utils import import_location

from .utils import is_tuple_type


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,6 +65,50 @@ def from_definition(
return _build_step(definition)


def _is_tuple_param(param: Parameter) -> bool:
if param.default is not param.empty:
if isinstance(param.default, tuple):
return True
if param.annotation and is_tuple_type(param.annotation):
return True
return False


def create_instance(fn, **kwargs):
"""
Create a class instance.
Examples
--------
>>> from sklearn.preprocessing import MinMaxScaler
>>> create_instance(MinMaxScaler, feature_range=[-1, 1])
MinMaxScaler(feature_range=(-1, 1))
Parameters
----------
fn
Class factory function.
kwargs
fn parameters.
Returns
-------
"""
s = signature(fn)
kwargs = copy.copy(kwargs)
for param in s.parameters.values():
if param.name not in kwargs:
continue
if (
param.kind == Parameter.KEYWORD_ONLY
or param.kind == Parameter.POSITIONAL_OR_KEYWORD
):
if _is_tuple_param(param):
v = kwargs[param.name]
kwargs[param.name] = tuple(v)
return fn(**kwargs)


def _build_branch(
definition: Iterable[Union[str, Dict[Any, Any]]],
constructor_class=Union[Pipeline, None],
Expand Down Expand Up @@ -181,7 +228,7 @@ def _build_step(
f"Got {StepClass} but the supplied parameters"
f"seem invalid: {params}"
)
return StepClass(**params)
return create_instance(StepClass, **params)

# If step is just a string, can initialize it without any params
# ie. "sklearn.preprocessing.PCA"
Expand Down Expand Up @@ -303,7 +350,7 @@ def _load_param_classes(params: dict):
else:
# Call this func again, incase there is nested occurances of this problem in these kwargs
kwargs = _load_param_classes(sub_params)
params[key] = Model(**kwargs) # type: ignore
params[key] = create_instance(Model, **kwargs) # type: ignore
elif key == "callbacks" and isinstance(value, list):
params[key] = _build_callbacks(value)
return params
Expand Down
35 changes: 29 additions & 6 deletions gordo/serializer/into_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
logger = logging.getLogger(__name__)


def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> dict:
def into_definition(
pipeline: Pipeline, prune_default_params: bool = False, tuples_to_list: bool = True
) -> dict:
"""
Convert an instance of ``sklearn.pipeline.Pipeline`` into a dict definition
capable of being reconstructed with
Expand All @@ -22,6 +24,8 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
prune_default_params: bool
Whether to prune the default parameters found in current instance of the transformers
vs what their default params are.
tuples_to_list: bool
Convert all tuples in output to lists
Returns
-------
Expand All @@ -46,6 +50,8 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
copy: true
iterated_power: auto
n_components: 4
n_oversamples: 10
power_iteration_normalizer: auto
random_state: null
svd_solver: auto
tol: 0.0
Expand All @@ -55,11 +61,13 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
verbose: false
<BLANKLINE>
"""
steps = _decompose_node(pipeline, prune_default_params)
steps = _decompose_node(pipeline, prune_default_params, tuples_to_list)
return steps


def _decompose_node(step: object, prune_default_params: bool = False):
def _decompose_node(
step: object, prune_default_params: bool = False, tuples_to_list: bool = True
):
"""
Decompose a specific instance of a scikit-learn transformer,
including Pipelines or FeatureUnions
Expand All @@ -71,6 +79,8 @@ def _decompose_node(step: object, prune_default_params: bool = False):
prune_default_params
Whether to output the default parameter values into the definition. If True,
only those parameters differing from the default params will be output.
tuples_to_list
Convert all tuples in output to lists
Returns
-------
Expand All @@ -93,6 +103,14 @@ def _decompose_node(step: object, prune_default_params: bool = False):
if prune_default_params
else definition
)

if prune_default_params:
new_definition = {}
for k, v in definition:
if isinstance(v, tuple):
v = list(v)
new_definition[k] = v
definition = new_definition
return {import_str: definition}


Expand Down Expand Up @@ -126,13 +144,14 @@ def _prune_default_parameters(obj: object, current_params) -> dict:
}


def load_definition_from_params(params: dict) -> dict:
def load_definition_from_params(params: dict, tuples_to_list: bool = True) -> dict:
"""
Recursively decomposing each of values from params into the definition
Parameters
----------
params: dict
tuples_to_list: bool
Returns
-------
Expand All @@ -143,7 +162,9 @@ def load_definition_from_params(params: dict) -> dict:
for param, param_val in params.items():

if hasattr(param_val, "get_params") or hasattr(param_val, "into_definition"):
definition[param] = _decompose_node(param_val)
definition[param] = _decompose_node(
param_val, tuples_to_list=tuples_to_list
)

# Handle parameter value that is a list
elif isinstance(param_val, list):
Expand All @@ -153,7 +174,9 @@ def load_definition_from_params(params: dict) -> dict:
# TODO: Make this more robust, probably via another function to parse the iterable recursively
# TODO: b/c it _could_, in theory, be a dict of {str: BaseEstimator} or similar.
definition[param] = [
_decompose_node(leaf[1]) if isinstance(leaf, tuple) else leaf
_decompose_node(leaf[1], tuples_to_list=tuples_to_list)
if isinstance(leaf, tuple)
else leaf
for leaf in param_val
]

Expand Down
49 changes: 49 additions & 0 deletions gordo/serializer/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import get_origin, get_args, Union

try:
from types import UnionType # type: ignore
except ImportError:
UnionType = None


def _is_exact_tuple_type(tp) -> bool:
if tp is tuple:
return True
origin = get_origin(tp)
return origin is tuple


def is_tuple_type(tp) -> bool:
"""
Check if this type is a tuple.
Examples
--------
>>> from typing import Optional, Tuple
>>> is_tuple_type(tuple)
True
>>> is_tuple_type(Optional[tuple[int, int]])
True
>>> is_tuple_type(Tuple[str, str])
True
>>> is_tuple_type(list[str])
False
Parameters
----------
tp
Type for check.
Returns
-------
"""
if _is_exact_tuple_type(tp):
return True
origin = get_origin(tp)
if origin is Union or (UnionType is not None and origin is UnionType):
args = get_args(tp)
for arg in args:
if not _is_exact_tuple_type(arg) and not (arg is type(None)):
return False
return True
return False
2 changes: 1 addition & 1 deletion gordo/server/blueprints/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def get_download_model():
"""
serialized_model = serializer.dumps(g.model)
buff = io.BytesIO(serialized_model)
return send_file(buff, attachment_filename="model.tar.gz")
return send_file(buff, download_name="model.pickle")


@base_blueprint.route("/gordo/v0/<gordo_project>/models", methods=["GET"])
Expand Down
8 changes: 6 additions & 2 deletions gordo/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,12 @@ def _set_revision_and_collection_dir():
g.current_revision = os.path.basename(g.collection_dir)

# If a specific revision was requested, update collection_dir
g.revision = request.args.get("revision") or request.headers.get("revision")
if g.revision:
has_revision = "revision" in request.args or "revision" in request.headers
if has_revision:
if "revision" in request.args:
g.revision = request.args["revision"]
else:
g.revision = request.headers["revision"]
if not validate_revision(g.revision):
return make_response(
jsonify({"error": "Revision should only contains numbers."}), 410
Expand Down
13 changes: 9 additions & 4 deletions gordo/server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,11 +285,16 @@ def wrapper_method(*args, **kwargs):
if request.method == "POST":

# Always require an X, be it in JSON or file/parquet format.
if ("X" not in (request.json or {})) and ("X" not in request.files):
message = dict(message='Cannot predict without "X"')
return make_response((jsonify(message), 400))
if request.is_json:
if "X" not in (request.json or {}):
message = dict(message='Cannot predict without "X"')
return make_response((jsonify(message), 400))
else:
if "X" not in request.files:
message = dict(message='Cannot predict without "X"')
return make_response((jsonify(message), 400))

if request.json is not None:
if request.is_json:
X = dataframe_from_dict(request.json["X"])
y = request.json.get("y")
if y is not None:
Expand Down

0 comments on commit cdc3779

Please sign in to comment.