Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update dependencies #1306

Merged
merged 21 commits into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion gordo/cli/custom_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def convert(
ipaddress.ip_address(value)
return value
except ValueError as e:
self.fail(e)
self.fail(str(e))


def key_value_par(val) -> Tuple[str, str]:
Expand Down
16 changes: 13 additions & 3 deletions gordo/machine/model/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
results:
np.ndarray
"""
kwargs.setdefault("verbose", 0)
return self.model.predict(X, **kwargs)

def get_params(self, **params):
Expand Down Expand Up @@ -371,6 +372,7 @@ def score(
X: Union[np.ndarray, pd.DataFrame],
y: Union[np.ndarray, pd.DataFrame],
sample_weight: Optional[np.ndarray] = None,
**kwargs,
) -> float:
"""
Returns the explained variance score between auto encoder's input vs output
Expand All @@ -383,6 +385,8 @@ def score(
Target
sample_weight: Optional[np.ndarray]
sample weights
kwargs
Additional kwargs for model.predict()

Returns
-------
Expand All @@ -394,7 +398,8 @@ def score(
f"This {self.__class__.__name__} has not been fitted yet."
)

out = self.model.predict(X)
kwargs.setdefault("verbose", 0)
out = self.model.predict(X, **kwargs)

return explained_variance_score(y, out)

Expand Down Expand Up @@ -664,13 +669,15 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
lookback_window=self.lookback_window,
lookahead=self.lookahead,
)
return self.model.predict(tsg)
kwargs.setdefault("verbose", 0)
return self.model.predict(tsg, **kwargs)

def score(
self,
X: Union[np.ndarray, pd.DataFrame],
y: Union[np.ndarray, pd.DataFrame],
sample_weight: Optional[np.ndarray] = None,
**kwargs,
) -> float:
"""
Returns the explained variance score between 1 step forecasted input and true
Expand All @@ -684,6 +691,8 @@ def score(
Target
sample_weight: Optional[np.ndarray]
Sample weights
kwargs
Additional kwargs for predict

Returns
-------
Expand All @@ -695,7 +704,8 @@ def score(
f"This {self.__class__.__name__} has not been fitted yet."
)

out = self.predict(X)
kwargs.setdefault("verbose", 0)
out = self.predict(X, **kwargs)

# Limit X samples to match the offset causes by LSTM lookback window
# ie, if look back window is 5, 'out' will be 5 rows less than X by now
Expand Down
53 changes: 50 additions & 3 deletions gordo/serializer/from_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
from typing import Union, Dict, Any, Iterable
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import BaseEstimator
from tensorflow.keras.models import Sequential
from tensorflow.keras import Sequential
from inspect import signature, Parameter

from gordo_core.import_utils import import_location

from .utils import is_tuple_type


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,6 +65,50 @@ def from_definition(
return _build_step(definition)


def _is_tuple_param(param: Parameter) -> bool:
if param.default is not param.empty:
if isinstance(param.default, tuple):
return True
if param.annotation and is_tuple_type(param.annotation):
return True
return False


def create_instance(fn, **kwargs):
"""
Create a class instance.
Examples
--------
>>> from sklearn.preprocessing import MinMaxScaler
>>> create_instance(MinMaxScaler, feature_range=[-1, 1])
MinMaxScaler(feature_range=(-1, 1))
Parameters
----------
fn
Class factory function.
kwargs
fn parameters.
Returns
-------
"""
s = signature(fn)
kwargs = copy.copy(kwargs)
for param in s.parameters.values():
if param.name not in kwargs:
continue
if (
param.kind == Parameter.KEYWORD_ONLY
or param.kind == Parameter.POSITIONAL_OR_KEYWORD
):
if _is_tuple_param(param):
v = kwargs[param.name]
kwargs[param.name] = tuple(v)
return fn(**kwargs)


def _build_branch(
definition: Iterable[Union[str, Dict[Any, Any]]],
constructor_class=Union[Pipeline, None],
Expand Down Expand Up @@ -181,7 +228,7 @@ def _build_step(
f"Got {StepClass} but the supplied parameters"
f"seem invalid: {params}"
)
return StepClass(**params)
return create_instance(StepClass, **params)

# If step is just a string, can initialize it without any params
# ie. "sklearn.preprocessing.PCA"
Expand Down Expand Up @@ -303,7 +350,7 @@ def _load_param_classes(params: dict):
else:
# Call this func again, incase there is nested occurances of this problem in these kwargs
kwargs = _load_param_classes(sub_params)
params[key] = Model(**kwargs) # type: ignore
params[key] = create_instance(Model, **kwargs) # type: ignore
elif key == "callbacks" and isinstance(value, list):
params[key] = _build_callbacks(value)
return params
Expand Down
35 changes: 29 additions & 6 deletions gordo/serializer/into_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
logger = logging.getLogger(__name__)


def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> dict:
def into_definition(
pipeline: Pipeline, prune_default_params: bool = False, tuples_to_list: bool = True
) -> dict:
"""
Convert an instance of ``sklearn.pipeline.Pipeline`` into a dict definition
capable of being reconstructed with
Expand All @@ -22,6 +24,8 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
prune_default_params: bool
Whether to prune the default parameters found in current instance of the transformers
vs what their default params are.
tuples_to_list: bool
Convert all tuples in output to lists

Returns
-------
Expand All @@ -46,6 +50,8 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
copy: true
iterated_power: auto
n_components: 4
n_oversamples: 10
power_iteration_normalizer: auto
random_state: null
svd_solver: auto
tol: 0.0
Expand All @@ -55,11 +61,13 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
verbose: false
<BLANKLINE>
"""
steps = _decompose_node(pipeline, prune_default_params)
steps = _decompose_node(pipeline, prune_default_params, tuples_to_list)
return steps


def _decompose_node(step: object, prune_default_params: bool = False):
def _decompose_node(
step: object, prune_default_params: bool = False, tuples_to_list: bool = True
):
"""
Decompose a specific instance of a scikit-learn transformer,
including Pipelines or FeatureUnions
Expand All @@ -71,6 +79,8 @@ def _decompose_node(step: object, prune_default_params: bool = False):
prune_default_params
Whether to output the default parameter values into the definition. If True,
only those parameters differing from the default params will be output.
tuples_to_list
Convert all tuples in output to lists

Returns
-------
Expand All @@ -93,6 +103,14 @@ def _decompose_node(step: object, prune_default_params: bool = False):
if prune_default_params
else definition
)

if prune_default_params:
new_definition = {}
for k, v in definition:
if isinstance(v, tuple):
v = list(v)
new_definition[k] = v
definition = new_definition
return {import_str: definition}


Expand Down Expand Up @@ -126,13 +144,14 @@ def _prune_default_parameters(obj: object, current_params) -> dict:
}


def load_definition_from_params(params: dict) -> dict:
def load_definition_from_params(params: dict, tuples_to_list: bool = True) -> dict:
"""
Recursively decomposing each of values from params into the definition

Parameters
----------
params: dict
tuples_to_list: bool

Returns
-------
Expand All @@ -143,7 +162,9 @@ def load_definition_from_params(params: dict) -> dict:
for param, param_val in params.items():

if hasattr(param_val, "get_params") or hasattr(param_val, "into_definition"):
definition[param] = _decompose_node(param_val)
definition[param] = _decompose_node(
param_val, tuples_to_list=tuples_to_list
)

# Handle parameter value that is a list
elif isinstance(param_val, list):
Expand All @@ -153,7 +174,9 @@ def load_definition_from_params(params: dict) -> dict:
# TODO: Make this more robust, probably via another function to parse the iterable recursively
# TODO: b/c it _could_, in theory, be a dict of {str: BaseEstimator} or similar.
definition[param] = [
_decompose_node(leaf[1]) if isinstance(leaf, tuple) else leaf
_decompose_node(leaf[1], tuples_to_list=tuples_to_list)
if isinstance(leaf, tuple)
else leaf
for leaf in param_val
]

Expand Down
49 changes: 49 additions & 0 deletions gordo/serializer/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import get_origin, get_args, Union

try:
from types import UnionType # type: ignore
except ImportError:
UnionType = None


def _is_exact_tuple_type(tp) -> bool:
if tp is tuple:
return True
origin = get_origin(tp)
return origin is tuple


def is_tuple_type(tp) -> bool:
"""
Check if this type is a tuple.

Examples
--------
>>> from typing import Optional, Tuple
>>> is_tuple_type(tuple)
True
>>> is_tuple_type(Optional[tuple[int, int]])
True
>>> is_tuple_type(Tuple[str, str])
True
>>> is_tuple_type(list[str])
False

Parameters
----------
tp
Type for check.

Returns
-------
"""
if _is_exact_tuple_type(tp):
return True
origin = get_origin(tp)
if origin is Union or (UnionType is not None and origin is UnionType):
args = get_args(tp)
for arg in args:
if not _is_exact_tuple_type(arg) and not (arg is type(None)):
return False
return True
return False
2 changes: 1 addition & 1 deletion gordo/server/blueprints/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def get_download_model():
"""
serialized_model = serializer.dumps(g.model)
buff = io.BytesIO(serialized_model)
return send_file(buff, attachment_filename="model.tar.gz")
return send_file(buff, download_name="model.pickle")


@base_blueprint.route("/gordo/v0/<gordo_project>/models", methods=["GET"])
Expand Down
8 changes: 6 additions & 2 deletions gordo/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,12 @@ def _set_revision_and_collection_dir():
g.current_revision = os.path.basename(g.collection_dir)

# If a specific revision was requested, update collection_dir
g.revision = request.args.get("revision") or request.headers.get("revision")
if g.revision:
has_revision = "revision" in request.args or "revision" in request.headers
if has_revision:
if "revision" in request.args:
g.revision = request.args["revision"]
else:
g.revision = request.headers["revision"]
if not validate_revision(g.revision):
return make_response(
jsonify({"error": "Revision should only contains numbers."}), 410
Expand Down
13 changes: 9 additions & 4 deletions gordo/server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,11 +285,16 @@ def wrapper_method(*args, **kwargs):
if request.method == "POST":

# Always require an X, be it in JSON or file/parquet format.
if ("X" not in (request.json or {})) and ("X" not in request.files):
message = dict(message='Cannot predict without "X"')
return make_response((jsonify(message), 400))
if request.is_json:
if "X" not in (request.json or {}):
message = dict(message='Cannot predict without "X"')
return make_response((jsonify(message), 400))
else:
if "X" not in request.files:
message = dict(message='Cannot predict without "X"')
return make_response((jsonify(message), 400))

if request.json is not None:
if request.is_json:
X = dataframe_from_dict(request.json["X"])
y = request.json.get("y")
if y is not None:
Expand Down