Update dependencies (#1306)

* Update requirements.in * Fix some unittests * Fix tests/gordo/server unit-tests * Update black * Fix issue with packaging dependencie * Fix tests/gordo/serializer unit-tests * black reformating * Fix docstring unit-tests * Remove debug lines from unit-test * verbose=0 for predict * Fix mypy complains * black reformating * create_instance function * Docstring Examples * Using create_instance in from_defintion module * Black reformating * Fix mypy issue * Fix test_client.py unit-tests * Update full_requirements.txt * tensorflow>=2.11,<2.13 * Remove one debug assert message
equinor · Apr 3, 2023 · cdc3779 · cdc3779
1 parent 4b54d1f
commit cdc3779
Show file tree

Hide file tree

Showing 16 changed files with 314 additions and 137 deletions.
diff --git a/gordo/cli/custom_types.py b/gordo/cli/custom_types.py
@@ -71,7 +71,7 @@ def convert(
             ipaddress.ip_address(value)
             return value
         except ValueError as e:
-            self.fail(e)
+            self.fail(str(e))
 
 
 def key_value_par(val) -> Tuple[str, str]:

diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py
@@ -302,6 +302,7 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
         results:
             np.ndarray
         """
+        kwargs.setdefault("verbose", 0)
         return self.model.predict(X, **kwargs)
 
     def get_params(self, **params):
@@ -371,6 +372,7 @@ def score(
         X: Union[np.ndarray, pd.DataFrame],
         y: Union[np.ndarray, pd.DataFrame],
         sample_weight: Optional[np.ndarray] = None,
+        **kwargs,
     ) -> float:
         """
         Returns the explained variance score between auto encoder's input vs output
@@ -383,6 +385,8 @@ def score(
             Target
         sample_weight: Optional[np.ndarray]
             sample weights
+        kwargs
+            Additional kwargs for model.predict()
 
         Returns
         -------
@@ -394,7 +398,8 @@ def score(
                 f"This {self.__class__.__name__} has not been fitted yet."
             )
 
-        out = self.model.predict(X)
+        kwargs.setdefault("verbose", 0)
+        out = self.model.predict(X, **kwargs)
 
         return explained_variance_score(y, out)
 
@@ -664,13 +669,15 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
             lookback_window=self.lookback_window,
             lookahead=self.lookahead,
         )
-        return self.model.predict(tsg)
+        kwargs.setdefault("verbose", 0)
+        return self.model.predict(tsg, **kwargs)
 
     def score(
         self,
         X: Union[np.ndarray, pd.DataFrame],
         y: Union[np.ndarray, pd.DataFrame],
         sample_weight: Optional[np.ndarray] = None,
+        **kwargs,
     ) -> float:
         """
         Returns the explained variance score between 1 step forecasted input and true
@@ -684,6 +691,8 @@ def score(
             Target
         sample_weight: Optional[np.ndarray]
             Sample weights
+        kwargs
+            Additional kwargs for predict
 
         Returns
         -------
@@ -695,7 +704,8 @@ def score(
                 f"This {self.__class__.__name__} has not been fitted yet."
             )
 
-        out = self.predict(X)
+        kwargs.setdefault("verbose", 0)
+        out = self.predict(X, **kwargs)
 
         # Limit X samples to match the offset causes by LSTM lookback window
         # ie, if look back window is 5, 'out' will be 5 rows less than X by now

diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py
@@ -7,10 +7,13 @@
 from typing import Union, Dict, Any, Iterable
 from sklearn.pipeline import Pipeline, FeatureUnion
 from sklearn.base import BaseEstimator
-from tensorflow.keras.models import Sequential
+from tensorflow.keras import Sequential
+from inspect import signature, Parameter
 
 from gordo_core.import_utils import import_location
 
+from .utils import is_tuple_type
+
 
 logger = logging.getLogger(__name__)
 
@@ -62,6 +65,50 @@ def from_definition(
     return _build_step(definition)
 
 
+def _is_tuple_param(param: Parameter) -> bool:
+    if param.default is not param.empty:
+        if isinstance(param.default, tuple):
+            return True
+    if param.annotation and is_tuple_type(param.annotation):
+        return True
+    return False
+
+
+def create_instance(fn, **kwargs):
+    """
+    Create a class instance.
+
+    Examples
+    --------
+    >>> from sklearn.preprocessing import MinMaxScaler
+    >>> create_instance(MinMaxScaler, feature_range=[-1, 1])
+    MinMaxScaler(feature_range=(-1, 1))
+
+    Parameters
+    ----------
+    fn
+        Class factory function.
+    kwargs
+        fn parameters.
+
+    Returns
+    -------
+    """
+    s = signature(fn)
+    kwargs = copy.copy(kwargs)
+    for param in s.parameters.values():
+        if param.name not in kwargs:
+            continue
+        if (
+            param.kind == Parameter.KEYWORD_ONLY
+            or param.kind == Parameter.POSITIONAL_OR_KEYWORD
+        ):
+            if _is_tuple_param(param):
+                v = kwargs[param.name]
+                kwargs[param.name] = tuple(v)
+    return fn(**kwargs)
+
+
 def _build_branch(
     definition: Iterable[Union[str, Dict[Any, Any]]],
     constructor_class=Union[Pipeline, None],
@@ -181,7 +228,7 @@ def _build_step(
                     f"Got {StepClass} but the supplied parameters"
                     f"seem invalid: {params}"
                 )
-        return StepClass(**params)
+        return create_instance(StepClass, **params)
 
     # If step is just a string, can initialize it without any params
     # ie. "sklearn.preprocessing.PCA"
@@ -303,7 +350,7 @@ def _load_param_classes(params: dict):
                 else:
                     # Call this func again, incase there is nested occurances of this problem in these kwargs
                     kwargs = _load_param_classes(sub_params)
-                    params[key] = Model(**kwargs)  # type: ignore
+                    params[key] = create_instance(Model, **kwargs)  # type: ignore
         elif key == "callbacks" and isinstance(value, list):
             params[key] = _build_callbacks(value)
     return params

diff --git a/gordo/serializer/into_definition.py b/gordo/serializer/into_definition.py
@@ -9,7 +9,9 @@
 logger = logging.getLogger(__name__)
 
 
-def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> dict:
+def into_definition(
+    pipeline: Pipeline, prune_default_params: bool = False, tuples_to_list: bool = True
+) -> dict:
     """
     Convert an instance of ``sklearn.pipeline.Pipeline`` into a dict definition
     capable of being reconstructed with
@@ -22,6 +24,8 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
     prune_default_params: bool
         Whether to prune the default parameters found in current instance of the transformers
         vs what their default params are.
+    tuples_to_list: bool
+        Convert all tuples in output to lists
 
     Returns
     -------
@@ -46,6 +50,8 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
           copy: true
           iterated_power: auto
           n_components: 4
+          n_oversamples: 10
+          power_iteration_normalizer: auto
           random_state: null
           svd_solver: auto
           tol: 0.0
@@ -55,11 +61,13 @@ def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> d
       verbose: false
     <BLANKLINE>
     """
-    steps = _decompose_node(pipeline, prune_default_params)
+    steps = _decompose_node(pipeline, prune_default_params, tuples_to_list)
     return steps
 
 
-def _decompose_node(step: object, prune_default_params: bool = False):
+def _decompose_node(
+    step: object, prune_default_params: bool = False, tuples_to_list: bool = True
+):
     """
     Decompose a specific instance of a scikit-learn transformer,
     including Pipelines or FeatureUnions
@@ -71,6 +79,8 @@ def _decompose_node(step: object, prune_default_params: bool = False):
     prune_default_params
         Whether to output the default parameter values into the definition. If True,
         only those parameters differing from the default params will be output.
+    tuples_to_list
+        Convert all tuples in output to lists
 
     Returns
     -------
@@ -93,6 +103,14 @@ def _decompose_node(step: object, prune_default_params: bool = False):
             if prune_default_params
             else definition
         )
+
+    if prune_default_params:
+        new_definition = {}
+        for k, v in definition:
+            if isinstance(v, tuple):
+                v = list(v)
+            new_definition[k] = v
+        definition = new_definition
     return {import_str: definition}
 
 
@@ -126,13 +144,14 @@ def _prune_default_parameters(obj: object, current_params) -> dict:
     }
 
 
-def load_definition_from_params(params: dict) -> dict:
+def load_definition_from_params(params: dict, tuples_to_list: bool = True) -> dict:
     """
     Recursively decomposing each of values from params into the definition
 
     Parameters
     ----------
         params: dict
+        tuples_to_list: bool
 
     Returns
     -------
@@ -143,7 +162,9 @@ def load_definition_from_params(params: dict) -> dict:
     for param, param_val in params.items():
 
         if hasattr(param_val, "get_params") or hasattr(param_val, "into_definition"):
-            definition[param] = _decompose_node(param_val)
+            definition[param] = _decompose_node(
+                param_val, tuples_to_list=tuples_to_list
+            )
 
         # Handle parameter value that is a list
         elif isinstance(param_val, list):
@@ -153,7 +174,9 @@ def load_definition_from_params(params: dict) -> dict:
             # TODO: Make this more robust, probably via another function to parse the iterable recursively
             # TODO: b/c it _could_, in theory, be a dict of {str: BaseEstimator} or similar.
             definition[param] = [
-                _decompose_node(leaf[1]) if isinstance(leaf, tuple) else leaf
+                _decompose_node(leaf[1], tuples_to_list=tuples_to_list)
+                if isinstance(leaf, tuple)
+                else leaf
                 for leaf in param_val
             ]
 

diff --git a/gordo/serializer/utils.py b/gordo/serializer/utils.py
@@ -0,0 +1,49 @@
+from typing import get_origin, get_args, Union
+
+try:
+    from types import UnionType  # type: ignore
+except ImportError:
+    UnionType = None
+
+
+def _is_exact_tuple_type(tp) -> bool:
+    if tp is tuple:
+        return True
+    origin = get_origin(tp)
+    return origin is tuple
+
+
+def is_tuple_type(tp) -> bool:
+    """
+    Check if this type is a tuple.
+
+    Examples
+    --------
+    >>> from typing import Optional, Tuple
+    >>> is_tuple_type(tuple)
+    True
+    >>> is_tuple_type(Optional[tuple[int, int]])
+    True
+    >>> is_tuple_type(Tuple[str, str])
+    True
+    >>> is_tuple_type(list[str])
+    False
+
+    Parameters
+    ----------
+    tp
+        Type for check.
+
+    Returns
+    -------
+    """
+    if _is_exact_tuple_type(tp):
+        return True
+    origin = get_origin(tp)
+    if origin is Union or (UnionType is not None and origin is UnionType):
+        args = get_args(tp)
+        for arg in args:
+            if not _is_exact_tuple_type(arg) and not (arg is type(None)):
+                return False
+        return True
+    return False
diff --git a/gordo/server/blueprints/base.py b/gordo/server/blueprints/base.py
@@ -178,7 +178,7 @@ def get_download_model():
     """
     serialized_model = serializer.dumps(g.model)
     buff = io.BytesIO(serialized_model)
-    return send_file(buff, attachment_filename="model.tar.gz")
+    return send_file(buff, download_name="model.pickle")
 
 
 @base_blueprint.route("/gordo/v0/<gordo_project>/models", methods=["GET"])

diff --git a/gordo/server/server.py b/gordo/server/server.py
@@ -175,8 +175,12 @@ def _set_revision_and_collection_dir():
         g.current_revision = os.path.basename(g.collection_dir)
 
         # If a specific revision was requested, update collection_dir
-        g.revision = request.args.get("revision") or request.headers.get("revision")
-        if g.revision:
+        has_revision = "revision" in request.args or "revision" in request.headers
+        if has_revision:
+            if "revision" in request.args:
+                g.revision = request.args["revision"]
+            else:
+                g.revision = request.headers["revision"]
             if not validate_revision(g.revision):
                 return make_response(
                     jsonify({"error": "Revision should only contains numbers."}), 410

diff --git a/gordo/server/utils.py b/gordo/server/utils.py
@@ -285,11 +285,16 @@ def wrapper_method(*args, **kwargs):
         if request.method == "POST":
 
             # Always require an X, be it in JSON or file/parquet format.
-            if ("X" not in (request.json or {})) and ("X" not in request.files):
-                message = dict(message='Cannot predict without "X"')
-                return make_response((jsonify(message), 400))
+            if request.is_json:
+                if "X" not in (request.json or {}):
+                    message = dict(message='Cannot predict without "X"')
+                    return make_response((jsonify(message), 400))
+            else:
+                if "X" not in request.files:
+                    message = dict(message='Cannot predict without "X"')
+                    return make_response((jsonify(message), 400))
 
-            if request.json is not None:
+            if request.is_json:
                 X = dataframe_from_dict(request.json["X"])
                 y = request.json.get("y")
                 if y is not None: