From e36f32fd3ec3d5a663fcbd8f5043425ea9d6deaf Mon Sep 17 00:00:00 2001
From: Francisco <francisco.de-la-pena@univ-lille1.fr>
Date: Fri, 20 Oct 2017 11:07:20 +0200
Subject: [PATCH 01/12] Enable lazy reshape as it is now supported by dask

---
 hyperspy/_signals/lazy.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index b7e32aaf91..2f39a26c4a 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -255,9 +255,6 @@ def rebin(self, new_shape=None, scale=None, crop=False, out=None):
     def __array__(self, dtype=None):
         return self.data.__array__(dtype=dtype)
 
-    def _unfold(self, *args):
-        raise lazyerror
-
     def _make_sure_data_is_contiguous(self, log=None):
         self._make_lazy(rechunk=True)
 

From 45fd025a0e3ab637ac070b9a0e692a6462d49b43 Mon Sep 17 00:00:00 2001
From: Francisco <francisco.de-la-pena@univ-lille1.fr>
Date: Fri, 20 Oct 2017 11:07:20 +0200
Subject: [PATCH 02/12] Automatic style corrections courtesy of autopep8

---
 hyperspy/_signals/lazy.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index 2f39a26c4a..1ca3e09735 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -771,14 +771,17 @@ def decomposition(self,
             if reproject:
                 if algorithm == 'PCA':
                     method = obj.transform
-                    post = lambda a: np.concatenate(a, axis=0)
+
+                    def post(a): return np.concatenate(a, axis=0)
                 elif algorithm == 'ORPCA':
                     method = obj.project
                     obj.R = []
-                    post = lambda a: obj.finish()[4]
+
+                    def post(a): return obj.finish()[4]
                 elif algorithm == 'ONMF':
                     method = obj.project
-                    post = lambda a: np.concatenate(a, axis=1).T
+
+                    def post(a): return np.concatenate(a, axis=1).T
 
                 _map = map(lambda thing: method(thing),
                            self._block_iterator(

From 89a191392611fdff9a9f45751dba2a0d75239324 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Wed, 17 Jan 2018 16:16:54 +0100
Subject: [PATCH 03/12] Enable lazy unfolding and get_decomposition_model

---
 hyperspy/_signals/lazy.py | 12 ++++++------
 hyperspy/learn/mva.py     | 30 ++++++++++++++----------------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index 1ca3e09735..e59705d3cf 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -255,6 +255,9 @@ def rebin(self, new_shape=None, scale=None, crop=False, out=None):
     def __array__(self, dtype=None):
         return self.data.__array__(dtype=dtype)
 
+    # def _unfold(self, *args):
+    #     raise lazyerror
+
     def _make_sure_data_is_contiguous(self, log=None):
         self._make_lazy(rechunk=True)
 
@@ -771,17 +774,14 @@ def decomposition(self,
             if reproject:
                 if algorithm == 'PCA':
                     method = obj.transform
-
-                    def post(a): return np.concatenate(a, axis=0)
+                    post = lambda a: np.concatenate(a, axis=0)
                 elif algorithm == 'ORPCA':
                     method = obj.project
                     obj.R = []
-
-                    def post(a): return obj.finish()[4]
+                    post = lambda a: obj.finish()[4]
                 elif algorithm == 'ONMF':
                     method = obj.project
-
-                    def post(a): return np.concatenate(a, axis=1).T
+                    post = lambda a: np.concatenate(a, axis=1).T
 
                 _map = map(lambda thing: method(thing),
                            self._block_iterator(
diff --git a/hyperspy/learn/mva.py b/hyperspy/learn/mva.py
index 8e5849754b..4bb020b3f2 100644
--- a/hyperspy/learn/mva.py
+++ b/hyperspy/learn/mva.py
@@ -51,7 +51,7 @@ def centering_and_whitening(X):
     del _
     K = (u / (d / np.sqrt(X.shape[1]))).T
     del u, d
-    X1 = np.dot(K, X)
+    X1 = K @ X
     return X1.T, K
 
 
@@ -414,15 +414,14 @@ def decomposition(self,
             if reproject in ('navigation', 'both'):
                 if algorithm not in ('nmf', 'sparse_pca',
                                      'mini_batch_sparse_pca'):
-                    loadings_ = np.dot(dc[:, signal_mask] - mean, factors)
+                    loadings_ = (dc[:, signal_mask] - mean) @ factors
                 else:
                     loadings_ = sk.transform(dc[:, signal_mask])
                 target.loadings = loadings_
             if reproject in ('signal', 'both'):
                 if algorithm not in ('nmf', 'sparse_pca',
                                      'mini_batch_sparse_pca'):
-                    factors = np.dot(np.linalg.pinv(loadings),
-                                     dc[navigation_mask, :] - mean).T
+                    factors = (np.linalg.pinv(loadings) @ (dc[navigation_mask, :] - mean)).T
                     target.factors = factors
                 else:
                     _logger.info("Reprojecting the signal is not yet "
@@ -660,15 +659,15 @@ def blind_source_separation(self,
             lr.bss_node = temp_function(**kwargs)
             lr.bss_node.train(factors)
             unmixing_matrix = lr.bss_node.get_recmatrix()
-        w = np.dot(unmixing_matrix, invsqcovmat)
+        w = unmixing_matrix @ invsqcovmat
         if lr.explained_variance is not None:
             # The output of ICA is not sorted in any way what makes it
             # difficult to compare results from different unmixings. The
             # following code is an experimental attempt to sort them in a
             # more predictable way
-            sorting_indices = np.argsort(np.dot(
-                lr.explained_variance[:number_of_components],
-                np.abs(w.T)))[::-1]
+            sorting_indices = np.argsort(
+                lr.explained_variance[:number_of_components] @ np.abs(w.T)
+                )[::-1]
             w[:] = w[sorting_indices, :]
         lr.unmixing_matrix = w
         lr.on_loadings = on_loadings
@@ -779,12 +778,12 @@ def _unmix_components(self):
         w = lr.unmixing_matrix
         n = len(w)
         if lr.on_loadings:
-            lr.bss_loadings = np.dot(lr.loadings[:, :n], w.T)
-            lr.bss_factors = np.dot(lr.factors[:, :n], np.linalg.inv(w))
+            lr.bss_loadings = lr.loadings[:, :n] @  w.T
+            lr.bss_factors = lr.factors[:, :n] @ np.linalg.inv(w)
         else:
 
-            lr.bss_factors = np.dot(lr.factors[:, :n], w.T)
-            lr.bss_loadings = np.dot(lr.loadings[:, :n], np.linalg.inv(w))
+            lr.bss_factors = lr.factors[:, :n] @ w.T
+            lr.bss_loadings = lr.loadings[:, :n] @ np.linalg.inv(w)
 
     def _auto_reverse_bss_component(self, target):
         n_components = target.bss_factors.shape[1]
@@ -822,7 +821,7 @@ def _calculate_recmatrix(self, components=None, mva_type=None,):
             factors = target.bss_factors
             loadings = target.bss_loadings.T
         if components is None:
-            a = np.dot(factors, loadings)
+            a = factors @ loadings
             signal_name = 'model from %s with %i components' % (
                 mva_type, factors.shape[1])
         elif hasattr(components, '__iter__'):
@@ -831,12 +830,11 @@ def _calculate_recmatrix(self, components=None, mva_type=None,):
             for i in range(len(components)):
                 tfactors[:, i] = factors[:, components[i]]
                 tloadings[i, :] = loadings[components[i], :]
-            a = np.dot(tfactors, tloadings)
+            a = tfactors @ tloadings
             signal_name = 'model from %s with components %s' % (
                 mva_type, components)
         else:
-            a = np.dot(factors[:, :components],
-                       loadings[:components, :])
+            a = factors[:, :components] @ loadings[:components, :]
             signal_name = 'model from %s with %i components' % (
                 mva_type, components)
 

From 53cd48c2e78e8b3740390a76ea85f50da3c5141a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Wed, 17 Jan 2018 16:16:54 +0100
Subject: [PATCH 04/12] Automatic style corrections courtesy of autopep8

---
 hyperspy/_signals/lazy.py | 9 ++++++---
 hyperspy/learn/mva.py     | 9 +++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index e59705d3cf..1f48cd5c5a 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -774,14 +774,17 @@ def decomposition(self,
             if reproject:
                 if algorithm == 'PCA':
                     method = obj.transform
-                    post = lambda a: np.concatenate(a, axis=0)
+
+                    def post(a): return np.concatenate(a, axis=0)
                 elif algorithm == 'ORPCA':
                     method = obj.project
                     obj.R = []
-                    post = lambda a: obj.finish()[4]
+
+                    def post(a): return obj.finish()[4]
                 elif algorithm == 'ONMF':
                     method = obj.project
-                    post = lambda a: np.concatenate(a, axis=1).T
+
+                    def post(a): return np.concatenate(a, axis=1).T
 
                 _map = map(lambda thing: method(thing),
                            self._block_iterator(
diff --git a/hyperspy/learn/mva.py b/hyperspy/learn/mva.py
index 4bb020b3f2..43556c8c81 100644
--- a/hyperspy/learn/mva.py
+++ b/hyperspy/learn/mva.py
@@ -26,7 +26,7 @@
 try:
     import mdp
     mdp_installed = True
-except:
+except BaseException:
     mdp_installed = False
 
 
@@ -327,7 +327,7 @@ def decomposition(self,
                             var_array = np.polyval(
                                 polyfit, dc[
                                     signal_mask, navigation_mask])
-                        except:
+                        except BaseException:
                             raise ValueError(
                                 'var_func must be either a function or an '
                                 'array defining the coefficients of a polynom')
@@ -421,7 +421,8 @@ def decomposition(self,
             if reproject in ('signal', 'both'):
                 if algorithm not in ('nmf', 'sparse_pca',
                                      'mini_batch_sparse_pca'):
-                    factors = (np.linalg.pinv(loadings) @ (dc[navigation_mask, :] - mean)).T
+                    factors = (np.linalg.pinv(loadings) @ (
+                        dc[navigation_mask, :] - mean)).T
                     target.factors = factors
                 else:
                     _logger.info("Reprojecting the signal is not yet "
@@ -667,7 +668,7 @@ def blind_source_separation(self,
             # more predictable way
             sorting_indices = np.argsort(
                 lr.explained_variance[:number_of_components] @ np.abs(w.T)
-                )[::-1]
+            )[::-1]
             w[:] = w[sorting_indices, :]
         lr.unmixing_matrix = w
         lr.on_loadings = on_loadings

From d70539dd67a75e088cd24d8243fc7e15fc055d95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Thu, 18 Jan 2018 10:57:52 +0100
Subject: [PATCH 05/12] Remove commented out lines

---
 hyperspy/_signals/lazy.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index 1f48cd5c5a..1ca3e09735 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -255,9 +255,6 @@ def rebin(self, new_shape=None, scale=None, crop=False, out=None):
     def __array__(self, dtype=None):
         return self.data.__array__(dtype=dtype)
 
-    # def _unfold(self, *args):
-    #     raise lazyerror
-
     def _make_sure_data_is_contiguous(self, log=None):
         self._make_lazy(rechunk=True)
 

From 625f262ec349f6004c2b080490e7d7557c0ac08a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Wed, 21 Mar 2018 16:55:53 +0100
Subject: [PATCH 06/12] Deprecate lazy decomposition bound kword

---
 hyperspy/_signals/lazy.py | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index 7b67a848b1..12f46b4862 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -18,9 +18,9 @@
 
 import logging
 from functools import partial
+import warnings
 
 import numpy as np
-import math as math
 import dask.array as da
 import dask.delayed as dd
 from dask import threaded
@@ -28,12 +28,11 @@
 from itertools import product
 
 from ..signal import BaseSignal
-from ..misc.utils import underline, multiply, dummy_context_manager
+from ..misc.utils import multiply, dummy_context_manager
 from ..external.progressbar import progressbar
 from ..external.astroML.histtools import dasky_histogram
-from ..defaults_parser import preferences
-from ..docstrings.signal import (ONE_AXIS_PARAMETER, OUT_ARG)
 from hyperspy.misc.array_tools import _requires_linear_rebin
+from hyperspy.exceptions import VisibleDeprecationWarning
 
 _logger = logging.getLogger(__name__)
 
@@ -611,7 +610,7 @@ def decomposition(self,
                       get=threaded.get,
                       num_chunks=None,
                       reproject=True,
-                      bounds=True,
+                      bounds=False,
                       **kwargs):
         """Perform Incremental (Batch) decomposition on the data, keeping n
         significant components.
@@ -640,12 +639,6 @@ def decomposition(self,
             decomposition.
         reproject : bool
             Reproject data on the learnt components (factors) after learning.
-        bounds : {tuple, bool}
-            The (min, max) values of the data to normalize before learning.
-            If tuple (min, max), those values will be used for normalization.
-            If True, extremes will be looked up (expensive), default.
-            If False, no normalization is done (learning may be very slow).
-            If normalize_poissonian_noise is True, this cannot be True.
         **kwargs
             passed to the partial_fit/fit functions.
 
@@ -674,6 +667,11 @@ def decomposition(self,
 
 
         """
+        if bounds:
+            msg = (
+                "The `bounds` keyword is deprecated and will be removed "
+                "in v2.0. Since version > 1.3 this has no effect.")
+            warnings.warn(msg, VisibleDeprecationWarning)
         explained_variance = None
         explained_variance_ratio = None
         _al_data = self._data_aligned_with_axes
@@ -749,14 +747,6 @@ def decomposition(self,
                 data = data / coeff
                 self.data = data
 
-            # normalize the data for learning algs:
-            if bounds:
-                if bounds is True:
-                    _min, _max = da.compute(self.data.min(), self.data.max())
-                else:
-                    _min, _max = bounds
-                self.data = (self.data - _min) / (_max - _min)
-
             # LEARN
             this_data = []
             try:

From 58e03e5df2e93ea2aa07ed1539db7431bb9e92dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Wed, 21 Mar 2018 17:01:19 +0100
Subject: [PATCH 07/12] Disable RPCA scaling

---
 hyperspy/learn/rpca.py | 48 +++---------------------------------------
 1 file changed, 3 insertions(+), 45 deletions(-)

diff --git a/hyperspy/learn/rpca.py b/hyperspy/learn/rpca.py
index 6c1dd24573..b926f3cbe2 100644
--- a/hyperspy/learn/rpca.py
+++ b/hyperspy/learn/rpca.py
@@ -34,22 +34,6 @@ def _thresh(X, lambda1):
     return np.sign(X) * ((res > 0) * res)
 
 
-def _normalize(arr, ar_min=None, ar_max=None, undo=False):
-    if not undo:
-        if ar_min is None:
-            ar_min = arr.min()
-        if ar_max is None:
-            ar_max = arr.max()
-    else:
-        if ar_min is None or ar_max is None:
-            raise ValueError("min / max values have to be passed when undoing "
-                             "the normalization")
-    if undo:
-        return (arr * (ar_max - ar_min)) + ar_min
-    else:
-        return (arr - ar_min) / (ar_max - ar_min)
-
-
 def rpca_godec(X, rank, fast=False, lambda1=None,
                power=None, tol=None, maxiter=None):
     """
@@ -132,11 +116,6 @@ def svd(X):
                         "Defaulting to 1e3")
         maxiter = 1e3
 
-    # Get min & max of data matrix for scaling
-    X_max = np.max(X)
-    X_min = np.min(X)
-    X = (X - X_min) / X_max
-
     # Initialize L and E
     L = X
     E = np.zeros(L.shape)
@@ -178,9 +157,9 @@ def svd(X):
         G = G.T
 
     # Rescale
-    Xhat = (L * X_max) + X_min
-    Ehat = (E * X_max) + X_min
-    Ghat = (G * X_max) + X_min
+    Xhat = L
+    Ehat = E
+    Ghat = G
 
     # Do final SVD
     U, S, Vh = svd(Xhat)
@@ -305,18 +284,7 @@ def _setup(self, X, normalize=False):
         if isinstance(X, np.ndarray):
             n, m = X.shape
             iterating = False
-            if normalize:
-                self.X_min = X.min()
-                self.X_max = X.max()
-                self.normalize = normalize
-                # actually scale the data to be between 0 and 1, not just close
-                # to it..
-                X = _normalize(X, ar_min=self.X_min, ar_max=self.X_max)
-                # X = (X - self.X_min) / (self.X_max - self.X_min)
         else:
-            if normalize:
-                _logger.warning("Normalization with an iterator is not"
-                                " possible, option ignored.")
             x = next(X)
             m = len(x)
             X = chain([x], X)
@@ -360,9 +328,6 @@ def _initialize(self, X):
                     X = chain(iter(Y2.T.copy()), X)
                 else:
                     Y2 = X[:self.training_samples, :].T
-                # normalize the init data here..
-                # Y2 = (Y2 - Y2.min()) / (Y2.max() - Y2.min())
-                Y2 = _normalize(Y2)
             elif self.init == 'rand':
                 Y2 = np.random.randn(m, self.rank)
             L, _ = scipy.linalg.qr(Y2, mode='economic')
@@ -452,13 +417,6 @@ def finish(self):
             # both keep an indicator that we had something and remove the
             # duplicate data
             self.E = [1]
-            if self.normalize:
-                Ehat = _normalize(Ehat, ar_min=self.X_min, ar_max=self.X_max,
-                                  undo=True)
-
-        if self.normalize:
-            Xhat = _normalize(Xhat, ar_min=self.X_min, ar_max=self.X_max,
-                              undo=True)
 
         # Do final SVD
         U, S, Vh = self.svd(Xhat)

From f2af5effa79b98bbe0156cece2dea100212b618a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Wed, 21 Mar 2018 18:32:37 +0100
Subject: [PATCH 08/12] Add lazy svd and make it default

---
 hyperspy/_signals/lazy.py | 74 ++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 29 deletions(-)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index 12f46b4862..89fbf65000 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -602,7 +602,7 @@ def _block_iterator(self,
                                     self.axes_manager.signal_shape[::-1])
 
     def decomposition(self,
-                      output_dimension,
+                      output_dimension=None,
                       normalize_poissonian_noise=False,
                       algorithm='PCA',
                       signal_mask=None,
@@ -618,12 +618,13 @@ def decomposition(self,
         Parameters
         ----------
         output_dimension : int
-            the number of significant components to keep
+            the number of significant components to keep. If None, keep all
+            (only valid for SVD)
         normalize_poissonian_noise : bool
             If True, scale the SI to normalize Poissonian noise
         algorithm : str
-            One of ('PCA', 'ORPCA', 'ONMF'). By default ('PCA') IncrementalPCA
-            from scikit-learn is run.
+            One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd',
+            lazy SVD decomposition from dask.
         get : dask scheduler
             the dask scheduler to use for computations;
             default `dask.threaded.get`
@@ -684,6 +685,9 @@ def decomposition(self,
         if blocksize / output_dimension < num_chunks:
             num_chunks = np.ceil(blocksize / output_dimension)
         blocksize *= num_chunks
+        if algorithm != "svd" and output_dimension is None:
+            raise ValueError("With the %s the output_dimension "
+                             "must be specified" % algorithm)
 
         # LEARN
         if algorithm == 'PCA':
@@ -704,16 +708,12 @@ def decomposition(self,
             batch_size = kwargs.pop('batch_size', None)
             obj = ONMF(output_dimension, **kwargs)
             method = partial(obj.fit, batch_size=batch_size)
-
-        else:
+        elif algorithm != "svd":
             raise ValueError('algorithm not known')
 
         original_data = self.data
         try:
             if normalize_poissonian_noise:
-                if bounds is True:
-                    bounds = False
-                    # warnings.warn?
                 data = self._data_aligned_with_axes
                 ndim = self.axes_manager.navigation_dimension
                 sdim = self.axes_manager.signal_dimension
@@ -748,27 +748,42 @@ def decomposition(self,
                 self.data = data
 
             # LEARN
-            this_data = []
-            try:
-                for chunk in progressbar(
-                        self._block_iterator(
-                            flat_signal=True,
-                            get=get,
-                            signal_mask=signal_mask,
-                            navigation_mask=navigation_mask),
-                        total=nblocks,
-                        leave=True,
-                        desc='Learn'):
-                    this_data.append(chunk)
-                    if len(this_data) == num_chunks:
+            if algorithm == "svd":
+                reproject = False
+                from dask.array.linalg import svd
+                try:
+                    self._unfolded4decomposition = self.unfold()
+                    # TODO: implement masking
+                    U, S, V = svd(self.data)
+                    factors = V.T
+                    explained_variance = S ** 2 / self.data.shape[0]
+                    loadings = U * S
+                finally:
+                    if self._unfolded4decomposition is True:
+                        self.fold()
+                        self._unfolded4decomposition is False
+            else:
+                this_data = []
+                try:
+                    for chunk in progressbar(
+                            self._block_iterator(
+                                flat_signal=True,
+                                get=get,
+                                signal_mask=signal_mask,
+                                navigation_mask=navigation_mask),
+                            total=nblocks,
+                            leave=True,
+                            desc='Learn'):
+                        this_data.append(chunk)
+                        if len(this_data) == num_chunks:
+                            thedata = np.concatenate(this_data, axis=0)
+                            method(thedata)
+                            this_data = []
+                    if len(this_data):
                         thedata = np.concatenate(this_data, axis=0)
                         method(thedata)
-                        this_data = []
-                if len(this_data):
-                    thedata = np.concatenate(this_data, axis=0)
-                    method(thedata)
-            except KeyboardInterrupt:
-                pass
+                except KeyboardInterrupt:
+                    pass
 
             # GET ALREADY CALCULATED RESULTS
             if algorithm == 'PCA':
@@ -840,7 +855,8 @@ def post(a): return np.concatenate(a, axis=1).T
         target = self.learning_results
         target.decomposition_algorithm = algorithm
         target.output_dimension = output_dimension
-        target._object = obj
+        if algorithm != "svd":
+            target._object = obj
         target.factors = factors
         target.loadings = loadings
         target.explained_variance = explained_variance

From da124b68e6faf22811a00f6629b6afb987765d01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Wed, 21 Mar 2018 18:53:48 +0100
Subject: [PATCH 09/12] Fix lazy norm poiss noise

---
 hyperspy/_signals/lazy.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index 89fbf65000..42b6f14545 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -862,6 +862,11 @@ def post(a): return np.concatenate(a, axis=1).T
         target.explained_variance = explained_variance
         target.explained_variance_ratio = explained_variance_ratio
 
+        # Rescale the results if the noise was normalized
+        if normalize_poissonian_noise is True:
+            target.factors = target.factors * rbH.ravel()[:, np.newaxis]
+            target.loadings = target.loadings * raG.ravel()[:, np.newaxis]
+
     def transpose(self, *args, **kwargs):
         res = super().transpose(*args, **kwargs)
         res._make_lazy(rechunk=True)

From 9a878f7d062b8db355a929d99241b10474c2c9a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Wed, 21 Mar 2018 18:55:15 +0100
Subject: [PATCH 10/12] Make kwarg order consistent with nonlazy

---
 hyperspy/_signals/lazy.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py
index 42b6f14545..08cf62b91d 100644
--- a/hyperspy/_signals/lazy.py
+++ b/hyperspy/_signals/lazy.py
@@ -602,9 +602,9 @@ def _block_iterator(self,
                                     self.axes_manager.signal_shape[::-1])
 
     def decomposition(self,
-                      output_dimension=None,
                       normalize_poissonian_noise=False,
                       algorithm='PCA',
+                      output_dimension=None,
                       signal_mask=None,
                       navigation_mask=None,
                       get=threaded.get,
@@ -617,14 +617,14 @@ def decomposition(self,
 
         Parameters
         ----------
-        output_dimension : int
-            the number of significant components to keep. If None, keep all
-            (only valid for SVD)
         normalize_poissonian_noise : bool
             If True, scale the SI to normalize Poissonian noise
         algorithm : str
             One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd',
             lazy SVD decomposition from dask.
+        output_dimension : int
+            the number of significant components to keep. If None, keep all
+            (only valid for SVD)
         get : dask scheduler
             the dask scheduler to use for computations;
             default `dask.threaded.get`

From d59de7e102a3350860bba8c71034a5b7e2f30d2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Thu, 22 Mar 2018 17:52:17 +0100
Subject: [PATCH 11/12] Disable reverse component for lazy

---
 hyperspy/learn/mva.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hyperspy/learn/mva.py b/hyperspy/learn/mva.py
index 25449af72d..0215f9a809 100644
--- a/hyperspy/learn/mva.py
+++ b/hyperspy/learn/mva.py
@@ -766,10 +766,15 @@ def reverse_bss_component(self, component_number):
         >>> s.reverse_bss_component(1) # reverse IC 1
         >>> s.reverse_bss_component((0, 2)) # reverse ICs 0 and 2
         """
-
+        if self._lazy:
+            _logger.warning(
+                "Component(s) %s not reversed, featured not implemented "
+                "for lazy signals" % component_number)
+            return
         target = self.learning_results
 
         for i in [component_number, ]:
+            _logger.info("Component %i reversed" % i)
             target.bss_factors[:, i] *= -1
             target.bss_loadings[:, i] *= -1
             target.unmixing_matrix[i, :] *= -1
@@ -793,7 +798,6 @@ def _auto_reverse_bss_component(self, target):
             maximum = np.nanmax(target.bss_loadings[:, i])
             if minimum < 0 and -minimum > maximum:
                 self.reverse_bss_component(i)
-                _logger.info("IC %i reversed" % i)
 
     def _calculate_recmatrix(self, components=None, mva_type=None,):
         """

From cba3f206a574d1c8d386647ef8b0bd0cb024ede9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= <frandelapena@gmail.com>
Date: Thu, 22 Mar 2018 18:14:29 +0100
Subject: [PATCH 12/12] Add compute to lr.crop and bss

---
 hyperspy/learn/mva.py | 39 +++++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/hyperspy/learn/mva.py b/hyperspy/learn/mva.py
index 0215f9a809..42dc2b1143 100644
--- a/hyperspy/learn/mva.py
+++ b/hyperspy/learn/mva.py
@@ -477,6 +477,7 @@ def blind_source_separation(self,
                                 mask=None,
                                 on_loadings=False,
                                 pretreatment=None,
+                                compute=False,
                                 **kwargs):
         """Blind source separation (BSS) on the result on the
         decomposition.
@@ -512,6 +513,10 @@ def blind_source_separation(self,
             If True, perform the BSS on the loadings of a previous
             decomposition. If False, performs it on the factors.
         pretreatment: dict
+        compute: bool
+           If the decomposition results are lazy, compute the BSS components
+           so that they are not lazy.
+           Default is False.
 
         **kwargs : extra key word arguments
             Any keyword arguments are passed to the BSS algorithm.
@@ -672,7 +677,7 @@ def blind_source_separation(self,
             w[:] = w[sorting_indices, :]
         lr.unmixing_matrix = w
         lr.on_loadings = on_loadings
-        self._unmix_components()
+        self._unmix_components(compute=compute)
         self._auto_reverse_bss_component(lr)
         lr.bss_algorithm = algorithm
         lr.bss_node = str(lr.bss_node)
@@ -744,6 +749,12 @@ def reverse_decomposition_component(self, component_number):
         >>> s.reverse_decomposition_component((0, 2)) # reverse ICs 0 and 2
         """
 
+        if hasattr(self.learning_results.factors, "compute"):
+            # They are lazy
+            _logger.warning(
+                "Component(s) %s not reversed, featured not implemented "
+                "for lazy computations" % component_number)
+            return
         target = self.learning_results
 
         for i in [component_number, ]:
@@ -766,10 +777,11 @@ def reverse_bss_component(self, component_number):
         >>> s.reverse_bss_component(1) # reverse IC 1
         >>> s.reverse_bss_component((0, 2)) # reverse ICs 0 and 2
         """
-        if self._lazy:
+        if hasattr(self.learning_results.bss_factors, "compute"):
+            # They are lazy
             _logger.warning(
                 "Component(s) %s not reversed, featured not implemented "
-                "for lazy signals" % component_number)
+                "for lazy computations" % component_number)
             return
         target = self.learning_results
 
@@ -779,7 +791,7 @@ def reverse_bss_component(self, component_number):
             target.bss_loadings[:, i] *= -1
             target.unmixing_matrix[i, :] *= -1
 
-    def _unmix_components(self):
+    def _unmix_components(self, compute=False):
         lr = self.learning_results
         w = lr.unmixing_matrix
         n = len(w)
@@ -787,9 +799,11 @@ def _unmix_components(self):
             lr.bss_loadings = lr.loadings[:, :n] @  w.T
             lr.bss_factors = lr.factors[:, :n] @ np.linalg.inv(w)
         else:
-
             lr.bss_factors = lr.factors[:, :n] @ w.T
             lr.bss_loadings = lr.loadings[:, :n] @ np.linalg.inv(w)
+        if compute:
+            lr.bss_factors = lr.bss_factors.compute()
+            lr.bss_loadings = lr.bss_loadings.compute()
 
     def _auto_reverse_bss_component(self, target):
         n_components = target.bss_factors.shape[1]
@@ -1333,16 +1347,29 @@ def _summary(self):
                 ("Number of components : %i" % len(self.unmixing_matrix)))
         return summary_str
 
-    def crop_decomposition_dimension(self, n):
+    def crop_decomposition_dimension(self, n, compute=False):
         """
         Crop the score matrix up to the given number.
         It is mainly useful to save memory and reduce the storage size
+
+        Parameters
+        ----------
+        n : int
+            Number of components to keep.
+        compute: bool
+           If the decomposition results are lazy, also compute the results.
+           Default is False.
         """
         _logger.info("trimming to %i dimensions" % n)
         self.loadings = self.loadings[:, :n]
         if self.explained_variance is not None:
             self.explained_variance = self.explained_variance[:n]
         self.factors = self.factors[:, :n]
+        if compute:
+            self.loadings = self.loadings.compute()
+            self.factors = self.factors.compute()
+            if self.explained_variance is not None:
+                self.explained_variance = self.explained_variance.compute()
 
     def _transpose_results(self):
         (self.factors, self.loadings, self.bss_factors,