From e36f32fd3ec3d5a663fcbd8f5043425ea9d6deaf Mon Sep 17 00:00:00 2001 From: Francisco Date: Fri, 20 Oct 2017 11:07:20 +0200 Subject: [PATCH 01/12] Enable lazy reshape as it is now supported by dask --- hyperspy/_signals/lazy.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index b7e32aaf91..2f39a26c4a 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -255,9 +255,6 @@ def rebin(self, new_shape=None, scale=None, crop=False, out=None): def __array__(self, dtype=None): return self.data.__array__(dtype=dtype) - def _unfold(self, *args): - raise lazyerror - def _make_sure_data_is_contiguous(self, log=None): self._make_lazy(rechunk=True) From 45fd025a0e3ab637ac070b9a0e692a6462d49b43 Mon Sep 17 00:00:00 2001 From: Francisco Date: Fri, 20 Oct 2017 11:07:20 +0200 Subject: [PATCH 02/12] Automatic style corrections courtesy of autopep8 --- hyperspy/_signals/lazy.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index 2f39a26c4a..1ca3e09735 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -771,14 +771,17 @@ def decomposition(self, if reproject: if algorithm == 'PCA': method = obj.transform - post = lambda a: np.concatenate(a, axis=0) + + def post(a): return np.concatenate(a, axis=0) elif algorithm == 'ORPCA': method = obj.project obj.R = [] - post = lambda a: obj.finish()[4] + + def post(a): return obj.finish()[4] elif algorithm == 'ONMF': method = obj.project - post = lambda a: np.concatenate(a, axis=1).T + + def post(a): return np.concatenate(a, axis=1).T _map = map(lambda thing: method(thing), self._block_iterator( From 89a191392611fdff9a9f45751dba2a0d75239324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Wed, 17 Jan 2018 16:16:54 +0100 Subject: [PATCH 03/12] Enable lazy unfolding and get_decomposition_model --- hyperspy/_signals/lazy.py | 12 ++++++------ hyperspy/learn/mva.py | 30 ++++++++++++++---------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index 1ca3e09735..e59705d3cf 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -255,6 +255,9 @@ def rebin(self, new_shape=None, scale=None, crop=False, out=None): def __array__(self, dtype=None): return self.data.__array__(dtype=dtype) + # def _unfold(self, *args): + # raise lazyerror + def _make_sure_data_is_contiguous(self, log=None): self._make_lazy(rechunk=True) @@ -771,17 +774,14 @@ def decomposition(self, if reproject: if algorithm == 'PCA': method = obj.transform - - def post(a): return np.concatenate(a, axis=0) + post = lambda a: np.concatenate(a, axis=0) elif algorithm == 'ORPCA': method = obj.project obj.R = [] - - def post(a): return obj.finish()[4] + post = lambda a: obj.finish()[4] elif algorithm == 'ONMF': method = obj.project - - def post(a): return np.concatenate(a, axis=1).T + post = lambda a: np.concatenate(a, axis=1).T _map = map(lambda thing: method(thing), self._block_iterator( diff --git a/hyperspy/learn/mva.py b/hyperspy/learn/mva.py index 8e5849754b..4bb020b3f2 100644 --- a/hyperspy/learn/mva.py +++ b/hyperspy/learn/mva.py @@ -51,7 +51,7 @@ def centering_and_whitening(X): del _ K = (u / (d / np.sqrt(X.shape[1]))).T del u, d - X1 = np.dot(K, X) + X1 = K @ X return X1.T, K @@ -414,15 +414,14 @@ def decomposition(self, if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): - loadings_ = np.dot(dc[:, signal_mask] - mean, factors) + loadings_ = (dc[:, signal_mask] - mean) @ factors else: loadings_ = sk.transform(dc[:, signal_mask]) target.loadings = loadings_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): - factors = np.dot(np.linalg.pinv(loadings), - dc[navigation_mask, :] - mean).T + factors = (np.linalg.pinv(loadings) @ (dc[navigation_mask, :] - mean)).T target.factors = factors else: _logger.info("Reprojecting the signal is not yet " @@ -660,15 +659,15 @@ def blind_source_separation(self, lr.bss_node = temp_function(**kwargs) lr.bss_node.train(factors) unmixing_matrix = lr.bss_node.get_recmatrix() - w = np.dot(unmixing_matrix, invsqcovmat) + w = unmixing_matrix @ invsqcovmat if lr.explained_variance is not None: # The output of ICA is not sorted in any way what makes it # difficult to compare results from different unmixings. The # following code is an experimental attempt to sort them in a # more predictable way - sorting_indices = np.argsort(np.dot( - lr.explained_variance[:number_of_components], - np.abs(w.T)))[::-1] + sorting_indices = np.argsort( + lr.explained_variance[:number_of_components] @ np.abs(w.T) + )[::-1] w[:] = w[sorting_indices, :] lr.unmixing_matrix = w lr.on_loadings = on_loadings @@ -779,12 +778,12 @@ def _unmix_components(self): w = lr.unmixing_matrix n = len(w) if lr.on_loadings: - lr.bss_loadings = np.dot(lr.loadings[:, :n], w.T) - lr.bss_factors = np.dot(lr.factors[:, :n], np.linalg.inv(w)) + lr.bss_loadings = lr.loadings[:, :n] @ w.T + lr.bss_factors = lr.factors[:, :n] @ np.linalg.inv(w) else: - lr.bss_factors = np.dot(lr.factors[:, :n], w.T) - lr.bss_loadings = np.dot(lr.loadings[:, :n], np.linalg.inv(w)) + lr.bss_factors = lr.factors[:, :n] @ w.T + lr.bss_loadings = lr.loadings[:, :n] @ np.linalg.inv(w) def _auto_reverse_bss_component(self, target): n_components = target.bss_factors.shape[1] @@ -822,7 +821,7 @@ def _calculate_recmatrix(self, components=None, mva_type=None,): factors = target.bss_factors loadings = target.bss_loadings.T if components is None: - a = np.dot(factors, loadings) + a = factors @ loadings signal_name = 'model from %s with %i components' % ( mva_type, factors.shape[1]) elif hasattr(components, '__iter__'): @@ -831,12 +830,11 @@ def _calculate_recmatrix(self, components=None, mva_type=None,): for i in range(len(components)): tfactors[:, i] = factors[:, components[i]] tloadings[i, :] = loadings[components[i], :] - a = np.dot(tfactors, tloadings) + a = tfactors @ tloadings signal_name = 'model from %s with components %s' % ( mva_type, components) else: - a = np.dot(factors[:, :components], - loadings[:components, :]) + a = factors[:, :components] @ loadings[:components, :] signal_name = 'model from %s with %i components' % ( mva_type, components) From 53cd48c2e78e8b3740390a76ea85f50da3c5141a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Wed, 17 Jan 2018 16:16:54 +0100 Subject: [PATCH 04/12] Automatic style corrections courtesy of autopep8 --- hyperspy/_signals/lazy.py | 9 ++++++--- hyperspy/learn/mva.py | 9 +++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index e59705d3cf..1f48cd5c5a 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -774,14 +774,17 @@ def decomposition(self, if reproject: if algorithm == 'PCA': method = obj.transform - post = lambda a: np.concatenate(a, axis=0) + + def post(a): return np.concatenate(a, axis=0) elif algorithm == 'ORPCA': method = obj.project obj.R = [] - post = lambda a: obj.finish()[4] + + def post(a): return obj.finish()[4] elif algorithm == 'ONMF': method = obj.project - post = lambda a: np.concatenate(a, axis=1).T + + def post(a): return np.concatenate(a, axis=1).T _map = map(lambda thing: method(thing), self._block_iterator( diff --git a/hyperspy/learn/mva.py b/hyperspy/learn/mva.py index 4bb020b3f2..43556c8c81 100644 --- a/hyperspy/learn/mva.py +++ b/hyperspy/learn/mva.py @@ -26,7 +26,7 @@ try: import mdp mdp_installed = True -except: +except BaseException: mdp_installed = False @@ -327,7 +327,7 @@ def decomposition(self, var_array = np.polyval( polyfit, dc[ signal_mask, navigation_mask]) - except: + except BaseException: raise ValueError( 'var_func must be either a function or an ' 'array defining the coefficients of a polynom') @@ -421,7 +421,8 @@ def decomposition(self, if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): - factors = (np.linalg.pinv(loadings) @ (dc[navigation_mask, :] - mean)).T + factors = (np.linalg.pinv(loadings) @ ( + dc[navigation_mask, :] - mean)).T target.factors = factors else: _logger.info("Reprojecting the signal is not yet " @@ -667,7 +668,7 @@ def blind_source_separation(self, # more predictable way sorting_indices = np.argsort( lr.explained_variance[:number_of_components] @ np.abs(w.T) - )[::-1] + )[::-1] w[:] = w[sorting_indices, :] lr.unmixing_matrix = w lr.on_loadings = on_loadings From d70539dd67a75e088cd24d8243fc7e15fc055d95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Thu, 18 Jan 2018 10:57:52 +0100 Subject: [PATCH 05/12] Remove commented out lines --- hyperspy/_signals/lazy.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index 1f48cd5c5a..1ca3e09735 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -255,9 +255,6 @@ def rebin(self, new_shape=None, scale=None, crop=False, out=None): def __array__(self, dtype=None): return self.data.__array__(dtype=dtype) - # def _unfold(self, *args): - # raise lazyerror - def _make_sure_data_is_contiguous(self, log=None): self._make_lazy(rechunk=True) From 625f262ec349f6004c2b080490e7d7557c0ac08a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Wed, 21 Mar 2018 16:55:53 +0100 Subject: [PATCH 06/12] Deprecate lazy decomposition bound kword --- hyperspy/_signals/lazy.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index 7b67a848b1..12f46b4862 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -18,9 +18,9 @@ import logging from functools import partial +import warnings import numpy as np -import math as math import dask.array as da import dask.delayed as dd from dask import threaded @@ -28,12 +28,11 @@ from itertools import product from ..signal import BaseSignal -from ..misc.utils import underline, multiply, dummy_context_manager +from ..misc.utils import multiply, dummy_context_manager from ..external.progressbar import progressbar from ..external.astroML.histtools import dasky_histogram -from ..defaults_parser import preferences -from ..docstrings.signal import (ONE_AXIS_PARAMETER, OUT_ARG) from hyperspy.misc.array_tools import _requires_linear_rebin +from hyperspy.exceptions import VisibleDeprecationWarning _logger = logging.getLogger(__name__) @@ -611,7 +610,7 @@ def decomposition(self, get=threaded.get, num_chunks=None, reproject=True, - bounds=True, + bounds=False, **kwargs): """Perform Incremental (Batch) decomposition on the data, keeping n significant components. @@ -640,12 +639,6 @@ def decomposition(self, decomposition. reproject : bool Reproject data on the learnt components (factors) after learning. - bounds : {tuple, bool} - The (min, max) values of the data to normalize before learning. - If tuple (min, max), those values will be used for normalization. - If True, extremes will be looked up (expensive), default. - If False, no normalization is done (learning may be very slow). - If normalize_poissonian_noise is True, this cannot be True. **kwargs passed to the partial_fit/fit functions. @@ -674,6 +667,11 @@ def decomposition(self, """ + if bounds: + msg = ( + "The `bounds` keyword is deprecated and will be removed " + "in v2.0. Since version > 1.3 this has no effect.") + warnings.warn(msg, VisibleDeprecationWarning) explained_variance = None explained_variance_ratio = None _al_data = self._data_aligned_with_axes @@ -749,14 +747,6 @@ def decomposition(self, data = data / coeff self.data = data - # normalize the data for learning algs: - if bounds: - if bounds is True: - _min, _max = da.compute(self.data.min(), self.data.max()) - else: - _min, _max = bounds - self.data = (self.data - _min) / (_max - _min) - # LEARN this_data = [] try: From 58e03e5df2e93ea2aa07ed1539db7431bb9e92dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Wed, 21 Mar 2018 17:01:19 +0100 Subject: [PATCH 07/12] Disable RPCA scaling --- hyperspy/learn/rpca.py | 48 +++--------------------------------------- 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/hyperspy/learn/rpca.py b/hyperspy/learn/rpca.py index 6c1dd24573..b926f3cbe2 100644 --- a/hyperspy/learn/rpca.py +++ b/hyperspy/learn/rpca.py @@ -34,22 +34,6 @@ def _thresh(X, lambda1): return np.sign(X) * ((res > 0) * res) -def _normalize(arr, ar_min=None, ar_max=None, undo=False): - if not undo: - if ar_min is None: - ar_min = arr.min() - if ar_max is None: - ar_max = arr.max() - else: - if ar_min is None or ar_max is None: - raise ValueError("min / max values have to be passed when undoing " - "the normalization") - if undo: - return (arr * (ar_max - ar_min)) + ar_min - else: - return (arr - ar_min) / (ar_max - ar_min) - - def rpca_godec(X, rank, fast=False, lambda1=None, power=None, tol=None, maxiter=None): """ @@ -132,11 +116,6 @@ def svd(X): "Defaulting to 1e3") maxiter = 1e3 - # Get min & max of data matrix for scaling - X_max = np.max(X) - X_min = np.min(X) - X = (X - X_min) / X_max - # Initialize L and E L = X E = np.zeros(L.shape) @@ -178,9 +157,9 @@ def svd(X): G = G.T # Rescale - Xhat = (L * X_max) + X_min - Ehat = (E * X_max) + X_min - Ghat = (G * X_max) + X_min + Xhat = L + Ehat = E + Ghat = G # Do final SVD U, S, Vh = svd(Xhat) @@ -305,18 +284,7 @@ def _setup(self, X, normalize=False): if isinstance(X, np.ndarray): n, m = X.shape iterating = False - if normalize: - self.X_min = X.min() - self.X_max = X.max() - self.normalize = normalize - # actually scale the data to be between 0 and 1, not just close - # to it.. - X = _normalize(X, ar_min=self.X_min, ar_max=self.X_max) - # X = (X - self.X_min) / (self.X_max - self.X_min) else: - if normalize: - _logger.warning("Normalization with an iterator is not" - " possible, option ignored.") x = next(X) m = len(x) X = chain([x], X) @@ -360,9 +328,6 @@ def _initialize(self, X): X = chain(iter(Y2.T.copy()), X) else: Y2 = X[:self.training_samples, :].T - # normalize the init data here.. - # Y2 = (Y2 - Y2.min()) / (Y2.max() - Y2.min()) - Y2 = _normalize(Y2) elif self.init == 'rand': Y2 = np.random.randn(m, self.rank) L, _ = scipy.linalg.qr(Y2, mode='economic') @@ -452,13 +417,6 @@ def finish(self): # both keep an indicator that we had something and remove the # duplicate data self.E = [1] - if self.normalize: - Ehat = _normalize(Ehat, ar_min=self.X_min, ar_max=self.X_max, - undo=True) - - if self.normalize: - Xhat = _normalize(Xhat, ar_min=self.X_min, ar_max=self.X_max, - undo=True) # Do final SVD U, S, Vh = self.svd(Xhat) From f2af5effa79b98bbe0156cece2dea100212b618a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Wed, 21 Mar 2018 18:32:37 +0100 Subject: [PATCH 08/12] Add lazy svd and make it default --- hyperspy/_signals/lazy.py | 74 ++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index 12f46b4862..89fbf65000 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -602,7 +602,7 @@ def _block_iterator(self, self.axes_manager.signal_shape[::-1]) def decomposition(self, - output_dimension, + output_dimension=None, normalize_poissonian_noise=False, algorithm='PCA', signal_mask=None, @@ -618,12 +618,13 @@ def decomposition(self, Parameters ---------- output_dimension : int - the number of significant components to keep + the number of significant components to keep. If None, keep all + (only valid for SVD) normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : str - One of ('PCA', 'ORPCA', 'ONMF'). By default ('PCA') IncrementalPCA - from scikit-learn is run. + One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd', + lazy SVD decomposition from dask. get : dask scheduler the dask scheduler to use for computations; default `dask.threaded.get` @@ -684,6 +685,9 @@ def decomposition(self, if blocksize / output_dimension < num_chunks: num_chunks = np.ceil(blocksize / output_dimension) blocksize *= num_chunks + if algorithm != "svd" and output_dimension is None: + raise ValueError("With the %s the output_dimension " + "must be specified" % algorithm) # LEARN if algorithm == 'PCA': @@ -704,16 +708,12 @@ def decomposition(self, batch_size = kwargs.pop('batch_size', None) obj = ONMF(output_dimension, **kwargs) method = partial(obj.fit, batch_size=batch_size) - - else: + elif algorithm != "svd": raise ValueError('algorithm not known') original_data = self.data try: if normalize_poissonian_noise: - if bounds is True: - bounds = False - # warnings.warn? data = self._data_aligned_with_axes ndim = self.axes_manager.navigation_dimension sdim = self.axes_manager.signal_dimension @@ -748,27 +748,42 @@ def decomposition(self, self.data = data # LEARN - this_data = [] - try: - for chunk in progressbar( - self._block_iterator( - flat_signal=True, - get=get, - signal_mask=signal_mask, - navigation_mask=navigation_mask), - total=nblocks, - leave=True, - desc='Learn'): - this_data.append(chunk) - if len(this_data) == num_chunks: + if algorithm == "svd": + reproject = False + from dask.array.linalg import svd + try: + self._unfolded4decomposition = self.unfold() + # TODO: implement masking + U, S, V = svd(self.data) + factors = V.T + explained_variance = S ** 2 / self.data.shape[0] + loadings = U * S + finally: + if self._unfolded4decomposition is True: + self.fold() + self._unfolded4decomposition is False + else: + this_data = [] + try: + for chunk in progressbar( + self._block_iterator( + flat_signal=True, + get=get, + signal_mask=signal_mask, + navigation_mask=navigation_mask), + total=nblocks, + leave=True, + desc='Learn'): + this_data.append(chunk) + if len(this_data) == num_chunks: + thedata = np.concatenate(this_data, axis=0) + method(thedata) + this_data = [] + if len(this_data): thedata = np.concatenate(this_data, axis=0) method(thedata) - this_data = [] - if len(this_data): - thedata = np.concatenate(this_data, axis=0) - method(thedata) - except KeyboardInterrupt: - pass + except KeyboardInterrupt: + pass # GET ALREADY CALCULATED RESULTS if algorithm == 'PCA': @@ -840,7 +855,8 @@ def post(a): return np.concatenate(a, axis=1).T target = self.learning_results target.decomposition_algorithm = algorithm target.output_dimension = output_dimension - target._object = obj + if algorithm != "svd": + target._object = obj target.factors = factors target.loadings = loadings target.explained_variance = explained_variance From da124b68e6faf22811a00f6629b6afb987765d01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Wed, 21 Mar 2018 18:53:48 +0100 Subject: [PATCH 09/12] Fix lazy norm poiss noise --- hyperspy/_signals/lazy.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index 89fbf65000..42b6f14545 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -862,6 +862,11 @@ def post(a): return np.concatenate(a, axis=1).T target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio + # Rescale the results if the noise was normalized + if normalize_poissonian_noise is True: + target.factors = target.factors * rbH.ravel()[:, np.newaxis] + target.loadings = target.loadings * raG.ravel()[:, np.newaxis] + def transpose(self, *args, **kwargs): res = super().transpose(*args, **kwargs) res._make_lazy(rechunk=True) From 9a878f7d062b8db355a929d99241b10474c2c9a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Wed, 21 Mar 2018 18:55:15 +0100 Subject: [PATCH 10/12] Make kwarg order consistent with nonlazy --- hyperspy/_signals/lazy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hyperspy/_signals/lazy.py b/hyperspy/_signals/lazy.py index 42b6f14545..08cf62b91d 100644 --- a/hyperspy/_signals/lazy.py +++ b/hyperspy/_signals/lazy.py @@ -602,9 +602,9 @@ def _block_iterator(self, self.axes_manager.signal_shape[::-1]) def decomposition(self, - output_dimension=None, normalize_poissonian_noise=False, algorithm='PCA', + output_dimension=None, signal_mask=None, navigation_mask=None, get=threaded.get, @@ -617,14 +617,14 @@ def decomposition(self, Parameters ---------- - output_dimension : int - the number of significant components to keep. If None, keep all - (only valid for SVD) normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : str One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd', lazy SVD decomposition from dask. + output_dimension : int + the number of significant components to keep. If None, keep all + (only valid for SVD) get : dask scheduler the dask scheduler to use for computations; default `dask.threaded.get` From d59de7e102a3350860bba8c71034a5b7e2f30d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Thu, 22 Mar 2018 17:52:17 +0100 Subject: [PATCH 11/12] Disable reverse component for lazy --- hyperspy/learn/mva.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hyperspy/learn/mva.py b/hyperspy/learn/mva.py index 25449af72d..0215f9a809 100644 --- a/hyperspy/learn/mva.py +++ b/hyperspy/learn/mva.py @@ -766,10 +766,15 @@ def reverse_bss_component(self, component_number): >>> s.reverse_bss_component(1) # reverse IC 1 >>> s.reverse_bss_component((0, 2)) # reverse ICs 0 and 2 """ - + if self._lazy: + _logger.warning( + "Component(s) %s not reversed, featured not implemented " + "for lazy signals" % component_number) + return target = self.learning_results for i in [component_number, ]: + _logger.info("Component %i reversed" % i) target.bss_factors[:, i] *= -1 target.bss_loadings[:, i] *= -1 target.unmixing_matrix[i, :] *= -1 @@ -793,7 +798,6 @@ def _auto_reverse_bss_component(self, target): maximum = np.nanmax(target.bss_loadings[:, i]) if minimum < 0 and -minimum > maximum: self.reverse_bss_component(i) - _logger.info("IC %i reversed" % i) def _calculate_recmatrix(self, components=None, mva_type=None,): """ From cba3f206a574d1c8d386647ef8b0bd0cb024ede9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20de=20la=20Pe=C3=B1a?= Date: Thu, 22 Mar 2018 18:14:29 +0100 Subject: [PATCH 12/12] Add compute to lr.crop and bss --- hyperspy/learn/mva.py | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/hyperspy/learn/mva.py b/hyperspy/learn/mva.py index 0215f9a809..42dc2b1143 100644 --- a/hyperspy/learn/mva.py +++ b/hyperspy/learn/mva.py @@ -477,6 +477,7 @@ def blind_source_separation(self, mask=None, on_loadings=False, pretreatment=None, + compute=False, **kwargs): """Blind source separation (BSS) on the result on the decomposition. @@ -512,6 +513,10 @@ def blind_source_separation(self, If True, perform the BSS on the loadings of a previous decomposition. If False, performs it on the factors. pretreatment: dict + compute: bool + If the decomposition results are lazy, compute the BSS components + so that they are not lazy. + Default is False. **kwargs : extra key word arguments Any keyword arguments are passed to the BSS algorithm. @@ -672,7 +677,7 @@ def blind_source_separation(self, w[:] = w[sorting_indices, :] lr.unmixing_matrix = w lr.on_loadings = on_loadings - self._unmix_components() + self._unmix_components(compute=compute) self._auto_reverse_bss_component(lr) lr.bss_algorithm = algorithm lr.bss_node = str(lr.bss_node) @@ -744,6 +749,12 @@ def reverse_decomposition_component(self, component_number): >>> s.reverse_decomposition_component((0, 2)) # reverse ICs 0 and 2 """ + if hasattr(self.learning_results.factors, "compute"): + # They are lazy + _logger.warning( + "Component(s) %s not reversed, featured not implemented " + "for lazy computations" % component_number) + return target = self.learning_results for i in [component_number, ]: @@ -766,10 +777,11 @@ def reverse_bss_component(self, component_number): >>> s.reverse_bss_component(1) # reverse IC 1 >>> s.reverse_bss_component((0, 2)) # reverse ICs 0 and 2 """ - if self._lazy: + if hasattr(self.learning_results.bss_factors, "compute"): + # They are lazy _logger.warning( "Component(s) %s not reversed, featured not implemented " - "for lazy signals" % component_number) + "for lazy computations" % component_number) return target = self.learning_results @@ -779,7 +791,7 @@ def reverse_bss_component(self, component_number): target.bss_loadings[:, i] *= -1 target.unmixing_matrix[i, :] *= -1 - def _unmix_components(self): + def _unmix_components(self, compute=False): lr = self.learning_results w = lr.unmixing_matrix n = len(w) @@ -787,9 +799,11 @@ def _unmix_components(self): lr.bss_loadings = lr.loadings[:, :n] @ w.T lr.bss_factors = lr.factors[:, :n] @ np.linalg.inv(w) else: - lr.bss_factors = lr.factors[:, :n] @ w.T lr.bss_loadings = lr.loadings[:, :n] @ np.linalg.inv(w) + if compute: + lr.bss_factors = lr.bss_factors.compute() + lr.bss_loadings = lr.bss_loadings.compute() def _auto_reverse_bss_component(self, target): n_components = target.bss_factors.shape[1] @@ -1333,16 +1347,29 @@ def _summary(self): ("Number of components : %i" % len(self.unmixing_matrix))) return summary_str - def crop_decomposition_dimension(self, n): + def crop_decomposition_dimension(self, n, compute=False): """ Crop the score matrix up to the given number. It is mainly useful to save memory and reduce the storage size + + Parameters + ---------- + n : int + Number of components to keep. + compute: bool + If the decomposition results are lazy, also compute the results. + Default is False. """ _logger.info("trimming to %i dimensions" % n) self.loadings = self.loadings[:, :n] if self.explained_variance is not None: self.explained_variance = self.explained_variance[:n] self.factors = self.factors[:, :n] + if compute: + self.loadings = self.loadings.compute() + self.factors = self.factors.compute() + if self.explained_variance is not None: + self.explained_variance = self.explained_variance.compute() def _transpose_results(self): (self.factors, self.loadings, self.bss_factors,