diff --git a/dipy/denoise/localpca.py b/dipy/denoise/localpca.py index cab5936882..99363d52ca 100644 --- a/dipy/denoise/localpca.py +++ b/dipy/denoise/localpca.py @@ -13,6 +13,7 @@ from scipy.linalg import svd svd_args = [False] from scipy.linalg import eigh +from dipy.denoise.pca_noise_estimate import pca_noise_estimate def _pca_classifier(L, nvoxels): @@ -44,6 +45,15 @@ def _pca_classifier(L, nvoxels): theory. Neuroimage 142:394-406. doi: 10.1016/j.neuroimage.2016.08.016 """ + + # if num_samples - 1 (to correct for mean subtraction) is less than number + # of features, discard the zero eigenvalues + if L.size > nvoxels - 1: + L = L[-(nvoxels - 1):] + + # Note that the condition expressed in the while-loop is expressed in terms + # of the variance of equation (12), not equation (11) as in [1]_. Also, + # this code implements ascending eigenvalues, unlike [1]_. var = np.mean(L) c = L.size - 1 r = L[c] - L[0] - 4 * np.sqrt((c + 1.0) / nvoxels) * var @@ -52,6 +62,7 @@ def _pca_classifier(L, nvoxels): c = c - 1 r = L[c] - L[0] - 4 * np.sqrt((c + 1.0) / nvoxels) * var ncomps = c + 1 + return var, ncomps @@ -76,7 +87,7 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig', outside of those voxels. patch_radius : int or 1D array (optional) The radius of the local patch to be taken around each voxel (in - voxels). Default: 2 (denoise in blocks of 5x5x5 voxels). + voxels). E.g. patch_radius=2 gives 5x5x5 patches. pca_method : 'eig' or 'svd' (optional) Use either eigenvalue decomposition (eig) or singular value decomposition (svd) for principal component analysis. The default @@ -93,16 +104,13 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig', \tau_{factor} can be set to a predefined values (e.g. \tau_{factor} = 2.3 [3]_), or automatically calculated using random matrix theory (in case that \tau_{factor} is set to None). - Default: None. return_sigma : bool (optional) If true, the Standard deviation of the noise will be returned. - Default: False. out_dtype : str or dtype (optional) The dtype for the output array. Default: output has the same dtype as the input. suppress_warning : bool (optional) If true, suppress warning caused by patch_size < arr.shape[-1]. - Default: False. Returns ------- @@ -166,7 +174,8 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig', if num_samples == 1: raise ValueError("Cannot have only 1 sample,\ please increase patch_radius.") - if num_samples < arr.shape[-1] and not suppress_warning: + # account for mean subtraction by testing #samples - 1 + if (num_samples - 1) < arr.shape[-1] and not suppress_warning: tmp = np.sum(patch_size == 1) # count spatial dimensions with size 1 if tmp == 0: root = np.ceil(arr.shape[-1] ** (1./3)) # 3D @@ -176,7 +185,7 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig', root = arr.shape[-1] # 1D root = root + 1 if (root % 2) == 0 else root # make odd spr = int((root - 1) / 2) # suggested patch_radius - e_s = "Number of samples {1} < Dimensionality {0}. "\ + e_s = "Number of samples {1} - 1 < Dimensionality {0}. "\ .format(arr.shape[-1], num_samples) e_s += "This might have a performance impact. " e_s += "Increase patch_radius to {0} to avoid this warning, "\ @@ -224,7 +233,7 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig', X = arr[ix1:ix2, jx1:jx2, kx1:kx2].reshape( num_samples, dim) - # compute the mean and normalize + # compute the mean M = np.mean(X, axis=0) # Upcast the dtype for precision in the SVD X = X - M @@ -248,7 +257,7 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig', if sigma is None: # Random matrix theory - this_var, ncomps = _pca_classifier(d, num_samples) + this_var, _ = _pca_classifier(d, num_samples) else: # Predefined variance this_var = var[i, j, k] @@ -287,8 +296,10 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig', return denoised_arr.astype(out_dtype) -def localpca(arr, sigma, mask=None, patch_radius=2, pca_method='eig', - tau_factor=2.3, out_dtype=None, suppress_warning=False): +def localpca(arr, sigma=None, mask=None, patch_radius=2, gtab=None, + patch_radius_sigma=1, pca_method='eig', tau_factor=2.3, + return_sigma=False, correct_bias=True, out_dtype=None, + suppress_warning=False): r""" Performs local PCA denoising according to Manjon et al. [1]_. Parameters @@ -296,15 +307,24 @@ def localpca(arr, sigma, mask=None, patch_radius=2, pca_method='eig', arr : 4D array Array of data to be denoised. The dimensions are (X, Y, Z, N), where N are the diffusion gradient directions. - sigma : float or 3D array - Standard deviation of the noise estimated from the data. + sigma : float or 3D array (optional) + Standard deviation of the noise estimated from the data. If not given, + calculate using method in [1]_. mask : 3D boolean array (optional) A mask with voxels that are true inside the brain and false outside of it. The function denoises within the true part and returns zeros outside of those voxels. patch_radius : int or 1D array (optional) The radius of the local patch to be taken around each voxel (in - voxels). Default: 2 (denoise in blocks of 5x5x5 voxels). + voxels). E.g. patch_radius=2 gives 5x5x5 patches. + gtab: gradient table object (optional if sigma is provided) + gradient information for the data gives us the bvals and bvecs of + diffusion data, which is needed to calculate noise level if sigma is + not provided. + patch_radius_sigma : int (optional) + The radius of the local patch to be taken around each voxel (in + voxels) for estimating sigma. E.g. patch_radius_sigma=2 gives + 5x5x5 patches. pca_method : 'eig' or 'svd' (optional) Use either eigenvalue decomposition (eig) or singular value decomposition (svd) for principal component analysis. The default @@ -321,14 +341,18 @@ def localpca(arr, sigma, mask=None, patch_radius=2, pca_method='eig', \tau_{factor} can be change to adjust the relationship between the noise standard deviation and the threshold \tau. If \tau_{factor} is set to None, it will be automatically calculated using the - Marcenko-Pastur distribution [2]_. - Default: 2.3 (according to [1]_) + Marcenko-Pastur distribution [2]_. Default: 2.3 according to [1]_. + return_sigma : bool (optional) + If true, a noise standard deviation estimate based on the + Marcenko-Pastur distribution is returned [2]_. + correct_bias : bool (optional) + Whether to correct for bias due to Rician noise. This is an + implementation of equation 8 in [1]_. out_dtype : str or dtype (optional) The dtype for the output array. Default: output has the same dtype as the input. suppress_warning : bool (optional) If true, suppress warning caused by patch_size < arr.shape[-1]. - Default: False. Returns ------- @@ -347,9 +371,20 @@ def localpca(arr, sigma, mask=None, patch_radius=2, pca_method='eig', theory. Neuroimage 142:394-406. doi: 10.1016/j.neuroimage.2016.08.016 """ + # check gtab is given, if sigma is not given + if sigma is None and gtab is None: + raise ValueError("gtab must be provided if sigma is not given") + + # calculate sigma + if sigma is None: + sigma = pca_noise_estimate(arr, gtab, + correct_bias=correct_bias, + patch_radius=patch_radius_sigma, + images_as_samples=True) + return genpca(arr, sigma=sigma, mask=mask, patch_radius=patch_radius, pca_method=pca_method, tau_factor=tau_factor, - return_sigma=False, out_dtype=out_dtype, + return_sigma=return_sigma, out_dtype=out_dtype, suppress_warning=suppress_warning) @@ -369,7 +404,7 @@ def mppca(arr, mask=None, patch_radius=2, pca_method='eig', outside of those voxels. patch_radius : int or 1D array (optional) The radius of the local patch to be taken around each voxel (in - voxels). Default: 2 (denoise in blocks of 5x5x5 voxels). + voxels). E.g. patch_radius=2 gives 5x5x5 patches. pca_method : 'eig' or 'svd' (optional) Use either eigenvalue decomposition (eig) or singular value decomposition (svd) for principal component analysis. The default @@ -378,13 +413,11 @@ def mppca(arr, mask=None, patch_radius=2, pca_method='eig', return_sigma : bool (optional) If true, a noise standard deviation estimate based on the Marcenko-Pastur distribution is returned [2]_. - Default: False. out_dtype : str or dtype (optional) The dtype for the output array. Default: output has the same dtype as the input. suppress_warning : bool (optional) If true, suppress warning caused by patch_size < arr.shape[-1]. - Default: False. Returns ------- diff --git a/dipy/denoise/pca_noise_estimate.pyx b/dipy/denoise/pca_noise_estimate.pyx index 3c03e657b2..8b2701f024 100644 --- a/dipy/denoise/pca_noise_estimate.pyx +++ b/dipy/denoise/pca_noise_estimate.pyx @@ -10,6 +10,7 @@ import scipy.special as sps from scipy import ndimage cimport cython cimport numpy as cnp +from warnings import warn # Try to get the SVD through direct API to lapack: try: @@ -26,7 +27,7 @@ except ImportError: @cython.wraparound(False) @cython.cdivision(True) def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True, - smooth=2): + smooth=2, images_as_samples=False): """ PCA based local noise estimation. Parameters @@ -47,12 +48,25 @@ def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True, smooth : int Radius of a Gaussian smoothing filter to apply to the noise estimate before returning. Default: 2. + image_as_samples : bool, optional + Whether to use images as rows (samples) for PCA (algorithm in [1]_) or + to use images as columns (features). Returns ------- sigma_corr: 3D array The local noise standard deviation estimate. + Notes + ----- + In [1]_, images are used as samples, so voxels are features, therefore + eigenvectors are image-shaped. However, [1]_ is not clear on how to use + these eigenvectors to determine the noise level, so here eigenvalues + (variance over samples explained by eigenvectors) are used to scale + the eigenvectors. Use images_as_samples=True to use this algorithm. + Alternatively, voxels can be used as samples using + images_as_samples=False. This is not the canonical algorithm of [1]_. + References ---------- .. [1] Manjon JV, Coupe P, Concha L, Buades A, Collins DL "Diffusion @@ -72,6 +86,10 @@ def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True, data0 = data[..., ~gtab.b0s_mask] sibe = True + if patch_radius < 1: + warn("Minimum patch radius must be 1, setting to 1", UserWarning) + patch_radius = 1 + data0 = data0.astype(np.float64) cdef: cnp.npy_intp dsm = np.min(data0.shape[0:-1]) @@ -92,25 +110,42 @@ def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True, if (dsm != 1) and (dsm < 2 * patch_radius + 1): raise ValueError("Array 'data' is incorrect shape") - X = data0.reshape(nsamples, n3) + if images_as_samples: + X = data0.reshape(nsamples, n3).T + else: + X = data0.reshape(nsamples, n3) + # Demean: M = np.mean(X, axis=0) X = X - M U, S, Vt = svd(X, *svd_args)[:3] # Rows of Vt are the eigenvectors, in ascending eigenvalue order: W = Vt.T - # Project into the data space - V = X.dot(W) - # Grab the column corresponding to the smallest eigen-vector/-value: - I = V[:, -1].reshape(n0, n1, n2) + if images_as_samples: + W = W.astype('double') + # #vox(features) >> # img(samples), last eigval zero (X is centered) + idx = n3 - 2 # use second-to-last eigvec + V = W[:, idx].reshape(n0, n1, n2) + + # ref [1]_ method is ambiguous on how to use image-shaped eigvec + # since eigvec is normalized, used eigval=variance for scale + I = V * S[idx] + else: + # Project into the data space + V = X.dot(W) + + # Grab the column corresponding to the smallest eigen-vector/-value: + # #vox(samples) >> #img(features), last eigenvector is meaningful + I = V[:, -1].reshape(n0, n1, n2) + del V, W, X, U, S, Vt cdef: - double[:, :, :] count = np.zeros((n0, n1, n2)) - double[:, :, :] mean = np.zeros((n0, n1, n2)) - double[:, :, :] sigma_sq = np.zeros((n0, n1, n2)) - double[:, :, :, :] data0temp = data0 + double[:, :, :] count = np.zeros((n0, n1, n2)) + double[:, :, :] mean = np.zeros((n0, n1, n2)) + double[:, :, :] sigma_sq = np.zeros((n0, n1, n2)) + double[:, :, :, :] data0temp = data0 with nogil: for i in range(prx, n0 - prx): @@ -123,37 +158,38 @@ def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True, for k0 in range(-prz, prz + 1): sum_reg += I[i + i0, j + j0, k + k0] / norm for l0 in range(n3): - temp1 += (data0temp[i + i0, j+ j0, k + k0, l0]) / (norm * n3) + temp1 += (data0temp[i + i0, j + j0, k + k0, l0])\ + / (norm * n3) for i0 in range(-prx, prx + 1): for j0 in range(-pry, pry + 1): for k0 in range(-prz, prz + 1): - sigma_sq[i + i0, j +j0, k + k0] += ( + sigma_sq[i + i0, j + j0, k + k0] += ( I[i + i0, j + j0, k + k0] - sum_reg) ** 2 mean[i + i0, j + j0, k + k0] += temp1 - count[i + i0, j +j0, k + k0] += 1 + count[i + i0, j + j0, k + k0] += 1 sigma_sq = np.divide(sigma_sq, count) # find the SNR and make the correction for bias due to Rician noise: if correct_bias: - mean = np.divide(mean, count) - snr = np.divide(mean, np.sqrt(sigma_sq)) - snr_sq = (snr ** 2) - # xi is practically equal to 1 above 37.4, and we overflow, raising - # warnings and creating ot-a-numbers. - # Instead, we will replace these values with 1 below - with np.errstate(over='ignore', invalid='ignore'): - xi = (2 + snr_sq - (np.pi / 8) * np.exp(-snr_sq / 2) * + mean = np.divide(mean, count) + snr = np.divide(mean, np.sqrt(sigma_sq)) + snr_sq = (snr ** 2) + # xi is practically equal to 1 above 37.4, and we overflow, raising + # warnings and creating ot-a-numbers. + # Instead, we will replace these values with 1 below + with np.errstate(over='ignore', invalid='ignore'): + xi = (2 + snr_sq - (np.pi / 8) * np.exp(-snr_sq / 2) * ((2 + snr_sq) * sps.iv(0, snr_sq / 4) + snr_sq * sps.iv(1, snr_sq / 4)) ** 2).astype(float) - xi[snr > 37.4] = 1 - sigma_corr = sigma_sq / xi - sigma_corr[np.isnan(sigma_corr)] = 0 + xi[snr > 37.4] = 1 + sigma_corr = sigma_sq / xi + sigma_corr[np.isnan(sigma_corr)] = 0 else: - sigma_corr = sigma_sq + sigma_corr = sigma_sq if smooth is not None: - sigma_corr = ndimage.gaussian_filter(sigma_corr, smooth) + sigma_corr = ndimage.gaussian_filter(sigma_corr, smooth) return np.sqrt(sigma_corr) diff --git a/dipy/denoise/tests/test_lpca.py b/dipy/denoise/tests/test_lpca.py index 4ceb27fa0f..71a4ea1f8f 100644 --- a/dipy/denoise/tests/test_lpca.py +++ b/dipy/denoise/tests/test_lpca.py @@ -260,6 +260,11 @@ def test_lpca_sigma_wrong_shape(): assert_raises(ValueError, localpca, DWI, sigma) +def test_lpca_no_gtab_no_sigma(): + DWI, sigma = rfiw_phantom(gtab, snr=30) + assert_raises(ValueError, localpca, DWI, None, None) + + def test_pca_classifier(): # Produce small phantom with well aligned single voxels and ground truth # snr = 50, i.e signal std = 0.02 (Gaussian noise) @@ -302,12 +307,16 @@ def test_mppca_in_phantom(): std_gt = 0.02 noise = std_gt*np.random.standard_normal(DWIgt.shape) DWInoise = DWIgt + noise - DWIden = mppca(DWInoise, patch_radius=2) - # Test if denoised data is closer to ground truth than noisy data - rmse_den = np.sum(np.abs(DWIgt - DWIden)) / np.sum(np.abs(DWIgt)) - rmse_noisy = np.sum(np.abs(DWIgt - DWInoise)) / np.sum(np.abs(DWIgt)) - assert_(rmse_den < rmse_noisy) + # patch radius (2: #samples > #features, 1: #samples < #features) + for PR in [2, 1]: + + DWIden = mppca(DWInoise, patch_radius=PR) + + # Test if denoised data is closer to ground truth than noisy data + rmse_den = np.sum(np.abs(DWIgt - DWIden)) / np.sum(np.abs(DWIgt)) + rmse_noisy = np.sum(np.abs(DWIgt - DWInoise)) / np.sum(np.abs(DWIgt)) + assert_(rmse_den < rmse_noisy) def test_mppca_returned_sigma(): @@ -316,19 +325,28 @@ def test_mppca_returned_sigma(): noise = std_gt*np.random.standard_normal(DWIgt.shape) DWInoise = DWIgt + noise - # Case that sigma is estimated using mpPCA - DWIden0, sigma = mppca(DWInoise, patch_radius=2, return_sigma=True) - msigma = np.mean(sigma) - std_error = abs(msigma - std_gt)/std_gt * 100 - assert_(std_error < 5) - - # Case that sigma is inputted (sigma outputted should be the same as the - # one inputted) - DWIden1, rsigma = genpca(DWInoise, sigma=sigma, tau_factor=None, - patch_radius=2, return_sigma=True) - assert_array_almost_equal(rsigma, sigma) - - # DWIden1 should be very similar to DWIden0 - rmse_den = np.sum(np.abs(DWIden1 - DWIden0)) / np.sum(np.abs(DWIden0)) - rmse_ref = np.sum(np.abs(DWIden1 - DWIgt)) / np.sum(np.abs(DWIgt)) - assert_(rmse_den < rmse_ref) + # patch radius (2: #samples > #features, 1: #samples < #features) + for PR in [2, 1]: + + # Case that sigma is estimated using mpPCA + DWIden0, sigma = mppca(DWInoise, patch_radius=PR, return_sigma=True) + msigma = np.mean(sigma) + std_error = abs(msigma - std_gt)/std_gt * 100 + + # if #noise_eigenvals >> #signal_eigenvals, variance should be well estimated + # this is more likely achieved if #samples > #features + if PR > 1: + assert_(std_error < 5) + else: # otherwise, variance estimate may be wrong + pass + + # Case that sigma is inputted (sigma outputted should be the same as the + # one inputted) + DWIden1, rsigma = genpca(DWInoise, sigma=sigma, tau_factor=None, + patch_radius=PR, return_sigma=True) + assert_array_almost_equal(rsigma, sigma) + + # DWIden1 should be very similar to DWIden0 + rmse_den = np.sum(np.abs(DWIden1 - DWIden0)) / np.sum(np.abs(DWIden0)) + rmse_ref = np.sum(np.abs(DWIden1 - DWIgt)) / np.sum(np.abs(DWIgt)) + assert_(rmse_den < rmse_ref) diff --git a/dipy/denoise/tests/test_noise_estimate.py b/dipy/denoise/tests/test_noise_estimate.py index 1d5f439b7c..f0851f59c4 100644 --- a/dipy/denoise/tests/test_noise_estimate.py +++ b/dipy/denoise/tests/test_noise_estimate.py @@ -1,7 +1,7 @@ import numpy as np from numpy.testing import (assert_almost_equal, assert_equal, assert_, - assert_array_almost_equal) + assert_array_almost_equal, assert_warns) from dipy.denoise.noise_estimate import _inv_nchi_cdf, piesno, estimate_sigma from dipy.denoise.noise_estimate import _piesno_3D from dipy.denoise.pca_noise_estimate import pca_noise_estimate @@ -152,27 +152,36 @@ def test_pca_noise_estimate(): bvecs2 = np.concatenate([np.zeros((1, 3)), np.eye(3)]) gtab2 = dpg.gradient_table(bvals2, bvecs2) - for patch_radius in [1, 2]: - for gtab in [gtab1, gtab2]: - for dtype in [np.int16, np.float64]: - signal = np.ones((20, 20, 20, gtab.bvals.shape[0])) - for correct_bias in [True, False]: - if not correct_bias: - # High signal for no bias correction - signal = signal * 100 - - sigma = 1 - noise1 = np.random.normal(0, sigma, size=signal.shape) - noise2 = np.random.normal(0, sigma, size=signal.shape) - - # Rician noise: - data = np.sqrt((signal + noise1) ** 2 + noise2 ** 2) - - sigma_est = pca_noise_estimate(data.astype(dtype), gtab, - correct_bias=correct_bias, - patch_radius=patch_radius) - assert_array_almost_equal(np.mean(sigma_est), sigma, - decimal=1) - - assert_(np.mean(pca_noise_estimate(data, gtab, correct_bias=True)) > - np.mean(pca_noise_estimate(data, gtab, correct_bias=False))) + for images_as_samples in [True, False]: + + for patch_radius in [1, 2]: + for gtab in [gtab1, gtab2]: + for dtype in [np.int16, np.float64]: + signal = np.ones((20, 20, 20, gtab.bvals.shape[0])) + for correct_bias in [True, False]: + if not correct_bias: + # High signal for no bias correction + signal = signal * 100 + + sigma = 1 + noise1 = np.random.normal(0, sigma, size=signal.shape) + noise2 = np.random.normal(0, sigma, size=signal.shape) + + # Rician noise: + data = np.sqrt((signal + noise1) ** 2 + noise2 ** 2) + + sigma_est = pca_noise_estimate(data.astype(dtype), gtab, + correct_bias=correct_bias, + patch_radius=patch_radius, + images_as_samples=images_as_samples) + #print("sigma_est:", sigma_est) + assert_array_almost_equal(np.mean(sigma_est), sigma, + decimal=1) + + # check that Rician corrects produces larger noise estimate + assert_(np.mean(pca_noise_estimate(data, gtab, correct_bias=True, + images_as_samples=images_as_samples)) > + np.mean(pca_noise_estimate(data, gtab, correct_bias=False, + images_as_samples=images_as_samples))) + + assert_warns(UserWarning, pca_noise_estimate, data, gtab, patch_radius=0) diff --git a/doc/api_changes.rst b/doc/api_changes.rst index 8a5208f6e4..7a108f4a8e 100644 --- a/doc/api_changes.rst +++ b/doc/api_changes.rst @@ -5,6 +5,13 @@ API changes Here we provide information about functions or classes that have been removed, renamed or are deprecated (not recommended) during different release circles. +DIPY 1.7.0 changes +------------------ + +**Denoising** + +- Change in ``dipy.denoise.localpca``, function ``genpca`` can use fewer images than patch voxels. +- Change in ``dipy.denoise.pca_noise_estimate``, function ``pca_noise_estimate`` has new argument ``images_as_samples`` DIPY 1.6.0 changes ------------------