Skip to content

Commit

Permalink
fixed pca for features > samples, and fixed pca_noise_estimate
Browse files Browse the repository at this point in the history
  • Loading branch information
samcoveney committed Mar 10, 2023
1 parent 13af40f commit e928a82
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 92 deletions.
73 changes: 53 additions & 20 deletions dipy/denoise/localpca.py
Expand Up @@ -13,6 +13,7 @@
from scipy.linalg import svd
svd_args = [False]
from scipy.linalg import eigh
from dipy.denoise.pca_noise_estimate import pca_noise_estimate


def _pca_classifier(L, nvoxels):
Expand Down Expand Up @@ -44,6 +45,15 @@ def _pca_classifier(L, nvoxels):
theory. Neuroimage 142:394-406.
doi: 10.1016/j.neuroimage.2016.08.016
"""

# if num_samples - 1 (to correct for mean subtraction) is less than number
# of features, discard the zero eigenvalues
if L.size > nvoxels - 1:
L = L[-(nvoxels - 1):]

# Note that the condition expressed in the while-loop is expressed in terms
# of the variance of equation (12), not equation (11) as in [1]_. Also,
# this code implements ascending eigenvalues, unlike [1]_.
var = np.mean(L)
c = L.size - 1
r = L[c] - L[0] - 4 * np.sqrt((c + 1.0) / nvoxels) * var
Expand All @@ -52,6 +62,7 @@ def _pca_classifier(L, nvoxels):
c = c - 1
r = L[c] - L[0] - 4 * np.sqrt((c + 1.0) / nvoxels) * var
ncomps = c + 1

return var, ncomps


Expand All @@ -76,7 +87,7 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig',
outside of those voxels.
patch_radius : int or 1D array (optional)
The radius of the local patch to be taken around each voxel (in
voxels). Default: 2 (denoise in blocks of 5x5x5 voxels).
voxels). E.g. patch_radius=2 gives 5x5x5 patches.
pca_method : 'eig' or 'svd' (optional)
Use either eigenvalue decomposition (eig) or singular value
decomposition (svd) for principal component analysis. The default
Expand All @@ -93,16 +104,13 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig',
\tau_{factor} can be set to a predefined values (e.g. \tau_{factor} =
2.3 [3]_), or automatically calculated using random matrix theory
(in case that \tau_{factor} is set to None).
Default: None.
return_sigma : bool (optional)
If true, the Standard deviation of the noise will be returned.
Default: False.
out_dtype : str or dtype (optional)
The dtype for the output array. Default: output has the same dtype as
the input.
suppress_warning : bool (optional)
If true, suppress warning caused by patch_size < arr.shape[-1].
Default: False.
Returns
-------
Expand Down Expand Up @@ -166,7 +174,8 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig',
if num_samples == 1:
raise ValueError("Cannot have only 1 sample,\
please increase patch_radius.")
if num_samples < arr.shape[-1] and not suppress_warning:
# account for mean subtraction by testing #samples - 1
if (num_samples - 1) < arr.shape[-1] and not suppress_warning:
tmp = np.sum(patch_size == 1) # count spatial dimensions with size 1
if tmp == 0:
root = np.ceil(arr.shape[-1] ** (1./3)) # 3D
Expand All @@ -176,7 +185,7 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig',
root = arr.shape[-1] # 1D
root = root + 1 if (root % 2) == 0 else root # make odd
spr = int((root - 1) / 2) # suggested patch_radius
e_s = "Number of samples {1} < Dimensionality {0}. "\
e_s = "Number of samples {1} - 1 < Dimensionality {0}. "\
.format(arr.shape[-1], num_samples)
e_s += "This might have a performance impact. "
e_s += "Increase patch_radius to {0} to avoid this warning, "\
Expand Down Expand Up @@ -224,7 +233,7 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig',

X = arr[ix1:ix2, jx1:jx2, kx1:kx2].reshape(
num_samples, dim)
# compute the mean and normalize
# compute the mean
M = np.mean(X, axis=0)
# Upcast the dtype for precision in the SVD
X = X - M
Expand All @@ -248,7 +257,7 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig',

if sigma is None:
# Random matrix theory
this_var, ncomps = _pca_classifier(d, num_samples)
this_var, _ = _pca_classifier(d, num_samples)
else:
# Predefined variance
this_var = var[i, j, k]
Expand Down Expand Up @@ -287,24 +296,35 @@ def genpca(arr, sigma=None, mask=None, patch_radius=2, pca_method='eig',
return denoised_arr.astype(out_dtype)


def localpca(arr, sigma, mask=None, patch_radius=2, pca_method='eig',
tau_factor=2.3, out_dtype=None, suppress_warning=False):
def localpca(arr, sigma=None, mask=None, patch_radius=2, gtab=None,
patch_radius_sigma=1, pca_method='eig', tau_factor=2.3,
return_sigma=False, correct_bias=True, out_dtype=None,
suppress_warning=False):
r""" Performs local PCA denoising according to Manjon et al. [1]_.
Parameters
----------
arr : 4D array
Array of data to be denoised. The dimensions are (X, Y, Z, N), where N
are the diffusion gradient directions.
sigma : float or 3D array
Standard deviation of the noise estimated from the data.
sigma : float or 3D array (optional)
Standard deviation of the noise estimated from the data. If not given,
calculate using method in [1]_.
mask : 3D boolean array (optional)
A mask with voxels that are true inside the brain and false outside of
it. The function denoises within the true part and returns zeros
outside of those voxels.
patch_radius : int or 1D array (optional)
The radius of the local patch to be taken around each voxel (in
voxels). Default: 2 (denoise in blocks of 5x5x5 voxels).
voxels). E.g. patch_radius=2 gives 5x5x5 patches.
gtab: gradient table object (optional if sigma is provided)
gradient information for the data gives us the bvals and bvecs of
diffusion data, which is needed to calculate noise level if sigma is
not provided.
patch_radius_sigma : int (optional)
The radius of the local patch to be taken around each voxel (in
voxels) for estimating sigma. E.g. patch_radius_sigma=2 gives
5x5x5 patches.
pca_method : 'eig' or 'svd' (optional)
Use either eigenvalue decomposition (eig) or singular value
decomposition (svd) for principal component analysis. The default
Expand All @@ -321,14 +341,18 @@ def localpca(arr, sigma, mask=None, patch_radius=2, pca_method='eig',
\tau_{factor} can be change to adjust the relationship between the
noise standard deviation and the threshold \tau. If \tau_{factor} is
set to None, it will be automatically calculated using the
Marcenko-Pastur distribution [2]_.
Default: 2.3 (according to [1]_)
Marcenko-Pastur distribution [2]_. Default: 2.3 according to [1]_.
return_sigma : bool (optional)
If true, a noise standard deviation estimate based on the
Marcenko-Pastur distribution is returned [2]_.
correct_bias : bool (optional)
Whether to correct for bias due to Rician noise. This is an
implementation of equation 8 in [1]_.
out_dtype : str or dtype (optional)
The dtype for the output array. Default: output has the same dtype as
the input.
suppress_warning : bool (optional)
If true, suppress warning caused by patch_size < arr.shape[-1].
Default: False.
Returns
-------
Expand All @@ -347,9 +371,20 @@ def localpca(arr, sigma, mask=None, patch_radius=2, pca_method='eig',
theory. Neuroimage 142:394-406.
doi: 10.1016/j.neuroimage.2016.08.016
"""
# check gtab is given, if sigma is not given
if sigma is None and gtab is None:
raise ValueError("gtab must be provided if sigma is not given")

# calculate sigma
if sigma is None:
sigma = pca_noise_estimate(arr, gtab,
correct_bias=correct_bias,
patch_radius=patch_radius_sigma,
images_as_samples=True)

return genpca(arr, sigma=sigma, mask=mask, patch_radius=patch_radius,
pca_method=pca_method, tau_factor=tau_factor,
return_sigma=False, out_dtype=out_dtype,
return_sigma=return_sigma, out_dtype=out_dtype,
suppress_warning=suppress_warning)


Expand All @@ -369,7 +404,7 @@ def mppca(arr, mask=None, patch_radius=2, pca_method='eig',
outside of those voxels.
patch_radius : int or 1D array (optional)
The radius of the local patch to be taken around each voxel (in
voxels). Default: 2 (denoise in blocks of 5x5x5 voxels).
voxels). E.g. patch_radius=2 gives 5x5x5 patches.
pca_method : 'eig' or 'svd' (optional)
Use either eigenvalue decomposition (eig) or singular value
decomposition (svd) for principal component analysis. The default
Expand All @@ -378,13 +413,11 @@ def mppca(arr, mask=None, patch_radius=2, pca_method='eig',
return_sigma : bool (optional)
If true, a noise standard deviation estimate based on the
Marcenko-Pastur distribution is returned [2]_.
Default: False.
out_dtype : str or dtype (optional)
The dtype for the output array. Default: output has the same dtype as
the input.
suppress_warning : bool (optional)
If true, suppress warning caused by patch_size < arr.shape[-1].
Default: False.
Returns
-------
Expand Down
88 changes: 62 additions & 26 deletions dipy/denoise/pca_noise_estimate.pyx
Expand Up @@ -10,6 +10,7 @@ import scipy.special as sps
from scipy import ndimage
cimport cython
cimport numpy as cnp
from warnings import warn

# Try to get the SVD through direct API to lapack:
try:
Expand All @@ -26,7 +27,7 @@ except ImportError:
@cython.wraparound(False)
@cython.cdivision(True)
def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True,
smooth=2):
smooth=2, images_as_samples=False):
""" PCA based local noise estimation.
Parameters
Expand All @@ -47,12 +48,25 @@ def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True,
smooth : int
Radius of a Gaussian smoothing filter to apply to the noise estimate
before returning. Default: 2.
image_as_samples : bool, optional
Whether to use images as rows (samples) for PCA (algorithm in [1]_) or
to use images as columns (features).
Returns
-------
sigma_corr: 3D array
The local noise standard deviation estimate.
Notes
-----
In [1]_, images are used as samples, so voxels are features, therefore
eigenvectors are image-shaped. However, [1]_ is not clear on how to use
these eigenvectors to determine the noise level, so here eigenvalues
(variance over samples explained by eigenvectors) are used to scale
the eigenvectors. Use images_as_samples=True to use this algorithm.
Alternatively, voxels can be used as samples using
images_as_samples=False. This is not the canonical algorithm of [1]_.
References
----------
.. [1] Manjon JV, Coupe P, Concha L, Buades A, Collins DL "Diffusion
Expand All @@ -72,6 +86,10 @@ def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True,
data0 = data[..., ~gtab.b0s_mask]
sibe = True

if patch_radius < 1:
warn("Minimum patch radius must be 1, setting to 1", UserWarning)
patch_radius = 1

data0 = data0.astype(np.float64)
cdef:
cnp.npy_intp dsm = np.min(data0.shape[0:-1])
Expand All @@ -92,25 +110,42 @@ def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True,
if (dsm != 1) and (dsm < 2 * patch_radius + 1):
raise ValueError("Array 'data' is incorrect shape")

X = data0.reshape(nsamples, n3)
if images_as_samples:
X = data0.reshape(nsamples, n3).T
else:
X = data0.reshape(nsamples, n3)

# Demean:
M = np.mean(X, axis=0)
X = X - M
U, S, Vt = svd(X, *svd_args)[:3]
# Rows of Vt are the eigenvectors, in ascending eigenvalue order:
W = Vt.T
# Project into the data space
V = X.dot(W)

# Grab the column corresponding to the smallest eigen-vector/-value:
I = V[:, -1].reshape(n0, n1, n2)
if images_as_samples:
W = W.astype('double')
# #vox(features) >> # img(samples), last eigval zero (X is centered)
idx = n3 - 2 # use second-to-last eigvec
V = W[:, idx].reshape(n0, n1, n2)

# ref [1]_ method is ambiguous on how to use image-shaped eigvec
# since eigvec is normalized, used eigval=variance for scale
I = V * S[idx]
else:
# Project into the data space
V = X.dot(W)

# Grab the column corresponding to the smallest eigen-vector/-value:
# #vox(samples) >> #img(features), last eigenvector is meaningful
I = V[:, -1].reshape(n0, n1, n2)

del V, W, X, U, S, Vt

cdef:
double[:, :, :] count = np.zeros((n0, n1, n2))
double[:, :, :] mean = np.zeros((n0, n1, n2))
double[:, :, :] sigma_sq = np.zeros((n0, n1, n2))
double[:, :, :, :] data0temp = data0
double[:, :, :] count = np.zeros((n0, n1, n2))
double[:, :, :] mean = np.zeros((n0, n1, n2))
double[:, :, :] sigma_sq = np.zeros((n0, n1, n2))
double[:, :, :, :] data0temp = data0

with nogil:
for i in range(prx, n0 - prx):
Expand All @@ -123,37 +158,38 @@ def pca_noise_estimate(data, gtab, patch_radius=1, correct_bias=True,
for k0 in range(-prz, prz + 1):
sum_reg += I[i + i0, j + j0, k + k0] / norm
for l0 in range(n3):
temp1 += (data0temp[i + i0, j+ j0, k + k0, l0]) / (norm * n3)
temp1 += (data0temp[i + i0, j + j0, k + k0, l0])\
/ (norm * n3)

for i0 in range(-prx, prx + 1):
for j0 in range(-pry, pry + 1):
for k0 in range(-prz, prz + 1):
sigma_sq[i + i0, j +j0, k + k0] += (
sigma_sq[i + i0, j + j0, k + k0] += (
I[i + i0, j + j0, k + k0] - sum_reg) ** 2
mean[i + i0, j + j0, k + k0] += temp1
count[i + i0, j +j0, k + k0] += 1
count[i + i0, j + j0, k + k0] += 1

sigma_sq = np.divide(sigma_sq, count)

# find the SNR and make the correction for bias due to Rician noise:
if correct_bias:
mean = np.divide(mean, count)
snr = np.divide(mean, np.sqrt(sigma_sq))
snr_sq = (snr ** 2)
# xi is practically equal to 1 above 37.4, and we overflow, raising
# warnings and creating ot-a-numbers.
# Instead, we will replace these values with 1 below
with np.errstate(over='ignore', invalid='ignore'):
xi = (2 + snr_sq - (np.pi / 8) * np.exp(-snr_sq / 2) *
mean = np.divide(mean, count)
snr = np.divide(mean, np.sqrt(sigma_sq))
snr_sq = (snr ** 2)
# xi is practically equal to 1 above 37.4, and we overflow, raising
# warnings and creating ot-a-numbers.
# Instead, we will replace these values with 1 below
with np.errstate(over='ignore', invalid='ignore'):
xi = (2 + snr_sq - (np.pi / 8) * np.exp(-snr_sq / 2) *
((2 + snr_sq) * sps.iv(0, snr_sq / 4) +
snr_sq * sps.iv(1, snr_sq / 4)) ** 2).astype(float)
xi[snr > 37.4] = 1
sigma_corr = sigma_sq / xi
sigma_corr[np.isnan(sigma_corr)] = 0
xi[snr > 37.4] = 1
sigma_corr = sigma_sq / xi
sigma_corr[np.isnan(sigma_corr)] = 0
else:
sigma_corr = sigma_sq
sigma_corr = sigma_sq

if smooth is not None:
sigma_corr = ndimage.gaussian_filter(sigma_corr, smooth)
sigma_corr = ndimage.gaussian_filter(sigma_corr, smooth)

return np.sqrt(sigma_corr)

0 comments on commit e928a82

Please sign in to comment.