Skip to content

Commit

Permalink
Merge 5a3d061 into eb392a4
Browse files Browse the repository at this point in the history
  • Loading branch information
cameronmartino committed Apr 22, 2019
2 parents eb392a4 + 5a3d061 commit d5f1ac2
Show file tree
Hide file tree
Showing 8 changed files with 4,536 additions and 4,430 deletions.
25 changes: 25 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,30 @@
# DEICODE changelog

## Version 0.2.2 (2019-4-22)

### Features

* Ensure sorting that the eigenvalues is being
is from largest to smallest. This is done for
the singular value matrix, given as the variable
s. This involves the sorting s by the diagonal
while also ordering off diagonal elements. Additionally,
a new function was added to ensure that the sorted
eigenvalues are also ordered in the U and V
loading values in the SVD. This function also
ensures that after sorting, the values of the SVD
are deterministic. This is all implemented in the
function located in _optspace.py called svd_sort.
This methodology is also performed in the code
for PCA in scikit learn. To do this I pulled
from this code and noted this in the code
comments with direct line links in scikit-learn.

* Tests for the function svd_sort described above
were added. The test was added under the location
tests/test_optspace.py in the function given by
the function test_optspace_ordering.

## Version 0.2.1 (2019-4-15)

Implemented in [PR#33](https://github.com/biocore/DEICODE/pull/33).
Expand Down
2 changes: 1 addition & 1 deletion deicode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
#
# ----------------------------------------------------------------------------

__version__ = "0.2.1"
__version__ = "0.2.2"
43 changes: 42 additions & 1 deletion deicode/_optspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@


def optspace(M_E, r, niter, tol):

"""
Parameters
----------
Expand Down Expand Up @@ -69,9 +68,51 @@ def _optspace(M_E, E, r, niter, tol, sign=1):
if(dist[i + 1] < tol):
break
S = S / rescal_param
X, S, Y = svd_sort(X, S, Y)
return X, S, Y, dist


def svd_sort(X, S, Y):
"""
Sorting via the s matrix from SVD. In addition to
sign correction from the U matrix to ensure a
deterministic output.
Parameters
----------
X: array-like
U matrix from SVD
Y: array-like
V matrix from SVD
S: array-like
S matrix from SVD
Notes
-----
S matrix can be off diagonal elements.
"""
# See https://github.com/scikit-learn/scikit-learn/
# blob/7b136e92acf49d46251479b75c88cba632de1937/sklearn/
# decomposition/pca.py#L510-#L518 for context.
# Because svds do not abide by the normal
# conventions in scipy.linalg.svd/randomized_svd
# the output has to be reversed
idx = np.argsort(np.diag(S))[::-1]
# sorting following the solution
# provided by https://stackoverflow.com/
# questions/36381356/sort-matrix-based
# -on-its-diagonal-entries
S = S[idx, :][:, idx]
X, Y = X[:, idx], Y[:, idx]
# here we ensure a deterministic
# solution after changing order.
max_abs_cols = np.argmax(np.abs(X), axis=0)
signs = np.sign(X[max_abs_cols, range(X.shape[1])])
X *= signs
Y *= signs[:, np.newaxis].T
return X, S, Y


def F_t(X, Y, S, M_E, E, m0, rho):
"""
Parameters
Expand Down
22 changes: 11 additions & 11 deletions deicode/optspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,17 +176,17 @@ def _fit(self):
'Insufficient samples, must have rank*10 samples in the table')

# return solved matrix
U, s_, V, _ = optspace(X_sparse, r=self.rank,
niter=self.iteration, tol=self.tol)
solution = U.dot(s_).dot(V.T)
explained_variance_ratio_ = np.diag(s_) / np.diag(s_).sum()
self.eigenvalues = np.diag(s_)[::-1]
self.explained_variance_ratio = list(explained_variance_ratio_)[::-1]
self.distance = distance.cdist(U, U)
self.solution = solution
self.feature_weights = V
self.sample_weights = U
self.s = s_
self.U, self.s, self.V, _ = optspace(X_sparse, r=self.rank,
niter=self.iteration,
tol=self.tol)
# save the solution (of the imputation)
self.solution = self.U.dot(self.s).dot(self.V.T)
self.eigenvalues = np.diag(self.s)
self.explained_variance_ratio = list(
self.eigenvalues / self.eigenvalues.sum())
self.distance = distance.cdist(self.U, self.U)
self.feature_weights = self.V
self.sample_weights = self.U

def fit_transform(self, X):
"""
Expand Down
14 changes: 8 additions & 6 deletions deicode/rpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@


def rpca(table: biom.Table,
rank: int=DEFAULT_RANK,
min_sample_count: int=DEFAULT_MSC,
min_feature_count: int=DEFAULT_MFC,
iterations: int=DEFAULT_ITERATIONS) -> (
skbio.OrdinationResults,
skbio.DistanceMatrix):
rank: int = DEFAULT_RANK,
min_sample_count: int = DEFAULT_MSC,
min_feature_count: int = DEFAULT_MFC,
iterations: int = DEFAULT_ITERATIONS) -> (
skbio.OrdinationResults,
skbio.DistanceMatrix):
"""Runs RPCA with an rclr preprocessing step.
This code will be run by both the standalone and QIIME 2 versions of
Expand All @@ -22,7 +22,9 @@ def rpca(table: biom.Table,

# filter sample to min depth
def sample_filter(val, id_, md): return sum(val) > min_sample_count

def observation_filter(val, id_, md): return sum(val) > min_feature_count

table = table.filter(observation_filter, axis='observation')
table = table.filter(sample_filter, axis='sample')
table = table.to_dataframe().T
Expand Down
400 changes: 200 additions & 200 deletions deicode/scripts/tests/data/expected-distance-matrix.tsv

Large diffs are not rendered by default.

0 comments on commit d5f1ac2

Please sign in to comment.