Skip to content

Commit

Permalink
PCoA updates (#46)
Browse files Browse the repository at this point in the history
* Remove normalization of eigenvectors

* Fix proportions explained to use eigenvalues computed via matrix trace

* Update miniconda, python, skbio to support new fsvd

* Fix flake8 warnings

* Update environment

* update environment again

* fixing travis (#48)
  • Loading branch information
HannesHolste authored and antgonza committed May 13, 2019
1 parent 5911f9c commit 6f52697
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 109 deletions.
12 changes: 6 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@
sudo: false
language: python
env:
- PYTHON_VERSION=3
- PYTHON_VERSION=3.6
before_install:
- wget http://repo.continuum.io/miniconda/Miniconda3-3.7.3-Linux-x86_64.sh -O miniconda.sh
- wget http://repo.continuum.io/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh
- ./miniconda.sh -b
- export PATH=/home/travis/miniconda3/bin:$PATH
# Update conda itself
- conda update --yes conda
install:
- conda env create -n mds-approximations -f environment.yml
- conda install --yes cython nose pep8 flake8 pip
#- if [ ${USE_CYTHON} ]; then conda install --yes -n mds-approximations cython; fi
- conda env create -n mds-approximations -f environment.yml python=3.6
- source activate mds-approximations
- conda install --yes cython nose pep8 flake8 pip
# - if [ ${USE_CYTHON} ]; then conda install --yes cython; fi
- pip install .
- pip install coveralls
script:
- nosetests --with-coverage --cover-package=mdsa
- flake8 setup.py mdsa
after_success:
- coveralls
- coveralls
14 changes: 3 additions & 11 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,13 @@ channels:
- defaults
- bioconda
- biocore
- anaconda
dependencies:
- scikit-bio
- click=6.7 # install from anaconda channel
- python=3.5
- click
- numpy
- scikit-learn=0.19.1
- scikit-learn
- scipy
- pandas
- jupyter
- matplotlib
- gneiss=0.4.2
- pytz
- tensorflow
- pip
# also need to install bayesian-regression:
- pip:
- bayesian_regression-0.1-py3-none-any.whl
- python=3.7
86 changes: 6 additions & 80 deletions mdsa/algorithms/fsvd.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from numpy import dot, hstack
from numpy.linalg import qr, svd
from numpy.random import standard_normal
from skbio import DistanceMatrix
from skbio.stats.ordination import pcoa as skbio_pcoa

from mdsa.algorithm import Algorithm

Expand Down Expand Up @@ -47,80 +46,7 @@ def run(self, distance_matrix, num_dimensions_out=10,
"""
super(FSVD, self).run(distance_matrix, num_dimensions_out)

m, n = distance_matrix.shape

# Note: this transpose is removed for performance, since we
# only expect square matrices.
# Take (conjugate) transpose if necessary, because it makes H smaller,
# leading to faster computations
# if m < n:
# distance_matrix = distance_matrix.transpose()
# m, n = distance_matrix.shape
if m != n:
raise ValueError('FSVD.run(...) expects square distance matrix')

k = num_dimensions_out + 2

# Form a real nxl matrix G whose entries are independent,
# identically distributed
# Gaussian random variables of
# zero mean and unit variance
G = standard_normal(size=(n, k))

if use_power_method:
# use only the given exponent
H = dot(distance_matrix, G)

for x in range(2, num_levels + 2):
# enhance decay of singular values
# note: distance_matrix is no longer transposed, saves work
# since we're expecting symmetric, square matrices anyway
# (Daniel McDonald's changes)
H = dot(distance_matrix, dot(distance_matrix, H))

else:
# compute the m x l matrices H^{(0)}, ..., H^{(i)}
# Note that this is done implicitly in each iteration below.
H = dot(distance_matrix, G)
# Again, removed transpose: dot(distance_matrix.transpose(), H)
# to enhance performance
H = hstack(
(H, dot(distance_matrix, dot(distance_matrix, H))))
for x in range(3, num_levels + 2):
# Removed this transpose: dot(distance_matrix.transpose(), H)
tmp = dot(distance_matrix, dot(distance_matrix, H))

# Removed this transpose: dot(distance_matrix.transpose(), tmp)
H = hstack(
(H, dot(distance_matrix, dot(distance_matrix, tmp))))

# Using the pivoted QR-decomposiion, form a real m * ((i+1)l) matrix Q
# whose columns are orthonormal, s.t. there exists a real
# ((i+1)l) * ((i+1)l) matrix R for which H = QR
Q, R = qr(H)

# Compute the n * ((i+1)l) product matrix T = A^T Q
# Removed transpose of distance_matrix for performance
T = dot(distance_matrix, Q) # step 3

# Form an SVD of T
Vt, St, W = svd(T, full_matrices=False)
W = W.transpose()

# Compute the m * ((i+1)l) product matrix
Ut = dot(Q, W)

if m < n:
# V_fsvd = Ut[:, :num_dimensions_out] # unused
U_fsvd = Vt[:, :num_dimensions_out]
else:
# V_fsvd = Vt[:, :num_dimensions_out] # unused
U_fsvd = Ut[:, :num_dimensions_out]

S = St[:num_dimensions_out] ** 2

# drop imaginary component, if we got one
eigenvalues = S.real
eigenvectors = U_fsvd.real

return eigenvectors, eigenvalues
results = skbio_pcoa(DistanceMatrix(distance_matrix),
method='fsvd',
number_of_dimensions=num_dimensions_out)
return results.samples.values, results.eigvals
4 changes: 2 additions & 2 deletions mdsa/algorithms/nystrom.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@
/ E | F \
D = |-----|----|
\ F.t | G /
\\ F.t | G /
The correspondong association matrix or centered inner-product
matrix K is:
/ A | B \
K = |-----|----|
\ B.t | C /
\\ B.t | C /
where A and B are computed as follows
Expand Down
4 changes: 2 additions & 2 deletions mdsa/algorithms/scmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ def affine_mapping(self, matrix_x, matrix_y):
Affine mapping function:
Y = UX + kron(b,ones(1,n)), UU' = I
X = [x_1, x_2, ... , x_n]; x_j \in R^m
Y = [y_1, y_2, ... , y_n]; y_j \in R^m
X = [x_1, x_2, ... , x_n]; x_j \\in R^m
Y = [y_1, y_2, ... , y_n]; y_j \\in R^m
From Tzeng 2008:
`The projection of xi,2 from q2 dimension to q1 dimension
Expand Down
17 changes: 9 additions & 8 deletions mdsa/pcoa.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def pcoa(distance_matrix, algorithm, num_dimensions_out=10):
If the distance is not euclidean (for example if it is a
semimetric and the triangle inequality doesn't hold),
negative eigenvalues can appear. There are different ways
to deal with that problem (see Legendre & Legendre 1998, \S
to deal with that problem (see Legendre & Legendre 1998, \\S
9.2.3), but none are currently implemented here.
However, a warning is raised whenever negative eigenvalues
appear, allowing the user to decide if they can be safely
Expand All @@ -90,10 +90,6 @@ def pcoa(distance_matrix, algorithm, num_dimensions_out=10):
eigenvectors = np.array(eigenvectors)
eigenvalues = np.array(eigenvalues)

# Ensure eigenvectors are normalized
eigenvectors = np.apply_along_axis(lambda vec: vec / np.linalg.norm(vec),
axis=1, arr=eigenvectors)

# Generate axis labels for output
axis_labels = ['PC%d' % i for i in range(1, len(eigenvectors) + 1)]
axis_labels = axis_labels[:num_dimensions_out]
Expand Down Expand Up @@ -170,6 +166,13 @@ def pcoa(distance_matrix, algorithm, num_dimensions_out=10):
# operation.
eigenvectors = eigenvectors * np.sqrt(eigenvalues)

# Calculate the array of proportion of variance explained
# proportion_explained = eigenvalues / eigenvalues.sum()
sum_eigenvalues = np.trace(centered_dm)

# Calculate the array of proportion of variance explained
proportion_explained = eigenvalues / sum_eigenvalues

# Now remove the dimensions with the least information
# Only select k (num_dimensions_out) first eigenvectors
# and their corresponding eigenvalues from the sorted array
Expand All @@ -178,9 +181,7 @@ def pcoa(distance_matrix, algorithm, num_dimensions_out=10):
if len(eigenvalues) > num_dimensions_out:
eigenvectors = eigenvectors[:, :num_dimensions_out]
eigenvalues = eigenvalues[:num_dimensions_out]

# Calculate the array of proportion of variance explained
proportion_explained = eigenvalues / eigenvalues.sum()
proportion_explained = proportion_explained[:num_dimensions_out]

return OrdinationResults(
short_method_name='PCoA',
Expand Down

0 comments on commit 6f52697

Please sign in to comment.