Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added pca weight init for sofm. added sofm example for weight init
method comparison
- Loading branch information
Yurii Shevchuk
committed
May 21, 2017
1 parent
bb26ac3
commit 81a4c74
Showing
9 changed files
with
342 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
from itertools import product | ||
|
||
import matplotlib.pyplot as plt | ||
from neupy import algorithms, environment, init | ||
|
||
from utils import plot_2d_grid, make_circle, make_elipse, make_square | ||
|
||
|
||
plt.style.use('ggplot') | ||
environment.reproducible() | ||
|
||
|
||
if __name__ == '__main__': | ||
GRID_WIDTH = 4 | ||
GRID_HEIGHT = 4 | ||
|
||
datasets = [ | ||
make_square(), | ||
make_circle(), | ||
make_elipse(corr=0.7), | ||
] | ||
configurations = [{ | ||
'weight_init': init.Uniform(0, 1), | ||
'title': 'Random uniform initialization', | ||
}, { | ||
'weight_init': 'sample_from_data', | ||
'title': 'Sampled from the data', | ||
}, { | ||
'weight_init': 'init_pca', | ||
'title': 'Initialize with PCA', | ||
}] | ||
|
||
plt.figure(figsize=(15, 15)) | ||
plt.title("Compare weight initialization methods for SOFM") | ||
|
||
red, blue = ('#E24A33', '#348ABD') | ||
n_columns = len(configurations) | ||
n_rows = len(datasets) | ||
index = 1 | ||
|
||
for data, conf in product(datasets, configurations): | ||
sofm = algorithms.SOFM( | ||
n_inputs=2, | ||
features_grid=(GRID_HEIGHT, GRID_WIDTH), | ||
|
||
verbose=True, | ||
shuffle_data=True, | ||
weight=conf['weight_init'], | ||
|
||
learning_radius=8, | ||
reduce_radius_after=5, | ||
|
||
std=2, | ||
reduce_std_after=5, | ||
|
||
step=0.3, | ||
reduce_step_after=5, | ||
) | ||
sofm.train(data, epochs=0) | ||
|
||
plt.subplot(n_rows, n_columns, index) | ||
|
||
plt.title(conf['title']) | ||
plt.scatter(*data.T, color=blue, alpha=0.05) | ||
plt.scatter(*sofm.weight, color=red) | ||
|
||
weights = sofm.weight.reshape((2, GRID_HEIGHT, GRID_WIDTH)) | ||
plot_2d_grid(weights, color=red) | ||
|
||
index += 1 | ||
|
||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
import numpy as np | ||
from scipy import linalg | ||
|
||
|
||
def svd_flip(u, v): | ||
""" | ||
Sign correction to ensure deterministic output from SVD. | ||
Adjusts the columns of u and the rows of v such that the | ||
loadings in the columns in u that are largest in absolute | ||
value are always positive. | ||
Parameters | ||
---------- | ||
u, v : ndarray | ||
u and v are the output of `linalg.svd` or | ||
`sklearn.utils.extmath.randomized_svd`, with matching inner | ||
dimensions so one can compute `np.dot(u * s, v)`. | ||
Returns | ||
------- | ||
u_adjusted, v_adjusted : arrays with the same dimensions | ||
as the input. | ||
""" | ||
max_abs_rows = np.argmax(np.abs(v), axis=1) | ||
signs = np.sign(v[range(v.shape[0]), max_abs_rows]) | ||
|
||
u *= signs | ||
v *= signs[:, np.newaxis] | ||
|
||
return u, v | ||
|
||
|
||
def randomized_range_finder(A, size, n_iter): | ||
"""Computes an orthonormal matrix whose range | ||
approximates the range of A. | ||
Parameters | ||
---------- | ||
A: 2D array | ||
The input data matrix | ||
size: integer | ||
Size of the return array | ||
n_iter: integer | ||
Number of power iterations used to stabilize the result | ||
Returns | ||
------- | ||
Q: 2D array | ||
A (size x size) projection matrix, the range of which | ||
approximates well the range of the input matrix A. | ||
Notes | ||
----- | ||
scikit-learn implementation | ||
""" | ||
# Generating normal random vectors with shape: (A.shape[1], size) | ||
Q = np.random.normal(size=(A.shape[1], size)) | ||
|
||
# Perform power iterations with Q to further 'imprint' the top | ||
# singular vectors of A in Q | ||
for i in range(n_iter): | ||
Q, _ = linalg.lu(np.dot(A, Q), permute_l=True) | ||
Q, _ = linalg.lu(np.dot(A.T, Q), permute_l=True) | ||
|
||
# Sample the range of A using by linear projection of Q | ||
# Extract an orthonormal basis | ||
Q, _ = linalg.qr(np.dot(A, Q), mode='economic') | ||
return Q | ||
|
||
|
||
def randomized_svd(M, n_components, n_oversamples=10): | ||
""" | ||
Computes a truncated randomized SVD | ||
Parameters | ||
---------- | ||
M: ndarray or sparse matrix | ||
Matrix to decompose | ||
n_components: int | ||
Number of singular values and vectors to extract. | ||
n_oversamples: int (default is 10) | ||
Additional number of random vectors to sample the range of M so as | ||
to ensure proper conditioning. The total number of random vectors | ||
used to find the range of M is n_components + n_oversamples. Smaller | ||
number can improve speed but can negatively impact the quality of | ||
approximation of singular vectors and singular values. | ||
Notes | ||
----- | ||
scikit-learn implementation | ||
""" | ||
n_random = n_components + n_oversamples | ||
n_samples, n_features = M.shape | ||
n_iter = 7 if n_components < .1 * min(M.shape) else 4 | ||
|
||
Q = randomized_range_finder(M, n_random, n_iter) | ||
# project M to the (k + p) dimensional space | ||
# using the basis vectors | ||
B = np.dot(Q.T, M) | ||
|
||
# compute the SVD on the thin matrix: (k + p) wide | ||
Uhat, s, V = linalg.svd(B, full_matrices=False) | ||
del B | ||
|
||
U = np.dot(Q, Uhat) | ||
U, V = svd_flip(U, V) | ||
|
||
return U[:, :n_components], s[:n_components], V[:n_components, :] | ||
|
||
|
||
def randomized_pca(data, n_componets): | ||
""" | ||
Randomized PCA based on the scikit-learn implementation. | ||
Parameters | ||
---------- | ||
data : 2d array-like | ||
n_components : int | ||
Number of PCA components | ||
Returns | ||
------- | ||
(eigenvectors, eigenvalues) | ||
""" | ||
n_samples, n_features = data.shape | ||
|
||
U, S, V = randomized_svd(data, n_componets) | ||
eigenvalues = (S ** 2) / n_samples | ||
eigenvectors = V / S[:, np.newaxis] * np.sqrt(n_samples) | ||
|
||
return eigenvectors, eigenvalues |
Oops, something went wrong.