Skip to content

Commit

Permalink
added pca weight init for sofm. added sofm example for weight init
Browse files Browse the repository at this point in the history
method comparison
  • Loading branch information
Yurii Shevchuk committed May 21, 2017
1 parent bb26ac3 commit 81a4c74
Show file tree
Hide file tree
Showing 9 changed files with 342 additions and 27 deletions.
4 changes: 4 additions & 0 deletions examples/competitive/README.md
Expand Up @@ -15,3 +15,7 @@
## Output from sofm_compare_grid_types.py

![](images/sofm-hexagon-vs-rectangular-grid.png)

## Output from sofm_compare_weight_init.py

![](images/compare-weight-init-methods.png)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
18 changes: 5 additions & 13 deletions examples/competitive/sofm_compare_grid_types.py
@@ -1,22 +1,13 @@
import numpy as np
import matplotlib.pyplot as plt
from neupy import algorithms, environment

from utils import plot_2d_grid
from utils import plot_2d_grid, make_circle


plt.style.use('ggplot')
environment.reproducible()


def make_circle():
data = np.random.random((10000, 2))
x, y = data[:, 0], data[:, 1]

distance_from_center = ((x - 0.5) ** 2 + (y - 0.5) ** 2)
return data[distance_from_center <= 0.5 ** 2]


if __name__ == '__main__':
GRID_WIDTH = 10
GRID_HEIGHT = 10
Expand All @@ -32,6 +23,10 @@ def make_circle():
}]

data = make_circle()

red, blue = ('#E24A33', '#348ABD')
n_columns = len(configurations)

plt.figure(figsize=(12, 5))

for index, conf in enumerate(configurations, start=1):
Expand All @@ -54,9 +49,6 @@ def make_circle():
)
sofm.train(data, epochs=40)

red, blue = ('#E24A33', '#348ABD')
n_columns = len(configurations)

plt.subplot(1, n_columns, index)

plt.title(conf['title'])
Expand Down
72 changes: 72 additions & 0 deletions examples/competitive/sofm_compare_weight_init.py
@@ -0,0 +1,72 @@
from itertools import product

import matplotlib.pyplot as plt
from neupy import algorithms, environment, init

from utils import plot_2d_grid, make_circle, make_elipse, make_square


plt.style.use('ggplot')
environment.reproducible()


if __name__ == '__main__':
GRID_WIDTH = 4
GRID_HEIGHT = 4

datasets = [
make_square(),
make_circle(),
make_elipse(corr=0.7),
]
configurations = [{
'weight_init': init.Uniform(0, 1),
'title': 'Random uniform initialization',
}, {
'weight_init': 'sample_from_data',
'title': 'Sampled from the data',
}, {
'weight_init': 'init_pca',
'title': 'Initialize with PCA',
}]

plt.figure(figsize=(15, 15))
plt.title("Compare weight initialization methods for SOFM")

red, blue = ('#E24A33', '#348ABD')
n_columns = len(configurations)
n_rows = len(datasets)
index = 1

for data, conf in product(datasets, configurations):
sofm = algorithms.SOFM(
n_inputs=2,
features_grid=(GRID_HEIGHT, GRID_WIDTH),

verbose=True,
shuffle_data=True,
weight=conf['weight_init'],

learning_radius=8,
reduce_radius_after=5,

std=2,
reduce_std_after=5,

step=0.3,
reduce_step_after=5,
)
sofm.train(data, epochs=0)

plt.subplot(n_rows, n_columns, index)

plt.title(conf['title'])
plt.scatter(*data.T, color=blue, alpha=0.05)
plt.scatter(*sofm.weight, color=red)

weights = sofm.weight.reshape((2, GRID_HEIGHT, GRID_WIDTH))
plot_2d_grid(weights, color=red)

index += 1

plt.show()
22 changes: 22 additions & 0 deletions examples/competitive/utils.py
@@ -1,5 +1,6 @@
from itertools import product

import numpy as np
import matplotlib.pyplot as plt


Expand Down Expand Up @@ -51,3 +52,24 @@ def plot_2d_grid(weights, ax=None, color='green', hexagon=False):

neurons = weights[:, neurons_x_coords, neurons_y_coords]
ax.plot(*neurons, color=color)


def make_square():
return np.random.random((10000, 2))


def make_circle():
data = make_square()
x, y = data[:, 0], data[:, 1]

distance_from_center = ((x - 0.5) ** 2 + (y - 0.5) ** 2)
return data[distance_from_center <= 0.5 ** 2]


def make_elipse(corr=0.8):
projection = np.array([
[corr, 1 - corr],
[1 - corr, corr]])

data = make_circle()
return data.dot(projection)
136 changes: 136 additions & 0 deletions neupy/algorithms/competitive/randomized_pca.py
@@ -0,0 +1,136 @@
import numpy as np
from scipy import linalg


def svd_flip(u, v):
"""
Sign correction to ensure deterministic output from SVD.
Adjusts the columns of u and the rows of v such that the
loadings in the columns in u that are largest in absolute
value are always positive.
Parameters
----------
u, v : ndarray
u and v are the output of `linalg.svd` or
`sklearn.utils.extmath.randomized_svd`, with matching inner
dimensions so one can compute `np.dot(u * s, v)`.
Returns
-------
u_adjusted, v_adjusted : arrays with the same dimensions
as the input.
"""
max_abs_rows = np.argmax(np.abs(v), axis=1)
signs = np.sign(v[range(v.shape[0]), max_abs_rows])

u *= signs
v *= signs[:, np.newaxis]

return u, v


def randomized_range_finder(A, size, n_iter):
"""Computes an orthonormal matrix whose range
approximates the range of A.
Parameters
----------
A: 2D array
The input data matrix
size: integer
Size of the return array
n_iter: integer
Number of power iterations used to stabilize the result
Returns
-------
Q: 2D array
A (size x size) projection matrix, the range of which
approximates well the range of the input matrix A.
Notes
-----
scikit-learn implementation
"""
# Generating normal random vectors with shape: (A.shape[1], size)
Q = np.random.normal(size=(A.shape[1], size))

# Perform power iterations with Q to further 'imprint' the top
# singular vectors of A in Q
for i in range(n_iter):
Q, _ = linalg.lu(np.dot(A, Q), permute_l=True)
Q, _ = linalg.lu(np.dot(A.T, Q), permute_l=True)

# Sample the range of A using by linear projection of Q
# Extract an orthonormal basis
Q, _ = linalg.qr(np.dot(A, Q), mode='economic')
return Q


def randomized_svd(M, n_components, n_oversamples=10):
"""
Computes a truncated randomized SVD
Parameters
----------
M: ndarray or sparse matrix
Matrix to decompose
n_components: int
Number of singular values and vectors to extract.
n_oversamples: int (default is 10)
Additional number of random vectors to sample the range of M so as
to ensure proper conditioning. The total number of random vectors
used to find the range of M is n_components + n_oversamples. Smaller
number can improve speed but can negatively impact the quality of
approximation of singular vectors and singular values.
Notes
-----
scikit-learn implementation
"""
n_random = n_components + n_oversamples
n_samples, n_features = M.shape
n_iter = 7 if n_components < .1 * min(M.shape) else 4

Q = randomized_range_finder(M, n_random, n_iter)
# project M to the (k + p) dimensional space
# using the basis vectors
B = np.dot(Q.T, M)

# compute the SVD on the thin matrix: (k + p) wide
Uhat, s, V = linalg.svd(B, full_matrices=False)
del B

U = np.dot(Q, Uhat)
U, V = svd_flip(U, V)

return U[:, :n_components], s[:n_components], V[:n_components, :]


def randomized_pca(data, n_componets):
"""
Randomized PCA based on the scikit-learn implementation.
Parameters
----------
data : 2d array-like
n_components : int
Number of PCA components
Returns
-------
(eigenvectors, eigenvalues)
"""
n_samples, n_features = data.shape

U, S, V = randomized_svd(data, n_componets)
eigenvalues = (S ** 2) / n_samples
eigenvectors = V / S[:, np.newaxis] * np.sqrt(n_samples)

return eigenvectors, eigenvalues

0 comments on commit 81a4c74

Please sign in to comment.