added pca weight init for sofm. added sofm example for weight init

method comparison
itdxer · May 21, 2017 · 81a4c74 · 81a4c74
1 parent bb26ac3
commit 81a4c74
Show file tree

Hide file tree

Showing 9 changed files with 342 additions and 27 deletions.
diff --git a/examples/competitive/README.md b/examples/competitive/README.md
@@ -15,3 +15,7 @@
 ## Output from sofm_compare_grid_types.py
 
 ![](images/sofm-hexagon-vs-rectangular-grid.png)
+
+## Output from sofm_compare_weight_init.py
+
+![](images/compare-weight-init-methods.png)
diff --git a/examples/competitive/images/compare-weight-init-methods.png b/examples/competitive/images/compare-weight-init-methods.png
diff --git a/examples/competitive/sofm_compare_grid_types.py b/examples/competitive/sofm_compare_grid_types.py
@@ -1,22 +1,13 @@
-import numpy as np
 import matplotlib.pyplot as plt
 from neupy import algorithms, environment
 
-from utils import plot_2d_grid
+from utils import plot_2d_grid, make_circle
 
 
 plt.style.use('ggplot')
 environment.reproducible()
 
 
-def make_circle():
-    data = np.random.random((10000, 2))
-    x, y = data[:, 0], data[:, 1]
-
-    distance_from_center = ((x - 0.5) ** 2 + (y - 0.5) ** 2)
-    return data[distance_from_center <= 0.5 ** 2]
-
-
 if __name__ == '__main__':
     GRID_WIDTH = 10
     GRID_HEIGHT = 10
@@ -32,6 +23,10 @@ def make_circle():
     }]
 
     data = make_circle()
+
+    red, blue = ('#E24A33', '#348ABD')
+    n_columns = len(configurations)
+
     plt.figure(figsize=(12, 5))
 
     for index, conf in enumerate(configurations, start=1):
@@ -54,9 +49,6 @@ def make_circle():
         )
         sofm.train(data, epochs=40)
 
-        red, blue = ('#E24A33', '#348ABD')
-        n_columns = len(configurations)
-
         plt.subplot(1, n_columns, index)
 
         plt.title(conf['title'])

diff --git a/examples/competitive/sofm_compare_weight_init.py b/examples/competitive/sofm_compare_weight_init.py
@@ -0,0 +1,72 @@
+from itertools import product
+
+import matplotlib.pyplot as plt
+from neupy import algorithms, environment, init
+
+from utils import plot_2d_grid, make_circle, make_elipse, make_square
+
+
+plt.style.use('ggplot')
+environment.reproducible()
+
+
+if __name__ == '__main__':
+    GRID_WIDTH = 4
+    GRID_HEIGHT = 4
+
+    datasets = [
+        make_square(),
+        make_circle(),
+        make_elipse(corr=0.7),
+    ]
+    configurations = [{
+        'weight_init': init.Uniform(0, 1),
+        'title': 'Random uniform initialization',
+    }, {
+        'weight_init': 'sample_from_data',
+        'title': 'Sampled from the data',
+    }, {
+        'weight_init': 'init_pca',
+        'title': 'Initialize with PCA',
+    }]
+
+    plt.figure(figsize=(15, 15))
+    plt.title("Compare weight initialization methods for SOFM")
+
+    red, blue = ('#E24A33', '#348ABD')
+    n_columns = len(configurations)
+    n_rows = len(datasets)
+    index = 1
+
+    for data, conf in product(datasets, configurations):
+        sofm = algorithms.SOFM(
+            n_inputs=2,
+            features_grid=(GRID_HEIGHT, GRID_WIDTH),
+
+            verbose=True,
+            shuffle_data=True,
+            weight=conf['weight_init'],
+
+            learning_radius=8,
+            reduce_radius_after=5,
+
+            std=2,
+            reduce_std_after=5,
+
+            step=0.3,
+            reduce_step_after=5,
+        )
+        sofm.train(data, epochs=0)
+
+        plt.subplot(n_rows, n_columns, index)
+
+        plt.title(conf['title'])
+        plt.scatter(*data.T, color=blue, alpha=0.05)
+        plt.scatter(*sofm.weight, color=red)
+
+        weights = sofm.weight.reshape((2, GRID_HEIGHT, GRID_WIDTH))
+        plot_2d_grid(weights, color=red)
+
+        index += 1
+
+    plt.show()
diff --git a/examples/competitive/utils.py b/examples/competitive/utils.py
@@ -1,5 +1,6 @@
 from itertools import product
 
+import numpy as np
 import matplotlib.pyplot as plt
 
 
@@ -51,3 +52,24 @@ def plot_2d_grid(weights, ax=None, color='green', hexagon=False):
 
             neurons = weights[:, neurons_x_coords, neurons_y_coords]
             ax.plot(*neurons, color=color)
+
+
+def make_square():
+    return np.random.random((10000, 2))
+
+
+def make_circle():
+    data = make_square()
+    x, y = data[:, 0], data[:, 1]
+
+    distance_from_center = ((x - 0.5) ** 2 + (y - 0.5) ** 2)
+    return data[distance_from_center <= 0.5 ** 2]
+
+
+def make_elipse(corr=0.8):
+    projection = np.array([
+        [corr, 1 - corr],
+        [1 - corr, corr]])
+
+    data = make_circle()
+    return data.dot(projection)
diff --git a/neupy/algorithms/competitive/randomized_pca.py b/neupy/algorithms/competitive/randomized_pca.py
@@ -0,0 +1,136 @@
+import numpy as np
+from scipy import linalg
+
+
+def svd_flip(u, v):
+    """
+    Sign correction to ensure deterministic output from SVD.
+    Adjusts the columns of u and the rows of v such that the
+    loadings in the columns in u that are largest in absolute
+    value are always positive.
+
+    Parameters
+    ----------
+    u, v : ndarray
+        u and v are the output of `linalg.svd` or
+        `sklearn.utils.extmath.randomized_svd`, with matching inner
+        dimensions so one can compute `np.dot(u * s, v)`.
+
+    Returns
+    -------
+    u_adjusted, v_adjusted : arrays with the same dimensions
+    as the input.
+    """
+    max_abs_rows = np.argmax(np.abs(v), axis=1)
+    signs = np.sign(v[range(v.shape[0]), max_abs_rows])
+
+    u *= signs
+    v *= signs[:, np.newaxis]
+
+    return u, v
+
+
+def randomized_range_finder(A, size, n_iter):
+    """Computes an orthonormal matrix whose range
+    approximates the range of A.
+
+    Parameters
+    ----------
+    A: 2D array
+        The input data matrix
+
+    size: integer
+        Size of the return array
+
+    n_iter: integer
+        Number of power iterations used to stabilize the result
+
+    Returns
+    -------
+    Q: 2D array
+        A (size x size) projection matrix, the range of which
+        approximates well the range of the input matrix A.
+
+    Notes
+    -----
+    scikit-learn implementation
+    """
+    # Generating normal random vectors with shape: (A.shape[1], size)
+    Q = np.random.normal(size=(A.shape[1], size))
+
+    # Perform power iterations with Q to further 'imprint' the top
+    # singular vectors of A in Q
+    for i in range(n_iter):
+        Q, _ = linalg.lu(np.dot(A, Q), permute_l=True)
+        Q, _ = linalg.lu(np.dot(A.T, Q), permute_l=True)
+
+    # Sample the range of A using by linear projection of Q
+    # Extract an orthonormal basis
+    Q, _ = linalg.qr(np.dot(A, Q), mode='economic')
+    return Q
+
+
+def randomized_svd(M, n_components, n_oversamples=10):
+    """
+    Computes a truncated randomized SVD
+
+    Parameters
+    ----------
+    M: ndarray or sparse matrix
+        Matrix to decompose
+
+    n_components: int
+        Number of singular values and vectors to extract.
+
+    n_oversamples: int (default is 10)
+        Additional number of random vectors to sample the range of M so as
+        to ensure proper conditioning. The total number of random vectors
+        used to find the range of M is n_components + n_oversamples. Smaller
+        number can improve speed but can negatively impact the quality of
+        approximation of singular vectors and singular values.
+
+    Notes
+    -----
+    scikit-learn implementation
+    """
+    n_random = n_components + n_oversamples
+    n_samples, n_features = M.shape
+    n_iter = 7 if n_components < .1 * min(M.shape) else 4
+
+    Q = randomized_range_finder(M, n_random, n_iter)
+    # project M to the (k + p) dimensional space
+    # using the basis vectors
+    B = np.dot(Q.T, M)
+
+    # compute the SVD on the thin matrix: (k + p) wide
+    Uhat, s, V = linalg.svd(B, full_matrices=False)
+    del B
+
+    U = np.dot(Q, Uhat)
+    U, V = svd_flip(U, V)
+
+    return U[:, :n_components], s[:n_components], V[:n_components, :]
+
+
+def randomized_pca(data, n_componets):
+    """
+    Randomized PCA based on the scikit-learn implementation.
+
+    Parameters
+    ----------
+    data : 2d array-like
+
+    n_components : int
+        Number of PCA components
+
+    Returns
+    -------
+    (eigenvectors, eigenvalues)
+    """
+    n_samples, n_features = data.shape
+
+    U, S, V = randomized_svd(data, n_componets)
+    eigenvalues = (S ** 2) / n_samples
+    eigenvectors = V / S[:, np.newaxis] * np.sqrt(n_samples)
+
+    return eigenvectors, eigenvalues