save

bdpedigo · Dec 5, 2020 · d6043af · d6043af
1 parent 7b07884
commit d6043af
Show file tree

Hide file tree

Showing 4 changed files with 172 additions and 20 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -13,6 +13,8 @@
 
 import sys, os
 
+import sphinx_rtd_theme
+
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -31,7 +33,7 @@
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"]
+extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx_rtd_theme"]
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -100,7 +102,7 @@
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = "default"
+html_theme = "sphinx_rtd_theme"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the

diff --git a/docs/reference/decomposition.rst b/docs/reference/decomposition.rst
@@ -3,6 +3,6 @@ Decomposition
 
 .. currentmodule:: sparse_decomposition.decomposition
 
-..autoclass:: SparseComponentAnalysis
+.. autoclass:: SparseComponentAnalysis
 
-..autoclass:: SparseMatrixApproximation
+.. autoclass:: SparseMatrixApproximation
diff --git a/sparse_decomposition/decomposition/decomposition.py b/sparse_decomposition/decomposition/decomposition.py
@@ -14,6 +14,8 @@
 
 from ..utils import calculate_explained_variance_ratio, soft_threshold
 
+from scipy.linalg import orthogonal_procrustes
+
 
 def _varimax(X):
     return Rotator(normalize=False).fit_transform(X)
@@ -28,6 +30,9 @@ def _polar(X):
 def _polar_rotate_shrink(X, gamma=0.1):
     # Algorithm 1 from the paper
     U, _, _ = selectSVD(X, n_components=X.shape[1], algorithm="full")
+    # U = _polar(X)
+    # R, _ = orthogonal_procrustes(U_old, U)
+    # print(np.linalg.norm(U_old @ R - U))
     U_rot = _varimax(U)
     U_thresh = soft_threshold(U_rot, gamma)
     return U_thresh
@@ -39,6 +44,18 @@ def _reorder_components(X, Z_hat, Y_hat):
     return Z_hat[:, sort_inds], Y_hat[:, sort_inds]
 
 
+# import abc
+
+
+# class SuperclassMeta(type):
+#     def __new__(mcls, classname, bases, cls_dict):
+#         cls = super().__new__(mcls, classname, bases, cls_dict)
+#         for name, member in cls_dict.items():
+#             if not getattr(member, "__doc__"):
+#                 member.__doc__ = getattr(bases[-1], name).__doc__
+#         return cls
+
+
 class BaseSparseDecomposition(BaseEstimator):
     def __init__(
         self,
@@ -47,27 +64,31 @@ def __init__(
         max_iter=10,
         scale=False,
         center=False,
-        tol=1e-5,
+        tol=1e-4,
         verbose=0,
     ):
-        """[summary]
+        """Sparse matrix decomposition model.
 
         Parameters
         ----------
-        n_components : int, optional
-            [description], by default 2
-        gamma : [type], optional
-            [description], by default None
-        max_iter : int, optional
-            [description], by default 10
+        n_components : int, optional (default=2)
+            Number of components or embedding dimensions.
+        gamma : float, int or None, optional (default=None)
+            Sparsity parameter, must be nonnegative. Lower values lead to more sparsity
+            in the estimated components. If ``None``, will be set to 
+            ``sqrt(n_components * X.shape[1])`` where ``X`` is the matrix passed to 
+            ``fit``.
+        max_iter : int, optional (default=10)
+            Maximum number of iterations allowed, must be nonnegative.
         scale : bool, optional
             [description], by default False
         center : bool, optional
             [description], by default False
-        tol : [type], optional
-            [description], by default 1e-5
-        verbose : int, optional
-            [description], by default 0
+        tol : float or int, optional (default=1e-4)
+            Tolerance for stopping iterative optimization. If the relative difference in
+            score is less than this amount the algorithm will terminate.
+        verbose : int, optional (default=0)
+            Verbosity level. Higher values will result in more messages. 
         """
         self.n_components = n_components
         self.gamma = gamma
@@ -79,18 +100,49 @@ def __init__(
         # TODO add random state
 
     def _initialize(self, X):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
         U, D, Vt = selectSVD(X, n_components=self.n_components)
         score = np.linalg.norm(D)
         return U, Vt.T, score
 
     def _validate_parameters(self, X):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+        """
         if not self.gamma:
             gamma = np.sqrt(self.n_components * X.shape[1])
         else:
             gamma = self.gamma
         self.gamma_ = gamma
 
     def _preprocess_data(self, X):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
         if self.scale or self.center:
             X = StandardScaler(
                 with_mean=self.center, with_std=self.scale
@@ -121,6 +173,11 @@ def fit_transform(self, X, y=None):
 
         Z_hat, Y_hat, score = self._initialize(X)
 
+        if self.gamma == np.inf:
+            max_iter = 0
+        else:
+            max_iter = self.max_iter
+
         # for keeping track of progress over iteration
         Z_diff = np.inf
         Y_diff = np.inf
@@ -129,15 +186,15 @@ def fit_transform(self, X, y=None):
 
         # main loop
         i = 0
-        while (i < self.max_iter) and (norm_score_diff > self.tol):
+        while (i < max_iter) and (norm_score_diff > self.tol):
             if self.verbose > 0:
                 print(f"Iteration: {i}")
 
-            currtime = time.time()
+            iter_time = time.time()
 
             Z_hat_new, Y_hat_new = self._update_estimates(X, Z_hat, Y_hat)
 
-            Z_hat_new, Y_hat_new = _reorder_components(X, Z_hat_new, Y_hat_new)
+            # Z_hat_new, Y_hat_new = _reorder_components(X, Z_hat_new, Y_hat_new)
             Z_diff = np.linalg.norm(Z_hat_new - Z_hat)
             Y_diff = np.linalg.norm(Y_hat_new - Y_hat)
             norm_Z_diff = Z_diff / np.linalg.norm(Z_hat_new)
@@ -152,7 +209,7 @@ def fit_transform(self, X, y=None):
             last_score = score
 
             if self.verbose > 1:
-                print(f"{time.time() - currtime:.3f} seconds elapsed for iteration.")
+                print(f"{time.time() - iter_time:.3f} seconds elapsed for iteration.")
 
             if self.verbose > 0:
                 print(f"Difference in Z_hat: {Z_diff}")
@@ -177,32 +234,124 @@ def fit_transform(self, X, y=None):
         return Z_hat
 
     def fit(self, X):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
         self.fit_transform(X)
         return self
 
     def transform(self, X):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
         # TODO input checking
         return X @ self.components_.T
 
     @abstractmethod
     def _update_estimates(self, X, Z_hat, Y_hat):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+        Z_hat : [type]
+            [description]
+        Y_hat : [type]
+            [description]
+        """
         pass
 
 
 class SparseComponentAnalysis(BaseSparseDecomposition):
     def _update_estimates(self, X, Z_hat, Y_hat):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+        Z_hat : [type]
+            [description]
+        Y_hat : [type]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
         Y_hat = _polar_rotate_shrink(X.T @ Z_hat, gamma=self.gamma)
         Z_hat = _polar(X @ Y_hat)
         return Z_hat, Y_hat
 
+    def _save_attributes(self, X, Z_hat, Y_hat):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+        Z_hat : [type]
+            [description]
+        Y_hat : [type]
+            [description]
+        """
+        pass
+
 
 class SparseMatrixApproximation(BaseSparseDecomposition):
     def _update_estimates(self, X, Z_hat, Y_hat):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+        Z_hat : [type]
+            [description]
+        Y_hat : [type]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
         Z_hat = _polar_rotate_shrink(X @ Y_hat)
         Y_hat = _polar_rotate_shrink(X.T @ Z_hat)
         return Z_hat, Y_hat
 
     def _save_attributes(self, X, Z_hat, Y_hat):
+        """[summary]
+
+        Parameters
+        ----------
+        X : [type]
+            [description]
+        Z_hat : [type]
+            [description]
+        Y_hat : [type]
+            [description]
+        """
         B = Z_hat.T @ X @ Y_hat
         self.score_ = B
         self.right_latent_ = Y_hat

diff --git a/sparse_decomposition/utils/__init__.py b/sparse_decomposition/utils/__init__.py
@@ -3,4 +3,5 @@
     soft_threshold,
     l1_norm,
     calculate_explained_variance_ratio,
+    proportion_variance_explained,
 )