graspologic-org · bdpedigo · Aug 24, 2021 · Feb 9, 2021 · Feb 9, 2021 · Feb 9, 2021
diff --git a/graspologic/cluster/autogmm.py b/graspologic/cluster/autogmm.py
@@ -17,6 +17,8 @@
 from joblib import Parallel, delayed
 import warnings
 
+from sklearn.utils import check_scalar
+
 from .base import BaseCluster
 
 
@@ -117,6 +119,11 @@ class AutoGMMCluster(BaseCluster):
         If provided, min_components and ``max_components`` must match the number of
         unique labels given here.
 
+    n_init : int, optional (default = None)
+        If `n_init` is not `None` and `label_init` is `None` then additional
+        k-means runs will be performed with `n_init` initializations for all covariance
+        parameters in `covariance_type`.
+
     max_iter : int, optional (default = 100).
         The maximum number of EM iterations to perform.
 
@@ -221,6 +228,7 @@ def __init__(
         selection_criteria="bic",
         max_agglom_size=2000,
         n_jobs=None,
+        n_init=None,
     ):
         if isinstance(min_components, int):
             if min_components <= 0:
@@ -363,6 +371,13 @@ def __init__(
         if max_agglom_size is not None and max_agglom_size < 2:
             raise ValueError("Must use at least 2 points for `max_agglom_size`")
 
+        if n_init is not None:
+            check_scalar(x=n_init, name="n_init", target_type=int, min_val=1)
+
+        run_multiple_init = False
+        if n_init is not None and label_init is None:
+            run_multiple_init = True
+
         self.min_components = min_components
         self.max_components = max_components
         self.affinity = affinity
@@ -375,8 +390,10 @@ def __init__(
         self.selection_criteria = selection_criteria
         self.max_agglom_size = max_agglom_size
         self.n_jobs = n_jobs
+        self.n_init = n_init
+        self.run_multiple_init = run_multiple_init
 
-    def _fit_cluster(self, X, X_subset, y, params, agg_clustering):
+    def _fit_cluster(self, X, X_subset, y, params, agg_clustering, seed):
         label_init = self.label_init
         if label_init is not None:
             onehot = _labels_to_onehot(label_init)
@@ -401,6 +418,7 @@ def _fit_cluster(self, X, X_subset, y, params, agg_clustering):
             gm_params["init_params"] = "kmeans"
         gm_params["reg_covar"] = 0
         gm_params["max_iter"] = self.max_iter
+        gm_params["random_state"] = seed
 
         criter = np.inf  # if none of the iterations converge, bic/aic is set to inf
         # below is the regularization scheme
@@ -515,10 +533,17 @@ def fit(self, X, y=None):
             linkage=self.linkage,
             covariance_type=self.covariance_type,
             n_components=range(lower_ncomponents, upper_ncomponents + 1),
-            random_state=[self.random_state],
         )
         param_grid = list(ParameterGrid(param_grid))
-        param_grid_ag, param_grid = _process_paramgrid(param_grid)
+        param_grid_ag, param_grid = _process_paramgrid(
+            param_grid, self.run_multiple_init, self.n_init
+        )
+
+        if isinstance(self.random_state, int):
+            np.random.seed(self.random_state)
+            seeds = np.random.randint(1e8, size=len(param_grid))
+        else:
+            seeds = [self.random_state]*len(param_grid)
 
         n = X.shape[0]
         if self.max_agglom_size is None or n <= self.max_agglom_size:
@@ -540,17 +565,17 @@ def fit(self, X, y=None):
                     )
                     ag_labels.append(hierarchical_labels)
 
-        def _fit_for_data(p):
+        def _fit_for_data(p, seed):
             n_clusters = p[1]["n_components"]
             if (p[0]["affinity"] != "none") and (self.label_init is None):
                 index = param_grid_ag.index(p[0])
                 agg_clustering = ag_labels[index][:, n_clusters - self.min_components]
             else:
                 agg_clustering = []
-            return self._fit_cluster(X, X_subset, y, p, agg_clustering)
+            return self._fit_cluster(X, X_subset, y, p, agg_clustering, seed)
 
         results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
-            delayed(_fit_for_data)(p) for p in param_grid
+            delayed(_fit_for_data)(p, seed) for p, seed in zip(param_grid, seeds)
         )
         results = pd.DataFrame(results)
 
@@ -647,7 +672,7 @@ def _labels_to_onehot(labels):
     return onehot
 
 
-def _process_paramgrid(paramgrid):
+def _process_paramgrid(paramgrid, run_multiple_init, n_init):
     """
     Removes combinations of affinity and linkage that are not possible.
 
@@ -657,6 +682,13 @@ def _process_paramgrid(paramgrid):
         Each dict has the keys 'affinity', 'covariance_type', 'linkage',
         'n_components', and 'random_state'
 
+    run_multiple_init : bool
+        If True, run additional n_init k_means initializations.
+        Else, no multiple k-means initialization is performed.
+
+    n_init : int, defaults to None.
+        The number of k-means initializations to perform if run_multiple_init is True
+
     Returns
     -------
     paramgrid_processed : list pairs of dicts
@@ -665,9 +697,10 @@ def _process_paramgrid(paramgrid):
     ag_paramgrid_processed : list of dicts
         options for AgglomerativeClustering
     """
-    gm_keys = ["covariance_type", "n_components", "random_state"]
+    gm_keys = ["covariance_type", "n_components"]
     ag_keys = ["affinity", "linkage"]
     ag_params_processed = []
+    gm_params_processed = []
     paramgrid_processed = []
 
     for params in paramgrid:
@@ -687,8 +720,18 @@ def _process_paramgrid(paramgrid):
             ag_params = {key: params[key] for key in ag_keys}
             if ag_params not in ag_params_processed:
                 ag_params_processed.append(ag_params)
+            if (
+                gm_params not in gm_params_processed
+                and ag_params["affinity"] == "none"
+                and run_multiple_init
+            ):
+                for _ in range(n_init):
+                    gm_params_processed.append(gm_params.copy())
+                    gm_params_processed[-1].update({"n_init": 1})
 
             paramgrid_processed.append([ag_params, gm_params])
+    for pa in gm_params_processed:
+        paramgrid_processed.append([{"affinity": "none", "linkage": "none"}, pa])
     return ag_params_processed, paramgrid_processed
 
 
@@ -712,4 +755,4 @@ def _hierarchical_labels(children, min_components, max_components):
             hierarchical_labels[:, i], return_inverse=True
         )
 
-    return hierarchical_labels[:, ::-1]
+    return hierarchical_labels[:, ::-1]