ENH renamed 'k' to n_clusters in SpectralClustering

amueller · May 15, 2012 · ee50f48 · ee50f48
1 parent eb186ce
commit ee50f48
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 17 deletions.
diff --git a/sklearn/cluster/spectral.py b/sklearn/cluster/spectral.py
@@ -139,8 +139,8 @@ def spectral_embedding(adjacency, n_components=8, mode=None,
     return embedding
 
 
-def spectral_clustering(affinity, k=8, n_components=None, mode=None,
-                        random_state=None, n_init=10):
+def spectral_clustering(affinity, n_clusters=8, n_components=None, mode=None,
+                        random_state=None, n_init=10, k=None):
     """Apply k-means to a projection to the normalized laplacian
 
     In practice Spectral Clustering is very useful when the structure of
@@ -163,7 +163,7 @@ def spectral_clustering(affinity, k=8, n_components=None, mode=None,
           - heat kernel of the pairwise distance matrix of the samples,
           - symmetic k-nearest neighbours connectivity matrix of the samples.
 
-    k: integer, optional
+    n_clusters: integer, optional
         Number of clusters to extract.
 
     n_components: integer, optional, default is k
@@ -211,12 +211,15 @@ def spectral_clustering(affinity, k=8, n_components=None, mode=None,
     This algorithm solves the normalized cut for k=2: it is a
     normalized spectral clustering.
     """
+    if not k is None:
+        warnings.warn("'k' was renamed to n_clusters", DeprecationWarning)
+        n_clusters = k
     random_state = check_random_state(random_state)
-    n_components = k if n_components is None else n_components
+    n_components = n_clusters if n_components is None else n_components
     maps = spectral_embedding(affinity, n_components=n_components,
                               mode=mode, random_state=random_state)
     maps = maps[1:]
-    _, labels, _ = k_means(maps.T, k, random_state=random_state,
+    _, labels, _ = k_means(maps.T, n_clusters, random_state=random_state,
                     n_init=n_init)
     return labels
 
@@ -235,20 +238,20 @@ class SpectralClustering(BaseEstimator):
 
     Parameters
     -----------
-    k: integer, optional
+    n_clusters : integer, optional
         The dimension of the projection subspace.
 
-    mode: {None, 'arpack' or 'amg'}
+    mode : {None, 'arpack' or 'amg'}
         The eigenvalue decomposition strategy to use. AMG requires pyamg
         to be installed. It can be faster on very large, sparse problems,
         but may also lead to instabilities
 
-    random_state: int seed, RandomState instance, or None (default)
+    random_state : int seed, RandomState instance, or None (default)
         A pseudo random number generator used for the initialization
         of the lobpcg eigen vectors decomposition when mode == 'amg'
         and by the K-Means initialization.
 
-    n_init: int, optional, default: 10
+    n_init : int, optional, default: 10
         Number of time the k-means algorithm will be run with different
         centroid seeds. The final results will be the best output of
         n_init consecutive runs in terms of inertia.
@@ -271,8 +274,12 @@ class SpectralClustering(BaseEstimator):
       http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323
     """
 
-    def __init__(self, k=8, mode=None, random_state=None, n_init=10):
-        self.k = k
+    def __init__(self, n_clusters=8, mode=None, random_state=None, n_init=10,
+            k=None):
+        if not k is None:
+            warnings.warn("'k' was renamed to n_clusters", DeprecationWarning)
+            n_clusters = k
+        self.n_clusters = n_clusters
         self.mode = mode
         self.random_state = random_state
         self.n_init = n_init
@@ -306,7 +313,7 @@ def fit(self, X):
         speeds up computation.
         """
         self.random_state = check_random_state(self.random_state)
-        self.labels_ = spectral_clustering(X, k=self.k, mode=self.mode,
-                                           random_state=self.random_state,
-                                           n_init=self.n_init)
+        self.labels_ = spectral_clustering(X, n_clusters=self.n_clusters,
+                mode=self.mode, random_state=self.random_state,
+                n_init=self.n_init)
         return self
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
@@ -22,15 +22,15 @@ def test_spectral_clustering():
                  ])
 
     for mat in (S, sparse.csr_matrix(S)):
-        model = SpectralClustering(random_state=0, k=2).fit(mat)
+        model = SpectralClustering(random_state=0, n_clusters=2).fit(mat)
         labels = model.labels_
         if labels[0] == 0:
             labels = 1 - labels
 
         assert_equal(labels, [1, 1, 1, 0, 0, 0, 0])
 
         model_copy = loads(dumps(model))
-        assert_equal(model_copy.k, model.k)
+        assert_equal(model_copy.n_clusters, model.n_clusters)
         assert_equal(model_copy.mode, model.mode)
         assert_equal(model_copy.random_state.get_state(),
                      model.random_state.get_state())
@@ -55,7 +55,7 @@ def test_spectral_clustering_sparse():
 
     S = sparse.coo_matrix(S)
 
-    labels = SpectralClustering(random_state=0, k=2).fit(S).labels_
+    labels = SpectralClustering(random_state=0, n_clusters=2).fit(S).labels_
     if labels[0] == 0:
         labels = 1 - labels