From 0619127ac1a11f94dd5d0400b8768200318e7441 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 4 Mar 2016 11:51:50 +0800
Subject: [PATCH 01/12] create bkm_example

---
 data/mllib/bisecting_kmeans_data.txt          |  6 +++
 .../python/mllib/bisecting_k_means_example.py | 50 +++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 data/mllib/bisecting_kmeans_data.txt
 create mode 100644 examples/src/main/python/mllib/bisecting_k_means_example.py

diff --git a/data/mllib/bisecting_kmeans_data.txt b/data/mllib/bisecting_kmeans_data.txt
new file mode 100644
index 0000000000000..be500232c0882
--- /dev/null
+++ b/data/mllib/bisecting_kmeans_data.txt
@@ -0,0 +1,6 @@
+0.1 0.1 0.1
+0.3 0.3 0.25
+0.1 0.1 -0.1
+20.3 20.1 19.9
+20.2 20.1 19.7
+18.9 20.0 19.7
diff --git a/examples/src/main/python/mllib/bisecting_k_means_example.py b/examples/src/main/python/mllib/bisecting_k_means_example.py
new file mode 100644
index 0000000000000..ccd7fb4bce362
--- /dev/null
+++ b/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from numpy import array
+from math import sqrt
+# $example off$
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.clustering import BisectingKMeans, BisectingKMeansModel
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="BisectingKMeansExample")  # SparkContext
+
+    # $example on$
+    # Load and parse the data
+    data = sc.textFile("data/mllib/bisecting_kmeans_data.txt")
+    parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
+
+    # Build the model (cluster the data)
+    clusters = BisectingKMeans.train(parsedData, 2, maxIterations=5)
+
+    # Evaluate clustering
+    cost = clusters.computeCost(parsedData)
+    print("Bisecting K-means cost = " + str(cost))
+
+    # Save and load model
+    clusters.save(sc, "target/org/apache/spark/PythonKMeansExample/BisectingKMeansModel")
+    sameModel = BisectingKMeansModel.load(sc, "target/org/apache/spark/PythonKMeansExample/BisectingKMeansModel")
+    # $example off$
+
+    sc.stop()

From 871c5c03d49a187bda5df5c7ec83f9161ddfa624 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 4 Mar 2016 15:47:46 +0800
Subject: [PATCH 02/12] update db

---
 data/mllib/bisecting_kmeans_data.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/data/mllib/bisecting_kmeans_data.txt b/data/mllib/bisecting_kmeans_data.txt
index be500232c0882..ff83945804ed3 100644
--- a/data/mllib/bisecting_kmeans_data.txt
+++ b/data/mllib/bisecting_kmeans_data.txt
@@ -1,6 +1,10 @@
 0.1 0.1 0.1
 0.3 0.3 0.25
 0.1 0.1 -0.1
+0.0 0.1 0.2
+-0.2 0.0 0.1
 20.3 20.1 19.9
 20.2 20.1 19.7
 18.9 20.0 19.7
+21.0 21.2 19.9
+20.0 19.1 20.0

From 948f50c6bead6286b38a686b57131efbe2f98d1d Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 4 Mar 2016 15:50:33 +0800
Subject: [PATCH 03/12] update path

---
 examples/src/main/python/mllib/bisecting_k_means_example.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/python/mllib/bisecting_k_means_example.py b/examples/src/main/python/mllib/bisecting_k_means_example.py
index ccd7fb4bce362..f2d13375aad7f 100644
--- a/examples/src/main/python/mllib/bisecting_k_means_example.py
+++ b/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -43,8 +43,8 @@
     print("Bisecting K-means cost = " + str(cost))
 
     # Save and load model
-    clusters.save(sc, "target/org/apache/spark/PythonKMeansExample/BisectingKMeansModel")
-    sameModel = BisectingKMeansModel.load(sc, "target/org/apache/spark/PythonKMeansExample/BisectingKMeansModel")
+    clusters.save(sc, "target/org/apache/spark/PythonBisectingKMeansExample/BisectingKMeansModel")
+    sameModel = BisectingKMeansModel.load(sc, "target/org/apache/spark/PythonBisectingKMeansExample/BisectingKMeansModel")
     # $example off$
 
     sc.stop()

From 31fead0419f7ed2f950efa319220791517bb34e1 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 4 Mar 2016 16:16:18 +0800
Subject: [PATCH 04/12] add to ml

---
 .../python/ml/bisecting_k_means_example.py    | 62 +++++++++++++++++++
 .../python/mllib/bisecting_k_means_example.py |  2 +-
 2 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 examples/src/main/python/ml/bisecting_k_means_example.py

diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py
new file mode 100644
index 0000000000000..58a18b59bd1c9
--- /dev/null
+++ b/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -0,0 +1,62 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import sys
+import re
+
+import numpy as np
+from pyspark import SparkContext
+from pyspark.ml.clustering import BisectingKMeans, BisectingKMeansModel
+from pyspark.mllib.linalg import VectorUDT, _convert_to_vector, Vectors
+from pyspark.sql import SQLContext
+from pyspark.sql.types import Row, StructField, StructType
+
+"""
+A simple example demonstrating a bisecting k-means clustering.
+"""
+
+if __name__ == "__main__":
+
+    sc = SparkContext(appName="PythonBisectingKMeansExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    training = sqlContext.createDataFrame([
+        (0, Vectors.dense(0.1, 0.1, 0.1)),
+        (1, Vectors.dense(0.3, 0.3, 0.25)),
+        (2, Vectors.dense(0.1, 0.1, -0.1)),
+        (3, Vectors.dense(20.3, 20.1, 19.9)),
+        (4, Vectors.dense(20.2, 20.1, 19.7)),
+        (5, Vectors.dense(18.9, 20.0, 19.7))], ["id", "features"])
+
+    k = 2
+    kmeans = BisectingKMeans().setK(k).setSeed(1).setFeaturesCol("features")
+
+    model = kmeans.fit(training)
+
+    # Evaluate clustering
+    cost = model.computeCost(training)
+    print("Bisecting K-means Cost = " + str(cost))
+
+    centers = model.clusterCenters()
+    print("Cluster Centers: ")
+    for center in centers:
+        print(center)
+
+    sc.stop()
\ No newline at end of file
diff --git a/examples/src/main/python/mllib/bisecting_k_means_example.py b/examples/src/main/python/mllib/bisecting_k_means_example.py
index f2d13375aad7f..cdad449a51955 100644
--- a/examples/src/main/python/mllib/bisecting_k_means_example.py
+++ b/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -40,7 +40,7 @@
 
     # Evaluate clustering
     cost = clusters.computeCost(parsedData)
-    print("Bisecting K-means cost = " + str(cost))
+    print("Bisecting K-means Cost = " + str(cost))
 
     # Save and load model
     clusters.save(sc, "target/org/apache/spark/PythonBisectingKMeansExample/BisectingKMeansModel")

From b05680f35061ef3fd0aad99d3da121417b4f3cad Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 4 Mar 2016 16:17:39 +0800
Subject: [PATCH 05/12] format

---
 examples/src/main/python/ml/bisecting_k_means_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py
index 58a18b59bd1c9..d5bbe277a13ab 100644
--- a/examples/src/main/python/ml/bisecting_k_means_example.py
+++ b/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -59,4 +59,4 @@
     for center in centers:
         print(center)
 
-    sc.stop()
\ No newline at end of file
+    sc.stop()

From 6bce85f15dc03385dcbc1b32fa8250006fe7dedb Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 4 Mar 2016 16:36:52 +0800
Subject: [PATCH 06/12] add example off

---
 examples/src/main/python/ml/bisecting_k_means_example.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py
index d5bbe277a13ab..d73b68e0ac039 100644
--- a/examples/src/main/python/ml/bisecting_k_means_example.py
+++ b/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -58,5 +58,6 @@
     print("Cluster Centers: ")
     for center in centers:
         print(center)
+    # $example off$
 
     sc.stop()

From be718beec427bc567cdbd419963ecfb525c96a98 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 4 Mar 2016 16:41:57 +0800
Subject: [PATCH 07/12] format

---
 examples/src/main/python/mllib/bisecting_k_means_example.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/python/mllib/bisecting_k_means_example.py b/examples/src/main/python/mllib/bisecting_k_means_example.py
index cdad449a51955..337f478424422 100644
--- a/examples/src/main/python/mllib/bisecting_k_means_example.py
+++ b/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -43,8 +43,9 @@
     print("Bisecting K-means Cost = " + str(cost))
 
     # Save and load model
-    clusters.save(sc, "target/org/apache/spark/PythonBisectingKMeansExample/BisectingKMeansModel")
-    sameModel = BisectingKMeansModel.load(sc, "target/org/apache/spark/PythonBisectingKMeansExample/BisectingKMeansModel")
+    path = "target/org/apache/spark/PythonBisectingKMeansExample/BisectingKMeansModel"
+    clusters.save(sc, path)
+    sameModel = BisectingKMeansModel.load(sc, path)
     # $example off$
 
     sc.stop()

From cea8ddfe291b79f19393144145e48286a1e39e8c Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Wed, 9 Mar 2016 21:01:45 +0800
Subject: [PATCH 08/12] del unnecessary dataset,import and add missing
 annotation

---
 data/mllib/bisecting_kmeans_data.txt                   | 10 ----------
 .../src/main/python/ml/bisecting_k_means_example.py    |  7 ++-----
 .../src/main/python/mllib/bisecting_k_means_example.py |  4 ++--
 python/pyspark/mllib/clustering.py                     |  1 +
 4 files changed, 5 insertions(+), 17 deletions(-)
 delete mode 100644 data/mllib/bisecting_kmeans_data.txt

diff --git a/data/mllib/bisecting_kmeans_data.txt b/data/mllib/bisecting_kmeans_data.txt
deleted file mode 100644
index ff83945804ed3..0000000000000
--- a/data/mllib/bisecting_kmeans_data.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-0.1 0.1 0.1
-0.3 0.3 0.25
-0.1 0.1 -0.1
-0.0 0.1 0.2
--0.2 0.0 0.1
-20.3 20.1 19.9
-20.2 20.1 19.7
-18.9 20.0 19.7
-21.0 21.2 19.9
-20.0 19.1 20.0
diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py
index d73b68e0ac039..bd59adfb5afb7 100644
--- a/examples/src/main/python/ml/bisecting_k_means_example.py
+++ b/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -17,15 +17,12 @@
 
 from __future__ import print_function
 
-import sys
-import re
-
-import numpy as np
 from pyspark import SparkContext
+# $example on$
 from pyspark.ml.clustering import BisectingKMeans, BisectingKMeansModel
 from pyspark.mllib.linalg import VectorUDT, _convert_to_vector, Vectors
+# $example off$
 from pyspark.sql import SQLContext
-from pyspark.sql.types import Row, StructField, StructType
 
 """
 A simple example demonstrating a bisecting k-means clustering.
diff --git a/examples/src/main/python/mllib/bisecting_k_means_example.py b/examples/src/main/python/mllib/bisecting_k_means_example.py
index 337f478424422..c959eb9a189e6 100644
--- a/examples/src/main/python/mllib/bisecting_k_means_example.py
+++ b/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -28,11 +28,11 @@
 # $example off$
 
 if __name__ == "__main__":
-    sc = SparkContext(appName="BisectingKMeansExample")  # SparkContext
+    sc = SparkContext(appName="PythonBisectingKMeansExample")  # SparkContext
 
     # $example on$
     # Load and parse the data
-    data = sc.textFile("data/mllib/bisecting_kmeans_data.txt")
+    data = sc.textFile("data/mllib/kmeans_data.txt")
     parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
 
     # Build the model (cluster the data)
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 5a5bf59dd5fe3..23d118bd40900 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -142,6 +142,7 @@ class BisectingKMeans(object):
     .. versionadded:: 2.0.0
     """
 
+    @classmethod
     @since('2.0.0')
     def train(self, rdd, k=4, maxIterations=20, minDivisibleClusterSize=1.0, seed=-1888008604):
         """

From 399290cd9345bea981963a1df66c403727c82a7f Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 11 Mar 2016 11:12:19 +0800
Subject: [PATCH 09/12] add include_example

---
 docs/mllib-clustering.md                       |  6 ++++++
 .../python/ml/bisecting_k_means_example.py     | 18 ++++++++----------
 .../python/mllib/bisecting_k_means_example.py  |  7 +++----
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 44720147be054..6897ba4a5d57d 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -399,6 +399,12 @@ Refer to the [`BisectingKMeans` Java docs](api/java/org/apache/spark/mllib/clust
 
 {% include_example java/org/apache/spark/examples/mllib/JavaBisectingKMeansExample.java %}
 </div>
+
+<div data-lang="python" markdown="1">
+Refer to the [`BisectingKMeans` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.clustering.BisectingKMeans) and [`BisectingKMeansModel` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.clustering.BisectingKMeansModel) for more details on the API.
+
+{% include_example python/mllib/bisecting_k_means_example.py %}
+</div>
 </div>
 
 ## Streaming k-means
diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py
index bd59adfb5afb7..c3b34d43c304f 100644
--- a/examples/src/main/python/ml/bisecting_k_means_example.py
+++ b/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -21,6 +21,8 @@
 # $example on$
 from pyspark.ml.clustering import BisectingKMeans, BisectingKMeansModel
 from pyspark.mllib.linalg import VectorUDT, _convert_to_vector, Vectors
+from pyspark.mllib.linalg import Vectors
+from pyspark.sql.types import Row, StructField, StructType
 # $example off$
 from pyspark.sql import SQLContext
 
@@ -34,16 +36,12 @@
     sqlContext = SQLContext(sc)
 
     # $example on$
-    training = sqlContext.createDataFrame([
-        (0, Vectors.dense(0.1, 0.1, 0.1)),
-        (1, Vectors.dense(0.3, 0.3, 0.25)),
-        (2, Vectors.dense(0.1, 0.1, -0.1)),
-        (3, Vectors.dense(20.3, 20.1, 19.9)),
-        (4, Vectors.dense(20.2, 20.1, 19.7)),
-        (5, Vectors.dense(18.9, 20.0, 19.7))], ["id", "features"])
-
-    k = 2
-    kmeans = BisectingKMeans().setK(k).setSeed(1).setFeaturesCol("features")
+    data = sc.textFile("data/mllib/kmeans_data.txt")
+    parsedData = data.map(lambda line: Row(features=Vectors.dense([float(x) for x in line.split(' ')])))
+    schema = StructType([StructField("features", VectorUDT(), False)])
+    training = sqlContext.createDataFrame(parsedData, schema)
+
+    kmeans = BisectingKMeans().setK(2).setSeed(1).setFeaturesCol("features")
 
     model = kmeans.fit(training)
 
diff --git a/examples/src/main/python/mllib/bisecting_k_means_example.py b/examples/src/main/python/mllib/bisecting_k_means_example.py
index c959eb9a189e6..7f4d0402d620c 100644
--- a/examples/src/main/python/mllib/bisecting_k_means_example.py
+++ b/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -19,7 +19,6 @@
 
 # $example on$
 from numpy import array
-from math import sqrt
 # $example off$
 
 from pyspark import SparkContext
@@ -36,15 +35,15 @@
     parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
 
     # Build the model (cluster the data)
-    clusters = BisectingKMeans.train(parsedData, 2, maxIterations=5)
+    model = BisectingKMeans.train(parsedData, 2, maxIterations=5)
 
     # Evaluate clustering
-    cost = clusters.computeCost(parsedData)
+    cost = model.computeCost(parsedData)
     print("Bisecting K-means Cost = " + str(cost))
 
     # Save and load model
     path = "target/org/apache/spark/PythonBisectingKMeansExample/BisectingKMeansModel"
-    clusters.save(sc, path)
+    model.save(sc, path)
     sameModel = BisectingKMeansModel.load(sc, path)
     # $example off$
 

From 3ab75336ba854e1801e8c29fa8bd5a1a868a5743 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 11 Mar 2016 11:45:46 +0800
Subject: [PATCH 10/12] reformat

---
 examples/src/main/python/ml/bisecting_k_means_example.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py
index c3b34d43c304f..b0565e40b662c 100644
--- a/examples/src/main/python/ml/bisecting_k_means_example.py
+++ b/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -37,9 +37,9 @@
 
     # $example on$
     data = sc.textFile("data/mllib/kmeans_data.txt")
-    parsedData = data.map(lambda line: Row(features=Vectors.dense([float(x) for x in line.split(' ')])))
-    schema = StructType([StructField("features", VectorUDT(), False)])
-    training = sqlContext.createDataFrame(parsedData, schema)
+    parsedData = data.map(lambda line: Row(features=Vectors.dense(
+            [float(x) for x in line.split(' ')])))
+    training = sqlContext.createDataFrame(parsedData)
 
     kmeans = BisectingKMeans().setK(2).setSeed(1).setFeaturesCol("features")
 

From d4415114a46791fe0ed959c1e8c5031bf743568d Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 11 Mar 2016 11:46:35 +0800
Subject: [PATCH 11/12] reformat

---
 examples/src/main/python/ml/bisecting_k_means_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py
index b0565e40b662c..cbd6bfb0c4c99 100644
--- a/examples/src/main/python/ml/bisecting_k_means_example.py
+++ b/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -22,7 +22,7 @@
 from pyspark.ml.clustering import BisectingKMeans, BisectingKMeansModel
 from pyspark.mllib.linalg import VectorUDT, _convert_to_vector, Vectors
 from pyspark.mllib.linalg import Vectors
-from pyspark.sql.types import Row, StructField, StructType
+from pyspark.sql.types import Row
 # $example off$
 from pyspark.sql import SQLContext
 

From 165a4fe6f0d8cbc9eb5bcdf161dbfe48b2c1f8f9 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Fri, 11 Mar 2016 11:53:51 +0800
Subject: [PATCH 12/12] fix python style

---
 examples/src/main/python/ml/bisecting_k_means_example.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py
index cbd6bfb0c4c99..e6f6bfd7e84ed 100644
--- a/examples/src/main/python/ml/bisecting_k_means_example.py
+++ b/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -37,9 +37,8 @@
 
     # $example on$
     data = sc.textFile("data/mllib/kmeans_data.txt")
-    parsedData = data.map(lambda line: Row(features=Vectors.dense(
-            [float(x) for x in line.split(' ')])))
-    training = sqlContext.createDataFrame(parsedData)
+    parsed = data.map(lambda l: Row(features=Vectors.dense([float(x) for x in l.split(' ')])))
+    training = sqlContext.createDataFrame(parsed)
 
     kmeans = BisectingKMeans().setK(2).setSeed(1).setFeaturesCol("features")