From ec8a5a006385c0ecf25f1d982ace83bb3c41e052 Mon Sep 17 00:00:00 2001 From: Suneel Marthi Date: Tue, 24 Mar 2015 23:20:17 -0400 Subject: [PATCH] MAHOUT-1639: streamingkmeans doesn't properly validate estimatedNumMapClusters -km --- CHANGELOG | 4 +++- .../clustering/streaming/mapreduce/StreamingKMeansDriver.java | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 7b1aa092a1..deadda6c89 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Mahout Change Log -Release 1.0 - unreleased +Release 0.10.0 - unreleased + + MAHOUT-1639: Streamingkmeans doesn't properly validate estimatedNumMapClusters -km (smarthi) MAHOUT-1493: Port Naive Bayes to Scala DSL (apalumbo) diff --git a/mrlegacy/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java b/mrlegacy/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java index c8f061ff0a..73776b9a70 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java +++ b/mrlegacy/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java @@ -169,7 +169,7 @@ public int run(String[] args) throws Exception { addOption(ESTIMATED_NUM_MAP_CLUSTERS, "km", "The estimated number of clusters to use for the " + "Map phase of the job when running StreamingKMeans. This should be around k * log(n), " + "where k is the final number of clusters and n is the total number of data points to " - + "cluster."); + + "cluster.", String.valueOf(1)); addOption(ESTIMATED_DISTANCE_CUTOFF, "e", "The initial estimated distance cutoff between two " + "points for forming new clusters. If no value is given, it's estimated from the data set",