From 5b4e49be82ff5baaa36f5bf903c1736a170f6d20 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Wed, 25 May 2016 16:48:34 -0700 Subject: [PATCH 1/2] Add -p option to hdfs mkdir in cluster_syntheticcontrol.sh --- examples/bin/cluster-syntheticcontrol.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/bin/cluster-syntheticcontrol.sh b/examples/bin/cluster-syntheticcontrol.sh index 8f37cc60c9..800830e888 100755 --- a/examples/bin/cluster-syntheticcontrol.sh +++ b/examples/bin/cluster-syntheticcontrol.sh @@ -75,7 +75,7 @@ if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ]; then echo "DFS is healthy... " echo "Uploading Synthetic control data to HDFS" $DFSRM ${WORK_DIR}/testdata - $DFS -mkdir ${WORK_DIR}/testdata + $DFS -mkdir -p ${WORK_DIR}/testdata $DFS -put ${WORK_DIR}/synthetic_control.data ${WORK_DIR}/testdata echo "Successfully Uploaded Synthetic control data to HDFS " From e8d90295ee93d40ef0abdd133fea44677eabd5ca Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Wed, 25 May 2016 17:26:02 -0700 Subject: [PATCH 2/2] Pass appropriate options job to ensure cluster_syntheticcontrol.sh works by default --- examples/bin/cluster-syntheticcontrol.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/bin/cluster-syntheticcontrol.sh b/examples/bin/cluster-syntheticcontrol.sh index 800830e888..39b2255b6b 100755 --- a/examples/bin/cluster-syntheticcontrol.sh +++ b/examples/bin/cluster-syntheticcontrol.sh @@ -79,7 +79,17 @@ if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ]; then $DFS -put ${WORK_DIR}/synthetic_control.data ${WORK_DIR}/testdata echo "Successfully Uploaded Synthetic control data to HDFS " - ../../bin/mahout org.apache.mahout.clustering.syntheticcontrol."${clustertype}".Job + options="--input ${WORK_DIR}/testdata --output ${WORK_DIR}/output --maxIter 10 --convergenceDelta 0.5" + + if [ "${clustertype}" == "kmeans" ]; then + options="${options} --numClusters 6" + # t1 & t2 not used if --numClusters specified, but parser requires input + options="${options} --t1 1 --t2 2" + ../../bin/mahout org.apache.mahout.clustering.syntheticcontrol."${clustertype}".Job ${options} + else + options="${options} --m 2.0f --t1 80 --t2 55" + ../../bin/mahout org.apache.mahout.clustering.syntheticcontrol."${clustertype}".Job ${options} + fi else echo " HADOOP is not running. Please make sure you hadoop is running. " fi