From 89e19c75e80fd9523af5eb208cc13674db7a847f Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Mon, 16 Nov 2015 16:43:42 -0800 Subject: [PATCH 1/2] Support environment variable MAHOUT_WORK_DIR to allow users to specify an alternate temporary directory in examples. --- examples/bin/classify-20newsgroups.sh | 6 +++++- examples/bin/classify-wikipedia.sh | 6 +++++- examples/bin/cluster-reuters.sh | 6 +++++- examples/bin/cluster-syntheticcontrol.sh | 6 +++++- examples/bin/factorize-movielens-1M.sh | 9 +++++++-- examples/bin/factorize-netflix.sh | 6 +++++- 6 files changed, 32 insertions(+), 7 deletions(-) diff --git a/examples/bin/classify-20newsgroups.sh b/examples/bin/classify-20newsgroups.sh index c58e9a02f4..6d7ab45d29 100755 --- a/examples/bin/classify-20newsgroups.sh +++ b/examples/bin/classify-20newsgroups.sh @@ -36,7 +36,11 @@ START_PATH=`pwd` # Set commands for dfs source ${START_PATH}/set-dfs-commands.sh -WORK_DIR=/tmp/mahout-work-${USER} +if [[ -z "$MAHOUT_WORK_DIR" ]]; then + WORK_DIR=/tmp/mahout-work-${USER} +else + WORK_DIR=$MAHOUT_WORK_DIR +fi algorithm=( cnaivebayes-MapReduce naivebayes-MapReduce cnaivebayes-Spark naivebayes-Spark sgd clean) if [ -n "$1" ]; then choice=$1 diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh index 68487dcc75..686c99dacb 100755 --- a/examples/bin/classify-wikipedia.sh +++ b/examples/bin/classify-wikipedia.sh @@ -42,7 +42,11 @@ START_PATH=`pwd` # Set commands for dfs source ${START_PATH}/set-dfs-commands.sh -WORK_DIR=/tmp/mahout-work-wiki +if [[ -z "$MAHOUT_WORK_DIR" ]]; then + WORK_DIR=/tmp/mahout-work-wiki +else + WORK_DIR=$MAHOUT_WORK_DIR +fi algorithm=( CBayes BinaryCBayes clean) if [ -n "$1" ]; then choice=$1 diff --git a/examples/bin/cluster-reuters.sh b/examples/bin/cluster-reuters.sh index d53aa00d2a..6c42ab9a76 100755 --- a/examples/bin/cluster-reuters.sh +++ b/examples/bin/cluster-reuters.sh @@ -43,7 +43,11 @@ if [ ! -e $MAHOUT ]; then exit 1 fi -WORK_DIR=/tmp/mahout-work-${USER} +if [[ -z "$MAHOUT_WORK_DIR" ]]; then + WORK_DIR=/tmp/mahout-work-${USER} +else + WORK_DIR=$MAHOUT_WORK_DIR +fi algorithm=( kmeans fuzzykmeans lda streamingkmeans clean) if [ -n "$1" ]; then diff --git a/examples/bin/cluster-syntheticcontrol.sh b/examples/bin/cluster-syntheticcontrol.sh index eab62be39f..5e1240f553 100755 --- a/examples/bin/cluster-syntheticcontrol.sh +++ b/examples/bin/cluster-syntheticcontrol.sh @@ -48,7 +48,11 @@ START_PATH=`pwd` # Set commands for dfs source ${START_PATH}/set-dfs-commands.sh -WORK_DIR=/tmp/mahout-work-${USER} +if [[ -z "$MAHOUT_WORK_DIR" ]]; then + WORK_DIR=/tmp/mahout-work-${USER} +else + WORK_DIR=$MAHOUT_WORK_DIR +fi echo "creating work directory at ${WORK_DIR}" mkdir -p ${WORK_DIR} diff --git a/examples/bin/factorize-movielens-1M.sh b/examples/bin/factorize-movielens-1M.sh index 735e425a3f..29730e1d80 100755 --- a/examples/bin/factorize-movielens-1M.sh +++ b/examples/bin/factorize-movielens-1M.sh @@ -43,7 +43,12 @@ fi export MAHOUT_LOCAL=true MAHOUT="$MAHOUT_HOME/bin/mahout" -WORK_DIR=/tmp/mahout-work-${USER} +if [[ -z "$MAHOUT_WORK_DIR" ]]; then + WORK_DIR=/tmp/mahout-work-${USER} +else + WORK_DIR=$MAHOUT_WORK_DIR +fi + echo "creating work directory at ${WORK_DIR}" mkdir -p ${WORK_DIR}/movielens @@ -77,4 +82,4 @@ shuf ${WORK_DIR}/recommendations/part-m-00000 |head echo -e "\n\n" echo "removing work directory" -rm -rf ${WORK_DIR} \ No newline at end of file +rm -rf ${WORK_DIR} diff --git a/examples/bin/factorize-netflix.sh b/examples/bin/factorize-netflix.sh index 856f775ac9..26faf66a53 100755 --- a/examples/bin/factorize-netflix.sh +++ b/examples/bin/factorize-netflix.sh @@ -45,7 +45,11 @@ fi MAHOUT="../../bin/mahout" -WORK_DIR=/tmp/mahout-work-${USER} +if [[ -z "$MAHOUT_WORK_DIR" ]]; then + WORK_DIR=/tmp/mahout-work-${USER} +else + WORK_DIR=$MAHOUT_WORK_DIR +fi START_PATH=`pwd` From 599101083ab7d31600b8c93b2efd8cea4c0a30bf Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Thu, 19 Nov 2015 13:46:05 -0800 Subject: [PATCH 2/2] In examples, specify -p option to HDFS -mkdir option when potentially necessary. --- examples/bin/classify-20newsgroups.sh | 2 +- examples/bin/classify-wikipedia.sh | 2 +- examples/bin/cluster-reuters.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/bin/classify-20newsgroups.sh b/examples/bin/classify-20newsgroups.sh index 6d7ab45d29..f47d5c5249 100755 --- a/examples/bin/classify-20newsgroups.sh +++ b/examples/bin/classify-20newsgroups.sh @@ -109,7 +109,7 @@ if ( [ "x$alg" == "xnaivebayes-MapReduce" ] || [ "x$alg" == "xcnaivebayes-MapR echo "Copying 20newsgroups data to HDFS" set +e $DFSRM ${WORK_DIR}/20news-all - $DFS -mkdir ${WORK_DIR} + $DFS -mkdir -p ${WORK_DIR} $DFS -mkdir ${WORK_DIR}/20news-all set -e if [ $HVERSION -eq "1" ] ; then diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh index 686c99dacb..8a7889f0a5 100755 --- a/examples/bin/classify-wikipedia.sh +++ b/examples/bin/classify-wikipedia.sh @@ -114,7 +114,7 @@ if [ "x$alg" == "xCBayes" ] || [ "x$alg" == "xBinaryCBayes" ] ; then echo "Copying wikipedia data to HDFS" set +e $DFSRM ${WORK_DIR}/wikixml - $DFS -mkdir ${WORK_DIR} + $DFS -mkdir -p ${WORK_DIR} set -e $DFS -put ${WORK_DIR}/wikixml ${WORK_DIR}/wikixml fi diff --git a/examples/bin/cluster-reuters.sh b/examples/bin/cluster-reuters.sh index 6c42ab9a76..49f6c94ccd 100755 --- a/examples/bin/cluster-reuters.sh +++ b/examples/bin/cluster-reuters.sh @@ -102,7 +102,7 @@ if [ ! -e ${WORK_DIR}/reuters-out-seqdir ]; then set +e $DFSRM ${WORK_DIR}/reuters-sgm $DFSRM ${WORK_DIR}/reuters-out - $DFS -mkdir ${WORK_DIR}/ + $DFS -mkdir -p ${WORK_DIR}/ $DFS -mkdir ${WORK_DIR}/reuters-sgm $DFS -mkdir ${WORK_DIR}/reuters-out $DFS -put ${WORK_DIR}/reuters-sgm ${WORK_DIR}/reuters-sgm