diff --git a/scripts/datagen/genRandData4DecisionTree.sh b/scripts/datagen/genRandData4DecisionTree.sh index 44978192fe2..6564d518f12 100644 --- a/scripts/datagen/genRandData4DecisionTree.sh +++ b/scripts/datagen/genRandData4DecisionTree.sh @@ -21,7 +21,7 @@ #------------------------------------------------------------- if [ "$1" == "" -o "$2" == "" ]; then echo "Usage: $0 e.g. $0 perftest SPARK" ; exit 1 ; fi -if [ "$2" == "SPARK" ]; then CMD="./sparkDML.sh "; DASH="-"; elif [ "$2" == "MR" ]; then CMD="hadoop jar SystemDS.jar " ; else CMD="echo " ; fi +# if [ "$2" == "SPARK" ]; then CMD="./sparkDML.sh "; DASH="-"; elif [ "$2" == "MR" ]; then CMD="hadoop jar SystemDS.jar " ; else CMD="echo " ; fi BASE=$1/trees diff --git a/scripts/perftest/MatrixMult.sh b/scripts/perftest/MatrixMult.sh index ca138994a5e..5aa4d0d374d 100755 --- a/scripts/perftest/MatrixMult.sh +++ b/scripts/perftest/MatrixMult.sh @@ -20,56 +20,77 @@ # #------------------------------------------------------------- -if [ "$(basename $PWD)" != "perftest" ]; -then +if [ "$(basename $PWD)" != "perftest" ]; then echo "Please execute scripts from directory 'perftest'" - exit 1; + exit 1 fi -if ! command -v perf &> /dev/null -then +if ! command -v perf &>/dev/null; then echo "Perf stat not installed for matrix operation benchmarks, see README" - exit 0; + exit 0 fi CMD=$1 +rep=2 +innerRep=300 +is=("100 1000 5000") +js=("100 1000 5000") +ks=("100 1000 5000") +spar=("1.0 0.35 0.1 0.01") +confs=("conf/std.xml conf/mkl.xml conf/openblas.xml") + +# is=("1000") +# js=("1000") +# ks=("1000") +# spar=("1.0 0.01") +# confs=("conf/mkl.xml") +# confs=("conf/openblas.xml") + # Logging output +mkdir -p logs LogName='logs/MM.log' -rm -f $LogName +rm -f $LogName # full log file +rm -f $LogName.log # Reduced log file + +echo "MATRIX MULTIPLICATION" >>results/times.txt + +for i in $is; do + for j in $js; do + for k in $ks; do + for con in $confs; do + + tstart=$(date +%s.%N) + + perf stat -d -d -d -r $rep \ + ${CMD} scripts/MM.dml \ + -config $con \ + -stats \ + -args $i $j $k 1.0 1.0 $innerRep \ + >>$LogName 2>&1 + ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc) + echo "Matrix mult $i x $j %*% $j x $k $con:" $ttrain >>results/times.txt -tstart=$(date +%s.%N) -# Baseline -perf stat -d -d -d -r 5 \ - ${CMD} scripts/MM.dml \ - -config conf/std.xml \ - -stats \ - -args 5000 5000 5000 1.0 1.0 3 \ - >>$LogName 2>&1 -ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc) -echo "Matrix mult 5000x5000 %*% 5000x5000 without mkl/openblas:" $ttrain >> results/times.txt + done + for sl in $spar; do + for sr in $spar; do + tstart=$(date +%s.%N) + perf stat -d -d -d -r $rep \ + ${CMD} scripts/MM.dml \ + -config conf/std.xml \ + -stats \ + -args $i $j $k $sl $sr $innerRep \ + >>$LogName 2>&1 + ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc) + echo "Matrix mult $i x $j %*% $j x $k spL $sl spR $sr :" $ttrain >>results/times.txt -tstart=$(date +%s.%N) -# MKL -perf stat -d -d -d -r 5 \ - ${CMD} scripts/MM.dml \ - -config conf/mkl.xml \ - -stats \ - -args 5000 5000 5000 1.0 1.0 3 \ - >>$LogName 2>&1 -ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc) -echo "Matrix mult 5000x5000 %*% 5000x5000 with mkl:" $ttrain >> results/times.txt + done + done + done + done +done -tstart=$(date +%s.%N) -# Open Blas -perf stat -d -d -d -r 5 \ - ${CMD} scripts/MM.dml \ - -config conf/openblas.xml \ - -stats \ - -args 5000 5000 5000 1.0 1.0 3 \ - >>$LogName 2>&1 -ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc) -echo "Matrix mult 5000x5000 %*% 5000x5000 with openblas:" $ttrain >> results/times.txt +echo -e "\n\n" >>results/times.txt -cat $LogName | grep -E ' ba\+\* |Total elapsed time|-----------| instructions | cycles | CPUs utilized ' >> $LogName.log \ No newline at end of file +cat $LogName | grep -E ' ba\+\* |Total elapsed time|-----------| instructions | cycles | CPUs utilized ' >>$LogName.log diff --git a/scripts/perftest/MatrixTranspose.sh b/scripts/perftest/MatrixTranspose.sh index 50141bbd938..5ebd86168cb 100755 --- a/scripts/perftest/MatrixTranspose.sh +++ b/scripts/perftest/MatrixTranspose.sh @@ -102,6 +102,9 @@ perf stat -d -d -d -r $repeatScript \ ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc) echo "Matrix transpose 15000000x30 matrix and sparsity 0.8: " $ttrain >> results/times.txt + +echo -e "\n\n" >>results/times.txt + cat $LogName | grep -E ' r. |Total elapsed time|-----------| instructions | cycles | CPUs utilized ' >> $LogName.log diff --git a/scripts/perftest/README.md b/scripts/perftest/README.md index 426f2e06053..44939391caf 100755 --- a/scripts/perftest/README.md +++ b/scripts/perftest/README.md @@ -28,14 +28,15 @@ There are a few prerequisites: - Setup OpenBlas: - Install Perf stat: -NOTE THE SCRIPT HAS TO BE RUN FROM THE PERFTEST FOLDER. +## NOTE THE SCRIPT HAS TO BE RUN FROM THE PERFTEST FOLDER Examples: + ```bash ./runAll.sh ``` Look inside the runAll script to see how to run individual tests. -Time calculations in the bash scripts additionally subtract a number, e.g. ".4". -This is done to accommodate for time lost by shell script and JVM startup overheads, to match the actual application runtime of SystemML. \ No newline at end of file +Time calculations in the bash scripts additionally subtract a number, e.g. ".4". +This is done to accommodate for time lost by shell script and JVM startup overheads, to match the actual application runtime of SystemML. diff --git a/scripts/perftest/conf/SystemDS-config.xml b/scripts/perftest/conf/SystemDS-config.xml index a073707bcb7..e3f5c08d1e9 100755 --- a/scripts/perftest/conf/SystemDS-config.xml +++ b/scripts/perftest/conf/SystemDS-config.xml @@ -18,6 +18,5 @@ --> - mkl diff --git a/scripts/perftest/conf/env-variables b/scripts/perftest/conf/env-variables deleted file mode 100644 index 1549aa13534..00000000000 --- a/scripts/perftest/conf/env-variables +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -export LOG4JPROP='conf/log4j-off.properties' -export SYSDS_QUIET=1 - -# stratstats needs a large heap for datasize of 800MB -# export SYSTEMDS_STANDALONE_OPTS="-Xmx10g -Xms10g -Xmn2000m" \ No newline at end of file diff --git a/scripts/perftest/conf/log4j-off.properties b/scripts/perftest/conf/log4j-off.properties index 39f2cd48a5c..f31c73165f3 100755 --- a/scripts/perftest/conf/log4j-off.properties +++ b/scripts/perftest/conf/log4j-off.properties @@ -19,8 +19,9 @@ # #------------------------------------------------------------- -log4j.rootLogger=ALL, console +log4j.rootLogger=ERROR,console +log4j.logger.org.apache=OFF log4j.logger.org.apache.sysds=OFF log4j.logger.org.apache.spark=OFF log4j.logger.org.apache.hadoop=OFF diff --git a/scripts/perftest/conf/mkl.xml b/scripts/perftest/conf/mkl.xml index fd655d76112..4af18677735 100755 --- a/scripts/perftest/conf/mkl.xml +++ b/scripts/perftest/conf/mkl.xml @@ -17,6 +17,5 @@ * under the License. --> - true mkl \ No newline at end of file diff --git a/scripts/perftest/conf/openblas.xml b/scripts/perftest/conf/openblas.xml index 97d7c164953..68df930cabb 100755 --- a/scripts/perftest/conf/openblas.xml +++ b/scripts/perftest/conf/openblas.xml @@ -17,6 +17,5 @@ * under the License. --> - true openblas \ No newline at end of file diff --git a/scripts/perftest/conf/std.xml b/scripts/perftest/conf/std.xml index c465de55475..1b01bf5d32a 100755 --- a/scripts/perftest/conf/std.xml +++ b/scripts/perftest/conf/std.xml @@ -17,5 +17,4 @@ * under the License. --> - true \ No newline at end of file diff --git a/scripts/perftest/genALSData.sh b/scripts/perftest/datagen/genALSData.sh similarity index 98% rename from scripts/perftest/genALSData.sh rename to scripts/perftest/datagen/genALSData.sh index fef1eb463b2..3d1a22a6757 100755 --- a/scripts/perftest/genALSData.sh +++ b/scripts/perftest/datagen/genALSData.sh @@ -33,6 +33,8 @@ FORMAT="text" # can be csv, mm, text, binary DENSE_SP=0.9 SPARSE_SP=0.01 +echo "-- Generating ALS data." >> results/times.txt; + #generate XS scenarios (80MB) if [ $MAXMEM -ge 80 ]; then ${CMD} -f ../datagen/genRandData4ALS.dml --nvargs X=${DATADIR}/X10k_1k_dense rows=10000 cols=1000 rank=10 nnz=`echo "scale=0; 10000 * 1000 * $DENSE_SP" | bc` sigma=0.01 fmt=$FORMAT & diff --git a/scripts/perftest/genBinomialData.sh b/scripts/perftest/datagen/genBinomialData.sh similarity index 98% rename from scripts/perftest/genBinomialData.sh rename to scripts/perftest/datagen/genBinomialData.sh index a8027ae1adb..7bf3af96dd8 100755 --- a/scripts/perftest/genBinomialData.sh +++ b/scripts/perftest/datagen/genBinomialData.sh @@ -33,6 +33,8 @@ FORMAT="binary" # can be csv, mm, text, binary DENSE_SP=0.9 SPARSE_SP=0.01 +echo -e "\n\n-- Generating binomial data..." >> results/times.txt; + #generate XS scenarios (80MB) if [ $MAXMEM -ge 80 ]; then ${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 10000 1000 5 5 ${BASE}/w10k_1k_dense ${BASE}/X10k_1k_dense ${BASE}/y10k_1k_dense 1 0 $DENSE_SP $FORMAT 1 & pidDense80=$! diff --git a/scripts/perftest/genClusteringData.sh b/scripts/perftest/datagen/genClusteringData.sh similarity index 98% rename from scripts/perftest/genClusteringData.sh rename to scripts/perftest/datagen/genClusteringData.sh index 02df510e18f..9fb1e9db451 100755 --- a/scripts/perftest/genClusteringData.sh +++ b/scripts/perftest/datagen/genClusteringData.sh @@ -33,6 +33,8 @@ FORMAT="binary" DENSE_SP=0.9 SPARSE_SP=0.01 +echo "-- Generating clustering data..." >> results/times.txt; + #generate XS scenarios (80MB) if [ $MAXMEM -ge 80 ]; then ${CMD} -f ../datagen/genRandData4Kmeans.dml --nvargs nr=10000 nf=1000 nc=5 dc=10.0 dr=1.0 fbf=100.0 cbf=100.0 X=$BASE/X10k_1k_dense C=$BASE/C10k_1k_dense Y=$BASE/y10k_1k_dense YbyC=$BASE/YbyC10k_1k_dense fmt=$FORMAT & pidDense80=$! diff --git a/scripts/perftest/genDescriptiveStatisticsData.sh b/scripts/perftest/datagen/genDescriptiveStatisticsData.sh similarity index 100% rename from scripts/perftest/genDescriptiveStatisticsData.sh rename to scripts/perftest/datagen/genDescriptiveStatisticsData.sh diff --git a/scripts/perftest/genDimensionReductionData.sh b/scripts/perftest/datagen/genDimensionReductionData.sh similarity index 96% rename from scripts/perftest/genDimensionReductionData.sh rename to scripts/perftest/datagen/genDimensionReductionData.sh index 5f146548c0c..1207a0dc417 100755 --- a/scripts/perftest/genDimensionReductionData.sh +++ b/scripts/perftest/datagen/genDimensionReductionData.sh @@ -31,6 +31,8 @@ MAXMEM=$3 FORMAT="binary" +echo "-- Generating Dimension Reduction data." >> results/times.txt; + #generate XS scenarios (80MB) if [ $MAXMEM -ge 80 ]; then ${CMD} -f ../datagen/genRandData4PCA.dml --nvargs R=5000 C=2000 OUT=$BASE/pcaData5k_2k_dense FMT=$FORMAT & diff --git a/scripts/perftest/genL2SVMData.sh b/scripts/perftest/datagen/genL2SVMData.sh similarity index 100% rename from scripts/perftest/genL2SVMData.sh rename to scripts/perftest/datagen/genL2SVMData.sh diff --git a/scripts/perftest/genMultinomialData.sh b/scripts/perftest/datagen/genMultinomialData.sh similarity index 98% rename from scripts/perftest/genMultinomialData.sh rename to scripts/perftest/datagen/genMultinomialData.sh index e7ef1093de7..43dd6ea7ff3 100755 --- a/scripts/perftest/genMultinomialData.sh +++ b/scripts/perftest/datagen/genMultinomialData.sh @@ -33,6 +33,8 @@ FORMAT="binary" DENSE_SP=0.9 SPARSE_SP=0.01 +echo "-- Generating multinomial data..." >> results/times.txt; + #generate XS scenarios (80MB) if [ $MAXMEM -ge 80 ]; then ${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 10000 1000 $DENSE_SP 5 0 $BASE/X10k_1k_dense_k5 $BASE/y10k_1k_dense_k5 $FORMAT 1 & pidDense80=$! diff --git a/scripts/perftest/genStratStatisticsData.sh b/scripts/perftest/datagen/genStratStatisticsData.sh similarity index 97% rename from scripts/perftest/genStratStatisticsData.sh rename to scripts/perftest/datagen/genStratStatisticsData.sh index 7aa18e38348..19c38e3fc73 100755 --- a/scripts/perftest/genStratStatisticsData.sh +++ b/scripts/perftest/datagen/genStratStatisticsData.sh @@ -31,6 +31,8 @@ MAXMEM=$3 FORMAT="binary" +echo "-- Generating stats data..." >> results/times.txt; + #XS data 10K rows if [ $MAXMEM -ge 80 ]; then ${CMD} -f ../datagen/genRandData4StratStats.dml --explain --stats --nvargs nr=10000 nf=100 D=${BASE}/A_10k/data Xcid=${BASE}/A_10k/Xcid Ycid=${BASE}/A_10k/Ycid A=${BASE}/A_10k/A fmt=$FORMAT & diff --git a/scripts/perftest/runAll.sh b/scripts/perftest/runAll.sh index 67701a0b84d..db315597bf4 100755 --- a/scripts/perftest/runAll.sh +++ b/scripts/perftest/runAll.sh @@ -26,50 +26,94 @@ then exit 1; fi -# Optional argument that can be a folder name for where generated data is stored -TEMPFOLDER=$1 -if [ "$TEMPFOLDER" == "" ]; then TEMPFOLDER=temp ; fi - # Command to be executed CMD="systemds" -# CMD="./sparkDML.sh" +TEMPFOLDER="temp" # Max memory of data to be benchmarked -MAXMEM=80 # Possible values: 80/80MB, 800/800MB, 8000/8000MB/8GB, 80000/80000MB/80GB, 800000/800000MB/800GB -MAXMEM=${MAXMEM%"MB"}; MAXMEM=${MAXMEM/GB/"000"} +# Possible values: 80/80MB, 800/800MB, 8000/8000MB/8GB, 80000/80000MB/80GB, 800000/800000MB/800GB +MAXMEM=80 # Set properties -source ./conf/env-variables +export LOG4JPROP='conf/log4j-off.properties' +export SYSDS_QUIET=1 +export SYSDS_EXEC_MODE="hybrid" +export SYSTEMDS_STANDALONE_OPTS="-Xmx10g -Xms10g -Xmn2000m" +export SYSDS_DISTRIBUTED=0 + +if [ "$HOSTNAME" = "alpha" ]; then + # Just to make it easy to run on our machine without having to change anything. + export SYSTEMDS_STANDALONE_OPTS="-Xmx500g -Xms500g -Xmn50g" + export SYSDS_DISTRIBUTED=1 + export SYSTEMDS_DISTRIBUTED_OPTS="\ + --master yarn \ + --deploy-mode client \ + --driver-memory 500g \ + --conf spark.driver.extraJavaOptions=\"-Xms500g -Xmn50g -Dlog4j.configuration=file:$LOG4JPROP\" \ + --conf spark.executor.extraJavaOptions=\"-Dlog4j.configuration=file:$LOG4JPROP\" \ + --conf spark.executor.heartbeatInterval=100s \ + --files $LOG4JPROP \ + --conf spark.network.timeout=512s \ + --num-executors 6 \ + --executor-memory 105g \ + --executor-cores 32 \ + " + MAXMEM="80GB" +elif [ "$HOSTNAME" = "charlie" ]; then + export SYSTEMDS_STANDALONE_OPTS="-Xmx100g -Xms100g -Xmn10g" + export SYSDS_DISTRIBUTED=1 + export SYSTEMDS_DISTRIBUTED_OPTS="\ + --master yarn \ + --deploy-mode client \ + --driver-memory 100g \ + --conf spark.driver.extraJavaOptions=\"-Xms100g -Xmn10g -Dlog4j.configuration=file:$LOG4JPROP\" \ + --conf spark.executor.extraJavaOptions=\"-Dlog4j.configuration=file:$LOG4JPROP\" \ + --conf spark.executor.heartbeatInterval=100s \ + --files $LOG4JPROP \ + --conf spark.network.timeout=512s \ + --num-executors 6 \ + --executor-memory 105g \ + --executor-cores 32 \ + " + MAXMEM="80GB" +elif [ "$HOSTNAME" = "XPS-15-7590" ]; then + MAXMEM=800 +fi + +# Fix max mem to format. +MAXMEM=${MAXMEM%"MB"}; MAXMEM=${MAXMEM/GB/"000"} # Possible lines to initialize Intel MKL, depending on version and install location -# . ~/intel/bin/compilervars.sh intel64 -# . ~/intel/oneapi/setvars.sh intel64 -# . /opt/intel/bin/compilervars.sh intel64 +if [ -d ~/intel ] && [ -d ~/intel/bin ] && [ -f ~/intel/bin/compilervars.sh ]; then + . ~/intel/bin/compilervars.sh intel64 +elif [ -d /opt ] && [ -d /opt/intel ] && [ -d /opt/intel/bin ]; then + . /opt/intel/bin/compilervars.sh intel64 +fi + +# make dirs if not exsisting +mkdir -p logs +mkdir -p results +mkdir -p temp # init time measurement -if [ ! -d logs ]; then mkdir -p logs ; fi -if [ ! -d results ]; then mkdir -p results ; fi -if [ ! -d temp ]; then mkdir -p temp ; fi -date >> results/times.txt - -### Data Generation -echo "-- Generating binomial data..." >> results/times.txt; -./genBinomialData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genBinomialData.out -echo "-- Generating multinomial data..." >> results/times.txt; -./genMultinomialData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genMultinomialData.out -echo "-- Generating stats data..." >> results/times.txt; -./genDescriptiveStatisticsData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genStatsData.out -./genStratStatisticsData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genStratStatsData.out -echo "-- Generating clustering data..." >> results/times.txt; -./genClusteringData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genClusteringData.out -echo "-- Generating Dimension Reduction data." >> results/times.txt; -./genDimensionReductionData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genDimensionReductionData.out -echo "-- Generating ALS data." >> results/times.txt; -./genALSData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genALSData.out + +rm -f results/times.txt +date +"%Y-%m-%d-%T" >> results/times.txt +echo -e "\n$HOSTNAME" >> results/times.txt +echo -e "\n\n" >> results/times.txt + +## Data Gen +# ./datagen/genBinomialData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genBinomialData.out +# ./datagen/genMultinomialData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genMultinomialData.out +# ./datagen/genDescriptiveStatisticsData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genStatsData.out +# ./datagen/genStratStatisticsData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genStratStatsData.out +# ./datagen/genClusteringData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genClusteringData.out +# ./datagen/genDimensionReductionData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genDimensionReductionData.out +# ./datagen/genALSData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genALSData.out ### Micro Benchmarks: -./MatrixMult.sh ${CMD} -./MatrixTranspose.sh ${CMD} +#./MatrixMult.sh ${CMD} +#./MatrixTranspose.sh ${CMD} # Federate benchmark #./fed/runAllFed.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} @@ -92,3 +136,5 @@ echo "-- Generating ALS data." >> results/times.txt; #./runAllSurvival.sh $CMD $TEMPFOLDER #KaplanMeier #Cox + +cp results/times.txt "results/times-$HOSTNAME-$(date +"%Y-%m-%d-%T").txt" diff --git a/scripts/perftest/runAllALS.sh b/scripts/perftest/runAllALS.sh index b0ac290b366..be3a53df690 100755 --- a/scripts/perftest/runAllALS.sh +++ b/scripts/perftest/runAllALS.sh @@ -53,3 +53,5 @@ do ./${f}.sh ${DATADIR}/X${d} $MAXITR $DATADIR ${CMD} 0.001 FALSE &> logs/${f}_${d}.out; done done + +echo -e "\n\n" >> results/times.txt diff --git a/scripts/perftest/runAllBinomial.sh b/scripts/perftest/runAllBinomial.sh index 3e52fcdcbd8..bd907f0d4ca 100755 --- a/scripts/perftest/runAllBinomial.sh +++ b/scripts/perftest/runAllBinomial.sh @@ -57,3 +57,5 @@ do ./${f}.sh ${BASE}/X${d} ${BASE}/y${d} 2 ${BASE} ${MAXITR} ${COMMAND} &> logs/${f}_${d}.out; done done + +echo -e "\n\n" >> results/times.txt diff --git a/scripts/perftest/runAllClustering.sh b/scripts/perftest/runAllClustering.sh index a5a5a2299ef..4856133c69e 100755 --- a/scripts/perftest/runAllClustering.sh +++ b/scripts/perftest/runAllClustering.sh @@ -53,3 +53,5 @@ do echo "-- Running Kmeans on "$d >> results/times.txt; ./runKmeans.sh ${BASE}/X${d} ${MAXITR} ${BASE} ${COMMAND} &> logs/runKmeans_${d}.out; done + +echo -e "\n\n" >> results/times.txt diff --git a/scripts/perftest/runAllDimensionReduction.sh b/scripts/perftest/runAllDimensionReduction.sh index fb13e44c0b2..e1549266890 100755 --- a/scripts/perftest/runAllDimensionReduction.sh +++ b/scripts/perftest/runAllDimensionReduction.sh @@ -51,3 +51,5 @@ do ./runPCA.sh ${BASE}/pcaData${d} ${BASE} ${COMMAND} &> logs/runPCA_${d}.out; done + +echo -e "\n\n" >> results/times.txt diff --git a/scripts/perftest/runAllMultinomial.sh b/scripts/perftest/runAllMultinomial.sh index d55a0b7b9c6..1078c20581a 100755 --- a/scripts/perftest/runAllMultinomial.sh +++ b/scripts/perftest/runAllMultinomial.sh @@ -66,3 +66,5 @@ do ./${f}.sh ${BASE}/X${d}_k5 ${BASE}/y${d}_k5 5 ${BASE} ${MAXITR} ${COMMAND} &> logs/${f}_${d}_k5.out; done done + +echo -e "\n\n" >> results/times.txt diff --git a/scripts/perftest/runAllRegression.sh b/scripts/perftest/runAllRegression.sh index 73fe7dae526..8c6495c503c 100755 --- a/scripts/perftest/runAllRegression.sh +++ b/scripts/perftest/runAllRegression.sh @@ -71,3 +71,5 @@ do ./${f}.sh ${BASE}/X${d} ${BASE}/y${d} ${BASE} ${MAXITR} ${COMMAND} &> logs/${f}_${d}.out; done done + +echo -e "\n\n" >> results/times.txt diff --git a/scripts/perftest/runAllStats.sh b/scripts/perftest/runAllStats.sh index d8f1314932b..c1710bf1ebc 100755 --- a/scripts/perftest/runAllStats.sh +++ b/scripts/perftest/runAllStats.sh @@ -59,3 +59,4 @@ do ./runStratStats.sh ${BASE3}/${d}/data ${BASE3}/${d}/Xcid ${BASE3}/${d}/Ycid ${BASE3} ${COMMAND} &> logs/runStrats-stats_${d}.out; done +echo -e "\n\n" >> results/times.txt diff --git a/scripts/perftest/scripts/alsCG.dml b/scripts/perftest/scripts/alsCG.dml index 913fbbb8d21..2e8115e5778 100644 --- a/scripts/perftest/scripts/alsCG.dml +++ b/scripts/perftest/scripts/alsCG.dml @@ -20,8 +20,8 @@ #------------------------------------------------------------- rank = ifdef($rank, 10); -reg = ifdef($reg, "L2"); -lambda = ifdef($lambda, 0.000001); +regType = ifdef($reg, "L2"); +reg = ifdef($lambda, 0.000001); maxiter = ifdef($maxiter, 50); thr = ifdef($thr, 0.0001); verbose = ifdef($verbose, TRUE); @@ -32,7 +32,7 @@ check = ifdef($check, TRUE); X = read($X); -[U, V] = alsCG(X=X, rank=rank, reg=reg, lambda=lambda, maxi=maxiter, check=check, thr=thr, verbose=verbose); +[U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg, maxi=maxiter, check=check, thr=thr, verbose=verbose); write(U, $modelU, format=fmt); write(V, $modelV, format=fmt); diff --git a/scripts/perftest/sparkDML.sh b/scripts/perftest/sparkDML.sh deleted file mode 100644 index 370e70c476a..00000000000 --- a/scripts/perftest/sparkDML.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -#set -x - - -# This script is a simplified version of sparkDML.sh in order to -# allow a simple drop-in replacement for 'hadoop jar' without -# the need to change any command line arguments. - -export SPARK_HOME=../spark-2.4.7-bin-hadoop2.7 -export HADOOP_CONF_DIR=/home/hadoop/hadoop-2.7.7/etc/hadoop - -$SPARK_HOME/bin/spark-submit \ - --master yarn \ - --deploy-mode client \ - --driver-memory 20g \ - --conf spark.driver.extraJavaOptions="-Xms20g -Dlog4j.configuration=file:/home/mboehm/perftest/conf/log4j.properties" \ - --conf spark.ui.showConsoleProgress=true \ - --conf spark.executor.heartbeatInterval=100s \ - --conf spark.network.timeout=512s \ - --num-executors 10 \ - --executor-memory 105g \ - --executor-cores 32 \ - SystemDS.jar "$@" \ No newline at end of file diff --git a/scripts/perftest/todo/genRandLogRegData_LTStats.sh b/scripts/perftest/todo/genRandLogRegData_LTStats.sh index 5b25e11a333..a4f4487c574 100755 --- a/scripts/perftest/todo/genRandLogRegData_LTStats.sh +++ b/scripts/perftest/todo/genRandLogRegData_LTStats.sh @@ -23,7 +23,7 @@ # ./genRandLogRegData_LTStats.sh myperftest SPARK 150 LOGISTIC &>> logs/genMultinomialData.out # ./genRandLogRegData_LTStats.sh myperftest SPARK 1 REGRESSION &>> logs/genRegressionData.out if [ "$1" == "" -o "$2" == "" ]; then echo "Usage: $0 e.g. $0 perftest SPARK" ; exit 1 ; fi -if [ "$2" == "SPARK" ]; then CMD="./sparkDML.sh "; DASH="-"; elif [ "$2" == "MR" ]; then CMD="hadoop jar SystemDS.jar " ; else CMD="echo " ; fi +# if [ "$2" == "SPARK" ]; then CMD="./sparkDML.sh "; DASH="-"; elif [ "$2" == "MR" ]; then CMD="hadoop jar SystemDS.jar " ; else CMD="echo " ; fi if [ "$3" == "1" ]; then BASE=$1/binomial ; else BASE=$1/multinomial ; fi if [ "$4" == "LOGISTIC" ]; then DATAGEN_SCRIPT=../datagen/genRandData4LogReg_LTstats.dml ; else DATAGEN_SCRIPT=../datagen/genRandData4LinearReg_LTstats.dml ; fi diff --git a/src/main/bash/sparkDML2.sh b/src/main/bash/sparkDML2.sh deleted file mode 100755 index dce35ce2918..00000000000 --- a/src/main/bash/sparkDML2.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -#set -x - -# This script is a simplified version of sparkDML.sh in order to -# allow a simple drop-in replacement for 'hadoop jar' without -# the need to change any command line arguments. - -#export HADOOP_CONF_DIR=/etc/hadoop/conf -#SPARK_HOME=../spark-2.3.1-bin-hadoop2.7 -#export HADOOP_HOME=${HADOOP_HOME:-/usr/hdp/2.5.0.0-1245/hadoop} -#HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/usr/hdp/2.5.0.0-1245/hadoop/conf} - -export SPARK_MAJOR_VERSION=2 - -#$SPARK_HOME/bin/spark-submit \ -spark-submit \ - --master yarn \ - --driver-memory 80g \ - --num-executors 1 \ - --executor-memory 60g \ - --executor-cores 19 \ - --conf "spark.yarn.am.extraJavaOptions -Dhdp.version=2.5.0.0-1245" \ - "$@" - -# # run spark submit locally -# spark-submit \ -# "$@"