Skip to content

Commit

Permalink
[MINOR] PerfTest cleanup and update
Browse files Browse the repository at this point in the history
Closes #1617
  • Loading branch information
Baunsgaard committed Jul 25, 2022
1 parent 1db2a0f commit f233c93
Show file tree
Hide file tree
Showing 30 changed files with 176 additions and 202 deletions.
2 changes: 1 addition & 1 deletion scripts/datagen/genRandData4DecisionTree.sh
Expand Up @@ -21,7 +21,7 @@
#-------------------------------------------------------------

if [ "$1" == "" -o "$2" == "" ]; then echo "Usage: $0 <hdfsDataDir> <MR | SPARK | ECHO> e.g. $0 perftest SPARK" ; exit 1 ; fi
if [ "$2" == "SPARK" ]; then CMD="./sparkDML.sh "; DASH="-"; elif [ "$2" == "MR" ]; then CMD="hadoop jar SystemDS.jar " ; else CMD="echo " ; fi
# if [ "$2" == "SPARK" ]; then CMD="./sparkDML.sh "; DASH="-"; elif [ "$2" == "MR" ]; then CMD="hadoop jar SystemDS.jar " ; else CMD="echo " ; fi

BASE=$1/trees

Expand Down
97 changes: 59 additions & 38 deletions scripts/perftest/MatrixMult.sh
Expand Up @@ -20,56 +20,77 @@
#
#-------------------------------------------------------------

if [ "$(basename $PWD)" != "perftest" ];
then
if [ "$(basename $PWD)" != "perftest" ]; then
echo "Please execute scripts from directory 'perftest'"
exit 1;
exit 1
fi

if ! command -v perf &> /dev/null
then
if ! command -v perf &>/dev/null; then
echo "Perf stat not installed for matrix operation benchmarks, see README"
exit 0;
exit 0
fi

CMD=$1

rep=2
innerRep=300
is=("100 1000 5000")
js=("100 1000 5000")
ks=("100 1000 5000")
spar=("1.0 0.35 0.1 0.01")
confs=("conf/std.xml conf/mkl.xml conf/openblas.xml")

# is=("1000")
# js=("1000")
# ks=("1000")
# spar=("1.0 0.01")
# confs=("conf/mkl.xml")
# confs=("conf/openblas.xml")

# Logging output
mkdir -p logs
LogName='logs/MM.log'
rm -f $LogName
rm -f $LogName # full log file
rm -f $LogName.log # Reduced log file

echo "MATRIX MULTIPLICATION" >>results/times.txt

for i in $is; do
for j in $js; do
for k in $ks; do
for con in $confs; do

tstart=$(date +%s.%N)

perf stat -d -d -d -r $rep \
${CMD} scripts/MM.dml \
-config $con \
-stats \
-args $i $j $k 1.0 1.0 $innerRep \
>>$LogName 2>&1
ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc)
echo "Matrix mult $i x $j %*% $j x $k $con:" $ttrain >>results/times.txt

tstart=$(date +%s.%N)
# Baseline
perf stat -d -d -d -r 5 \
${CMD} scripts/MM.dml \
-config conf/std.xml \
-stats \
-args 5000 5000 5000 1.0 1.0 3 \
>>$LogName 2>&1
ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc)
echo "Matrix mult 5000x5000 %*% 5000x5000 without mkl/openblas:" $ttrain >> results/times.txt
done
for sl in $spar; do
for sr in $spar; do
tstart=$(date +%s.%N)

perf stat -d -d -d -r $rep \
${CMD} scripts/MM.dml \
-config conf/std.xml \
-stats \
-args $i $j $k $sl $sr $innerRep \
>>$LogName 2>&1
ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc)
echo "Matrix mult $i x $j %*% $j x $k spL $sl spR $sr :" $ttrain >>results/times.txt

tstart=$(date +%s.%N)
# MKL
perf stat -d -d -d -r 5 \
${CMD} scripts/MM.dml \
-config conf/mkl.xml \
-stats \
-args 5000 5000 5000 1.0 1.0 3 \
>>$LogName 2>&1
ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc)
echo "Matrix mult 5000x5000 %*% 5000x5000 with mkl:" $ttrain >> results/times.txt
done
done
done
done
done

tstart=$(date +%s.%N)
# Open Blas
perf stat -d -d -d -r 5 \
${CMD} scripts/MM.dml \
-config conf/openblas.xml \
-stats \
-args 5000 5000 5000 1.0 1.0 3 \
>>$LogName 2>&1
ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc)
echo "Matrix mult 5000x5000 %*% 5000x5000 with openblas:" $ttrain >> results/times.txt
echo -e "\n\n" >>results/times.txt

cat $LogName | grep -E ' ba\+\* |Total elapsed time|-----------| instructions | cycles | CPUs utilized ' >> $LogName.log
cat $LogName | grep -E ' ba\+\* |Total elapsed time|-----------| instructions | cycles | CPUs utilized ' >>$LogName.log
3 changes: 3 additions & 0 deletions scripts/perftest/MatrixTranspose.sh
Expand Up @@ -102,6 +102,9 @@ perf stat -d -d -d -r $repeatScript \
ttrain=$(echo "$(date +%s.%N) - $tstart - .4" | bc)
echo "Matrix transpose 15000000x30 matrix and sparsity 0.8: " $ttrain >> results/times.txt


echo -e "\n\n" >>results/times.txt

cat $LogName | grep -E ' r. |Total elapsed time|-----------| instructions | cycles | CPUs utilized ' >> $LogName.log


7 changes: 4 additions & 3 deletions scripts/perftest/README.md
Expand Up @@ -28,14 +28,15 @@ There are a few prerequisites:
- Setup OpenBlas: <https://github.com/xianyi/OpenBLAS/wiki/Precompiled-installation-packages>
- Install Perf stat: <https://linoxide.com/linux-how-to/install-perf-tool-centos-ubuntu/>

NOTE THE SCRIPT HAS TO BE RUN FROM THE PERFTEST FOLDER.
## NOTE THE SCRIPT HAS TO BE RUN FROM THE PERFTEST FOLDER

Examples:

```bash
./runAll.sh
```

Look inside the runAll script to see how to run individual tests.

Time calculations in the bash scripts additionally subtract a number, e.g. ".4".
This is done to accommodate for time lost by shell script and JVM startup overheads, to match the actual application runtime of SystemML.
Time calculations in the bash scripts additionally subtract a number, e.g. ".4".
This is done to accommodate for time lost by shell script and JVM startup overheads, to match the actual application runtime of SystemML.
1 change: 0 additions & 1 deletion scripts/perftest/conf/SystemDS-config.xml
Expand Up @@ -18,6 +18,5 @@
-->

<root>
<!-- enables native blas for matrix multiplication and convolution, experimental feature (options: auto, mkl, openblas, none) -->
<sysds.native.blas>mkl</sysds.native.blas>
</root>
27 changes: 0 additions & 27 deletions scripts/perftest/conf/env-variables

This file was deleted.

3 changes: 2 additions & 1 deletion scripts/perftest/conf/log4j-off.properties
Expand Up @@ -19,8 +19,9 @@
#
#-------------------------------------------------------------

log4j.rootLogger=ALL, console
log4j.rootLogger=ERROR,console

log4j.logger.org.apache=OFF
log4j.logger.org.apache.sysds=OFF
log4j.logger.org.apache.spark=OFF
log4j.logger.org.apache.hadoop=OFF
Expand Down
1 change: 0 additions & 1 deletion scripts/perftest/conf/mkl.xml
Expand Up @@ -17,6 +17,5 @@
* under the License.
-->
<root>
<sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
<sysds.native.blas>mkl</sysds.native.blas>
</root>
1 change: 0 additions & 1 deletion scripts/perftest/conf/openblas.xml
Expand Up @@ -17,6 +17,5 @@
* under the License.
-->
<root>
<sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
<sysds.native.blas>openblas</sysds.native.blas>
</root>
1 change: 0 additions & 1 deletion scripts/perftest/conf/std.xml
Expand Up @@ -17,5 +17,4 @@
* under the License.
-->
<root>
<sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
</root>
Expand Up @@ -33,6 +33,8 @@ FORMAT="text" # can be csv, mm, text, binary
DENSE_SP=0.9
SPARSE_SP=0.01

echo "-- Generating ALS data." >> results/times.txt;

#generate XS scenarios (80MB)
if [ $MAXMEM -ge 80 ]; then
${CMD} -f ../datagen/genRandData4ALS.dml --nvargs X=${DATADIR}/X10k_1k_dense rows=10000 cols=1000 rank=10 nnz=`echo "scale=0; 10000 * 1000 * $DENSE_SP" | bc` sigma=0.01 fmt=$FORMAT &
Expand Down
Expand Up @@ -33,6 +33,8 @@ FORMAT="binary" # can be csv, mm, text, binary
DENSE_SP=0.9
SPARSE_SP=0.01

echo -e "\n\n-- Generating binomial data..." >> results/times.txt;

#generate XS scenarios (80MB)
if [ $MAXMEM -ge 80 ]; then
${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 10000 1000 5 5 ${BASE}/w10k_1k_dense ${BASE}/X10k_1k_dense ${BASE}/y10k_1k_dense 1 0 $DENSE_SP $FORMAT 1 & pidDense80=$!
Expand Down
Expand Up @@ -33,6 +33,8 @@ FORMAT="binary"
DENSE_SP=0.9
SPARSE_SP=0.01

echo "-- Generating clustering data..." >> results/times.txt;

#generate XS scenarios (80MB)
if [ $MAXMEM -ge 80 ]; then
${CMD} -f ../datagen/genRandData4Kmeans.dml --nvargs nr=10000 nf=1000 nc=5 dc=10.0 dr=1.0 fbf=100.0 cbf=100.0 X=$BASE/X10k_1k_dense C=$BASE/C10k_1k_dense Y=$BASE/y10k_1k_dense YbyC=$BASE/YbyC10k_1k_dense fmt=$FORMAT & pidDense80=$!
Expand Down
Expand Up @@ -31,6 +31,8 @@ MAXMEM=$3

FORMAT="binary"

echo "-- Generating Dimension Reduction data." >> results/times.txt;

#generate XS scenarios (80MB)
if [ $MAXMEM -ge 80 ]; then
${CMD} -f ../datagen/genRandData4PCA.dml --nvargs R=5000 C=2000 OUT=$BASE/pcaData5k_2k_dense FMT=$FORMAT &
Expand Down
File renamed without changes.
Expand Up @@ -33,6 +33,8 @@ FORMAT="binary"
DENSE_SP=0.9
SPARSE_SP=0.01

echo "-- Generating multinomial data..." >> results/times.txt;

#generate XS scenarios (80MB)
if [ $MAXMEM -ge 80 ]; then
${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 10000 1000 $DENSE_SP 5 0 $BASE/X10k_1k_dense_k5 $BASE/y10k_1k_dense_k5 $FORMAT 1 & pidDense80=$!
Expand Down
Expand Up @@ -31,6 +31,8 @@ MAXMEM=$3

FORMAT="binary"

echo "-- Generating stats data..." >> results/times.txt;

#XS data 10K rows
if [ $MAXMEM -ge 80 ]; then
${CMD} -f ../datagen/genRandData4StratStats.dml --explain --stats --nvargs nr=10000 nf=100 D=${BASE}/A_10k/data Xcid=${BASE}/A_10k/Xcid Ycid=${BASE}/A_10k/Ycid A=${BASE}/A_10k/A fmt=$FORMAT &
Expand Down

0 comments on commit f233c93

Please sign in to comment.