## Reservation & deployment

In [1]:
from fabfile import *
from real_hpl import *

In [2]:
nb_nodes = 4
dahu = Job.oarsub_cluster(site='grenoble', username='tocornebize', clusters=['dahu'],    walltime=Time(hours=4), nb_nodes=nb_nodes, deploy='debian9-x64-base', queue='testing')
gr20 = Job.oarsub_cluster(site='nancy',    username='tocornebize', clusters=['grvingt'], walltime=Time(hours=4), nb_nodes=nb_nodes, deploy='debian9-x64-base', queue='production')

alljobs = [dahu, gr20]
for job in alljobs:
    print(job.hostnames)
    if len(job.hostnames) != nb_nodes:
        logger.critical('Bad number of nodes')

[32m[2018-07-23 10:55:46][INFO] [37m[frontend | /home/tocornebize] oarsub -n "☕" -q testing -t deploy  -l "{cluster in ('dahu')}/nodes=4,walltime=04:00:00" -r "2018-07-23 10:55:46"[0m
[32m[2018-07-23 10:55:48][INFO] [37m[frontend | /home/tocornebize] oarsub -n "🥐" -q production -t deploy  -l "{cluster in ('grvingt')}/nodes=4,walltime=04:00:00" -r "2018-07-23 10:55:48"[0m
[32m[2018-07-23 10:56:13][INFO] [37m[frontend | /home/tocornebize] oarstat -fJ -j 1806467[0m
[32m[2018-07-23 10:56:13][INFO] [37m[frontend | /home/tocornebize] oarstat -fJ -j 1619955[0m


['dahu-6.grenoble.grid5000.fr', 'dahu-7.grenoble.grid5000.fr', 'dahu-8.grenoble.grid5000.fr', 'dahu-9.grenoble.grid5000.fr']


[32m[2018-07-23 10:56:19][INFO] [37m[frontend | /home/tocornebize] oarstat -fJ -j 1619955[0m
[32m[2018-07-23 10:56:31][INFO] [37m[frontend | /home/tocornebize] oarstat -fJ -j 1619955[0m
[32m[2018-07-23 10:56:52][INFO] [37m[frontend | /home/tocornebize] oarstat -fJ -j 1619955[0m
[32m[2018-07-23 10:57:36][INFO] [37m[frontend | /home/tocornebize] oarstat -fJ -j 1619955[0m


['grvingt-5.nancy.grid5000.fr', 'grvingt-6.nancy.grid5000.fr', 'grvingt-7.nancy.grid5000.fr', 'grvingt-8.nancy.grid5000.fr']


In [3]:
for job in alljobs:
    job.kadeploy(job.deploy)

[32m[2018-07-23 10:57:36][INFO] [37m[frontend | /home/tocornebize] kadeploy3 -k -f /var/lib/oar/1806467 -e debian9-x64-base[0m
[32m[2018-07-23 11:01:03][INFO] [37m[frontend | /home/tocornebize] kadeploy3 -k -f /var/lib/oar/1619955 -e debian9-x64-base[0m


In [4]:
for job in alljobs:
    install(job)

[32m[2018-07-23 11:04:56][INFO] [37m[allnodes | /tmp] echo "hello world"[0m
[32m[2018-07-23 11:04:57][INFO] [37m[allnodes | /tmp] apt update && DEBIAN_FRONTEND=noninteractive apt upgrade -yq[0m
[32m[2018-07-23 11:05:48][INFO] [37m[allnodes | /tmp] DEBIAN_FRONTEND=noninteractive apt install -y build-essential zip make git time hwloc pciutils cmake cpufrequtils linux-cpupower openmpi-bin libopenmpi-dev net-tools[0m
[32m[2018-07-23 11:06:17][INFO] [37m[allnodes | /tmp] wget https://github.com/xianyi/OpenBLAS/archive/v0.3.1.zip -O openblas.zip[0m
[32m[2018-07-23 11:06:43][INFO] [37m[allnodes | /tmp] unzip openblas.zip && mv OpenBLAS-* openblas[0m
[32m[2018-07-23 11:06:48][INFO] [37m[allnodes | /tmp/openblas] make -j 64[0m
[32m[2018-07-23 11:06:56][INFO] [37m[allnodes | /tmp/openblas] make install PREFIX=/tmp[0m
[32m[2018-07-23 11:06:57][INFO] [37m[allnodes | /tmp] wget http://www.netlib.org/benchmark/hpl/hpl-2.2.tar.gz[0m
[32m[2018-07-23 11:06:59][INFO] [37m[allno

## Performance tuning, estimation of the peak

We disable the hyperthreading and the DVFS.

In [5]:
for job in alljobs:
    job.nodes.disable_hyperthreading()
#job.nodes.set_frequency_performance() # ← fail on Dahu, no cpufreq driver
#print(job.nodes.frequency_information)
#print(job.nodes.current_frequency_information)

[32m[2018-07-23 11:11:37][INFO] [37m[allnodes | /tmp] lstopo topology.xml && cat topology.xml[0m
[32m[2018-07-23 11:11:38][INFO] [37m[allnodes | /tmp] echo -n '0' | tee /sys/devices/system/cpu/cpu32/online /sys/devices/system/cpu/cpu34/online /sys/devices/system/cpu/cpu36/online /sys/devices/system/cpu/cpu38/online /sys/devices/system/cpu/cpu40/online /sys/devices/system/cpu/cpu42/online /sys/devices/system/cpu/cpu44/online /sys/devices/system/cpu/cpu46/online /sys/devices/system/cpu/cpu48/online /sys/devices/system/cpu/cpu50/online /sys/devices/system/cpu/cpu52/online /sys/devices/system/cpu/cpu54/online /sys/devices/system/cpu/cpu56/online /sys/devices/system/cpu/cpu58/online /sys/devices/system/cpu/cpu60/online /sys/devices/system/cpu/cpu62/online /sys/devices/system/cpu/cpu33/online /sys/devices/system/cpu/cpu35/online /sys/devices/system/cpu/cpu37/online /sys/devices/system/cpu/cpu39/online /sys/devices/system/cpu/cpu41/online /sys/devices/system/cpu/cpu43/online /sys/devices

In [6]:
for job in alljobs:
    gflops = [str(estimate_peak(job)) for _ in range(10)]
    print('\n'.join(gflops))

[32m[2018-07-23 11:11:52][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:11:52][INFO] [37m[allnodes | /tmp] wget https://raw.githubusercontent.com/Ezibenroc/m2_internship_scripts/master/cblas_tests/dgemm_test.c[0m
[32m[2018-07-23 11:11:53][INFO] [37m[allnodes | /tmp] LD_LIBRARY_PATH=/tmp/lib gcc -DUSE_OPENBLAS ./dgemm_test.c -fopenmp -I /tmp/include                 /tmp/lib/libopenblas.so -O3 -o ./dgemm_test[0m
[32m[2018-07-23 11:11:53][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:11:57][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:12:01][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:12:05][INFO] [37m[al

3440.4337241631783
3419.637287371654
3412.8907622412216
3420.78499986291
3450.7596890995887
3463.428170821014
3431.4821949369093
3388.4932349673063
3378.4588613835463
3434.430790935229


[32m[2018-07-23 11:12:32][INFO] [37m[allnodes | /tmp] LD_LIBRARY_PATH=/tmp/lib gcc -DUSE_OPENBLAS ./dgemm_test.c -fopenmp -I /tmp/include                 /tmp/lib/libopenblas.so -O3 -o ./dgemm_test[0m
[32m[2018-07-23 11:12:32][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:12:36][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:12:40][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:12:44][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:12:48][INFO] [37m[allnodes | /tmp] OMP_NUM_THREADS=32 LD_LIBRARY_PATH=/tmp/lib ./dgemm_test 8192 8192 8192 8192 8192 8192 [0m
[32m[2018-07-23 11:12:51][INFO] [37m[allnodes | /tmp] 

3480.975447658493
3444.1009881868017
3479.924850208756
3464.6274065191783
3499.0209039146166
3451.9809701369422
3502.2524903285625
3471.5098882134944
3452.5792521703625
3513.2944242046865


## First (small) HPL run

In [7]:
for job in alljobs:
    send_key(job)
    %time time, gflops, output = run(job, size=2**14, block_size=512, proc_p=2, proc_q=2, bcast=3, pfact=2, rfact=2, depth=1)
    print(time, gflops)

[32m[2018-07-23 11:14:26][INFO] [37m[director | /root] ssh-keygen -b 2048 -t rsa -f .ssh/id_rsa -q -N ""[0m
[32m[2018-07-23 11:14:26][INFO] [37m[director] get: /root/.ssh/id_rsa.pub → /home/tom/Dropbox/Documents/Fac/phd/mpi_calibration/tmpxe91z9z7[0m
[32m[2018-07-23 11:14:26][INFO] [37m[orchestra] put: /home/tom/Dropbox/Documents/Fac/phd/mpi_calibration/tmpxe91z9z7 → /tmp/id_rsa.pub[0m
[32m[2018-07-23 11:14:27][INFO] [37m[orchestra | /root] cat /tmp/id_rsa.pub >> .ssh/authorized_keys[0m
[32m[2018-07-23 11:14:27][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" dahu-7.grenoble.grid5000.fr hostname[0m
[32m[2018-07-23 11:14:27][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" dahu-7 hostname[0m
[32m[2018-07-23 11:14:28][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" dahu-8.grenoble.grid5000.fr hostname[0m
[32m[2018-07-23 11:14:28][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" dahu-8 hostname[0m
[32m[2018

CPU times: user 285 ms, sys: 44.9 ms, total: 330 ms
Wall time: 9.37 s
3.84 764.0


[32m[2018-07-23 11:14:39][INFO] [37m[orchestra] put: /home/tom/Dropbox/Documents/Fac/phd/mpi_calibration/tmpu8ujwgjh → /tmp/id_rsa.pub[0m
[32m[2018-07-23 11:14:39][INFO] [37m[orchestra | /root] cat /tmp/id_rsa.pub >> .ssh/authorized_keys[0m
[32m[2018-07-23 11:14:39][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" grvingt-6.nancy.grid5000.fr hostname[0m
[32m[2018-07-23 11:14:40][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" grvingt-6 hostname[0m
[32m[2018-07-23 11:14:40][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" grvingt-7.nancy.grid5000.fr hostname[0m
[32m[2018-07-23 11:14:40][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" grvingt-7 hostname[0m
[32m[2018-07-23 11:14:41][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" grvingt-8.nancy.grid5000.fr hostname[0m
[32m[2018-07-23 11:14:41][INFO] [37m[director | /root] ssh -o "StrictHostKeyChecking no" grvingt-8 hostname[0m
[32m[2018-07-2

CPU times: user 263 ms, sys: 54.7 ms, total: 318 ms
Wall time: 9.42 s
3.93 746.4


## Larger HPL run

Now, the performance of `grvingt` and `dahu` are similar. Let's check with a larger size.

In [8]:
results = {}
%time results['dahu'] = run(dahu, size=2**15, block_size=128, proc_p=2, proc_q=2, bcast=3, pfact=2, rfact=2, depth=1)
%time results['gr20'] = run(gr20, size=2**15, block_size=128, proc_p=2, proc_q=2, bcast=3, pfact=2, rfact=2, depth=1)

print('dahu', results['dahu'][:-1])
print('gr20', results['gr20'][:-1])

[32m[2018-07-23 11:16:18][INFO] [37m[allnodes] put: /home/tom/Dropbox/Documents/Fac/phd/mpi_calibration/tmpc1lxcll0 → /tmp/hpl-2.2/bin/Debian/HPL.dat[0m
[32m[2018-07-23 11:16:19][INFO] [37m[director | /tmp/hpl-2.2/bin/Debian] mpirun --allow-run-as-root --bind-to none --timestamp-output -np 4 -x OMP_NUM_THREADS=32 -H dahu-6.grenoble.grid5000.fr,dahu-7.grenoble.grid5000.fr,dahu-8.grenoble.grid5000.fr,dahu-9.grenoble.grid5000.fr -x LD_LIBRARY_PATH=/tmp/lib ./xhpl[0m
[32m[2018-07-23 11:16:54][INFO] [37m[allnodes] put: /home/tom/Dropbox/Documents/Fac/phd/mpi_calibration/tmpl7uaxndm → /tmp/hpl-2.2/bin/Debian/HPL.dat[0m


CPU times: user 784 ms, sys: 168 ms, total: 952 ms
Wall time: 35.7 s


[32m[2018-07-23 11:16:55][INFO] [37m[director | /tmp/hpl-2.2/bin/Debian] mpirun --allow-run-as-root --bind-to none --timestamp-output -np 4 -x OMP_NUM_THREADS=32 -H grvingt-5.nancy.grid5000.fr,grvingt-6.nancy.grid5000.fr,grvingt-7.nancy.grid5000.fr,grvingt-8.nancy.grid5000.fr -x LD_LIBRARY_PATH=/tmp/lib ./xhpl[0m


CPU times: user 690 ms, sys: 136 ms, total: 825 ms
Wall time: 34.6 s
dahu (19.48, 1204.0)
gr20 (18.2, 1289.0)


Similar performance too, great!