In [1]:
import matplotlib as mpl
from matplotlib import pyplot as plt
%matplotlib inline
mpl.rcParams['figure.figsize'] = (12,6)
import seaborn as sns
sns.set_style("white", {"font.family": "serif"})

In [2]:
import sys
sys.path.append('../python-modules')

## GROMACS

GROMACS is the application that we currently have the widest set of data for. This is because it has versions that run on a wide variety of architectures and it is simple to port.

In [3]:
from appanalysis import gromacs

In [4]:
systems = ['ARCHER','Peta4-Skylake','Cirrus','Isambard','Tesseract','Wilkes2-GPU','JADE']

### Best performance per platform comparison

This section compares the best performaning configuration on a single node of each platform.

In [5]:
perf = {}
notes = {}
perf['ARCHER'] = gromacs.getperf('../apps/GROMACS/1400k-atoms/results/ARCHER/benchmark_1nodes_201808060910.log')
perf['Peta4-Skylake'] = gromacs.getperf('../apps/GROMACS/1400k-atoms/results/CSD3-Skylake/benchmark_1nodes_201808060933.log')
perf['Cirrus'] = gromacs.getperf('../apps/GROMACS/1400k-atoms/results/Cirrus/benchmark_1nodes36tasks2threads_201808202022.log')
notes['Cirrus'] = '(36 tasks, 2 threads)'
perf['Isambard'] = 1.471
notes['Isambard'] = '(128 tasks, 2 threads)'
perf['Tesseract'] = gromacs.getperf('../apps/GROMACS/1400k-atoms/results/Tesseract/benchmark_1nodes_201808061330.log')
perf['Wilkes2-GPU'] = gromacs.getperf('../apps/GROMACS/1400k-atoms/results/CSD3-GPU/benchmark_12core4gpu_201808071403.log')
notes['Wilkes2-GPU'] = '(12 core, 4 GPU)'
perf['JADE'] = 1.647
notes['JADE'] = '(5 core, 1 GPU), http://www.hecbiosim.ac.uk/jade-benchmarks'

In [6]:
formath = "{:>15s} {:>15s} {:>15s}"
formatp = "{:>15s} {:>15.3f} {:>15.3f} {:s}"
print("Performance improvement relative to ARCHER:\n")
print(formath.format('System', 'Perf. (ns/day)', 'Improvement'))
print(formath.format('======', '==============', '==========='))
for system in systems:
    tperf = perf.get(system,0.0)
    print(formatp.format(system, tperf, tperf/perf['ARCHER'], notes.get(system, '')))

Performance improvement relative to ARCHER:

         System  Perf. (ns/day)     Improvement
         ARCHER           1.024           1.000 
  Peta4-Skylake           2.005           1.958 
         Cirrus           1.673           1.634 (36 tasks, 2 threads)
       Isambard           1.471           1.437 (128 tasks, 2 threads)
      Tesseract           1.187           1.159 
    Wilkes2-GPU           2.420           2.363 (12 core, 4 GPU)
           JADE           1.647           1.608 (5 core, 1 GPU), http://www.hecbiosim.ac.uk/jade-benchmarks


In [7]:
print("{:13s}".format(''),end='')
for jsystem in systems:
    print("{:>14s}".format(jsystem), end='')
print()
for isystem in systems:
    print("{:13s}".format(isystem), end='')
    for jsystem in systems:
        print("{:14.3f}".format(perf[isystem]/perf[jsystem]), end='')
    print()

                     ARCHER Peta4-Skylake        Cirrus      Isambard     Tesseract   Wilkes2-GPU          JADE
ARCHER                1.000         0.511         0.612         0.696         0.863         0.423         0.622
Peta4-Skylake         1.958         1.000         1.198         1.363         1.689         0.829         1.217
Cirrus                1.634         0.834         1.000         1.137         1.409         0.691         1.016
Isambard              1.437         0.734         0.879         1.000         1.239         0.608         0.893
Tesseract             1.159         0.592         0.710         0.807         1.000         0.490         0.721
Wilkes2-GPU           2.363         1.207         1.447         1.645         2.039         1.000         1.469
JADE                  1.608         0.821         0.984         1.120         1.388         0.681         1.000


## OpenSBLI

In [18]:
from appanalysis import osbli

In [19]:
osbli_systems = ['ARCHER','Peta4-Skylake','Cirrus','Isambard','Tesseract','Naples']

In [20]:
osbli_perf = {}
osbli_perf['ARCHER'] = 1.0 / osbli.gettiming('../apps/OpenSBLI/TGV512ss/results/ARCHER/output_1nodes_201808020923.txt')
osbli_perf['Peta4-Skylake'] = 1.0 / osbli.gettiming('../apps/OpenSBLI/TGV512ss/results/CSD3-Skylake/output_1nodes_201808021001.txt')
osbli_perf['Cirrus'] = 1.0 / osbli.gettiming('../apps/OpenSBLI/TGV512ss/results/Cirrus/output_1nodes_201808020826.txt')
osbli_perf['Isambard'] = 1.0 / osbli.gettiming('../apps/OpenSBLI/TGV512ss/results/Isambard/output_1nodes_201808020732.txt')
osbli_perf['Tesseract'] = 1.0 / osbli.gettiming('../apps/OpenSBLI/TGV512ss/results/Tesseract/output_1nodes_201808021543.txt')
# Multiply by two as AMD system has single-socket nodes. Assumes perfect scaling intranode
osbli_perf['Naples'] = 2 * 1.0 / osbli.gettiming('../apps/OpenSBLI/TGV512ss/results/AMD_Naples/output_1nodes_201808031517.txt')

In [21]:
formath = "{:>15s} {:>15s} {:>15s}"
formatp = "{:>15s} {:>15.3f} {:>15.3f}"
print("Performance improvement relative to ARCHER:\n")
print(formath.format('System', 'Perf. (iter/s)', 'Improvement'))
print(formath.format('======', '==============', '==========='))
aperf = osbli_perf.get('ARCHER',0.0)
for system in osbli_systems:
    tperf = osbli_perf.get(system,0.0)
    print(formatp.format(system, tperf, tperf/aperf))

Performance improvement relative to ARCHER:

         System  Perf. (iter/s)     Improvement
         ARCHER           0.100           1.000
  Peta4-Skylake           0.122           1.221
         Cirrus           0.097           0.970
       Isambard           0.178           1.777
      Tesseract           0.066           0.658
         Naples           0.088           0.880


In [22]:
print("{:13s}".format(''),end='')
for jsystem in osbli_systems:
    print("{:>14s}".format(jsystem), end='')
print()
for isystem in osbli_systems:
    print("{:13s}".format(isystem), end='')
    iperf = osbli_perf[isystem]
    for jsystem in osbli_systems:
        jperf = osbli_perf[jsystem]
        print("{:14.3f}".format(iperf/jperf), end='')
    print()

                     ARCHER Peta4-Skylake        Cirrus      Isambard     Tesseract        Naples
ARCHER                1.000         0.819         1.031         0.563         1.520         1.136
Peta4-Skylake         1.221         1.000         1.259         0.687         1.856         1.387
Cirrus                0.970         0.794         1.000         0.546         1.474         1.102
Isambard              1.777         1.455         1.832         1.000         2.701         2.019
Tesseract             0.658         0.539         0.678         0.370         1.000         0.747
Naples                0.880         0.721         0.908         0.495         1.338         1.000


## CASTEP

In [23]:
from appanalysis import castep

In [24]:
castep_systems = ['ARCHER','Peta4-Skylake','Cirrus','Isambard','Tesseract','Naples']

In [25]:
castep_perf = {}
castep_perf['ARCHER'] = 1.0 / castep.getmeancycle('../apps/CASTEP/al3x3/results/ARCHER/al3x3.castep.1nodes')
castep_perf['Peta4-Skylake'] = 1.0 / castep.getmeancycle('../apps/CASTEP/al3x3/results/CSD3-Skylake/al3x3.castep.1nodes')
castep_perf['Cirrus'] = 1.0 / castep.getmeancycle('../apps/CASTEP/al3x3/results/Cirrus/17.21_gcc620_impi17/al3x3.castep.1nodes')
castep_perf['Isambard'] = 1.0 / castep.getmeancycle('../apps/CASTEP/al3x3/results/Isambard/al3x3.castep.1nodes_201806130634')
castep_perf['Tesseract'] = 1.0 / castep.getmeancycle('../apps/CASTEP/al3x3/results/Tesseract/al3x3_1nodes_201808071417.castep')
# Multiply by two as AMD system has single-socket nodes. Assumes perfect scaling intranode
castep_perf['Naples'] = 2 * 1.0 / castep.getmeancycle('../apps/CASTEP/al3x3/results/AMD_Naples/al3x3.castep.1node')

In [28]:
formath = "{:>15s} {:>15s} {:>15s}"
formatp = "{:>15s} {:>15.5f} {:>15.3f}"
print("Performance improvement relative to ARCHER:\n")
print(formath.format('System', 'Perf. (scf/s)', 'Improvement'))
print(formath.format('======', '==============', '==========='))
aperf = castep_perf.get('ARCHER',0.0)
for system in castep_systems:
    tperf = castep_perf.get(system,0.0)
    print(formatp.format(system, tperf, tperf/aperf))

Performance improvement relative to ARCHER:

         System   Perf. (scf/s)     Improvement
         ARCHER         0.00543           1.000
  Peta4-Skylake         0.01641           3.023
         Cirrus         0.01109           2.043
       Isambard         0.00691           1.273
      Tesseract         0.00728           1.341
         Naples         0.00672           1.238


In [27]:
print("{:13s}".format(''),end='')
for jsystem in castep_systems:
    print("{:>14s}".format(jsystem), end='')
print()
for isystem in castep_systems:
    print("{:13s}".format(isystem), end='')
    iperf = castep_perf[isystem]
    for jsystem in castep_systems:
        jperf = castep_perf[jsystem]
        print("{:14.3f}".format(iperf/jperf), end='')
    print()

                     ARCHER Peta4-Skylake        Cirrus      Isambard     Tesseract        Naples
ARCHER                1.000         0.331         0.489         0.786         0.746         0.808
Peta4-Skylake         3.023         1.000         1.479         2.375         2.254         2.441
Cirrus                2.043         0.676         1.000         1.605         1.524         1.650
Isambard              1.273         0.421         0.623         1.000         0.949         1.028
Tesseract             1.341         0.444         0.656         1.054         1.000         1.083
Naples                1.238         0.410         0.606         0.973         0.923         1.000
