# POSIX Statefulness' Impact on Performance

Analyze the results of an ensemble of IOR tests that demonstrate how the time it takes to open a file for parallel I/O scales linearly with the number of concurrent clients.  This script analyzes an ensemble of IOR jobs that opened files for read, did no I/O, then closed them.  It ran on the Cori system at NERSC using its Knight's Landing nodes.

In [None]:
%matplotlib inline

In [None]:
import re
import glob
import pandas
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams.update({'font.size': 18})
import numpy as np

In [None]:
REX_RESULT = re.compile('^read((?!POSIX).)*$')
REX_CLIENTS = re.compile('^\s*clients\s*=\s*(\d+)[^\d]+(\d+)\s+per node')
FILE_MODES = [ 'ssf', 'fpp' ]

In [None]:
### Parse all IOR output files
raw_data = {}
for output_file in glob.glob('ior-r.*x64_*.out'):
    with open(output_file, 'r') as fp:
        t_open = 0.0
        t_close = 0.0
        n_found = 0
        for line in fp:
            ### client concurrency
            match = REX_CLIENTS.search(line)
            if match is not None:
                num_clients = int(match.group(1))
                num_ppn = int(match.group(2))
            ### extract the performance for opens and closes
            match = REX_RESULT.search(line)
            if match is not None:
                fields = line.split()
                t_open = float(fields[4])
                t_close = float(fields[6])
                num_nodes = num_clients / num_ppn
                if num_nodes not in raw_data:
                    raw_data[num_nodes] = {
                        'sum_open_time': 0.0,
                        'sum_close_time': 0.0,
                        'sum2_open_time': 0.0,
                        'sum2_close_time': 0.0,
                        'num_measurements': 0
                    }
                raw_data[num_nodes]['sum_open_time'] += t_open
                raw_data[num_nodes]['sum_close_time'] += t_close
                raw_data[num_nodes]['sum2_open_time'] += t_open * t_open
                raw_data[num_nodes]['sum2_close_time'] += t_close * t_close
                raw_data[num_nodes]['num_measurements'] += 1

In [None]:
### normalize y relative to the single-node performance
df = pandas.DataFrame.from_dict(data=raw_data, orient='index')
df.index.name = 'clients'
for op in 'open', 'close':
    avg   = df['sum_'+op+'_time']  / df['num_measurements']
    stdev = df['sum2_'+op+'_time'] / df['num_measurements']
    stdev = stdev - avg*avg
    stdev = stdev**0.5
    
    df['avg_'+op+'_time'] = avg
    df['stdev_'+op+'_time'] = stdev
    df['rel_'+op+'_time'] = df['avg_'+op+'_time'] / df['avg_'+op+'_time'].loc[min(df.index)]
    df['rel_stdev_'+op+'_time'] = df['stdev_'+op+'_time'] / df['avg_'+op+'_time'] / df['avg_'+op+'_time'].loc[min(df.index)]

df.tail()

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(10, 6)
fig.suptitle("Performance of Concurrent File Opens")

df_plot = df[df['rel_open_time'].index > 8]['rel_open_time']
df_err = df[df['rel_stdev_open_time'].index > 8]['rel_stdev_open_time']

### linear fit
fit = np.polyfit(df_plot.index, df_plot.values, deg=1)
ax.plot(df_plot.index,
        fit[0] * df_plot.index + fit[1],
        linestyle='--',
        color='#000000')

### error bars (+/- stdev)
ax.errorbar(df_plot.index,
            df_plot.values,
            yerr=df_err.values,
            fmt='o',
            markersize=0,
            capsize=4,
            color='#00000044')

### plot actual points on top of linear fit
df_plot.plot(
        ax=ax,
        marker='o',
        linestyle='',
        linewidth=3,
        markersize=10)

### make the plot pretty
ax.legend().remove()
ax.grid()
ax.set_xlabel("Concurrent Client Nodes")
ax.set_ylabel("Performance Relative to Single-Node")
ax.set_xticks([32, 128, 256, 512])
ax.set_xlim([0,520])
ax.set_ylim([0,35])


fig.savefig('open-performance.png', dpi=200)