# IO500 Results Analyzer

In [None]:
import os
import io
import glob
import datetime
import configparser

import pandas
import matplotlib
import matplotlib.pyplot

import numpy

In [None]:
from pprint import pprint

In [None]:
RESULTS_BASE = '/global/homes/g/glock/src/git/io500/results'

In [None]:
def parse_io500_results(result_dir):
    """Converts a directory of IO500 results into a dictionary
    
    Args:
        result_dir (str): Path to a directory containing the results of a single IO500 run
        
    Returns:
        dict: Contains results of the form::
        
            {
                "job": {"api", "clients per node", "nodes", "tasks", "xfersize"},
                "result": {"DEFAULT", "SCORE", "find", "header", "ior-easy-read", ...}
            }
    """
    RESULT_CONVERTS = {
        "bw": float,
        "md": float,
        "score": float,
        "t_delta": float,
        "rate-stonewall": float,
        "throughput-stonewall": float,
        "total-files": int,
        "found": int,
        "t_end": lambda x: int(datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S").timestamp()),
        "t_start": lambda x: int(datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S").timestamp()),
    }
    results = {}

    config = configparser.ConfigParser()
    result_file = os.path.join(result_dir, 'result.txt')
    if not os.path.isfile(result_file):
        return results

    # parse result file
    result_text = open(result_file, 'r').read()
    # result file has some keys lacking a header at the top; unsure why...
    result_text = "[header]\n" + result_text
    config.read_string(result_text)
    results['result'] = {}
    for section, items in config.items():
        results['result'][section] = {k: float(v) if k == 'score' else v for k, v in items.items()}
    # clean up types
    for testname, testresults in results['result'].items():
        for key, value in testresults.items():
            testresults[key] = RESULT_CONVERTS.get(key, str)(value)
    
    # parse ior for job geometry
    fp = open(os.path.join(result_dir, 'ior-easy-write.txt'), 'r')
    results['job'] = {}
    for line in fp:
        if ':' in line:
            key, val = line.split(':', 1)
            key = key.strip()
            val = val.strip()
        if key in ('nodes', 'tasks', 'clients per node'):
            results['job'][key] = int(val)
        elif key in ('api',):
            results['job'][key] = val
        elif key in ('xfersize',):
            if val.endswith('KiB'):
                results['job'][key] = int(val.split()[0]) * 2**10
            elif val.endswith('MiB'):
                results['job'][key] = int(val.split()[0]) * 2**20
            elif val.endswith('GiB'):
                results['job'][key] = int(val.split()[0]) * 2**30
            elif val.endswith('bytes'):
                results['job'][key] = int(val.split()[0])

    return results

## Load Results

Load all result directories found in the path given by `RESULTS_BASE` and construct a dataframe containing summary metrics from each IO500 run.

In [None]:
records = []
for result_dir in glob.glob(os.path.join(RESULTS_BASE, '*')):
    result = parse_io500_results(result_dir)
    records.append({
        'result_id': os.path.basename(result_dir),
        'score': result.get('result', {}).get('SCORE', {}).get("score", -1),
        'bw': result.get('result', {}).get('SCORE', {}).get("bw", -1),
        'md': result.get('result', {}).get('SCORE', {}).get("md", -1),
        'clients per node': result.get('job', {}).get('clients per node', -1),
        'nodes': result.get('job', {}).get('nodes', -1),
        'xfersize': result.get('job', {}).get('xfersize', -1),
    })

In [None]:
results_df = pandas.DataFrame.from_records(records, index='result_id').sort_values('clients per node')
results_df = results_df[results_df['score'] > 0.0]
results_df

## Plot Results

### Select runs to compare

In [None]:
XFERSIZE = 16*8388608
COLUMN_TITLES = {
    "bw": "Bandwidth",
    "md": "Metadata",
    "score": "Score",
}

In [None]:
filt = results_df['xfersize'] == XFERSIZE
filt = results_df['xfersize'].astype(bool)
plot_df = results_df[filt]
plot_df = plot_df.set_index('clients per node')
plot_df.columns = [COLUMN_TITLES.get(x, x) for x in plot_df.columns]
plot_df

### Plot IO500 Score Components

Plot both the bandwidth and metadata scores to see how they compare to the singular "score" that pops out at the end of IO500 as different tuning parameters change.  This plot shows that metadata and bandwidth performance can no only diverge wildly, but they may show anticorrelative behavior with increasing concurrency.

This indicates that the highest IO500 score isn't necessarily the best bandwidth or best metadata scores; rather, it's an arbitrary point of balance between increasing concurrency yielding increasing bandwidth and too much concurrency conflicting with metadata operations.

It is easy to envision a way to optimally design a file system that scores high on IO-500; since metadata scores are weighted more highly than bandwidth by virtue of the fact that they are measured in kIOPS vs. GiB/s, a good IO500 score

1. posts the best possible metadata scores, probably by scaling wide and minimizing the amount of metadata contention that each client generates
2. posts a good ior-easy
3. doesn't matter what ior-hard scores, because the first two will drag the score up

In [None]:
fig, ax = matplotlib.pyplot.subplots()

plot_df[['Bandwidth', 'Metadata']].plot.bar(ax=ax, width=0.9, edgecolor='black', legend=False)
for i in range(plot_df.shape[0]):
    ax.plot((i-0.35, i+0.35),
            (plot_df['Score'].iloc[i], plot_df['Score'].iloc[i]),
            linestyle='-',
            linewidth=4,
#           marker='D',
#           markersize=9,
            color='C2',
            label="Score" if not i else None
           )

ax.set_axisbelow(True)
ax.yaxis.grid()
ax.set_title("10-node IO500 scores - DataWarp")
ax.legend(
    bbox_to_anchor=(0.5, -0.40, 0, 0),
    loc='lower center',
#   mode='expand',
#   borderaxespad=0.0,
    ncol=3)

The following plot shows the range of bw/md scores that a benchmark can post to achieve the same overall score.  There are two orders of magnitude in variation for the metadata scores and three orders of magnitude change for bandwidth scores as a result of the imbalance of kIOPS and GiB/s.  This is because

${md} = \frac{score^2}{bw}$

In [None]:
DW_MAX_BW_10NODE = 80.0 # GiB/s
DW_MAX_MD_10NODE = 80.0 # KIOPS
DW_MAX_SCORE_10NODE = plot_df['Score'].max()

In [None]:
fig, ax = matplotlib.pyplot.subplots()
ax.set_axisbelow(True)
ax.grid()
ax.set_ylabel("kIOPS")
ax.set_xlabel("GiB/s")

for target_score in (DW_MAX_SCORE_10NODE, ):
    x = numpy.arange(1, DW_MAX_BW_10NODE)
    y = target_score*target_score / x

    ax.plot(x, y, label="Score = %.1f" % target_score, color="black")
ax.legend()

#ax.set_yscale('log')
ax.set_xlim(0, DW_MAX_BW_10NODE)
ax.set_ylim(0, DW_MAX_MD_10NODE)
ax.set_title("Iso score")

From this we see that in order to maintain a constant score,

1. if bandwidths are very low, significantly higher IOPS are required
2. if bandwidths are very high, metadata improvements are small

I guess the above statements are trivial.  Where does a real file system land on this?

In [None]:
marker_opts = {
    'marker': 'o',
#   'markersize': 9,
}
color_map = {}
legend_handles, legend_labels = ax.get_legend_handles_labels()
for idx, val in enumerate(plot_df['xfersize'].unique()):
    color_map[val] = 'C%d' % idx
    legend_handles.append(
        matplotlib.lines.Line2D(
            [], [],
            linestyle="",
            color=color_map[val],
            **marker_opts))
    legend_labels.append(
        "blksize=%dK" % (val / 2**10) if val < 2**20 else "blksize=%dM" % (val / 2**20)
    )

x = plot_df['Bandwidth'].values
y = plot_df['Metadata'].values
ax.scatter(
    x,
    y,
    color=plot_df['xfersize'].map(lambda x: color_map.get(x)),
    **marker_opts)

for i, txt in enumerate(plot_df.index.values):
    ax.annotate(txt, (x[i]+1.0, y[i]+1.0))
ax.legend(legend_handles, legend_labels)
ax.get_figure()

On DataWarp, metadata scores are much less volatile than bandwidth scores.