In [1]:
root = '/Users/kz2249/fio_weka/'

files = ['weka_12nodes', 'weka_8nodes']
files_nobw = ['famx_central_nossd', 'famx_central_nossd2', 'famx_central_ssd',
              'central_nossd', 'central_ssd']

In [6]:
from collections import defaultdict
import json

def parse_benchmark_files(root, files_bw, files_nobw):
    results = {}
    for file in files_bw:
        results[file] = parse_benchmark_file(root + file, True)
    for file in files_nobw:
        results[file] = parse_benchmark_file(root + file, False)
    return results

def get_next_section(file):
    section = []
    start = False
    
    for line in file:
        if start:
            section.append(line.replace("'", '"'))
        if line.find("Starting tests on") != -1:
            start = True
            n = line.split(' ')[3]
        if line.find("Tests complete.") != -1:
            return section, n
    return False, 0

def fix_unit(value):
    split = value.split()
    if len(split) == 2:
        unit = split[1]
        if unit == 'us':
            result = str(float(split[0]) / 1000) + ' ms'
            return result
        elif unit == 'MiB':
            result = str(float(split[0]) / 1000) + ' GiB'
            if result == None:
                print(split)
            return result
        else:
            return value
    else:
        return value

def parse_benchmark_file(file, bw):
    results = defaultdict(lambda: list())
    with open(file) as f:
        section, n = get_next_section(f)
        while section:
            
            # Parse the section.
            if bw:
                result = parse_section(section)
            else:
                result = parse_section_nobw(section)
                
            # Update the results.
            results['n'].append(n)
            for key, value in result.items():
                results[key].append(fix_unit(value))
            
            # Get the next section of the file.
            section, n = get_next_section(f)      
    return dict(results)

def parse_section(section):
    
    bw = {**json.loads(section[5]), **json.loads(section[6])}
    bw = {'bw_' + key: value for key, value in bw.items()}
    keys = set(['bw_total_read_bandwidth', 'bw_avg_read_bandwidth', 'bw_total_bandwidth',
     'bw_avg_bandwidth', 'bw_avg_write_bandwidth', 'bw_total_write_bandwidth',
     'bw_total write iops', 'bw_total iops', 'bw_avg write iops',
     'bw_total read iops', 'bw_avg read iops', 'bw_avg iops'])
    assert bw.keys() == keys
        
    rw = {**json.loads(section[11]), **json.loads(section[12]), **json.loads(section[13])}
    rw = {'detrw_' + key: value for key, value in rw.items()}
    assert rw.keys() == set(['detrw_total_read_bandwidth', 'detrw_avg_read_bandwidth', 
     'detrw_total_bandwidth', 'detrw_avg_bandwidth', 'detrw_avg_write_bandwidth',
     'detrw_total_write_bandwidth', 'detrw_total write iops', 'detrw_total iops',
     'detrw_avg write iops', 'detrw_total read iops', 'detrw_avg read iops',
     'detrw_avg iops', 'detrw_write latency', 'detrw_read latency'])
    
    write = {**json.loads(section[18]), **json.loads(section[19]), **json.loads(section[20])}
    write = {'detwrite_' + key: value for key, value in write.items()}
    assert write.keys() == set(['detwrite_total_read_bandwidth', 'detwrite_avg_read_bandwidth', 'detwrite_total_bandwidth', 
     'detwrite_avg_bandwidth', 'detwrite_avg_write_bandwidth', 'detwrite_total_write_bandwidth',
     'detwrite_total write iops', 'detwrite_total iops', 'detwrite_avg write iops',
     'detwrite_total read iops', 'detwrite_avg read iops',
     'detwrite_avg iops', 'detwrite_write latency', 'detwrite_read latency'])
    
    iops = {**json.loads(section[25]), **json.loads(section[26])}
    iops = {'iops_' + key: value for key, value in iops.items()}
    assert iops.keys() == set(['iops_total_read_bandwidth', 'iops_avg_read_bandwidth', 'iops_total_bandwidth',
     'iops_avg_bandwidth', 'iops_avg_write_bandwidth', 'iops_total_write_bandwidth', 
     'iops_total write iops', 'iops_total iops', 'iops_avg write iops',
     'iops_total read iops', 'iops_avg read iops', 'iops_avg iops'])
    
    latency = json.loads(section[31])
    latency = {'latency_' + key: value for key, value in latency.items()}
    assert latency.keys() == set(['latency_write latency', 'latency_read latency'])

    return {**iops, **bw, **latency, **write, **rw}

def parse_section_nobw(section):
    
    rw = {**json.loads(section[5]), **json.loads(section[6]), **json.loads(section[7])}
    rw = {'detrw_' + key: value for key, value in rw.items()}
    assert rw.keys() == set(['detrw_total_read_bandwidth', 'detrw_avg_read_bandwidth', 
     'detrw_total_bandwidth', 'detrw_avg_bandwidth', 'detrw_avg_write_bandwidth',
     'detrw_total_write_bandwidth', 'detrw_total write iops', 'detrw_total iops',
     'detrw_avg write iops', 'detrw_total read iops', 'detrw_avg read iops',
     'detrw_avg iops', 'detrw_write latency', 'detrw_read latency'])

    write = {**json.loads(section[12]), **json.loads(section[13]), **json.loads(section[14])}
    write = {'detwrite_' + key: value for key, value in write.items()}
    assert write.keys() == set(['detwrite_total_read_bandwidth', 'detwrite_avg_read_bandwidth', 'detwrite_total_bandwidth', 
     'detwrite_avg_bandwidth', 'detwrite_avg_write_bandwidth', 'detwrite_total_write_bandwidth',
     'detwrite_total write iops', 'detwrite_total iops', 'detwrite_avg write iops',
     'detwrite_total read iops', 'detwrite_avg read iops',
     'detwrite_avg iops', 'detwrite_write latency', 'detwrite_read latency'])

    iops = {**json.loads(section[19]), **json.loads(section[20])}
    iops = {'iops_' + key: value for key, value in iops.items()}
    assert iops.keys() == set(['iops_total_read_bandwidth', 'iops_avg_read_bandwidth', 'iops_total_bandwidth',
     'iops_avg_bandwidth', 'iops_avg_write_bandwidth', 'iops_total_write_bandwidth', 
     'iops_total write iops', 'iops_total iops', 'iops_avg write iops',
     'iops_total read iops', 'iops_avg read iops', 'iops_avg iops'])
    
    latency = json.loads(section[25])
    latency = {'latency_' + key: value for key, value in latency.items()}
    assert latency.keys() == set(['latency_write latency', 'latency_read latency'])

    return {**iops, **latency, **write, **rw}

In [7]:
results = parse_benchmark_files(root, files, files_nobw)
print(results)
with open('result.json', 'w') as fp:
    json.dump(results, fp)

{'weka_12nodes': {'n': ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26'], 'iops_total_read_bandwidth': ['1.00 GiB', '1.49 GiB', '1.99 GiB', '2.47 GiB', '2.96 GiB', '3.46 GiB', '3.95 GiB', '4.42 GiB', '4.88 GiB', '5.36 GiB', '5.82 GiB', '6.30 GiB', '6.79 GiB', '7.30 GiB', '7.78 GiB', '8.21 GiB', '8.64 GiB', '9.04 GiB', '9.51 GiB', '9.94 GiB', '10.38 GiB', '10.86 GiB', '11.36 GiB', '11.82 GiB', '12.35 GiB'], 'iops_avg_read_bandwidth': ['0.51346 GiB', '0.5093799999999999 GiB', '0.50895 GiB', '0.50568 GiB', '0.50592 GiB', '0.5064 GiB', '0.50543 GiB', '0.50299 GiB', '0.49991 GiB', '0.49904000000000004 GiB', '0.49622000000000005 GiB', '0.49648000000000003 GiB', '0.49697 GiB', '0.49812 GiB', '0.49791 GiB', '0.49452999999999997 GiB', '0.49148000000000003 GiB', '0.48743000000000003 GiB', '0.48693000000000003 GiB', '0.48489 GiB', '0.48334 GiB', '0.48343 GiB', '0.48488 GiB', '0.48406 GiB', '0.48644 GiB'],

In [25]:
'1.00 GiB'.split()

['1.00', 'GiB']