In [None]:
import os
import re
from pathlib import Path

import pandas as pd
from tabulate import tabulate

REPORTS_DIR = './reports'
#The 1st level - environments
#The 2nd level - experiments
#The 3rd level - fio reports (files)

#
#reports
# |-prod
# | |-test-001
# | |-test-002
# |-staging
# | |-test-001
# | |-test-002


REPORTS_MAKS = "*\.fio\.report"

def load_envs(reports_dir = './reports', envs = []):
        return {o:os.path.join(reports_dir,o) for o in os.listdir(reports_dir) if 
                (envs is None or len(envs) == 0 or o in envs )
                and os.path.isdir(os.path.join(reports_dir,o))}

def load_fio_repors(exp_dir, mask):
    return [{'name':o, 'content':Path(os.path.join(exp_dir,o)).read_text()} for o in os.listdir(exp_dir) if os.path.isfile(os.path.join(exp_dir,o))]
    
def load_experiments(envs, mask = ".*"):
    exps = []
    for env_k, env_path in envs.items():
        exps_dirs = [{'exp':o, 'path':os.path.join(env_path,o)} for o in os.listdir(env_path) if os.path.isdir(os.path.join(env_path,o))]
        for d in exps_dirs:
            reports = load_fio_repors(d['path'], mask)
            for r in reports:
                exp = {'env':env_k, 'exp':d['exp'], 'report':r['name'], 'content':r['content']}
                exps.append(exp)
    return exps

def to_mbs(val):
    if "MB/s" in val:
        return val.rstrip("MB/s")
    if "KB/s" in val:
        return float(val.rstrip("KB/s"))/1000
    if "MiB/s"  in val:
        return float(val.rstrip("MiB/s"))*1024*1024/1000/1000
    if "KiB/s"  in val:
        return float(val.rstrip("KiB/s"))*1024/1000/1000
    return 0

In [None]:
class Fio2Parser(object):
    
    def __init__(self):
        self.Version = 2
    
    @staticmethod
    def get_report_summary_info_line(line):
        pattern = "[a-z]+=[0-9\.]+[a-zA-Z/\.]+"
        vals = re.findall(pattern, line)
        info = {}
        for v in vals:
            kv = v.split('=')
            if (len(kv)!=2):
                continue
            info[kv[0]] = kv[1]
        return info
    
    @staticmethod
    def get_file_info_line(line):
        pattern = "[a-z]+=[0-9]+"
        vals = re.findall(pattern, line)
        info = {}
        for v in vals:
            kv = v.split('=')
            if (len(kv)!=2):
                continue
            info[kv[0]] = kv[1]
        return info


    @staticmethod
    def get_report_summary_info(fname, c):
        lines = c.splitlines()
        g = (i for i, e in enumerate(lines) if 'Run status group 0' in e)
        try:
            idx = next(g)
            read_line = lines[idx+1]
            write_line = lines[idx+2]
            return {'read':Fio2Parser.get_report_summary_info_line(read_line),
                'write':Fio2Parser.get_report_summary_info_line(write_line)}

        except Exception as e:
            print(e)
            print(("Invalid (summary) report: {}".format(fname)))
            return {}
    
    @staticmethod
    def get_iops_avg(fname, c):
        lines = c.splitlines()
        tmp = []
        for i, e in enumerate(lines):
            if not ('-file' in e and '(groupid=' in e and 'pid=' in e):
                continue
            read_line = lines[i+1]
            write_line = lines[i+12]
            tmp.append({'read':Fio2Parser.get_file_info_line(read_line)['iops'],
                'write':Fio2Parser.get_file_info_line(write_line)['iops']})
        read_iops = sum(int(r['read']) for r in tmp)/len(tmp)
        write_iops = sum(int(r['write']) for r in tmp)/len(tmp)
        return {'read':read_iops, 'write':write_iops}

    
    @staticmethod
    def get_summary(e, info):
        info = Fio2Parser.get_report_summary_info(e['report'], e['content'])
        iops = Fio2Parser.get_iops_avg(e['report'], e['content'])
        if(len(info.keys()) > 0):
            return [e['env'],e['exp'], e['report'], to_mbs(info['read']['aggrb']), to_mbs(info['write']['aggrb']), 
                   iops['read'], iops['write']]
        else:
            return []


class Fio3Parser(object):

    def __init__(self):
        self.Version = 3

    @staticmethod
    def get_report_summary_info_line(line):
        pattern = "[a-z]+=[0-9\.]+([a-zA-Z/\.]+)?"
        vals = re.findall(pattern, line)
        info = {}
        for v in vals:
            kv = v.split('=')
            info[kv[0]] = kv[1]
        return info

    @staticmethod
    def get_report_summary_info(fname, c):
        lines = c.splitlines()
        g = (i for i, e in enumerate(lines) if 'Run status group 0' in e)
        try:
            idx = next(g)
            read_line = lines[idx+1]
            write_line = lines[idx+2]
            return {'read':get_report_summary_info_line(read_line),
                'write':get_report_summary_info_line(write_line)}

        except Exception as e:
            print(("Invalid report: {}".format(fname)))
            return {} 

    @staticmethod
    def get_file_info_line(line):
        pattern = "[a-zA-Z]+=[0-9]+"
        vals = re.findall(pattern, line)
        info = {}
        for v in vals:
            kv = v.split('=')
            if (len(kv)!=2):
                continue
            info[kv[0]] = kv[1]
        return info

    @staticmethod
    def get_iops_avg(fname, c):
        lines = c.splitlines()
        tmp = []
        for i, e in enumerate(lines):
            if not ('-file' in e and '(groupid=' in e and 'pid=' in e):
                continue
            read_line = lines[i+1]
            write_line = lines[i+13]
            if not "IOPS" in write_line:
                write_line = lines[i+14]
            else:
                raise Exception("Invalid fio report - {}".format(fname))

            tmp.append({'read':Fio3Parser.get_file_info_line(read_line)['IOPS'],
                'write':Fio3Parser.get_file_info_line(write_line)['IOPS']})
        read_iops = sum(int(r['read']) for r in tmp)/len(tmp)
        write_iops = sum(int(r['write']) for r in tmp)/len(tmp)
        return {'read':read_iops, 'write':write_iops}

    @staticmethod
    def get_summary(e, info):
            info = get_report_summary_info(e['report'], e['content'])
            iops = Fio3Parser.get_iops_avg(e['report'], e['content'])
            if(len(info.keys()) > 0):
                return [e['env'],e['exp'], e['report'], to_mbs(info['read']['bw']), to_mbs(info['write']['bw']),
                       iops['read'], iops['write']]
            else:
                return []

def get_fio_parser(fname, content):
    lines =  content.splitlines()
    for l in lines:
        if l.startswith("fio-2."):
            return Fio2Parser()
        if l.startswith("fio-3."):
            return Fio3Parser()
    raise Exception("Invalid fio report")

In [None]:
pd.set_option('display.max_rows', 1000)

exps = load_experiments(load_envs())

summary = []
for e in exps:
    parser = None
    try:
        parser = get_fio_parser(e['report'], e['content'])
    except Exception as ex:
        print("Skipping {}".format(e['report']))
        continue

    s = parser.get_summary(e, info)
    summary.append(s)

In [None]:
df = pd.DataFrame(summary, columns=["env","exp", "report", "read_mbps", "write_mbps", "iops_read", "iops_write"])
df = df.sort_values(by=["read_mbps", "write_mbps"], ascending=False)
#df

In [None]:
#tabulate(df, tablefmt="pipe", headers="keys")