In [25]:
import json
import csv
import glob
import os
import sys
import math
from bokeh.plotting import figure
from bokeh.io import show, output_notebook, curdoc, output_file, save
from bokeh.models import Legend
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Select, CustomJS


def read_json_file(path):
    try:
        with open(path, 'r') as file:
            return json.load(file)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}, filename: {file}")
    except FileNotFoundError as e:
        print(f"File not found: {e},filename: {file}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

def get_max_data_point(json_obj, interval):
    raw_stats = json_obj['data']['result'][0]['values']
    max_data_points = []
    for i in range(0, len(raw_stats), interval):
         slices = [float(v[1]) for v in raw_stats[i:i+interval]]
         max_data_points.append(math.floor(max(slices)))
    return max_data_points

def get_latency_stat(json_obj, workload, percentile):
    if percentile == "mean":
        lat = json_obj['jobs'][0][workload]['clat_ns']['mean']
    else:
        lat = json_obj['jobs'][0][workload]['clat_ns']['percentile'][percentile]
    return lat

def get_workload(json_obj):
    workload = json_obj['jobs'][0]["job options"]['rw']
    if "read" in workload:
        return "read"
    elif "write" in workload:
        return "write"
    else:
        print("unknown workload, exiting the program")
        
def get_iops(json_obj):
    workload = get_workload(json_obj)
    return json_obj['jobs'][0][workload]['iops']

def get_avg_clat(json_obj):
    workload = get_workload(json_obj)
    return json_obj['jobs'][0][workload]['clat_ns']['mean']

def get_avg_slat(json_obj):
    workload = get_workload(json_obj)
    return json_obj['jobs'][0][workload]['slat_ns']['mean']

def get_bandwidth(json_obj):
    workload = get_workload(json_obj)
    return json_obj['jobs'][0][workload]['bw']

def get_iodepth(json_obj):
    return json_obj['jobs'][0]["job options"]['iodepth']

def get_block_size(json_obj):
    return json_obj['jobs'][0]["job options"]['bs']

def get_numjobs(json_obj):
    return json_obj['jobs'][0]["job options"]['numjobs']

def get_fio_stats(json_obj):
    workload = get_workload(json_obj)
    iops = round(get_iops(json_obj))
    iodepth = get_iodepth(json_obj)
    numjobs = get_numjobs(json_obj)
    block_size = get_block_size(json_obj)
    avg_slat = round(get_avg_slat(json_obj) / 1000, 2)
    avg_clat = round(get_avg_clat(json_obj) / 1000, 2)
    bw = round(get_bandwidth(json_obj) / 1024, 2)  # MB unit
    print(workload, numjobs, block_size, iodepth, avg_slat, avg_clat, iops, bw)
    return [workload, numjobs, block_size, iodepth, avg_slat, avg_clat, iops, bw]
    
def write_row(csv_path, arr):
    with open(csv_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(arr)

def create_csv_table(csv_path, data):
    for i in range(len(data[blocks[0]])):
        tmp_row = []
        for blk in blocks:
            tmp_row.append(data[blk][i])
        write_row(csv_path, tmp_row) 
        
def generate_io_rate_range(min_rate, max_rate):
    arr = []
    start = min_rate // 30
    return [n//52 for n in range(start, max_rate, start)]

def generate_file_path(vmid, blk, rate):
    return f"{pg_256_dir}pg_256_2cpu_5g-rhel9-snap-{vmid}-{blk}-randread-depth-16-rate-{rate}.json"
    #return f"{pg_256_dir}pg_256_io_fixed_randread_no_scrub_2_CPU_5G_RAM_4k_128k-vm-count-52-rhel9-snap-{vmid}-{blk}-randread-depth-16-rate-{rate}.json"


def aggregate_json_stat(workload, percentile, io_rate):
    d = {}
    vm = [k for k in range(1,53)] 
    vm.remove(44)
    for blk in blocks:
        for rate in io_rate:
            tmp_arr = []
            for vmid in vm:
                file = generate_file_path(vmid, blk, rate)
                tmp = get_latency_stat(read_json_file(file), workload, percentile)
                tmp_arr.append(tmp)
            if blk not in d.keys():
                d[blk] = {'x':[], 'y': []}
            d[blk]['y'].append(round(sum(tmp_arr)/(10**5)/len(vm),2))
            d[blk]['x'].append(rate*52//1000)
    return {f"fio_{key}": value for key, value in d.items()}

def create_interactive_plot(datasets):
    output_notebook()
    blk_size = list(datasets.keys())
    print(blk_size)
   
    # Create the ColumnDataSource
    source = ColumnDataSource(data=datasets[blk_size[0]])

    # Create the figure
    p = figure(title="randread - example plot", x_axis_label="IOPS (K)", y_axis_label="ms")
    p.line('x', 'y',source=source)

    # Create the CustomJS callback arguments dynamically
    args = {'source': source}
  
    for blk in datasets:
        args[blk] = datasets[blk]
    # Generate the JavaScript code to handle the datasets
    js_code = """
        var data_sets = {"""
    for key in datasets:
        js_code += f"'{key}': {key}, "
    js_code = js_code[:-2]  # Remove the last comma and space
    js_code += """};
        source.data = data_sets[cb_obj.value];
        source.change.emit();
    """
 
    print("js_code=",js_code)
    # Create the CustomJS callback
    callback = CustomJS(args=args, code=js_code)

    # Create a Select widget with options for all datasets
    select = Select(title="Select block size", value=blk_size[0], options=blk_size)
    select.js_on_change('value', callback)

    # Layout and add to the current document
    layout = column(select, p)
    curdoc().add_root(layout)
    output_file("bokeh_plot.html")
    show(layout)


# pg_256_dir = "/home/guoqingli/work/data/cnv-odf/pg_256_default/"
# colors =["blue", "red", "yellow", "green", "orange", "purple"]
# blocks= ["4k", "8k", "16k", "32k", "64k", "128k"]
# # blocks= ["256k", "512k"]
# # blocks= ["1024k", "2048k"]

# # blk_size = {"256k": "indigo","512k":"darkslateblue"}
# # block 4k to 128k
# rw_rate_4k_to_128k = [32,65,97,130,162,195,227,260,292,325,357,390,422,455,487,520,552,585,617,650,682,715,747,780,812,845,877,910]
# rw_rate_256k_to_512k = [16,32,49,65,82,98,114,131,147,164,180,197,213,229,246,262,279,295,312,328,344,361,377,394,410,427,443,459]
# rw_rate_1024k_to_2048k = generate_io_rate_range(5938, 10312)
# rr_rate_4k_to_128k = generate_io_rate_range(188622, 390467)


# rr_rate_256_to_512k = generate_io_rate_range(90590, 143879)
# agg_stat = aggregate_json_stat("read", "99.000000", rr_rate_4k_to_128k)
# create_interactive_plot(agg_stat)
# sys.exit(1)
#table_path="/home/guoqingli/work/data/cnv-odf/randwrite_pg_256_2cpu_5g_1024_2048k.csv"

# write headers
#write_row(table_path, blocks)
#create_csv_table(table_path, agg_stat)

#sys.exit(1)
# p = figure(title="randwrite p99 latency 26 OSDs 2CPU 5G RAM", x_axis_label='IOPS', y_axis_label='ms', y_axis_type="log")
# legend_items = []
# for blk in blk_size.keys():
#     tmp_p = p.line(d[blk]['x_axis'], d[blk]['y_axis'],line_width=1.5, color=blk_size[blk])
#     legend_items.append([blk, [tmp_p]])

# legend = Legend(items=legend_items)
# p.add_layout(legend, 'right')
# show(p)




In [27]:
path = "/home/guoqingli/work/data/cnv-odf/rhel9-lvm/"
csv_path = path + "csv/lvm_read.csv"
header = ["workload", "numjobs", "block_size", "io_depth", "slat (us)", "clat (us)","iops", "bandwidth (MB/s)"]
write_row(csv_path, header)
for depth in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]:
    json_obj =  read_json_file(path+ "lvm-rhel9-lvm-4k-read-depth-" + str(depth) + "-numjob-1-rate-50000000.json")
    write_row(csv_path, get_fio_stats(json_obj))
    

read 1 4k 1 4.16 34.54 25574 99.9
read 1 4k 2 3.0 48.55 38512 150.44
read 1 4k 4 3.03 62.94 60291 235.51
read 1 4k 8 2.67 95.98 80799 315.62
read 1 4k 16 2.48 150.44 104381 407.74
read 1 4k 32 2.17 193.63 163137 637.25
read 1 4k 64 2.13 250.94 252500 986.33
read 1 4k 128 2.05 387.84 327961 1281.1
read 1 4k 256 2.04 780.49 326975 1277.25
read 1 4k 512 2.04 1580.46 323447 1263.46
read 1 4k 1024 2.02 3085.97 331563 1295.17
