# How does KILLER's per-operation metadata I/O faster than the NOVA, SplitFS, and PMFS?

In [14]:
import pandas as pd
import numpy as np

ops = [
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47087 * 60, 264382 * 60, 502315 * 60, 263680 * 60], # NOVA
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 31838 * 60, 107583 * 60, 171224 * 60, 113574 * 60], # SplitFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47205 * 60, 268363 * 60, 558425 * 60, 183316 * 60], # PMFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 79240 * 60, 437532 * 60, 580830 * 60, 344069 * 60], # KILLER
]

killer = "./performance-comparison-table-KILLER"
files = ["./performance-comparison-table-NOVA", "./performance-comparison-table-SplitFS", "./performance-comparison-table-PMFS"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]

with open(killer, "r") as f:
    df_killer = pd.read_csv(f, delim_whitespace=True, engine='python')

for file in files:
    with open(file, "r") as f:
        print(file)
        df = pd.read_csv(f, delim_whitespace=True, engine='python')
        min_improvement = 99999999
        max_improvement = 0
        for workload in workloads:
            s = df[(df['workloads'] == workload)]
            # sum all cols of s instead of the first two
            # s = s.iloc[:, 2:]
            s = s.sum(axis=1) / ops[files.index(file)][workloads.index(workload)]
            # print(s)
            s_killer = df_killer[(df_killer['workloads'] == workload)]
            # sum all cols of s_killer
            # s_killer = s_killer.iloc[:, 2:]
            s_killer = s_killer.sum(axis=1) / ops[3][workloads.index(workload)]
            
            improvement = (s - s_killer) / s
            if improvement.values[0] < min_improvement:
                min_improvement = improvement.values[0]
            if improvement.values[0] > max_improvement:
                max_improvement = improvement.values[0]
        
        print("min improvement: ", min_improvement)
        print("max improvement: ", max_improvement)
            

./performance-comparison-table-NOVA
min improvement:  0.13502484885108557
max improvement:  0.7659802171404732
./performance-comparison-table-SplitFS
min improvement:  0.3266643833390039
max improvement:  0.863983680565989
./performance-comparison-table-PMFS
min improvement:  0.05714856603924744
max improvement:  0.7483050327595694


# How much metadata I/O time do PMFS, SplitFS, and NOVA spend per operation?

In [5]:
import pandas as pd
import numpy as np

ops = [
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47087 * 60, 264382 * 60, 502315 * 60, 263680 * 60], # NOVA
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 31838 * 60, 107583 * 60, 171224 * 60, 113574 * 60], # SplitFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47205 * 60, 268363 * 60, 558425 * 60, 183316 * 60], # PMFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 79240 * 60, 437532 * 60, 580830 * 60, 344069 * 60], # KILLER
]

files = ["./performance-comparison-table-NOVA", "./performance-comparison-table-SplitFS", "./performance-comparison-table-PMFS"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]

for file in files:
    with open(file, "r") as f:
        print(file)
        df = pd.read_csv(f, delim_whitespace=True, engine='python')
        for workload in workloads:
            s = df[(df['workloads'] == workload)]
            s = s.sum(axis=1).values[0]
            d = df[(df['workloads'] == workload)]['D(ns)'].values[0]
            print((s-d)/s)
            

./performance-comparison-table-NOVA
0.7549408732854028
0.689210469535136
0.38375878876593644
0.5627263532020492
0.11344770619189085
0.312858584258666
./performance-comparison-table-SplitFS
0.8189702302563908
0.8481609157439394
0.6383692402292489
0.9177163918181542
0.9738725626426723
0.8553746311198425
./performance-comparison-table-PMFS
0.7454992566479379
0.7250994300691181
0.5477058247651525
0.7654877982745656
0.22895784002635125
0.6974738293190695


# How much metadata commit I/O time do PMFS and SplitFS spend per operation?

In [2]:
import pandas as pd
import numpy as np

ops = [
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47087 * 60, 264382 * 60, 502315 * 60, 263680 * 60], # NOVA
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 31838 * 60, 107583 * 60, 171224 * 60, 113574 * 60], # SplitFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47205 * 60, 268363 * 60, 558425 * 60, 183316 * 60], # PMFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 79240 * 60, 437532 * 60, 580830 * 60, 344069 * 60], # KILLER
]
# "./performance-comparison-table-NOVA", 
files = ["./performance-comparison-table-SplitFS", "./performance-comparison-table-PMFS"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]

for file in files:
    with open(file, "r") as f:
        print(file)
        df = pd.read_csv(f, delim_whitespace=True, engine='python')
        for workload in workloads:
            s = df[(df['workloads'] == workload)]
            s = s.sum(axis=1).values[0]
            m = df[(df['workloads'] == workload)]['M(ns)'].values[0]
            print(m/s)
            

./performance-comparison-table-SplitFS
0.764041545274836
0.8481605520024479
0.5793954428103706
0.8550286027079538
0.9725710115136263
0.8154138574913822
./performance-comparison-table-PMFS
0.2535544791883322
0.26655897694226915
0.17162603794934797
0.49657636038614883
0.16163641936566145
0.5840928513696264


# NOVA Overheads

In [1]:
import pandas as pd
import numpy as np

ops = [
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47087 * 60, 264382 * 60, 502315 * 60, 263680 * 60], # NOVA
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 31838 * 60, 107583 * 60, 171224 * 60, 113574 * 60], # SplitFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47205 * 60, 268363 * 60, 558425 * 60, 183316 * 60], # PMFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 79240 * 60, 437532 * 60, 580830 * 60, 344069 * 60], # KILLER
]
# "./performance-comparison-table-NOVA", 
files = ["./performance-comparison-table-NOVA"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]

for file in files:
    with open(file, "r") as f:
        print(file)
        df = pd.read_csv(f, delim_whitespace=True, engine='python')
        for workload in workloads:
            s = df[(df['workloads'] == workload)]
            s = s.sum(axis=1).values[0]
            gc = df[(df['workloads'] == workload)]['GC(ns)'].values[0]
            jc = df[(df['workloads'] == workload)]['JC(ns)'].values[0]
        
            print(gc/s)
            
for file in files:
    with open(file, "r") as f:
        print(file)
        df = pd.read_csv(f, delim_whitespace=True, engine='python')
        for workload in workloads:
            s = df[(df['workloads'] == workload)]
            s = s.sum(axis=1).values[0]
            gc = df[(df['workloads'] == workload)]['GC(ns)'].values[0]
            jc = df[(df['workloads'] == workload)]['JC(ns)'].values[0]
            
            print(jc/s)
            

./performance-comparison-table-NOVA
0.37966368069495704
0.3515585842795654
0.01050808914507862
0.06578154465715007
0.03441292622070513
0.03009075179186438
./performance-comparison-table-NOVA
0.09340691273178603
0.08693168832007585
0.12091377175132445
0.2353601325831926
0.019075381642644475
0.13292187581780052


# PMFS Overheads

In [None]:
import pandas as pd
import numpy as np

ops = [
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47087 * 60, 264382 * 60, 502315 * 60, 263680 * 60], # NOVA
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 31838 * 60, 107583 * 60, 171224 * 60, 113574 * 60], # SplitFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47205 * 60, 268363 * 60, 558425 * 60, 183316 * 60], # PMFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 79240 * 60, 437532 * 60, 580830 * 60, 344069 * 60], # KILLER
]
# "./performance-comparison-table-NOVA", 
files = ["./performance-comparison-table-PMFS"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]

for file in files:
    with open(file, "r") as f:
        print(file)
        df = pd.read_csv(f, delim_whitespace=True, engine='python')
        for workload in workloads:
            s = df[(df['workloads'] == workload)]
            s = s.sum(axis=1).values[0]
            jm = df[(df['workloads'] == workload)]['JM(ns)'].values[0]
            jc = df[(df['workloads'] == workload)]['JC(ns)'].values[0]
        
            print((jm+jc)/s)

./performance-comparison-table-PMFS
0.4919447774596057
0.45854045312684905
0.37607978681580456
0.26891143788841676
0.0673214206606898
0.11338097794944309


# SplitFS Overheads

In [2]:
import pandas as pd
import numpy as np

ops = [
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47087 * 60, 264382 * 60, 502315 * 60, 263680 * 60], # NOVA
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 31838 * 60, 107583 * 60, 171224 * 60, 113574 * 60], # SplitFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47205 * 60, 268363 * 60, 558425 * 60, 183316 * 60], # PMFS
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 79240 * 60, 437532 * 60, 580830 * 60, 344069 * 60], # KILLER
]
# "./performance-comparison-table-NOVA", 
files = ["./performance-comparison-table-SplitFS"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]

for file in files:
    with open(file, "r") as f:
        print(file)
        df = pd.read_csv(f, delim_whitespace=True, engine='python')
        for workload in workloads:
            s = df[(df['workloads'] == workload)]
            s = s.sum(axis=1).values[0]
            jmc = df[(df['workloads'] == workload)]['JM|JC(ns)'].values[0]
        
            print((jmc)/s)

./performance-comparison-table-SplitFS
0.05492868498155478
3.6374149154090064e-07
0.058973797418878336
0.06268778911020048
0.0013015511290460431
0.03996077362846036


# Calculation of Metadata Amplification

In [3]:
import pandas as pd
import numpy as np

files = ["./performance-comparison-table-NOVA", "./performance-comparison-table-PMFS"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]

real_meta_IOs_NOVA = []
real_meta_IOs_PMFS = []
software_meta_IO_percentages_NOVA = []
software_meta_IO_percentages_PMFS = []
meta_times_NOVA = []
meta_times_PMFS = []
meta_time_percentages_NOVA = []
meta_time_percentages_PMFS = []
WAs_NOVA = []
WAs_PMFS = []
RAs_NOVA = []
RAs_PMFS = []
IOAs_NOVA = []
IOAs_PMFS = []

for fid, file in enumerate(files):
    with open(file, "r") as f:
        df = pd.read_csv(f, delim_whitespace=True, engine='python')
    
    breakdown_times = df.columns[14:]
    breakdown_times = breakdown_times.insert(0, "data_read_time(ns)")
    breakdown_times = breakdown_times.insert(1, "data_write_time(ns)")
    
    print("=====================================")
    print(file)
    for workload_idx, workload in enumerate(workloads):
        total_time = 0
        meta_time = 0
        data_time = 0
        print(breakdown_times)
        for idx, breakdown_time in enumerate(breakdown_times):
            total_time += df[(df['workloads'] == workload)][breakdown_time].iloc[0]
            if idx == 0 or idx == 1:
                data_time += df[(df['workloads'] == workload)][breakdown_time].iloc[0]
            else:
                meta_time += df[(df['workloads'] == workload)][breakdown_time].iloc[0]
        # data_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["data_time(ns)"].iloc[0]
        # meta_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["meta_time(ns)"].iloc[0]
        # total_time = data_time + meta_time

        print("Total IO time of", workload, ": ", total_time / 1000000000)
        print("Meta IO time of", workload, ": ", meta_time / 1000000000)
        print("Data IO time of", workload, ": ", data_time / 1000000000)
        print("Meta IO time percentage of", workload, ": ", meta_time * 100 / total_time, "%")
        print("Data IO time percentage of", workload, ": ", data_time * 100 / total_time, "%")

        for breakdown_time in breakdown_times:
            print(breakdown_time + ": " + str(df[(df['workloads'] == workload)][breakdown_time].iloc[0] / 1000000000) + "s")
            print(breakdown_time + ": " + str(df[(df['workloads'] == workload)][breakdown_time].iloc[0] * 100 / total_time) + "%")

        data_write = df[(df['workloads'] == workload)]["data_write(bytes)"].iloc[0]
        data_read = df[(df['workloads'] == workload)]["data_read(bytes)"].iloc[0]
        media_write = df[(df['workloads'] == workload)]["media_write(byte)"].iloc[0]
        media_read = df[(df['workloads'] == workload)]["media_read(byte)"].iloc[0]
        meta_write = df[(df['workloads'] == workload)]["meta_write(bytes)"].iloc[0]
        meta_read = df[(df['workloads'] == workload)]["meta_read(bytes)"].iloc[0]

        software_meta_IO = meta_write + meta_read
        real_meta_IO = 0 if media_write + media_read - media_write - data_write - data_read < 0 else media_write + media_read - media_write - data_write - data_read 
        software_total_IO = software_meta_IO + data_write + data_read

        if fid == 0:
            real_meta_IOs_NOVA.append(real_meta_IO)
        else:
            real_meta_IOs_PMFS.append(real_meta_IO)

        print("workload: ", workload)
        print("software_meta_IO: ", software_meta_IO / 1024 / 1024 / 1024)
        print("real_meta_IO: ", real_meta_IO / 1024 / 1024 / 1024)
        print("software_meta_IO percentage: ", software_meta_IO * 100 / software_total_IO, "%")
        print("write amplification: ", media_write / (meta_write + data_write))
        print("read amplification: ", media_read / (meta_read + data_read))
        print("IO amplification: ", (media_write + media_read) / (meta_write + data_write + meta_read + data_read))

        software_meta_IO_percentages_NOVA.append(software_meta_IO * 100 / software_total_IO) if fid == 0 else software_meta_IO_percentages_PMFS.append(software_meta_IO * 100 / software_total_IO)
        meta_times_NOVA.append(meta_time / 1000000000) if fid == 0 else meta_times_PMFS.append(meta_time / 1000000000)
        meta_time_percentages_NOVA.append(meta_time * 100 / total_time) if fid == 0 else meta_time_percentages_PMFS.append(meta_time * 100 / total_time)
        WAs_NOVA.append(media_write / (meta_write + data_write)) if fid == 0 else WAs_PMFS.append(media_write / (meta_write + data_write))
        RAs_NOVA.append((media_read - media_write) / (meta_read + data_read)) if fid == 0 else RAs_PMFS.append((media_read - media_write) / (meta_read + data_read))
        IOAs_NOVA.append((media_write + media_read - media_write) / (meta_write + data_write + meta_read + data_read)) if fid == 0 else IOAs_PMFS.append((media_write + media_read - media_write) / (meta_write + data_write + meta_read + data_read))


    # for workload_idx, workload in enumerate(workloads):

def format_nice(array):
    return [round(x, 2) for x in array]

print("software_meta_IO_percentages_NOVA: ", format_nice(software_meta_IO_percentages_NOVA))
print("meta_time_percentages_NOVA: ", format_nice(meta_time_percentages_NOVA))
print("software_meta_IO_percentages_PMFS: ", format_nice(software_meta_IO_percentages_PMFS))
print("meta_time_percentages_PMFS: ", format_nice(meta_time_percentages_PMFS))
# print("meta_times_NOVA: ", meta_times_NOVA)
# print("meta_times_PMFS: ", meta_times_PMFS)

./performance-comparison-table-NOVA
Index(['data_read_time(ns)', 'data_write_time(ns)', 'update_entry_time(ns)',
       'update_inode_time(ns)', 'journal_time(ns)',
       'update_page_tail_time(ns)'],
      dtype='object')
Total IO time of write :  42.375228559
Meta IO time of write :  31.990792054
Data IO time of write :  10.384436505
Meta IO time percentage of write :  75.49408732854027 %
Data IO time percentage of write :  24.505912671459722 %
data_read_time(ns): 0.0s
data_read_time(ns): 0.0%
data_write_time(ns): 10.384436505s
data_write_time(ns): 24.505912671459722%
update_entry_time(ns): 11.944316366s
update_entry_time(ns): 28.187025231898527%
update_inode_time(ns): 3.958139137s
update_inode_time(ns): 9.340690945156773%
journal_time(ns): 1.306e-06s
journal_time(ns): 3.081989276309451e-06%
update_page_tail_time(ns): 16.088335245s
update_page_tail_time(ns): 37.9663680694957%
workload:  write
software_meta_IO:  4.1812749253585935
real_meta_IO:  11.62866735458374
software_meta_IO per

# KILLER

In [4]:
import pandas as pd
import numpy as np

files = ["./performance-comparison-table-KILLER"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]
real_meta_IOs_of_KILLER = []
package_persist_time_percentages = []
WAs_KILLER = []
RAs_KILLER = []
IOAs_KILLER = []
meta_times_KILLER = []

for fid, file in enumerate(files):
    with open(file, "r") as f:
        df = pd.read_csv(f, delim_whitespace=True, engine='python')

    for workload_idx, workload in enumerate(workloads):
        data_write = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["data_write(bytes)"].iloc[0]
        data_read = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["data_read(bytes)"].iloc[0]
        media_write = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["media_write(byte)"].iloc[0]
        media_read = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["media_read(byte)"].iloc[0]
        meta_write = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["meta_write(bytes)"].iloc[0]
        meta_read = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["meta_read(bytes)"].iloc[0]

        data_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["data_time(ns)"].iloc[0]
        meta_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["meta_time(ns)"].iloc[0]
        total_time = data_time + meta_time

        meta_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["meta_time(ns)"].iloc[0]
        update_bm_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["update_bm_time(ns)"].iloc[0]
        update_package_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER")]["update_package_time(ns)"].iloc[0]

        software_meta_IO = meta_write + meta_read
        real_meta_IO = 0 if media_write + media_read - media_write - data_write - data_read < 0 else media_write + media_read - media_write - data_write - data_read 
        software_total_IO = software_meta_IO + data_write + data_read
        
        real_meta_IOs_of_KILLER.append(real_meta_IO)
        meta_times_KILLER.append(meta_time / 1000000000)
        
        print("workload: ", workload)
        print("software_meta_IO: ", software_meta_IO / 1024 / 1024 / 1024)
        print("real_meta_IO: ", real_meta_IO / 1024 / 1024 / 1024)
        print("software_meta_IO percentage: ", software_meta_IO * 100 / software_total_IO, "%")
        print("Total IO time (seconds) of", workload, ": ", total_time / 1000000000)
        print("meta percentage: ", meta_time * 100 / total_time, "%")
        print("meta time: ", meta_time / 1000000000, "s")
        print("bm percentage: ", update_bm_time * 100 / total_time, "%")
        print("bm time: ", update_bm_time / 1000000000, "s")
        print("package percentage: ", update_package_time * 100 / total_time, "%")
        print("package time: ", update_package_time / 1000000000, "s")
        print("write amplification: ", media_write / (meta_write + data_write))
        print("read amplification: ", media_read / (meta_read + data_read))
        print("IO amplification: ", (media_write + media_read) / (meta_write + data_write + meta_read + data_read))
        print("")
        package_persist_time_percentages.append(update_package_time * 100 / total_time)
        WAs_KILLER.append(media_write / (meta_write + data_write))
        RAs_KILLER.append((media_read - media_write) / (meta_read + data_read))
        IOAs_KILLER.append((media_write + media_read - media_write) / (meta_write + data_write + meta_read + data_read))
        # software_meta_IO = meta_write + meta_read
        # real_meta_IO = 0 if media_write + media_read - media_write - data_write - data_read < 0 else media_write + media_read - media_write - data_write - data_read 
        
        # print("workload: ", workload)
        # print("software_meta_IO: ", software_meta_IO / 1024 / 1024 / 1024)
        # print("real_meta_IO: ", real_meta_IO / 1024 / 1024 / 1024)
print("real_meta_IOs_of_KILLER: ", format_nice(real_meta_IOs_of_KILLER))
print("package_persist_time_percentages: ", format_nice(package_persist_time_percentages))

print("WAs_NOVA: ", format_nice(WAs_NOVA))
print("RAs_NOVA: ", format_nice(RAs_NOVA))
print("IOAs_NOVA: ", format_nice(IOAs_NOVA))
print("WAs_PMFS: ", format_nice(WAs_PMFS))
print("RAs_PMFS: ", format_nice(RAs_PMFS))
print("IOAs_PMFS: ", format_nice(IOAs_PMFS))
print("WAs_KILLER: ", format_nice(WAs_KILLER))
print("RAs_KILLER: ", format_nice(RAs_KILLER))
print("IOAs_KILLER: ", format_nice(IOAs_KILLER))

workload:  write
software_meta_IO:  0.06335536669939756
real_meta_IO:  0.634427547454834
software_meta_IO percentage:  0.19759431280606915 %
Total IO time (seconds) of write :  9.916641786
meta percentage:  33.69344658306689 %
meta time:  3.341258403 s
bm percentage:  0.0023833169040517765 %
bm time:  0.000236345 s
package percentage:  33.691063266162836 %
package time:  3.341022058 s
write amplification:  1.0049284226749344
read amplification:  inf
IO amplification:  2.022739165631317

workload:  randwrite
software_meta_IO:  0.5001223925501108
real_meta_IO:  2.960953712463379
software_meta_IO percentage:  1.5388323357968403 %
Total IO time (seconds) of randwrite :  21.448647251
meta percentage:  46.913967632764624 %
meta time:  10.062411429 s
bm percentage:  0.25836513767746516 %
bm time:  0.055415827 s
package percentage:  46.65560249508716 %
package time:  10.006995602 s
write amplification:  1.02104277864672
read amplification:  inf
IO amplification:  2.096760380266502

workload:  



# Reduced Metadata I/Os per Operations

In [5]:
min_amps = []
max_amps = []

min_times = []
max_times = []

ops = [
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47087 * 60, 264382 * 60, 502315 * 60, 263680 * 60],
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 47205 * 60, 268363 * 60, 558425 * 60, 183316 * 60],
    [32 * 1024 * 1024 / 4, 32 * 1024 * 1024 / 4, 79240 * 60, 437532 * 60, 580830 * 60, 344069 * 60]
]

for idx, meta_IO in enumerate(real_meta_IOs_of_KILLER):
    if meta_IO == 0:
        continue
    meta_IO_KILLER_per_op = meta_IO / ops[2][idx]
    amp_NOVA = real_meta_IOs_NOVA[idx] / meta_IO_KILLER_per_op / ops[0][idx] 
    amp_PMFS = real_meta_IOs_PMFS[idx] / meta_IO_KILLER_per_op / ops[1][idx]
    min_amp = min(amp_NOVA, amp_PMFS)
    min_amps.append(min_amp)
    max_amp = max(amp_NOVA, amp_PMFS)
    max_amps.append(max_amp)

for idx, meta_time in enumerate(meta_times_KILLER):
    if meta_time == 0:
        continue
    meta_time_KILLER_per_op = meta_time / ops[2][idx]
    time_NOVA = meta_times_NOVA[idx] / meta_time_KILLER_per_op / ops[0][idx] 
    time_PMFS = meta_times_PMFS[idx] / meta_time_KILLER_per_op / ops[1][idx]
    min_time = min(time_NOVA, time_PMFS)
    min_times.append(min_time)
    max_time = max(time_NOVA, time_PMFS)
    max_times.append(max_time)
    print(time_NOVA, time_PMFS)
    
print("min_amps: ", min_amps)
print("max_amps: ", max_amps)
print("min-max: ", min(min_amps), max(max_amps))

print("min_times: ", min_times)
print("max_times: ", max_times)
print("min-max: ", min(min_times), max(max_times))

9.574474103911442 6.801746619954553
3.1232833648030707 2.4853654408250887
2.1707822456589856 2.349951860710183
1.8896971065053512 2.833243192564914
5.487208773440336 9.411168589012506
1.6300823579927044 6.468899230453607
min_amps:  [12.668330713999568, 4.409876770855309, 2.8908129858660234, 1.8336244927792262, 1.7039669067088856]
max_amps:  [18.329385918431615, 4.524832154239343, 3.7986896773779764, 2.890436322241741, 2.0068059460788397]
min-max:  1.7039669067088856 18.329385918431615
min_times:  [6.801746619954553, 2.4853654408250887, 2.1707822456589856, 1.8896971065053512, 5.487208773440336, 1.6300823579927044]
max_times:  [9.574474103911442, 3.1232833648030707, 2.349951860710183, 2.833243192564914, 9.411168589012506, 6.468899230453607]
min-max:  1.6300823579927044 9.574474103911442


# KILLER-NAIVE


In [6]:
import pandas as pd
import numpy as np

files = ["./performance-comparison-table-KILLER"] 
workloads = ["write", "randwrite", "fileserver.f", "varmail.f", "webserver.f", "webproxy.f"]
real_meta_IOs_of_KILLER = []
package_persist_time_percentages = []
WAs_KILLER = []
RAs_KILLER = []
IOAs_KILLER = []
meta_times_KILLER = []

for fid, file in enumerate(files):
    with open(file, "r") as f:
        df = pd.read_csv(f, delim_whitespace=True, engine='python')

    for workload_idx, workload in enumerate(workloads):
        data_write = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["data_write(bytes)"].iloc[0]
        data_read = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["data_read(bytes)"].iloc[0]
        media_write = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["media_write(byte)"].iloc[0]
        media_read = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["media_read(byte)"].iloc[0]
        meta_write = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["meta_write(bytes)"].iloc[0]
        meta_read = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["meta_read(bytes)"].iloc[0]

        data_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["data_time(ns)"].iloc[0]
        meta_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["meta_time(ns)"].iloc[0]
        total_time = data_time + meta_time

        meta_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["meta_time(ns)"].iloc[0]
        update_bm_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["update_bm_time(ns)"].iloc[0]
        update_package_time = df[(df['workloads'] == workload) & (df['file_system']=="KILLER-NAIVE")]["update_package_time(ns)"].iloc[0]

        software_meta_IO = meta_write + meta_read
        real_meta_IO = 0 if media_write + media_read - media_write - data_write - data_read < 0 else media_write + media_read - media_write - data_write - data_read 
        software_total_IO = software_meta_IO + data_write + data_read
        
        real_meta_IOs_of_KILLER.append(real_meta_IO)
        meta_times_KILLER.append(meta_time / 1000000000)
        
        print("workload: ", workload)
        print("software_meta_IO: ", software_meta_IO / 1024 / 1024 / 1024)
        print("real_meta_IO: ", real_meta_IO / 1024 / 1024 / 1024)
        print("software_meta_IO percentage: ", software_meta_IO * 100 / software_total_IO, "%")
        print("Total IO time (seconds) of", workload, ": ", total_time / 1000000000)
        print("meta percentage: ", meta_time * 100 / total_time, "%")
        print("meta time: ", meta_time / 1000000000, "s")
        print("bm percentage: ", update_bm_time * 100 / total_time, "%")
        print("bm time: ", update_bm_time / 1000000000, "s")
        print("package percentage: ", update_package_time * 100 / total_time, "%")
        print("package time: ", update_package_time / 1000000000, "s")
        print("write amplification: ", media_write / (meta_write + data_write))
        print("read amplification: ", media_read / (meta_read + data_read))
        print("IO amplification: ", (media_write + media_read) / (meta_write + data_write + meta_read + data_read))
        print("")
        package_persist_time_percentages.append(update_package_time * 100 / total_time)
        WAs_KILLER.append(media_write / (meta_write + data_write))
        RAs_KILLER.append((media_read - media_write) / (meta_read + data_read))
        IOAs_KILLER.append((media_write + media_read - media_write) / (meta_write + data_write + meta_read + data_read))
        # software_meta_IO = meta_write + meta_read
        # real_meta_IO = 0 if media_write + media_read - media_write - data_write - data_read < 0 else media_write + media_read - media_write - data_write - data_read 
        
        # print("workload: ", workload)
        # print("software_meta_IO: ", software_meta_IO / 1024 / 1024 / 1024)
        # print("real_meta_IO: ", real_meta_IO / 1024 / 1024 / 1024)
print("real_meta_IOs_of_KILLER: ", format_nice(real_meta_IOs_of_KILLER))
print("package_persist_time_percentages: ", format_nice(package_persist_time_percentages))

print("WAs_NOVA: ", format_nice(WAs_NOVA))
print("RAs_NOVA: ", format_nice(RAs_NOVA))
print("IOAs_NOVA: ", format_nice(IOAs_NOVA))
print("WAs_PMFS: ", format_nice(WAs_PMFS))
print("RAs_PMFS: ", format_nice(RAs_PMFS))
print("IOAs_PMFS: ", format_nice(IOAs_PMFS))
print("WAs_KILLER: ", format_nice(WAs_KILLER))
print("RAs_KILLER: ", format_nice(RAs_KILLER))
print("IOAs_KILLER: ", format_nice(IOAs_KILLER))

workload:  write
software_meta_IO:  0.5001225490123034
real_meta_IO:  2.6450347900390625
software_meta_IO percentage:  1.5388328098089044 %
Total IO time (seconds) of write :  20.038333614
meta percentage:  45.67877864157811 %
meta time:  9.153266055 s
bm percentage:  0.26156007784690033 %
bm time:  0.052412281 s
package percentage:  45.41721856373121 %
package time:  9.100853774 s
write amplification:  1.0174780699533537
read amplification:  inf
IO amplification:  2.0834751208196107

workload:  randwrite
software_meta_IO:  0.5001225490123034
real_meta_IO:  2.9326424598693848
software_meta_IO percentage:  1.5388328098089044 %
Total IO time (seconds) of randwrite :  21.235862864
meta percentage:  46.29368261586265 %
meta time:  9.830862955 s
bm percentage:  0.2566744113440419 %
bm time:  0.054507026 s
package percentage:  46.03700820451861 %
package time:  9.776355929 s
write amplification:  1.0205847550279368
read amplification:  inf
IO amplification:  2.0954312392893018

workload:  fi

