In [13]:
import re
import csv
import pandas as pd

new_df = pd.DataFrame(columns=['workload', 'file_system', 'meta_total(GiB)', 'meta_time(s)', 'data_total(GiB)', 'data_time(s)', 'meta_I/O_occupation(%)', 'meta_time_occupation(%)'])

with open('./metadata-trace-filebench-results', 'r') as f:
    df = pd.read_csv(f, delim_whitespace=True, index_col=None, engine='python')

df["workload"] = df["workload"].map({"fileserver.f": "FSR", "varmail.f": "VML", "webserver.f": "WSR", "webproxy.f": "WPY"})
df["meta_total(GiB)"] = df['meta_total(bytes)'] / 1024 / 1024 / 1024 
df["meta_time(s)"] = df['meta_time(ns)'] / 1000000000
df["data_total(GiB)"] = (df['data_write(bytes)'] + df['data_read(bytes)']) / 1024 / 1024 / 1024
df["data_time(s)"] = (df['data_write_time(ns)'] + df['data_read_time(ns)']) / 1000000000
df["meta_I/O_occupation(%)"] = df['meta_total(GiB)'] * 100 / (df['data_total(GiB)'] + df['meta_total(GiB)'])
df["meta_time_occupation(%)"] = df['meta_time(s)'] * 100 / (df['data_time(s)'] + df['meta_time(s)'])

common_cols = ['workload', 'file_system', 'meta_total(GiB)', 'meta_time(s)', 'data_total(GiB)', 'data_time(s)', 'meta_I/O_occupation(%)', 'meta_time_occupation(%)']

new_df = new_df.append(df[common_cols], ignore_index=True)

with open('./metadata-trace-fio-results', 'r') as f:
    df = pd.read_csv(f, delim_whitespace=True, index_col=None, engine='python')

df["workload"] = df["workload"].map({"write": "SW", "randwrite": "RW"}) 
df["meta_total(GiB)"] = df['meta_total(bytes)'] / 1024 / 1024 / 1024 
df["meta_time(s)"] = df['meta_time(ns)'] / 1000000000
df["data_total(GiB)"] = (df['data_write(bytes)'] + 0) / 1024 / 1024 / 1024
df["data_time(s)"] = (df['data_time(ns)'] + 0) / 1000000000
df["meta_I/O_occupation(%)"] = df['meta_total(GiB)'] * 100 / (df['data_total(GiB)'] + df['meta_total(GiB)'])
df["meta_time_occupation(%)"] = df['meta_time(s)'] * 100 / (df['data_time(s)'] + df['meta_time(s)'])

new_df = new_df.append(df[common_cols], ignore_index=True)
new_df["file_system"] = new_df["file_system"].map({"NOVA": "NA", "PMFS": "PS"})
workload_order = {'FSR': 1, 'VML': 2, 'WSR': 3, 'WPY': 4, 'SW': 5, 'RW': 6}
new_df_sort = new_df.sort_values(by=['workload'], key=lambda x: x.map(workload_order))
new_df_sort.to_latex('./metadata-trace-results.tex', index=False, escape=False, column_format='c|c|cccc|cc', float_format="%.1f", header=["WL", "FS", "MIO", "MT", "DIO", "DT", "IO/\%", "T/\%"])