In [None]:
import os
import sys
import pandas as pd
import numpy as np
import pickle
sys.path.append('plotting_utils')
from plotting_utils.plotting import *

In [None]:
# Collection data from csv files
file_dir = "new_data"
fname = "{}_{}_result.csv"
pd_data = {}

# all_traces = ["0e4a51","6214e9","6c71a0","b436b2","ee9e8c"]
all_traces = ["ee9e8c"]
all_policies = ["AFS", "SRSF", "THEMIS", "PCS_jct", "PCS_bal", "PCS_pred", "FIFO"] 

for trace in all_traces:
    pd_data[trace] = {}
    min_avg_jct = float('inf')
    for policy in all_policies:
        file_path = os.path.join(file_dir, f"{policy}_{trace}_result.csv")
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            df['jct'] =  df['end_time'] - df['submit_time']
            df['pred_jct'] =  df['estimated_end_time'] - df['submit_time']
            df['error'] = 100.0 * (df['pred_jct'] - df['jct']).abs() / df['pred_jct']
            min_avg_jct = df['jct'].mean() if min_avg_jct > df['jct'].mean() else min_avg_jct
            pd_data[trace][policy] = df
    pd_data[trace]["min_avg_jct"] = min_avg_jct

In [None]:
# Assuming pd_data is a two-level dictionary with traces as keys and policies as keys of the inner dictionary,
# and each inner dictionary also contains a 'min_avg_jct' key

# Create a list of DataFrames and a list of custom keys
dfs = []
custom_keys = []

# Iterate over traces
for trace, policies_dict in pd_data.items():
    # Iterate over policies
    for policy, df in policies_dict.items():
        # Skip the custom_key
        if policy == 'min_avg_jct':
            continue
        # Add a MultiIndex level with trace and policy names
        df.index = pd.MultiIndex.from_tuples([(trace, policy)] * len(df), names=['trace', 'policy'])
        # Append the DataFrame to the list
        dfs.append(df)
    # Append the custom key to the list
    custom_keys.append((trace, policies_dict['min_avg_jct']))

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dfs)

# Create a DataFrame for custom keys
custom_keys_df = pd.DataFrame(custom_keys, columns=['trace', 'min_avg_jct'])

# Set the 'trace' column as the index
custom_keys_df.set_index('trace', inplace=True)

# Now 'combined_df' is a single DataFrame with MultiIndex where the first level corresponds to trace names and the second level corresponds to policy names
# 'custom_keys_df' is a DataFrame containing custom keys indexed by trace names

In [None]:
# organizing data in the format accepted by plotting utils
# fig7a

data = {"traces": list(pd_data.keys()), "data": {}}

for policy in all_policies:
    data["data"][policy] = []
    for trace in all_traces:
        df = combined_df.loc[(trace, policy)]
        data["data"][policy].append(df['jct'].mean()/custom_keys_df.loc[trace, 'min_avg_jct'])

ax = plot_fig7a(data=data)
ax.figure

In [None]:
# organizing data in the format accepted by plotting utils
# fig7a

avg_error_data = {"traces": list(pd_data.keys()), "data": {}}
p99_error_data = {"traces": list(pd_data.keys()), "data": {}}
for policy in all_policies:
    avg_error_data["data"][policy] = []
    p99_error_data["data"][policy] = []
    for trace in all_traces:
        df = combined_df.loc[(trace, policy)]
        valid_prediction = (df['estimated_end_time'] != -1) & (df['estimated_start_time'] != -1)

        filtered_df = df[valid_prediction]

        pred_jct = filtered_df['estimated_end_time'] - filtered_df['submit_time']
        jct = filtered_df['end_time'] - filtered_df['submit_time']
        error = 100.0 * (pred_jct - jct).abs() / pred_jct
                
        avg_error_data["data"][policy].append(filtered_df['error'].mean())
        p99_error_data["data"][policy].append(filtered_df['error'].quantile(0.99))

ax = plot_fig7b(data1=avg_error_data,data2=p99_error_data)
ax.figure