In [1]:
import os
import sys
from os import path, listdir
from collections import namedtuple

import pickle
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import numpy as np

# Use simpson's rule for calculating AUC
from scipy.integrate import simpson

from process_results import recursive_file_search

# plt.rcParams['figure.constrained_layout.use'] = True
# plt.xlim(xmin=0.0)

RESULTS_DIR = path.abspath("../results")

def label_bars(bars, ax, customtxt = "", orientation="v"):
    for bar in bars:
        if customtxt:
            txt = customtxt
        else:
            if orientation == "v":
                txt = bar.get_height()
                xy=(bar.get_x() + bar.get_width() / 2, txt)
                xytxt = (0,1)
                va="bottom"
                ha="center"
            else:
                txt = bar.get_width()
                xy=(txt, bar.get_y() + bar.get_height() / 2)
                xytxt = (1,0)
                va = "center"
                ha = "left"
                
        max_chars = 6
            
        ax.annotate(f"{txt}"[:max_chars], 
                   xy=xy,
                   xytext=xytxt,
                   textcoords="offset points",
                   ha=ha, va=va)
        
def my_bar_plot(ax, values, groups, ylabel, datalabel, title="my bar plot", orientation="v"):
    bar_width = 0.4
    
    # Set all labels
    x_pos = np.arange(len(groups))
    
    if orientation == "v":
        bartype = "bar"
    else:
        bartype = "barh"

    barcall = getattr(ax, bartype)

    if len(values) == 2 and len(datalabel) == 2:
        my_bar = []
        my_bar.append(barcall(x_pos - bar_width/2, values[0], bar_width, label=datalabel[0]))
        my_bar.append(barcall(x_pos + bar_width/2, values[1], bar_width, label=datalabel[1]))
    else:
        my_bar = barcall(x_pos, values, bar_width, label=datalabel)
    

    if orientation == "v":
        ax.set_ylabel(ylabel)
        ax.set_xticks(x_pos)
        ax.set_xticklabels(groups)
    else:
        ax.set_xlabel(ylabel)
        ax.set_yticks(x_pos)
        ax.set_yticklabels(groups)
    ax.legend()
    ax.set_title(title)

    if isinstance(my_bar, list):
        for bar in my_bar:
            label_bars(bar, ax, orientation=orientation)
    else:
        label_bars(my_bar, ax, orientation=orientation)

    return my_bar

def save_my_figure(fig, figtype: str, machine: str, workload: str):
    figure_loc = path.join(path.dirname(path.realpath(sys.argv[1])), machine, workload)

    os.makedirs(figure_loc, exist_ok=True)
    
    figure_loc = path.join(figure_loc, figtype) + ".png"
    
    
    fig.savefig(figure_loc, bbox_inches="tight")
    

    

In [2]:
# Find all results
result_filter = lambda x: "processed-" in x
prediction_filter = lambda x: "predictions-" in x

all_results = list(recursive_file_search(RESULTS_DIR, result_filter))
all_predictions = list(recursive_file_search(RESULTS_DIR, prediction_filter))

result_prefix = path.commonprefix(all_results)

# Find all c5n directories
c5n_filter = lambda x: "c5n" in x
c5n_results = list(filter(c5n_filter, all_results))


c5n_results = list(map(lambda x: path.split(x), c5n_results))
all_results = list(map(lambda x: path.split(x), all_results))

c5n_per_workload = dict()

machine_workload_sched = dict()

for (p, result) in all_results:
    workload = result.replace("processed-results-", "").replace(".txt", "")
    
    sched = p.replace(result_prefix, "")
    sched_machine = sched.split("/")[0]
    sched = "".join([c for c in sched_machine if c.isupper()]) 
    machine = sched_machine.replace(sched, "")
    
    if machine == "":
        machine = "m510"
    
    sysmon = path.join(p, f"sysmon-{workload}.txt")
    # check for a sysmon file
    if not path.exists(sysmon):
        sysmon = None
    else:
        sysmon = pd.read_csv(sysmon, comment="#", skipinitialspace=True)
    
    if machine not in machine_workload_sched:
        machine_workload_sched[machine] = dict()
    
    if workload not in machine_workload_sched[machine]:
        machine_workload_sched[machine][workload] = {"results": [], "predictions": dict()}
    # Already storing the dataframes
    # remove rows where tVM == tFC == 0, as that indicates a crashed Firecracker
    # MOVE THIS TO process_results.py!
    df = pd.read_csv(path.join(p, result), comment="#", skipinitialspace=True)
    df = df[ ~((df["tVM"] == 0) & (df["tFC"] == 0)) ]
    machine_workload_sched[machine][workload]["results"].append((sched, df, sysmon))
    
    try:
        # In a try-block, as a StopIteration is raised when no next found
        prediction = next( (p for p in all_predictions 
                            if p == path.join(result_prefix, sched_machine, f"predictions-{workload}.txt")
                            ) , [None])
    except:
        prediction = None
        
    machine_workload_sched[machine][workload]["predictions"][sched] = prediction

# print(machine_workload_sched["c5n"]["poisson-12500-1hr-mem"])

# nice snippet from stackoverflow
def sizeof_fmt(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)


from sys import getsizeof
sizeof_fmt(getsizeof(pickle.dumps(machine_workload_sched)))


'104.9MiB'

In [6]:
%matplotlib inline

all_figs = []
all_axes = []

HOUR_SEC  = 60*60
HOUR_MSEC = HOUR_SEC*1000

for machine in machine_workload_sched:
#     # Skip others for now
#     if machine != "c5n":# and machine != "apollo":
#         continue
        
    cur_axes = []
    cur_figs = []
    
    for workload in machine_workload_sched[machine]:
#         # Skip others for now
#         if workload != "poisson-12500-1hr-mem" and workload != "poisson-50000-1hr-cpu" and workload != "poisson-":
#             continue

        # lists for gathering the data
        deltavm = []
        deltafc = []
        auc_sys = []
        
        # store the auc's of total sys cpu time and user cpu time
        cpu_sys_user = []
        runtimes = []

        bar_width = 0.4

        for result in machine_workload_sched[machine][workload]["results"]:
            # collect some basic metrics
            deltavm.append((result[0], result[1]["d tVM"].mean()))
            deltafc.append((result[0], result[1]["d tFC"].mean()))
            runtimes.append([result[0], result[1]["end time"].max()])
            
            # sysmon is not always present
            if result[2] is not None:
                # Normalize the sysmon times
                result[2]["t"] = result[2]["t"] - result[2]["t"].min()
                
                # Use a tuple here, as sysmon readings are optional
                # so we must know whether the result+sched has a sysmon
                auc_sys.append((result[0], simpson(result[2]["cpu_percentage"], result[2]["t"])))
                cpu_sys_user.append((result[0], simpson(result[2]["cpu_system"]), simpson(result[2]["cpu_user"])))
#             else:
#                 auc_sys.append((result[0], "None"))
#                 cpu_sys_user.append(())
                
                
        #Create figures
        deltafig, deltaax = plt.subplots()
        cur_axes.append(deltaax)
        cur_figs.append(("time-delta", deltafig))
        
        my_bar_plot(deltaax, [ [x[1] for x in deltafc], [y[1] for y in deltavm] ], 
                    [s[0] for s in deltafc], "Time delta (ms)", ["delta tFC", "delta tVM"],
                    f"delta plot of {machine} {workload}")
        
        # Set all for the sysmon plot (not always present)
        if auc_sys:
            sysfig, sysax = plt.subplots()
            cur_axes.append(sysax)
            cur_figs.append(("AUC", sysfig))

            # Normalize the values by using the ideal
            y_values = [val[1] for val in auc_sys]
            
            y_max = max(y_values)
            
            # Append 'ideal' AUC (100% usage over 1hr
            y_ideal = 100.0 * HOUR_SEC
            y_values.append(y_ideal)
            auc_sys.append(("ideal", y_ideal, HOUR_SEC))

            my_bar_plot(sysax, [ y/y_max for y in y_values ],
                        [s[0] for s in auc_sys], "Normalized CPU utilization",
                        "AUC", f"cpu auc {machine} {workload}")
        
        # Create the bars with runtimes
        runfig, runax = plt.subplots()
        cur_figs.append(("runtime", runfig))
        cur_axes.append(runax)
        
        # collect predicted runtimes
        for i, lst in enumerate(runtimes):
            pred = pd.read_csv(machine_workload_sched[machine][workload]["predictions"][lst[0]], comment="#", skipinitialspace=True)
            
            pred_runtime = pred["pred. end time"].max()
            runtimes[i].append(pred_runtime)
            
        tmp = [runtime for _, runtime, _ in runtimes]
        shortest_runtime = min(tmp)/HOUR_MSEC
        longest_runtime = max(tmp)/HOUR_MSEC
        del tmp
        
              
        my_bar_plot(runax, [ [x[1]/HOUR_MSEC for x in runtimes], [y[2]/HOUR_MSEC for y in runtimes] ],
                    [s[0] for s in runtimes], "Time (h)", ["Runtime", "Prediction"],
                    f"runtimes {machine} {workload}", "h")
        
        runax.set_xlim(xmin=shortest_runtime*0.95)

        # Create the table with cpu ratio's
#         tablefig, tableax = plt.subplots()
#         cur_figs.append(tablefig)
#         cur_axes.append(tableax)
        
#         cols = ["Ratio"]
#         rows = [x[0] for x in cpu_sys_user]
#         cell = [[f"{round((x[2]/x[1]), 3)}"] for x in cpu_sys_user]
        
#         print(cpu_sys_user)
#         tableax.axis("off")
#         tableax.table(cellText=cell, rowLabels=rows, colLabels=cols, loc="top")

        cells = [["Scheduler", "Ratio"] ]
        cells = cells + [ [x[0], f"{round((x[2]/x[1]), 3)}"] for x in cpu_sys_user]
        
        
        for ax in cur_axes:
#             ax.margins(0.1)
            ax.autoscale_view()
            
        for figtype, fig in cur_figs:
            fig.tight_layout()
            save_my_figure(fig, figtype, machine, workload)
            plt.close(fig=fig)
        
        
        
    all_axes = all_axes + cur_axes
    all_figs = all_figs + [f[1] for f in cur_figs]
    


  fig.tight_layout()


In [3]:
%matplotlib notebook

from collections import namedtuple

data_tuple = namedtuple("data", ["scheduler", "variance_tFC", "variance_tVM", "cvar_tFC", "cvar_tVM", "ratio_tFC_tVM"])

tmp = dict()

for machine in machine_workload_sched:
    # Skip others for now
    if machine != "c5n":# and machine != "apollo":
        continue
        
    cur_axes = []
    cur_figs = []
    
    for workload in machine_workload_sched[machine]:
            
        if machine not in tmp:
            tmp[machine] = dict()
            
        data = []
        
        for sched, df, _ in machine_workload_sched[machine][workload]["results"]:
            # we pick a (workloadid, argument) tuple
            wid, arg = -1, -1
            if "mem" in workload:
                wid, arg = 2, 20
            elif "cpu" in workload:
                wid, arg = 0,5500000
                
            df = df[ (df["workloadID"] == wid) & (df["workload argument"] == arg)]
            #Calculate the variance between each execution of wid, arg
            var_tFC = df["tFC"].var()
            var_tVM = df["tVM"].var()
            
            std_tFC = df["tFC"].std()
            std_tVM = df["tVM"].std()
            mean_tFC = df["tFC"].mean()
            mean_tVM = df["tVM"].mean()
            
            cvar_tFC = std_tFC / mean_tFC
            cvar_tVM = std_tVM / mean_tVM

            ratio = df["tFC"].mean() / df["tVM"].mean()

            data.append(data_tuple(scheduler=sched, variance_tFC=var_tFC, variance_tVM=var_tVM, cvar_tFC=cvar_tFC, cvar_tVM=cvar_tVM, ratio_tFC_tVM=ratio))
            
        if len(data) >= 4:
            tmp[machine][workload] = data
    
tmp

for workload in tmp["c5n"]:
    print(f"{workload}")
    headers = data_tuple._fields
    print(" \t ".join(headers))

    for res in tmp["c5n"][workload]:
        for i, field in enumerate(res):
            #Not rounding properly, but I don't mind for this preliminary result
            s = str(field)[0:len(headers[i])]
            spaces = len(headers[i]) - len(s)
            if spaces > 0:
                s = s + "".join(" " for i in range(spaces))
            print(f"{s} \t ", end="")
        print("")
    

poisson-50000-1hr-cpu
scheduler 	 variance_tFC 	 variance_tVM 	 cvar_tFC 	 cvar_tVM 	 ratio_tFC_tVM
BATCH     	 10240.096298 	 1864.1198913 	 0.018419 	 0.021120 	 2.68734711939 	 
FIFO      	 1407099.0894 	 4058.9918296 	 0.196614 	 0.030883 	 2.92460374219 	 
CFS       	 6579.2898616 	 1926.5431434 	 0.014819 	 0.021460 	 2.67608849004 	 
RR        	 2004014.3563 	 3530.5431364 	 0.233018 	 0.028825 	 2.94721494716 	 
poisson-25000-1hr-75cpu25mem
scheduler 	 variance_tFC 	 variance_tVM 	 cvar_tFC 	 cvar_tVM 	 ratio_tFC_tVM
BATCH     	 6645.2483943 	 5621.3731217 	 0.019535 	 0.112041 	 6.23571840286 	 
FIFO      	 959807.59428 	 6877.9197385 	 0.229073 	 0.122148 	 6.29906354590 	 
CFS       	 6724.7190240 	 5741.9244859 	 0.019625 	 0.112395 	 6.19786974056 	 
RR        	 20628.448344 	 5624.5817023 	 0.034290 	 0.111330 	 6.21767977515 	 
poisson-12500-1hr-mem
scheduler 	 variance_tFC 	 variance_tVM 	 cvar_tFC 	 cvar_tVM 	 ratio_tFC_tVM
BATCH     	 585641.76483 	 272178.65285 	 0.1

In [4]:
%matplotlib notebook
# Plot runtimes of each equal (workloadID, workload_argument)

for machine in machine_workload_sched:
    # Skip others for now
    if machine != "c5n":# and machine != "apollo":
        continue
        
    cur_axes = []
    cur_figs = []
    
    for workload in machine_workload_sched[machine]:
        
            
        data = []
        
        # Aggregate all schedulers in one figure
        fig, (ax_vm, ax_fc) = plt.subplots(nrows=1, ncols=2)
        
        for sched, df, _ in machine_workload_sched[machine][workload]["results"]:
            
            wid, arg = -1, -1
            if "mem" in workload:
                wid, arg = 2, 20
            elif "cpu" in workload:
                wid, arg = 0,5500000
                
            df = df[ (df["workloadID"] == wid) & (df["workload argument"] == arg)]
            
            ax_vm.plot(df.index, df["tVM"], label=sched)
            ax_fc.plot(df.index, df["tFC"], label=sched)
            
            fig.suptitle(f"{workload} {machine}")
            ax_vm.set_title("VM runtimes")
            ax_fc.set_title("FC runtimes")
            
            ax_vm.legend()
            ax_fc.legend()
            
            fig.subplots_adjust(wspace=0.4)
            fig.set_size_inches(w=15.0, h=7.0)
            
plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>