# General Setup

Install the dependencies if not installed yet

In [None]:
using Pkg
Pkg.add("CSVFiles")
Pkg.add("VegaLite")
Pkg.add("DataFrames")

using VegaLite, CSVFiles, DataFrames

# Compatibility
Read in `.csv` data files from what works and what does not

In [None]:
df_rgular_exec_once = DataFrame(load("./working-dir/executes-once-analysis-regular.csv"))
df_wasabi_exec_once = DataFrame(load("./working-dir/executes-once-analysis-wasabi.csv"))
df_wastrm_exec_once = DataFrame(load("./working-dir/executes-once-analysis-wastrumentation.csv"))
"Data files read"

In [None]:
df_rgular_executes = df_rgular_exec_once[!, :reason]
df_wasabi_executes = df_wasabi_exec_once[!, :reason]
df_wastrm_executes = df_wastrm_exec_once[!, :reason]

function known_exception(report)
    false || report == "success" || report == "timeout"
end

@assert all(known_exception, df_rgular_executes)
@assert all(report -> known_exception(report) || report == "error - Local count too large", df_wasabi_executes)
@assert all(known_exception, df_wastrm_executes)

total_input_programs = nrow(df_rgular_exec_once) # Total number of input programs

regular_success = sum(df_rgular_executes .== "success")
regular_timeout = sum(df_rgular_executes .== "timeout")
regular_timeout_report = if regular_timeout == 0 begin "" end else " ($regular_timeout timed out)" end

wasabi_success = sum(df_wasabi_executes .== "success")
wasabi_timeout = sum(df_wasabi_executes .== "timeout")
wasabi_error_r = sum(df_wasabi_executes .== "error - Local count too large")
wasabi_unsuccesful_report = if wasabi_timeout == 0 && wasabi_error_r == 0 begin
    ""
end elseif wasabi_timeout == 0 && wasabi_error_r > 0 begin
    " ($wasabi_error_r errored)"
end elseif wasabi_timeout > 0 && wasabi_error_r == 0 begin
    " ($wasabi_timeout timed out)"
end else
    " ($wasabi_timeout timed out, $wasabi_error_r errored)"
end

wastrm_success = sum(df_wastrm_executes .== "success")
wastrm_timeout = sum(df_wastrm_executes .== "timeout")
wastrm_timeout_report = if wastrm_timeout == 0 begin "" end else " ($wastrm_timeout timed out)" end

conclusion = "For the forward analysis, a total of $total_input_programs our benchmark harness succesfully executed $regular_success programs uninstrumented$regular_timeout_report, $wastrm_success after instrumentation by Wastrumentation$wastrm_timeout_report and $wasabi_success after instrumentation by Wasabi$wasabi_unsuccesful_report."

Perform a selection of all programs that passed all executions:

In [None]:
success_programs_on_all = innerjoin(
    select(df_rgular_exec_once[df_rgular_exec_once.reason .== "success", :], :input_program),
    select(df_wasabi_exec_once[df_wasabi_exec_once.reason .== "success", :], :input_program),
    select(df_wastrm_exec_once[df_wastrm_exec_once.reason .== "success", :], :input_program),
    on=[:input_program],
)

# Code Size Study

In [None]:
df_rgular_code_size = DataFrame(load("./working-dir/code-size-analysis-regular.csv"))
df_wasabi_code_size = DataFrame(load("./working-dir/code-size-analysis-wasabi.csv"))
df_wastrm_code_size = DataFrame(load("./working-dir/code-size-analysis-wastrumentation.csv"))
"Data files read"

## Plot Code Size Increase

In [None]:
df_wasabi_code_size_forward = df_wasabi_code_size[df_wasabi_code_size.setup .== "[wasabi - forward]", :]
df_wastrm_code_size_forward = df_wastrm_code_size[df_wastrm_code_size.setup .== "[wastrumentation - forward]", :]

code_size_forward =
    vcat(df_rgular_code_size, df_wasabi_code_size_forward, df_wastrm_code_size_forward)

code_size_forward |>
@vlplot(
  width=500,
  :bar,
  encoding={
    color={
      field="setup",
      type="nominal",
    },
    xOffset={
      field="setup",
      type="nominal",
    },
    x={
      field="input_program",
      type="nominal",
      axis={
        title="Input program",
      },
    },
    y={
      field="size_bytes",
      type="quantitative",
      scale={
        type="log"
      },
      axis={
        title="Program size (bytes)",
        grid=false,
      },
    },
  },
  config={
    spacing=100,
    view={stroke=:transparent},
    axis={domainWidth=1}
  },
)
"Absolute byte size left out of plots, not showing ..."

In [None]:
innerjoin(
    select(rename(df_rgular_code_size,         :size_bytes => :regular_size), Not(:setup)),
    select(rename(df_wasabi_code_size_forward, :size_bytes => :wasabi_size), Not(:setup)),
    select(rename(df_wastrm_code_size_forward, :size_bytes => :wastrm_size), Not(:setup)),
    on=:input_program
)

In [None]:
baseline =
    rename(
        select(df_rgular_code_size, Not([:setup])),
        :size_bytes => :size_bytes_baseline,
    )

df_wasabi_forward_code_incr = select(
  innerjoin(baseline, df_wasabi_code_size_forward, on=:input_program),
  :input_program,
  :setup,
  [:size_bytes, :size_bytes_baseline]
    => ((size_bytes, size_bytes_baseline) -> size_bytes ./ size_bytes_baseline)
    => :code_increase,
)

df_wastrm_forward_code_incr = select(
  innerjoin(baseline, df_wastrm_code_size_forward, on=:input_program),
  :input_program,
  :setup,
  [:size_bytes, :size_bytes_baseline]
    => ((size_bytes, size_bytes_baseline) -> size_bytes ./ size_bytes_baseline)
    => :code_increase,
)

"Relative code size increase computed"

In [None]:
df_wasabi_forward_code_incr_renamed = transform(
  df_wasabi_forward_code_incr,
  :setup => ByRow(setup -> if setup == "[wasabi - forward]" "Wasabi" else setup end) => :setup
)

df_wastrm_forward_code_incr_renamed = transform(
  df_wastrm_forward_code_incr,
  :setup => ByRow(setup -> if setup == "[wastrumentation - forward]" "Wastrumentation" else setup end) => :setup
)

In [None]:
binary_size_plot = vcat(df_wasabi_forward_code_incr_renamed, df_wastrm_forward_code_incr_renamed) |>
@vlplot(
  width=500,
  layer=[
    {
      mark="bar",
      encoding={
        color={
          field="setup",
          type="nominal",
          legend={
            title="Instrumentation Platform",
            orient="top",
          }
        },
        xOffset={
          field="setup",
          type="nominal",
        },
        x={
          field="input_program",
          type="nominal",
          axis={labelAngle="45"},
          title="Input Program",
        },
        y={
          field="code_increase",
          type="quantitative",
          axis={
            title="Program Size Increase (X)",
            grid=false,
          },
        },
      },
    },
    {
      mark="rule",
      encoding={
        y={
          datum=1,
        },
        color={value="red"}, # Color for the line
        size={value=1} # Thickness of the line
      },
    },
  ],
  config={
    view={stroke=:transparent},
  },
)

binary_size_plot |> save("./working-dir/wasabi-wastrm-binary-size.pdf")
binary_size_plot

In [None]:
df_rgular_code_size_baseline = transform(df_rgular_code_size, :size_bytes => :size_bytes_baseline)
df_rgular_code_size_baseline = select(df_rgular_code_size_baseline, Not([:size_bytes, :setup]))

all_code_sizes = vcat(
    outerjoin(df_rgular_code_size_baseline, df_wasabi_code_size, on=[:input_program]),
    outerjoin(df_rgular_code_size_baseline, df_wastrm_code_size, on=[:input_program]),
)

all_code_sizes = transform(
    all_code_sizes, 
    [:size_bytes, :size_bytes_baseline]
    => ((size_bytes, size_bytes_baseline) -> size_bytes ./ size_bytes_baseline)
    => :overhead)

all_code_sizes = select(all_code_sizes, Not([:size_bytes, :size_bytes_baseline]))

pattern = r"\[([\w-]+) - ([\w-]+)\]"

all_code_sizes = transform(
    all_code_sizes,
    :setup => ByRow(setup ->
        if setup === missing
            [missing, missing]
        else
            m = match(pattern, setup)
            platform, analyss = [m.captures[1], m.captures[2]]
            [platform, analyss]
        end
    ) => [:platform, :analysis],
)
all_code_sizes = select(all_code_sizes, Not([:setup]))
all_code_sizes = filter(row -> row.platform .!== missing, all_code_sizes)

In [None]:
all_code_sizes |> @vlplot(
  "transform"=[
    {
      "calculate"="substring(datum.overhead, 0, 5)",
      "as"="overhead_truncated"
    },
  ],
  facet={
    row={
      field="platform",
      type="nominal",
    },
  },
  spec={
    layer=[
      {
        mark="rect",encoding={
          y={
            field="analysis",
            type="nominal",
          },
          x={
            field="input_program",
            type="nominal",
            axis={title="Input Program",},
            axis={labelAngle="-30"},
          },
          color={
          field="overhead",
          type="quantitative",
          scale={
            type="log",
            scheme="blues",
          },
          legend={
            title="Code Size Increase (X)",
            orient="top",
          },
        },
        },
        
      },
      {
        mark={
          type="text",
          fontSize="6",
        },
        encoding={
          y={
            field="analysis",
            type="nominal",
          },
          x={
            field="input_program",
            type="nominal",
            axis={title="Input Program",},
          },
          text={
            field="overhead_truncated",
            type="quantitative",
          },
        },
      },
    ],

  },
  config={
    axis={
      grid=true,
      tickBand="extent",
    },
  },
)

In [None]:
# df_wasabi_wastrm_overhead = transform(
#   innerjoin(
#     rename(select(df_wasabi_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wasabi),
#     rename(select(df_wastrm_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wastrm),
#     on=[:input_program, :analysis]
#   ),
#   [:overhead_wasabi, :overhead_wastrm]
#     => ((overhead_wasabi, overhead_wastrm) -> overhead_wasabi ./ overhead_wastrm)
#     => :time_for_wasabi_per_time_for_wastrm,
# )

# outerjoin(
#     rename(
#         transform(
#             filter(row -> row.platform === "wasabi", all_code_sizes),
#             :platform => ByRow(platform -> "wasabi / wastrumentation") => :platform,
#         )
#         :overhead => :wasabi_overhead,
#     ),
#     # rename(
#     #     transform(
#     #         filter(row -> row.platform === "wastrumentation", all_code_sizes),
#     #         :platform => ByRow(platform -> "wasabi / wastrumentation") => :platform,
#     #     )
#     #     :overhead => :wastrm_overhead,
#     # ),
#     on=[:input_program, :analysis, platform]
# )

filter(row -> row.platform === "wasabi", all_code_sizes)

In [None]:
df_rgular = DataFrame(load("./working-dir/runtime-analysis-regular.csv"))
df_wasabi = DataFrame(load("./working-dir/runtime-analysis-wasabi.csv"))
df_wastrm = DataFrame(load("./working-dir/runtime-analysis-wastrumentation.csv"))

pattern_error   = r"^error$"
pattern_timeout = r"^timeout \d+$"

# Replace 'error' with 'missing' all error entries
df_rgular[!, :performance] = replace(replace.(string.(df_rgular.performance), pattern_error => missing, pattern_timeout => missing), "missing" => missing)
df_wasabi[!, :performance] = replace(replace.(string.(df_wasabi.performance), pattern_error => missing, pattern_timeout => missing), "missing" => missing)
df_wastrm[!, :performance] = replace(replace.(string.(df_wastrm.performance), pattern_error => missing, pattern_timeout => missing), "missing" => missing)

# df_rgular = filter(row -> row.input_program != "error", df_rgular)
# df_wasabi = filter(row -> row.performance != "error", df_wasabi)
# df_wastrm = filter(row -> row.performance != "error", df_wastrm)

# pattern = r"^timeout \d+$"
# if isa(df_rgular.performance, Vector{String}) df_rgular = df_rgular[.!occursin.(Ref(pattern), df_rgular.performance), :] end
# if isa(df_wasabi.performance, Vector{String}) df_wasabi = df_wasabi[.!occursin.(Ref(pattern), df_wasabi.performance), :] end
# if isa(df_wastrm.performance, Vector{String}) df_wastrm = df_wastrm[.!occursin.(Ref(pattern), df_wastrm.performance), :] end

if isa(df_rgular.performance, Vector{Union{String, Missing}}) df_rgular = transform(df_rgular, :performance => ByRow((x) -> if isa(x, String) begin parse(Float64, x) end else x end) => :performance) end
if isa(df_wasabi.performance, Vector{Union{String, Missing}}) df_wasabi = transform(df_wasabi, :performance => ByRow((x) -> if isa(x, String) begin parse(Float64, x) end else x end) => :performance) end
if isa(df_wastrm.performance, Vector{Union{String, Missing}}) df_wastrm = transform(df_wastrm, :performance => ByRow((x) -> if isa(x, String) begin parse(Float64, x) end else x end) => :performance) end

# # # Squash together runtime iterations
df_rgular = combine(groupby(df_rgular, Cols(:input_program, :runtime, :runtime_iteration, "time-unit", :setup)), :performance => median => :performance)
df_wasabi = combine(groupby(df_wasabi, Cols(:input_program, :runtime, :runtime_iteration, "time-unit", :setup)), :performance => median => :performance)
df_wastrm = combine(groupby(df_wastrm, Cols(:input_program, :runtime, :runtime_iteration, "time-unit", :setup)), :performance => median => :performance)


## Plot Engine Warmup Over Time

In [None]:
df_rgular = DataFrame(load("./working-dir/runtime-analysis-regular.csv"))
df_wasabi = DataFrame(load("./working-dir/runtime-analysis-wasabi.csv"))
df_wastrm = DataFrame(load("./working-dir/runtime-analysis-wastrumentation.csv"))

# Squash together runtime iterations
combine(groupby(df_rgular, Cols(:input_program, :runtime, :runtime_iteration, "time-unit", :setup)), :performance => median => :performance)


# select only the programs that we know execute on all for the forward analysis!
# df_rgular = innerjoin(success_programs_on_all, df_rgular, on=[:input_program])
# df_wasabi = innerjoin(success_programs_on_all, df_wasabi, on=[:input_program])
# df_wastrm = innerjoin(success_programs_on_all, df_wastrm, on=[:input_program])

# The analysis `memory tracing` is buggy (for both the Wastrumentation & Wasabi), it crashed sometimes
# What we will do is remove combinations where an error popped up!
@assert isempty(filter(row -> row.performance == "error", df_rgular))

is_zero = x -> x == 0 && isa(x, Int64)
@assert all(is_zero, filter(row -> row.performance == "error", df_wasabi).runtime_iteration)
@assert all(is_zero, filter(row -> row.performance == "error", df_wastrm).runtime_iteration)

# Remove all error entries
df_rgular = filter(row -> row.input_program != "error", df_rgular)
df_wasabi = filter(row -> row.performance != "error", df_wasabi)
df_wastrm = filter(row -> row.performance != "error", df_wastrm)

# Parse out timeouts -> TODO: should I not remove such an entry then entirely?
pattern = r"^timeout \d+$"
if isa(df_rgular.performance, Vector{String}) df_rgular = df_rgular[.!occursin.(Ref(pattern), df_rgular.performance), :] end
if isa(df_wasabi.performance, Vector{String}) df_wasabi = df_wasabi[.!occursin.(Ref(pattern), df_wasabi.performance), :] end
if isa(df_wastrm.performance, Vector{String}) df_wastrm = df_wastrm[.!occursin.(Ref(pattern), df_wastrm.performance), :] end

if isa(df_rgular.performance, Vector{String}) df_rgular = transform(df_rgular, :performance => ByRow((x) -> parse(Float64, x)) => :performance) end
if isa(df_wasabi.performance, Vector{String}) df_wasabi = transform(df_wasabi, :performance => ByRow((x) -> parse(Float64, x)) => :performance) end
if isa(df_wastrm.performance, Vector{String}) df_wastrm = transform(df_wastrm, :performance => ByRow((x) -> parse(Float64, x)) => :performance) end

"Data files read"

In [None]:
df_rgular = DataFrame(load("./working-dir/runtime-analysis-regular.csv"))
df_wasabi = DataFrame(load("./working-dir/runtime-analysis-wasabi.csv"))
df_wastrm = DataFrame(load("./working-dir/runtime-analysis-wastrumentation.csv"))

pattern_error   = r"^error$"
pattern_timeout = r"^timeout \d+$"

# Replace 'error' with 'missing' all error entries
df_rgular[!, :performance] = replace(replace.(string.(df_rgular.performance), pattern_error => missing, pattern_timeout => missing), "missing" => missing)
df_wasabi[!, :performance] = replace(replace.(string.(df_wasabi.performance), pattern_error => missing, pattern_timeout => missing), "missing" => missing)
df_wastrm[!, :performance] = replace(replace.(string.(df_wastrm.performance), pattern_error => missing, pattern_timeout => missing), "missing" => missing)

# df_rgular = filter(row -> row.input_program != "error", df_rgular)
# df_wasabi = filter(row -> row.performance != "error", df_wasabi)
# df_wastrm = filter(row -> row.performance != "error", df_wastrm)

# pattern = r"^timeout \d+$"
# if isa(df_rgular.performance, Vector{String}) df_rgular = df_rgular[.!occursin.(Ref(pattern), df_rgular.performance), :] end
# if isa(df_wasabi.performance, Vector{String}) df_wasabi = df_wasabi[.!occursin.(Ref(pattern), df_wasabi.performance), :] end
# if isa(df_wastrm.performance, Vector{String}) df_wastrm = df_wastrm[.!occursin.(Ref(pattern), df_wastrm.performance), :] end

if isa(df_rgular.performance, Vector{Union{String, Missing}}) df_rgular = transform(df_rgular, :performance => ByRow((x) -> if isa(x, String) begin parse(Float64, x) end else x end) => :performance) end
if isa(df_wasabi.performance, Vector{Union{String, Missing}}) df_wasabi = transform(df_wasabi, :performance => ByRow((x) -> if isa(x, String) begin parse(Float64, x) end else x end) => :performance) end
if isa(df_wastrm.performance, Vector{Union{String, Missing}}) df_wastrm = transform(df_wastrm, :performance => ByRow((x) -> if isa(x, String) begin parse(Float64, x) end else x end) => :performance) end

# # # Squash together runtime iterations
df_rgular = combine(groupby(df_rgular, Cols(:input_program, :runtime, :runtime_iteration, "time-unit", :setup)), :performance => median => :performance)
df_wasabi = combine(groupby(df_wasabi, Cols(:input_program, :runtime, :runtime_iteration, "time-unit", :setup)), :performance => median => :performance)
df_wastrm = combine(groupby(df_wastrm, Cols(:input_program, :runtime, :runtime_iteration, "time-unit", :setup)), :performance => median => :performance)

pattern = r"\[([\w-]+) - ([\w-]+)\]"

df_wasabi = transform(df_wasabi,
    :setup => ByRow(setup ->
        if setup === missing
            [missing, missing]
        else
            m = match(pattern, setup)
            platform, analyss = [m.captures[1], m.captures[2]]
            [platform, analyss]
        end
    ) => [:platform, :analysis],
)

df_wastrm = transform(df_wastrm,
    :setup => ByRow(setup ->
        if setup === missing
            [missing, missing]
        else
            m = match(pattern, setup)
            platform, analyss = [m.captures[1], m.captures[2]]
            [platform, analyss]
        end
    ) => [:platform, :analysis],
)


In [None]:
df_rgular |>
@vlplot(
  :line,
  mark={
      :errorband,
      extent=:ci,
  },
  encoding={
    x={
      field="runtime_iteration",
      type="nominal",
      scale={
        "rangeStep"=12
      },
    },
    y={
      aggregate="median",
      field="performance",
      type="quantitative",
      scale={
        type="log"
      },
      title="Execution time (ms)",
    },
    color={
      field="input_program",
      type="nominal",
    },
  },
  config={
    line={
      point=true
    },
    scale={
      useUnaggregatedDomain=true
    },
  },
)


In [None]:
df_wasabi[df_wasabi.setup .== "[wasabi - forward]", :] |>
@vlplot(
  :line,
  mark={
      :errorband,
      extent=:ci,
  },
  encoding={
    x={
      field="runtime_iteration",
      type="nominal",
      scale={
        "rangeStep"=12
      },
    },
    y={
      aggregate="median",
      field="performance",
      type="quantitative",
      scale={
        type="log"
      },
      title="Execution time (ms)",
    },
    color={
      field="input_program",
      type="nominal",
    },
  },
  config={
    line={
      point=true
    },
    scale={
      useUnaggregatedDomain=true
    },
  },
)


In [None]:
df_wastrm[df_wastrm.setup .== "[wastrumentation - forward]", :] |>
@vlplot(
  :line,
  mark={
      :errorband,
      extent=:ci,
  },
  encoding={
    x={
      field="runtime_iteration",
      type="nominal",
      scale={
        "rangeStep"=12
      },
    },
    y={
      aggregate="median",
      field="performance",
      type="quantitative",
      scale={
        type="log"
      },
      title="Execution time (ms)",
    },
    color={
      field="input_program",
      type="nominal",
    },
  },
  config={
    line={
      point=true
    },
    scale={
      useUnaggregatedDomain=true
    },
  },
)

## Plot Runtime Performances

In [None]:
# E.g. overhead wasabi: 10x
#      overhead wastrm: 50x
#
#      ==> wasabi faster (lower overhead; 10 <= 50)
#
#      ==> wasabi / wastrm = 0.2
#      ==> marked as "1. wasabi is much faster"

performance_ordinal_domain = [
  "1. Wastrmnt >3 times slower",
  "2. Wastrmnt 3-1.05 times slower",
  "3. Wastrmnt comparable",
  "4. Wastrmnt 3-1.05 times faster",
  "5. Wastrmnt >3 times faster",
]

#        1      
# [-∞ ======= 0.3 ======= 0.95 ======= 1.05 ======= 3 ======= 100 ======= ]
function performance_comparison(n::Float64)
    if n >= 0 && n <= 0.3
        return performance_ordinal_domain[1]
    elseif n > 0.3 && n <= 0.95
        return performance_ordinal_domain[2]
    elseif n > 0.95 && n <= 1.05
        return performance_ordinal_domain[3]
    elseif n > 1.05 && n <= 3
        return performance_ordinal_domain[4]
    elseif n > 3 && n < 100
        return performance_ordinal_domain[5]
    elseif n >= 100
        return "0. ❌ wasabi is INCREADIBLY SLOW"
    else
        return "Input is out of range"
    end
  end

function performance_comparison(n::Missing)
    n
  end

In [None]:
baseline =
    rename(
      select(
        subset(df_rgular, :runtime_iteration => i -> i .> 5),
        Not([:setup]),
      ),
      :performance => :performance_baseline,
    )

df_wasabi_timeout_computed = df_wasabi
df_wastrm_timeout_computed = df_wastrm

if isa(df_wasabi.performance, Vector{String})
  df_wasabi_timeout_computed = transform(df_wasabi, :performance => ByRow((x) -> parse(Float64, x == "timeout 300" ? "10000" : x)) => :performance)
end
if isa(df_wastrm.performance, Vector{String})
  df_wastrm_timeout_computed = transform(df_wastrm, :performance => ByRow((x) -> parse(Float64, x == "timeout 300" ? "10000" : x)) => :performance)
end

@assert isa(df_wasabi_timeout_computed.performance, Vector{Union{Float64, Missing}}) "df_wasabi should be parsed to Float64"
@assert isa(df_wastrm_timeout_computed.performance, Vector{Union{Float64, Missing}}) "df_wasabi should be parsed to Float64"

In [None]:
using Statistics

# Aggregate computations per 'run'!
df_wasabi_aggr = combine(groupby(df_wasabi_timeout_computed, Cols(:runtime_iteration, :setup, :runtime, :input_program, "time-unit")), :performance => median => :performance)
df_wastrm_aggr = combine(groupby(df_wastrm_timeout_computed, Cols(:runtime_iteration, :setup, :runtime, :input_program, "time-unit")), :performance => median => :performance)

df_wasabi_sep_analyses = transform(
  df_wasabi_aggr,
  :setup => ByRow(setup -> match(r"\[wasabi - (.+)\]", setup).captures[1]) => :analysis,
  :setup => (_ -> "wasabi") => :setup,
)
df_wastrm_sep_analyses = transform(
  df_wastrm_aggr,
  :setup => ByRow(setup -> match(r"\[wastrumentation - (.+)\]", setup).captures[1]) => :analysis,
  :setup => (_ -> "wastrumentation") => :setup,
)


# df_wasabi_instruction_overhead = select(
#   outerjoin(baseline, df_wasabi_sep_analyses, on=[:input_program, :runtime, :runtime_iteration, "time-unit"]),
#   [:performance, :performance_baseline]
#     => ((performance, performance_baseline) -> performance ./ performance_baseline)
#     => :overhead,
#   # What to keep:
#   :input_program, :setup, :analysis,
# )

# df_wastrm_instruction_overhead = select(
#   outerjoin(baseline, df_wastrm_sep_analyses, on=[:input_program, :runtime, :runtime_iteration, "time-unit"]),
#   [:performance, :performance_baseline]
#     => ((performance, performance_baseline) -> performance ./ performance_baseline)
#     => :overhead,
#   # What to keep:
#   :input_program, :setup, :analysis,
# )

df_wasabi_instruction_overhead = select(
  outerjoin(rename(select(df_rgular, :input_program, :performance, :runtime_iteration), :performance => :performance_baseline), df_wasabi, on=[:runtime_iteration, :input_program]),
  [:performance, :performance_baseline]
    => ((performance, performance_baseline) -> performance ./ performance_baseline)
    => :overhead,
  # What to keep:
  :input_program, :setup, :analysis,
)

df_wastrm_instruction_overhead = select(
  outerjoin(rename(select(df_rgular, :input_program, :performance, :runtime_iteration), :performance => :performance_baseline), df_wastrm, on=[:runtime_iteration, :input_program]),
  [:performance, :performance_baseline]
    => ((performance, performance_baseline) -> performance ./ performance_baseline)
    => :overhead,
  # What to keep:
  :input_program, :setup, :analysis,
)


# vcat(df_wasabi_instruction_overhead, df_wastrm_instruction_overhead) |>
# @vlplot(
#   :boxplot,
#   encoding={
#     row={
#       field="analysis",
#       title="Analysis",
#     },
#     column={
#       field="input_program",
#       title="Input Program",
#     },  
#     color={
#       field="setup",
#       type="nominal",
#       title="Instrumentation Platform",
#       legend={
#         orient="buttom",
#         titleOrient="left",
#       }
#     },
#     x={
#       field="setup",
#       type="nominal",
#       axis="null",
#     },
#     y={
#       field="overhead",
#       type="quantitative",
#       axis={
#         title="Overhead (X)",
#         grid=false,
#       },
#       scale={
#         type="log",
#         base=10,
#       },
#     },
#   },
#   config={
#     spacing=100,
#     view={stroke=:transparent},
#     axis={domainWidth=1}
#   },
# )
"Not plotting since its huge"

In [None]:
# Aggregate all overhead
df_wasabi_instruction_overhead_single = combine(groupby(df_wasabi_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)
df_wastrm_instruction_overhead_single = combine(groupby(df_wastrm_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)
"Aggregated!"

In [None]:
function limit_and_format(n::Float64)::String
    # Round to two decimal places and format to ensure at least 5 characters
    formatted = @sprintf("%03.2f", n)
    
    # # Ensure the length is exactly 5 characters
    # if length(formatted) != 5
    #     error("Formatted string does not meet the 5 character length requirement")
    # end
    
    return formatted
  end

In [None]:
wasabi_overhead_plot = filter(row -> row.analysis !== missing, df_wasabi_instruction_overhead_single) |>
@vlplot(
  "transform"=[
    {
      "calculate"="substring(datum.overhead, 0, 5)",
      "as"="overhead_truncated"
    },
  ],
  encoding={
    y={
      field="analysis",
      type="nominal",
    },
    x={
      field="input_program",
      type="nominal",
      axis={title="Input Program",},
    },
  },
  layer=[
    {
      mark="rect",
      encoding={
        color={
          field="overhead",
          type="quantitative",
          scale={
            type="log",
            scheme="blues",
          },
          legend={
            title="Overhead for Wasabi",
            orient="top",
          }
        },
      },
    },
    {
      mark={
        type="text",
        fontSize="6",
      },
      encoding={
        text={
          field="overhead_truncated",
          type="nominal",
        },
      },
    },
  ],
  config={
    axis={grid=true, tickBand="extent",}
  },
)

wasabi_overhead_plot |> save("./working-dir/wasabi-overhead.pdf")
wasabi_overhead_plot

In [None]:
wastrm_overhead_plot = df_wastrm_instruction_overhead_single |>
@vlplot(
  "transform"=[
    {
      "calculate"="substring(datum.overhead, 0, 5)",
      "as"="overhead_truncated"
    },
  ],
  encoding={
    y={
      field="analysis",
      type="nominal",
    },
    x={
      field="input_program",
      type="nominal",
      axis={title="Input Program",},
    },
  },
  layer=[
    {
      mark="rect",
      encoding={
        color={
          field="overhead",
          type="quantitative",
          scale={
            type="log",
            scheme="blues",
          },
          legend={
            title="Overhead for Wastrumentation",
            orient="top",
          }
        },
      },
    },
    {
      mark={
        type="text",
        fontSize="6",
      },
      encoding={
        text={
          field="overhead_truncated",
          type="nominal",
        },
      },
    },
  ],
  config={
    axis={grid=true, tickBand="extent",}
  },
)

wastrm_overhead_plot |> save("./working-dir/wastrm-overhead.pdf")
wastrm_overhead_plot


# On my macbook, these are the tte:
# bullet     - 1m18s
# funky-kart - 1m36s
# guiicons   - 1m17s
# rfxgen     - 1m03s
# rguistyler - 1m20s

In [None]:
using Printf

df_wasabi_wastrm_overhead = transform(
  outerjoin(
    filter(row -> row.analysis !== missing, rename(select(df_wasabi_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wasabi)),
    rename(select(df_wastrm_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wastrm),
    on=[:input_program, :analysis],
  ),
  [:overhead_wasabi, :overhead_wastrm]
    => ((overhead_wasabi, overhead_wastrm) -> overhead_wasabi ./ overhead_wastrm)
    => :time_for_wasabi_per_time_for_wastrm,
)

df_wasabi_wastrm_overhead = transform(
  df_wasabi_wastrm_overhead,
  :time_for_wasabi_per_time_for_wastrm => ByRow(performance_comparison) => :time_comparison_category,
)

In [None]:
comparison_plot = df_wasabi_wastrm_overhead |>
@vlplot(
  "transform"=[
    {
      "calculate"="substring(datum.time_for_wasabi_per_time_for_wastrm, 0, 5)",
      "as"="relative_overhead_truncated"
    },
  ],
  encoding={
    y={
      field="analysis",
      type="nominal",
    },
    x={
      field="input_program",
      type="nominal",
      axis={title="Input Program",},
    },
  },
  layer=[
    {
      mark="rect",
      encoding={
        color={
          field="time_comparison_category",
          type="ordinal",
          scale={
            scheme="redyellowgreen",
            domain=performance_ordinal_domain,
          },
          legend={
            title="Wastrm. Overh. / Wasabi Overh.",
            orient="top",
            direction="vertical"
          }
        },
      },
    },
    {
      mark={
        type="text",
        fontSize="6",
      },
      encoding={
        text={
          field="relative_overhead_truncated",
          type="nominal",
        },
      },
    },
  ],
  config={
    axis={grid=true, tickBand="extent",}
  },
)
comparison_plot

In [None]:
using Printf

# Aggregate all overhead
df_wasabi_instruction_overhead_single = combine(groupby(df_wasabi_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)
df_wastrm_instruction_overhead_single = combine(groupby(df_wastrm_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)

df_wasabi_wastrm_overhead = transform(
  innerjoin(
    rename(select(df_wasabi_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wasabi),
    rename(select(df_wastrm_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wastrm),
    on=[:input_program, :analysis]
  ),
  [:overhead_wasabi, :overhead_wastrm]
    => ((overhead_wasabi, overhead_wastrm) -> overhead_wasabi ./ overhead_wastrm)
    => :time_for_wasabi_per_time_for_wastrm,
)

function limit_and_format(n::Float64)::String
  # Round to two decimal places and format to ensure at least 5 characters
  formatted = @sprintf("%05.2f", n)
  
  # Ensure the length is exactly 5 characters
  if length(formatted) != 5
      error("Formatted string does not meet the 5 character length requirement")
  end
  
  return formatted
end

df_wasabi_wastrm_overhead = transform(
  df_wasabi_wastrm_overhead,
  :time_for_wasabi_per_time_for_wastrm => ByRow(performance_comparison) => :time_comparison_category,
  :time_for_wasabi_per_time_for_wastrm => ByRow(limit_and_format) => :time_comparison_label,
)

relative_overhead_plot = df_wasabi_wastrm_overhead |>
@vlplot(
  encoding={
    y={
      field="analysis",
      type="nominal",
    },
    x={
      field="input_program",
      type="nominal",
      axis={title="Input Program",},
    },
  },
  layer=[
    {
      mark="rect",
      encoding={
        color={
          field="time_for_wasabi_per_time_for_wastrm",
          type="quantitative",
          scale={
            domainMid=1,
            type="log",
            scheme="redblue",
          },
          legend={
            title="Wastrm. Overh. / Wasabi Overh.",
            orient="top",
          }
        },
      },
    },
    {
      mark={
        type="text",
        fontSize="6",
      },
      encoding={
        text={
          field="time_comparison_label",
          type="nominal",
        },
      },
    },
  ],
  config={
    axis={grid=true, tickBand="extent",},
  },
)
relative_overhead_plot |> save("./working-dir/relative-overhead-wastrm-wasabi.pdf")
relative_overhead_plot


## Let's do the same performance evaluation, but now take the first run!

In [None]:
baseline =
    rename(
      select(
        subset(df_rgular, :runtime_iteration => i -> i .== 1),
        Not([:setup]),
      ),
      :performance => :performance_baseline,
    )

df_wasabi_timeout_computed = df_wasabi
df_wastrm_timeout_computed = df_wastrm

if isa(df_wasabi.performance, Vector{String})
  df_wasabi_timeout_computed = transform(df_wasabi, :performance => ByRow((x) -> parse(Float64, x == "timeout 10s" ? "10000" : x)) => :performance)
end
if isa(df_wastrm.performance, Vector{String})
  df_wastrm_timeout_computed = transform(df_wastrm, :performance => ByRow((x) -> parse(Float64, x == "timeout 10s" ? "10000" : x)) => :performance)
end

@assert isa(df_wasabi_timeout_computed.performance, Vector{Float64}) "df_wasabi should be parsed to Float64"
@assert isa(df_wastrm_timeout_computed.performance, Vector{Float64}) "df_wasabi should be parsed to Float64"

using Statistics

# Aggregate computations per 'run'!
df_wasabi_aggr = combine(groupby(df_wasabi_timeout_computed, Cols(:runtime_iteration, :setup, :runtime, :input_program, "time-unit")), :performance => median => :performance)
df_wastrm_aggr = combine(groupby(df_wastrm_timeout_computed, Cols(:runtime_iteration, :setup, :runtime, :input_program, "time-unit")), :performance => median => :performance)

df_wasabi_sep_analyses = transform(
  df_wasabi_aggr,
  :setup => ByRow(setup -> match(r"\[wasabi - (.+)\]", setup).captures[1]) => :analysis,
  :setup => (_ -> "wasabi") => :setup,
)
df_wastrm_sep_analyses = transform(
  df_wastrm_aggr,
  :setup => ByRow(setup -> match(r"\[wastrumentation - (.+)\]", setup).captures[1]) => :analysis,
  :setup => (_ -> "wastrumentation") => :setup,
)

df_wasabi_instruction_overhead = select(
  innerjoin(baseline, df_wasabi_sep_analyses, on=[:input_program, :runtime, :runtime_iteration, "time-unit"]),
  [:performance, :performance_baseline]
    => ((performance, performance_baseline) -> performance ./ performance_baseline)
    => :overhead,
  # What to keep:
  :input_program, :setup, :analysis,
)

df_wastrm_instruction_overhead = select(
  innerjoin(baseline, df_wastrm_sep_analyses, on=[:input_program, :runtime, :runtime_iteration, "time-unit"]),
  [:performance, :performance_baseline]
    => ((performance, performance_baseline) -> performance ./ performance_baseline)
    => :overhead,
  # What to keep:
  :input_program, :setup, :analysis,
)

# Aggregate all overhead
df_wasabi_instruction_overhead_single = combine(groupby(df_wasabi_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)
df_wastrm_instruction_overhead_single = combine(groupby(df_wastrm_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)

df_wasabi_wastrm_overhead = transform(
  innerjoin(
    rename(select(df_wasabi_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wasabi),
    rename(select(df_wastrm_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wastrm),
    on=[:input_program, :analysis]
  ),
  [:overhead_wasabi, :overhead_wastrm]
    => ((overhead_wasabi, overhead_wastrm) -> overhead_wasabi ./ overhead_wastrm)
    => :time_for_wasabi_per_time_for_wastrm,
)

df_wasabi_wastrm_overhead = transform(
  df_wasabi_wastrm_overhead,
  :time_for_wasabi_per_time_for_wastrm
  =>
  ByRow(performance_comparison)
  =>
  :time_for_wasabi_per_time_for_wastrm,
) |>
@vlplot(
  :rect,
  encoding={
    color={
      field="time_for_wasabi_per_time_for_wastrm",
      type="ordinal",
      scale={
        scheme="blueorange",
        domain=performance_ordinal_domain
      },
    },
    x={
      field="analysis",
      type="nominal",
    },
    y={
      field="input_program",
      type="nominal",
    },
  },
  config={
    spacing=100,
    view={stroke=:transparent},
    axis={domainWidth=1}
  },
)
