# General Setup

Install the dependencies if not installed yet

In [None]:
using Pkg
Pkg.add("CSVFiles")
Pkg.add("VegaLite")
Pkg.add("DataFrames")

using VegaLite, CSVFiles, DataFrames

Read in `.csv` data files from runtime & code size

In [None]:
df_rgular = DataFrame(load("./working-dir/runtime-analysis-regular.csv"))
df_wasabi = DataFrame(load("./working-dir/runtime-analysis-wasabi.csv"))
df_wastrm = DataFrame(load("./working-dir/runtime-analysis-wastrumentation.csv"))

df_rgular_code_size = DataFrame(load("./working-dir/code-size-analysis-regular.csv"))
df_wasabi_code_size = DataFrame(load("./working-dir/code-size-analysis-wasabi.csv"))
df_wastrm_code_size = DataFrame(load("./working-dir/code-size-analysis-wastrumentation.csv"))

df_rgular_exec_once = DataFrame(load("./working-dir/executes-once-analysis-regular.csv"))
df_wasabi_exec_once = DataFrame(load("./working-dir/executes-once-analysis-wasabi.csv"))
df_wastrm_exec_once = DataFrame(load("./working-dir/executes-once-analysis-wastrumentation.csv"))

"Data files read"

## Plot Code Size Increase

In [None]:
df_wasabi_code_size_forward = df_wasabi_code_size[df_wasabi_code_size.setup .== "[wasabi - forward]", :]
df_wastrm_code_size_forward = df_wastrm_code_size[df_wastrm_code_size.setup .== "[wastrumentation - forward]", :]

code_size_forward =
    vcat(df_rgular_code_size, df_wasabi_code_size_forward, df_wastrm_code_size_forward)

code_size_forward |>
@vlplot(
  :bar,
  # mark="tick",
  column={
    field="input_program",
    type="nominal",
  },
  encoding={
    color={
      field="setup",
      type="nominal",
    },
    x={
      field="setup",
      type="nominal",
      axis={
        title="Input program",
      },
    },
    y={
      field="size_bytes",
      type="quantitative",
      scale={},
      axis={
        title="Program size (bytes)",
        grid=false,
      },
    },
  },
  config={
    spacing=100,
    view={stroke=:transparent},
    axis={domainWidth=1}
  },
)


In [None]:
innerjoin(
    select(rename(df_rgular_code_size,         :size_bytes => :regular_size), Not(:setup)),
    select(rename(df_wasabi_code_size_forward, :size_bytes => :wasabi_size), Not(:setup)),
    select(rename(df_wastrm_code_size_forward, :size_bytes => :wastrm_size), Not(:setup)),
    on=:input_program
)


In [None]:
baseline =
    rename(
        select(df_rgular_code_size, Not([:setup])),
        :size_bytes => :size_bytes_baseline,
    )

df_wasabi_forward_code_incr = select(
  innerjoin(baseline, df_wasabi_code_size_forward, on=:input_program),
  :input_program,
  :setup,
  [:size_bytes, :size_bytes_baseline]
    => ((size_bytes, size_bytes_baseline) -> size_bytes ./ size_bytes_baseline)
    => :code_increase,
)

df_wastrm_forward_code_incr = select(
  innerjoin(baseline, df_wastrm_code_size_forward, on=:input_program),
  :input_program,
  :setup,
  [:size_bytes, :size_bytes_baseline]
    => ((size_bytes, size_bytes_baseline) -> size_bytes ./ size_bytes_baseline)
    => :code_increase,
)

vcat(df_wasabi_forward_code_incr, df_wastrm_forward_code_incr) |>
@vlplot(
  :bar,
  mark="tick",
  column={
    field="input_program",
    type="nominal",
  },
  encoding={
    color={
      field="setup",
      type="nominal",
    },
    x={
      field="setup",
      type="nominal",
      axis={
        title="Input program",
      },
    },
    y={
      # TODO: check y axis log scale
      field="code_increase",
      type="quantitative",
      scale={},
      axis={
        title="Relative overhead (x)",
        grid=false,
      },
    },
  },
  config={
    spacing=100,
    view={stroke=:transparent},
    axis={domainWidth=1}
  },
)


## Plot Engine Warmup Over Time

In [None]:
df_rgular |>
@vlplot(
  :line,
  mark={
      :errorband,
      extent=:ci,
  },
  encoding={
    x={
      field="runtime_iteration",
      type="nominal",
      scale={
        "rangeStep"=12
      },
    },
    y={
      aggregate="median",
      field="performance",
      type="quantitative",
      scale={
        type="log"
      },
      title="Execution time (ms)",
    },
    color={
      field="input_program",
      type="nominal",
    },
  },
  config={
    line={
      point=true
    },
    scale={
      useUnaggregatedDomain=true
    },
  },
)


In [None]:
df_wasabi[df_wasabi.setup .== "[wasabi - forward]", :] |>
@vlplot(
  :line,
  mark={
      :errorband,
      extent=:ci,
  },
  encoding={
    x={
      field="runtime_iteration",
      type="nominal",
      scale={
        "rangeStep"=12
      },
    },
    y={
      aggregate="median",
      field="performance",
      type="quantitative",
      scale={
        type="log"
      },
      title="Execution time (ms)",
    },
    color={
      field="input_program",
      type="nominal",
    },
  },
  config={
    line={
      point=true
    },
    scale={
      useUnaggregatedDomain=true
    },
  },
)


In [None]:
df_wastrm[df_wastrm.setup .== "[wastrumentation - forward]", :] |>
@vlplot(
  :line,
  mark={
      :errorband,
      extent=:ci,
  },
  encoding={
    x={
      field="runtime_iteration",
      type="nominal",
      scale={
        "rangeStep"=12
      },
    },
    y={
      aggregate="median",
      field="performance",
      type="quantitative",
      scale={
        type="log"
      },
      title="Execution time (ms)",
    },
    color={
      field="input_program",
      type="nominal",
    },
  },
  config={
    line={
      point=true
    },
    scale={
      useUnaggregatedDomain=true
    },
  },
)


## Plot Runtime Performances

In [None]:
baseline =
    rename(
      select(
        subset(df_rgular, :runtime_iteration => i -> i .> 5),
        Not([:setup]),
      ),
      :performance => :performance_baseline,
    )

df_wasabi_timeout_computed = df_wasabi
df_wastrm_timeout_computed = df_wastrm

if isa(df_wasabi.performance, Vector{String})
  df_wasabi_timeout_computed = transform(df_wasabi, :performance => ByRow((x) -> parse(Float64, x == "timeout 10s" ? "10000" : x)) => :performance)
end
if isa(df_wastrm.performance, Vector{String})
  df_wastrm_timeout_computed = transform(df_wastrm, :performance => ByRow((x) -> parse(Float64, x == "timeout 10s" ? "10000" : x)) => :performance)
end

@assert isa(df_wasabi_timeout_computed.performance, Vector{Float64}) "df_wasabi should be parsed to Float64"
@assert isa(df_wastrm_timeout_computed.performance, Vector{Float64}) "df_wasabi should be parsed to Float64"


In [None]:
using Statistics

# Aggregate computations per 'run'!
df_wasabi_aggr = combine(groupby(df_wasabi_timeout_computed, Cols(:runtime_iteration, :setup, :runtime, :input_program, "time-unit")), :performance => median => :performance)
df_wastrm_aggr = combine(groupby(df_wastrm_timeout_computed, Cols(:runtime_iteration, :setup, :runtime, :input_program, "time-unit")), :performance => median => :performance)

df_wasabi_sep_analyses = transform(
  df_wasabi_aggr,
  :setup => ByRow(setup -> match(r"\[wasabi - (.+)\]", setup).captures[1]) => :analysis,
  :setup => (_ -> "wasabi") => :setup,
)
df_wastrm_sep_analyses = transform(
  df_wastrm_aggr,
  :setup => ByRow(setup -> match(r"\[wastrumentation - (.+)\]", setup).captures[1]) => :analysis,
  :setup => (_ -> "wastrumentation") => :setup,
)

df_wasabi_instruction_overhead = select(
  innerjoin(baseline, df_wasabi_sep_analyses, on=[:input_program, :runtime, :runtime_iteration, "time-unit"]),
  [:performance, :performance_baseline]
    => ((performance, performance_baseline) -> performance ./ performance_baseline)
    => :overhead,
  # What to keep:
  :input_program, :setup, :analysis,
)

df_wastrm_instruction_overhead = select(
  innerjoin(baseline, df_wastrm_sep_analyses, on=[:input_program, :runtime, :runtime_iteration, "time-unit"]),
  [:performance, :performance_baseline]
    => ((performance, performance_baseline) -> performance ./ performance_baseline)
    => :overhead,
  # What to keep:
  :input_program, :setup, :analysis,
)

vcat(df_wasabi_instruction_overhead, df_wastrm_instruction_overhead)

vcat(df_wasabi_instruction_overhead, df_wastrm_instruction_overhead) |>
@vlplot(
  :boxplot,
  encoding={
    row={
      field="analysis",
      title="Analysis",
    },
    column={
      field="input_program",
      title="Input Program",
    },  
    color={
      field="setup",
      type="nominal",
      title="Instrumentation Platform",
      legend={
        orient="buttom",
        titleOrient="left",
      }
    },
    x={
      field="setup",
      type="nominal",
      axis="null",
    },
    y={
      field="overhead",
      type="quantitative",
      axis={
        title="Overhead (x)",
        grid=false,
      },
      scale={
        type="log",
        base=10,
      },
    },
  },
  config={
    spacing=100,
    view={stroke=:transparent},
    axis={domainWidth=1}
  },
)


In [None]:
# Aggregate all overhead
df_wasabi_instruction_overhead_single = combine(groupby(df_wasabi_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)
df_wastrm_instruction_overhead_single = combine(groupby(df_wastrm_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)

df_wasabi_wastrm_overhead = transform(
  innerjoin(
    rename(select(df_wasabi_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wasabi),
    rename(select(df_wastrm_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wastrm),
    on=[:input_program, :analysis]
  ),
  [:overhead_wasabi, :overhead_wastrm]
    => ((overhead_wasabi, overhead_wastrm) -> overhead_wasabi ./ overhead_wastrm)
    => :time_for_wasabi_per_time_for_wastrm,
)

# E.g. overhead wasabi: 10x
#      overhead wastrm: 50x
#
#      ==> wasabi faster (lower overhead; 10 <= 50)
#
#      ==> wasabi / wastrm = 0.2
#      ==> marked as "1. wasabi is much faster"

performance_ordinal_domain = [
  "1. wasabi is much faster",
  "2. wasabi faster",
  "3. comparable performance",
  "4. wasabi is slower",
  "5. wasabi is much slower",
]

#        1      
# [-∞ ======= 0.3 ======= 0.95 ======= 1.05 ======= 3 ======= 100 ======= ]
function performance_comparison(n::Float64)
  if n >= 0 && n <= 0.3
      return performance_ordinal_domain[1]
  elseif n > 0.3 && n <= 0.95
      return performance_ordinal_domain[2]
  elseif n > 0.95 && n <= 1.05
      return performance_ordinal_domain[3]
  elseif n > 1.05 && n <= 3
      return performance_ordinal_domain[4]
  elseif n > 3 && n < 100
      return performance_ordinal_domain[5]
  elseif n >= 100
      return "0. ❌ wasabi is INCREADIBLY SLOW"
  else
      return "Input is out of range"
  end
end

df_wasabi_wastrm_overhead = transform(
  df_wasabi_wastrm_overhead,
  :time_for_wasabi_per_time_for_wastrm
  =>
  ByRow(performance_comparison)
  =>
  :time_for_wasabi_per_time_for_wastrm,
) |>
@vlplot(
  :rect,
  encoding={
    color={
      field="time_for_wasabi_per_time_for_wastrm",
      type="ordinal",
      scale={
        scheme="blueorange",
        domain=performance_ordinal_domain,
      }
    },
    x={
      field="input_program",
      type="nominal",
    },
    y={
      field="analysis",
      type="nominal",
    },
  },
  config={
    spacing=100,
    view={stroke=:transparent},
    axis={domainWidth=1}
  },
)


In [None]:
using Printf

# Aggregate all overhead
df_wasabi_instruction_overhead_single = combine(groupby(df_wasabi_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)
df_wastrm_instruction_overhead_single = combine(groupby(df_wastrm_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)

df_wasabi_wastrm_overhead = transform(
  innerjoin(
    rename(select(df_wasabi_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wasabi),
    rename(select(df_wastrm_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wastrm),
    on=[:input_program, :analysis]
  ),
  [:overhead_wasabi, :overhead_wastrm]
    => ((overhead_wasabi, overhead_wastrm) -> overhead_wasabi ./ overhead_wastrm)
    => :time_for_wasabi_per_time_for_wastrm,
)

function limit_and_format(n::Float64)::String
  # Round to two decimal places and format to ensure at least 5 characters
  formatted = @sprintf("%05.2f", n)
  
  # Ensure the length is exactly 5 characters
  if length(formatted) != 5
      error("Formatted string does not meet the 5 character length requirement")
  end
  
  return formatted
end

df_wasabi_wastrm_overhead = transform(
  df_wasabi_wastrm_overhead,
  :time_for_wasabi_per_time_for_wastrm => ByRow(performance_comparison) => :time_comparison_category,
  :time_for_wasabi_per_time_for_wastrm => ByRow(limit_and_format) => :time_comparison_label,
)

df_wasabi_wastrm_overhead |>
@vlplot(
  encoding={
    x={
      field="input_program",
      type="nominal",
    },
    y={
      field="analysis",
      type="nominal",
    },
  },
  layer=[
    {
      mark="rect",
      encoding={
        color={
          field="time_comparison_category",
          type="ordinal",
          scale={
            scheme="blueorange",
            domain=performance_ordinal_domain,
          },
        },
      },
    },
    {
      mark={
        type="text",
        fontSize="6",
      },
      encoding={
        text={
          field="time_comparison_label",
          type="nominal",
        },
      },
    },
  ],
  config={
    axis={grid=true, tickBand="extent",}
  },
)


## Let's do the same performance evaluation, but now take the first run!

In [None]:
baseline =
    rename(
      select(
        subset(df_rgular, :runtime_iteration => i -> i .== 1),
        Not([:setup]),
      ),
      :performance => :performance_baseline,
    )

df_wasabi_timeout_computed = df_wasabi
df_wastrm_timeout_computed = df_wastrm

if isa(df_wasabi.performance, Vector{String})
  df_wasabi_timeout_computed = transform(df_wasabi, :performance => ByRow((x) -> parse(Float64, x == "timeout 10s" ? "10000" : x)) => :performance)
end
if isa(df_wastrm.performance, Vector{String})
  df_wastrm_timeout_computed = transform(df_wastrm, :performance => ByRow((x) -> parse(Float64, x == "timeout 10s" ? "10000" : x)) => :performance)
end

@assert isa(df_wasabi_timeout_computed.performance, Vector{Float64}) "df_wasabi should be parsed to Float64"
@assert isa(df_wastrm_timeout_computed.performance, Vector{Float64}) "df_wasabi should be parsed to Float64"

using Statistics

# Aggregate computations per 'run'!
df_wasabi_aggr = combine(groupby(df_wasabi_timeout_computed, Cols(:runtime_iteration, :setup, :runtime, :input_program, "time-unit")), :performance => median => :performance)
df_wastrm_aggr = combine(groupby(df_wastrm_timeout_computed, Cols(:runtime_iteration, :setup, :runtime, :input_program, "time-unit")), :performance => median => :performance)

df_wasabi_sep_analyses = transform(
  df_wasabi_aggr,
  :setup => ByRow(setup -> match(r"\[wasabi - (.+)\]", setup).captures[1]) => :analysis,
  :setup => (_ -> "wasabi") => :setup,
)
df_wastrm_sep_analyses = transform(
  df_wastrm_aggr,
  :setup => ByRow(setup -> match(r"\[wastrumentation - (.+)\]", setup).captures[1]) => :analysis,
  :setup => (_ -> "wastrumentation") => :setup,
)

df_wasabi_instruction_overhead = select(
  innerjoin(baseline, df_wasabi_sep_analyses, on=[:input_program, :runtime, :runtime_iteration, "time-unit"]),
  [:performance, :performance_baseline]
    => ((performance, performance_baseline) -> performance ./ performance_baseline)
    => :overhead,
  # What to keep:
  :input_program, :setup, :analysis,
)

df_wastrm_instruction_overhead = select(
  innerjoin(baseline, df_wastrm_sep_analyses, on=[:input_program, :runtime, :runtime_iteration, "time-unit"]),
  [:performance, :performance_baseline]
    => ((performance, performance_baseline) -> performance ./ performance_baseline)
    => :overhead,
  # What to keep:
  :input_program, :setup, :analysis,
)

# Aggregate all overhead
df_wasabi_instruction_overhead_single = combine(groupby(df_wasabi_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)
df_wastrm_instruction_overhead_single = combine(groupby(df_wastrm_instruction_overhead, Cols(:input_program, :setup, :analysis)), :overhead => median => :overhead)

df_wasabi_wastrm_overhead = transform(
  innerjoin(
    rename(select(df_wasabi_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wasabi),
    rename(select(df_wastrm_instruction_overhead_single, Not(:setup)), :overhead => :overhead_wastrm),
    on=[:input_program, :analysis]
  ),
  [:overhead_wasabi, :overhead_wastrm]
    => ((overhead_wasabi, overhead_wastrm) -> overhead_wasabi ./ overhead_wastrm)
    => :time_for_wasabi_per_time_for_wastrm,
)

df_wasabi_wastrm_overhead = transform(
  df_wasabi_wastrm_overhead,
  :time_for_wasabi_per_time_for_wastrm
  =>
  ByRow(performance_comparison)
  =>
  :time_for_wasabi_per_time_for_wastrm,
) |>
@vlplot(
  :rect,
  encoding={
    color={
      field="time_for_wasabi_per_time_for_wastrm",
      type="ordinal",
      scale={
        scheme="blueorange",
        domain=performance_ordinal_domain
      },
    },
    x={
      field="input_program",
      type="nominal",
    },
    y={
      field="analysis",
      type="nominal",
    },
  },
  config={
    spacing=100,
    view={stroke=:transparent},
    axis={domainWidth=1}
  },
)
