In [None]:
# these two things reimport modules if they change (needed for changing eval_functions)
%reload_ext autoreload
%autoreload 2

In [None]:
import eval_functions
from eval_functions import *
import pandas as pd

# make tables interactive
from itables import init_notebook_mode
import itables.options as opt
init_notebook_mode(all_interactive=True, connected=True)
opt.maxBytes=0

In [None]:
NOODLER=""
OLD_NOODLER="z3-noodler-7525ba0-2756940"
CVC5="cvc5-1.1.2"
Z3="z3-4.13.0"
Z3STR4="z3str4"
OSTRICH="ostrich-1.4pre"
Z3STR3RE="z3strRE"
Z3TRAU="z3-trau-1.1"
Z3ALPHA="z3-alpha-smtcomp2024"

if NOODLER == "":
  NOODLER = OLD_NOODLER

TOOLS = list(dict.fromkeys([ # small hack so that we get list of unique values (i.e. a set, but also in the given order; see https://stackoverflow.com/questions/1653970/does-python-have-an-ordered-set)
    NOODLER,
    OLD_NOODLER,
    CVC5,
    Z3,
    # Z3STR4,
    # Z3ALPHA,
    # OSTRICH,
    # Z3STR3RE,
    # Z3TRAU,

    # you can add more tools here directly if needed
]))

VBS = [
    # [Z3, CVC5],
    # [NOODLER, CVC5],
    # [NOODLER, Z3],
    # [NOODLER, Z3, CVC5],
]


bench_selection = (
  # Select one:
    "NORMAL"
    # "INT_CONVS"
    # "QF_S"
    # "QF_SLIA"
    # "QF_SNIA"
    # "ZALIGVINDER"
    # "REGEX"
)

if bench_selection == "NORMAL":
  BENCHES = [
      "sygus_qgen",
      "denghang",
      "automatark",
      "stringfuzz",
      "redos",

      "norn",
      "slog",
      "slent",
      "omark",
      "kepler",
      "woorpje",
      "webapp",
      "kaluza",

      "transducer_plus",
      "leetcode",
      "str_small_rw",
      "pyex",
      "full_str_int",

      "snia",
      ]
elif bench_selection == "INT_CONVS":
  # Only benchmarks with to_int/from_int
  BENCHES = [
      "stringfuzz",
      "str_small_rw",
      "full_str_int",
  ]
elif bench_selection == "QF_S":
  BENCHES = [
      "sygus_qgen",
      "automatark",

      "slog",
      "woorpje",
  ]
elif bench_selection == "QF_SLIA":
  BENCHES = [
      "denghang",
      "stringfuzz",

      "norn",
      "slent",
      "transducer_plus",
      "kepler",
      "woorpje",
      "webapp",
      "kaluza",
      "redos",

      "leetcode",
      "str_small_rw",
      "pyex",
      "full_str_int",
  ]
elif bench_selection == "QF_SNIA":
  BENCHES = [
    "snia"
  ]
elif bench_selection == "ZALIGVINDER":
  BENCHES = [
    "zaligvinder"
  ]
elif bench_selection == "REGEX":
  BENCHES = [
    "regex"
  ]

In [None]:
df_all = load_benches(BENCHES, TOOLS, bench_selection)

# TODO VBS are ugly for now, will fix it
for vbs in VBS:
  name = "+".join(vbs)
  df_all = add_vbs(df_all, vbs, name)
  TOOLS.append(name)
  # tool_names_mapping[name] = " + ".join([tool_names_mapping[tool] for tool in vbs])

### Evaluation

In [None]:
print(simple_table(df_all, TOOLS, BENCHES, separately=False))
print(simple_table(df_all, TOOLS, BENCHES, separately=True))

In [None]:
cactus_plot(df_all, TOOLS, start=int(len(df_all)*0.9), height=4, width=4.2, put_legend_outside=False, logarithmic_y_axis=True,
                  # tool_names=tool_names_mapping,
                  # file_name_to_save="cactus",
                  # num_of_x_ticks=6,
                )

for tool in TOOLS:
    if tool != NOODLER:
        print(scatter_plot(df_all, NOODLER, tool,
                          #  xname=tool_names_mapping[NOODLER], yname=tool_names_mapping[tool],
                          #  file_name_to_save=f"{tool_names_mapping[tool]}_vs_{tool_names_mapping[NOODLER]}", show_legend=False, transparent=True
                           ))


### More detailed evaluation

In [None]:
# check if noodler does not return different result than other solvers (i.e. wrong sat/unsat)
sanity_check(df_all, NOODLER, [tool for tool in TOOLS if tool!=NOODLER])

In [None]:
# get all formulae where noodler gives different result than sat/unsat/unknown/TO/ERR
get_invalid(df_all, NOODLER)

In [None]:
get_errors(df_all, NOODLER)

In [None]:
get_timeouts(df_all, NOODLER)

In [None]:
get_unknowns(df_all, NOODLER)

In [None]:
get_solved(df_all, NOODLER)

In [None]:
get_sat(df_all, NOODLER)

In [None]:
get_unsat(df_all, NOODLER)

### TODO: For papers (tables and figures with nicer names or something) 

In [None]:
tool_names_mapping = {
    NOODLER : "Z3-Noodler",
    CVC5 : "cvc5",
    Z3 : "Z3",
    Z3STR4 : "Z3stsr4",
    OSTRICH : "OSTRICH",
    Z3STR3RE : "Z3str3RE",
    Z3TRAU : "Z3-Trau",
    OLD_NOODLER : "Z3-Noodler",
}

tool_latex_mapping = {
    NOODLER : "\\ziiinoodler",
    CVC5 : "\\cvcv",
    Z3 : "\\ziii",
    Z3STR4 : "\\ziiistriv",
    OSTRICH : "\\ostrich",
    Z3STR3RE : "\\ziiistriiire",
    Z3TRAU : "\\ziiitrau",
    OLD_NOODLER : "\\ziiinoodlerold",
}

# TODO add table generation for latex