## Initialization (#init)

In [1]:
import pandas
import tabulate
import pickle
import seaborn
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
import csv
import re
import numpy as np
import itertools
from statistics import mean
from collections import defaultdict
from enum import Enum
from matplotlib.lines import Line2D
from matplotlib.patches import Rectangle

ClassifyBy = Enum('ClassifyBy', ('Levels', 'Stats', 'Group', 'StatsOnly'))
ALL_CLASSIFICATIONS = (
    ClassifyBy.Levels, 
    ClassifyBy.Stats, 
    ClassifyBy.StatsOnly, 
    ClassifyBy.Group
)
# Classification method
DEFAULT_CLASSIFICATION = ClassifyBy.Group

from IPython.core.display import display, HTML
display(HTML("<style>div.output_area pre {white-space: pre;}</style>"))

PREFIX = "/home/debian/bench/"
PREFIX2 = "/home/debian/home1/bench/"
PREFIX3 = "/home1/debian/bench/"

db = None
def load_db():
    global db
    with open('db.pickle', 'rb') as handle:
        db = pickle.load(handle)
load_db()

# Directory, where figures are stored
FIGS_DIR = "figs"
if not os.path.exists(FIGS_DIR):
    os.mkdir(FIGS_DIR)

def to_range(i):
    i_range = len(str(i))
    if i_range < 11:
        return f"{10**(i_range-1)}-{10**i_range - 1}"
    else:
        return f"{10**10}+"
          
def is_real(x):
    try:
        float(x)
        return True
    except ValueError:
        return False        

def value_to_float(val, timeout):
    if val.startswith('TO') or val == 'ERR':
        return timeout
    else:
        return np.NaN
    
def to_float_with_timeout(series, timeout):
    return [float(f) if is_real(f) else value_to_float(f, timeout) for f in series]

def df_to_float(df, timeout, allow_timeouts=False):
    return df.apply(lambda x: to_float_with_timeout(x, timeout)).dropna(how='all', axis='columns')

def unify_tool(tool):
    return tool_to_output[
        tool.replace('-runtime','').replace('-backward','-bwd').replace('-nt','').replace('-min','').replace('-incl','')
    ]
        
def get_benchmark_name(bench, classification=DEFAULT_CLASSIFICATION):
    if classification == ClassifyBy.Stats:
        if bench not in db.keys() or 'max_square' not in db[bench].keys() or 'afa_transitions' not in db[bench].keys(): 
            return "?" 	
        level = get_benchmark_name(bench, ClassifyBy.Levels)
        square = to_range(db[bench]['max_square'])
        trans = to_range(db[bench]['afa_transitions'])
        return f"{level}: {str(trans).rjust(10)} transitions, {str(square).rjust(10)} size"
    elif classification == ClassifyBy.StatsOnly:
        if bench not in db.keys() or 'max_square' not in db[bench].keys() or 'afa_transitions' not in db[bench].keys(): 
            return "?" 	
        afa_terms = to_range(db[bench].get('afa-minterms', '?'))
        square = to_range(db[bench]['max_square'])
        trans = to_range(db[bench]['afa_transitions'])
        trans_square = to_range(db[bench]['afa_transitions']*db[bench]['max_square'])
        #return f"{str(trans).rjust(10)} transitions, {str(square).rjust(10)} size"
        return f"{afa_terms}"
    elif classification == ClassifyBy.Levels:
        d = os.path.dirname(bench).split(os.sep)
        return os.path.join(*d[:{
            'armc-inclusion': 2, 
            'bool_comb': 3, 
            'email_filter': 1, 
            'noodler_cut': 2, 
            'noodler_hard': 2,
            'ltl_afa': 3,
            'stranger_afa': 1,
        }.get(d[0], 1)])
    elif classification == ClassifyBy.Group:
        if 'bool_comb/ere/QF_SLIA_Norn' in bench or 'bool_comb/ere/QF_S_sygus_qgen' in bench:
            return 'b-smt'
        elif 'email_filter' in bench:
            return 'b-regex'
        elif 'bool_comb/ere/boolean_and_loops' in bench or 'bool_comb/ere/date' in bench or 'bool_comb/ere/det_blowup' in bench or 'bool_comb/ere/password' in bench:
            return 'b-hand-made'
        elif 'armc-inclusion' in bench or "automata_inclusion" in bench:
            return 'b-armc-incl'
        elif 'bool_comb/cox' in bench or 'bool_comb/intersect' in bench:
            return 'b-param'
        elif 'ltl_afa/random_ltl' in bench:
            return 'a-ltl-rand'
        elif 'ltl_afa/parametric_ltl' in bench:
            return 'a-ltl-param'
        elif 'ltl_afa/created_ltl/nasa' in bench:
            # might include ltl_afa/created_ltl more
            return 'a-ltl-spec'
        elif 'ltl_afa/created_ltl/LTLf' in bench:
            # temporary include ltl_afa/created_ltl more
            return 'a-ltlf-patterns'
        elif 'ltl_afa/created_ltl/LTL-' in bench:
            # temporary include ltl_afa/created_ltl more
            return 'a-ltl'
        elif 'stranger_afa' in bench:
            return 'a-sloth'
        elif 'noodler' in bench:
            return 'a-noodler'
        else:
            d = os.path.dirname(bench).split(os.sep)
            return f"? {'_'.join(d[:2])}"

def save_figure(fig, ext=".png"):
    tgt = os.path.join(FIGS_DIR, fig + ext)    
    print(f"Saving to {tgt}")
    if ext == ".png":
        plt.savefig(tgt, backend="cairo", bbox_inches="tight", pad_inches=0.2)
    else:
        plt.savefig(tgt, bbox_inches="tight", pad_inches=0.2)

  from IPython.core.display import display, HTML


In [2]:
tool_to_latex = {
    'bwIC3': '\\abc',
    'Antisat': '\\minisat',
    'Automata': '\\dotnet',
    'Bisim': '\\bisim',
    'Brics': '\\brics',
    'CVC5': '\\cvc',
    'JaltImpact': '\\jaltimpact' ,
    'eNfa': '\\ours',
    'Mona': '\mona',
    'VATA': '\\vata',
    'Z3': '\\zthree'
}
tool_to_output = {
    'abc': 'bwIC3',
    'afaminisat': 'Antisat',
    'afaminisat-nt': 'Antisat',
    'automata': 'Automata',
    'bisim': 'Bisim',
    'bricks': 'Brics',
    'cvc5': 'CVC5',
    'jaltimpact': 'JaltImpact' ,
    'mata-nfa': 'eNfa',
    'mona': 'Mona',
    'vata': 'VATA',
    'z3': 'Z3'
}

In [3]:
def to_pandas(src: str, limit_columns_to: list[str], limit_tools_to=None, timeout=60) -> pandas.DataFrame:
    df = pandas.read_csv(src, delimiter=';')
    columns = [
        col for col in df.columns 
        if (col == 'name' or any(col.endswith(limit) for limit in limit_columns_to))
        and not col.startswith('mata-afa')
        and not col.startswith('cvc4')
        and (not col.startswith('afaminisat') or col.startswith('afaminisat-nt'))
        and (not col.startswith('bricks') or col.startswith('bricks-min'))
    ]
    df = df[columns]
    df['name'] = [name.replace(PREFIX, '').replace(PREFIX2, '').replace(PREFIX3, '') for name in df['name']]
    def rounder(x):
        if x == 'TO' or (is_real(x) and float(x) > timeout):
            return f'TO{timeout}'
        else:
            return x
    df = df.applymap(rounder)
    return df

def df_to_plottable(df, classification=DEFAULT_CLASSIFICATION, timeout=60):
    columns = ['benchmark', 'file', 'tool', 'duration']
    data = []
    for index, row in df.iterrows():
        file = row['name']
        bench = get_benchmark_name(file, classification)
        for tool, duration in [r for r in row.items() if r[0] != 'name']:
            if is_real(duration):
                data.append([bench, file, unify_tool(tool), float(duration)])
            elif duration == 'MISSING':
                continue
            else:
                data.append([bench, file, unify_tool(tool), timeout])
    return pandas.DataFrame(data, columns=columns)

def df_to_times_only(df, timeout):
    times = df[[c for c in df.columns if c.endswith('runtime')]]
    times = df_to_float(df, timeout=timeout, allow_timeouts=True)
    times.rename(columns={k: unify_tool(k) for k in times.columns}, inplace=True)
    return times

In [4]:
benchmarks = [
    ('ArmcInclusion', 60, os.path.join('data', 'automata_inclusion-timeout-60.csv'), ),
    ('BoolComb', 60, os.path.join('data', 'bool_comb-timeout-60.csv'), ),
    ('EmailFilter', 60, os.path.join('data', 'email_filter-timeout-60.csv'), ),
    ('LtlAfa', 60, os.path.join('data', 'ltl_afa-timeout-60.csv'), ),
    ('Noodler', 60, os.path.join('data', 'noodler-timeout-60.csv'), ),
    ('StrangerAfa', 60, os.path.join('data', 'stranger_afa-timeout-60.csv'), ),
]

## Input dataframe (#dataframe)

In [5]:
dataframe_map = {}
times_map = {}
plottable_map_per_level = {}
plottable_map_per_grp = {}
plottable_map_per_stats = {}

for bench, timeout, src in benchmarks:
    key = f"{bench}#{timeout}"
    df = to_pandas(src, limit_columns_to=['runtime'], timeout=timeout)
    dataframe_map[key] = df
    plottable_map_per_level[key] = df_to_plottable(df, ClassifyBy.Levels, timeout=timeout)
    plottable_map_per_grp[key] = df_to_plottable(df, ClassifyBy.Group, timeout=timeout)
    plottable_map_per_stats[key] = df_to_plottable(df, ClassifyBy.Stats, timeout=timeout)
    times_map[key] = df_to_times_only(df, timeout)
    
overall_df = pandas.concat([df for key, df in dataframe_map.items() ])
overall_times = df_to_times_only(overall_df, 300)
overall_plottable_per_group = df_to_plottable(overall_df, ClassifyBy.Group)
overall_plottable_per_level = df_to_plottable(overall_df, ClassifyBy.Levels)
overall_plottable_per_stats = df_to_plottable(overall_df, ClassifyBy.Stats)
overall_plottable_per_stats_only = df_to_plottable(overall_df, ClassifyBy.StatsOnly)

In [6]:
tools = sorted([unify_tool(t) for t in overall_df.columns if t != 'name' and t != 'bench'])
tool_len = len(tools)
color_map = {
    t: c for (t, c) in zip(tools, mpl.colormaps['tab20'].resampled(tool_len).colors)
}
color_map

{'Antisat': array([0.12156863, 0.46666667, 0.70588235, 1.        ]),
 'Automata': array([1.        , 0.49803922, 0.05490196, 1.        ]),
 'Bisim': array([0.17254902, 0.62745098, 0.17254902, 1.        ]),
 'Brics': array([0.83921569, 0.15294118, 0.15686275, 1.        ]),
 'CVC5': array([0.58039216, 0.40392157, 0.74117647, 1.        ]),
 'JaltImpact': array([0.54901961, 0.3372549 , 0.29411765, 1.        ]),
 'Mona': array([0.89019608, 0.46666667, 0.76078431, 1.        ]),
 'VATA': array([0.49803922, 0.49803922, 0.49803922, 1.        ]),
 'Z3': array([0.7372549 , 0.74117647, 0.13333333, 1.        ]),
 'bwIC3': array([0.09019608, 0.74509804, 0.81176471, 1.        ]),
 'eNfa': array([0.61960784, 0.85490196, 0.89803922, 1.        ])}

## Summary of statistics (#stats)

In [7]:
cols = 3
def count_short_stats(series):
    global cols
    vals = [float(v) if is_real(v) else value_to_float(v, 60) for v in series] or [-1]
    if cols == 3:
        to_len = len([s for s in series if str(s).startswith('TO')])
        err_len = len([s for s in series if str(s).startswith('ERR')])
        return (
            round(np.nanmean(vals), 1), 
            round(np.nanmedian(vals), 1), 
            f"{to_len}" if err_len == 0 else f"{to_len}({err_len})"
        )
    else:
        return (round(np.nanmean(vals), 1), len([s for s in series if str(s).startswith('TO')]))
grp_df = overall_df.copy()
grp_df['benchmark'] = [get_benchmark_name(b, ClassifyBy.Group) for b in grp_df['name']]
grp_df = grp_df[[c for c in grp_df.columns if c != 'name']]
#grp_df.groupby('benchmark').agg(['mean', 'median', count_timeouts])
b_param = 'b-param'
benchmark_names = sorted(list(set(grp_df['benchmark'])))
benchmark_names = [b for b in benchmark_names if b != b_param] + [b_param]
headers = ['tool'] + ["\\multicolumn{{{1}}}{{c}}{{{0}}}".format(b, cols) for b in benchmark_names]
data = []
def is_win(c):
    if is_real(c) and float(c) < 10:
        return '{{{0}}}'.format(c)
    else:
        return str(c)
def to_cell(stats, n):
    cell = stats.get(n, '-')
    if cell == '-' or (np.isnan(cell[0]) and (cell[-1] == 0 or cell[-1] == '0')):
        return '\\multicolumn{{{0}}}{{c|}}{{-}}'.format(cols)
    else:
        #return f"{cell[0]} | {cell[1]} | {cell[2]}"
        if cols == 3:
            return f"{is_win(cell[0])} & {is_win(cell[1])} & {is_win(cell[2])}"
        else:
            return f"{is_win(cell[0])} & {is_win(cell[1])}"
grp_items = grp_df.groupby('benchmark').agg(count_short_stats).items()
for group in grp_items:
    if group[0] == 'bench':
        continue
    tool = tool_to_latex[unify_tool(group[0])]
    stats = group[1].to_dict()
    data.append([tool] + [to_cell(stats, n) for n in benchmark_names])
data = sorted(data, key=lambda x: x[0])
dheaders = headers[:7]
ddata = [d[:7] for d in [data[0], data[1], data[5], data[6]]]
print(tabulate.tabulate(ddata, headers=dheaders, tablefmt='latex_raw'))
with open(os.path.join('figs', 'afa-stats.html'), 'w') as stats_handle:
    stats_handle.write(tabulate.tabulate(ddata, headers=dheaders, tablefmt='html'))
with open(os.path.join('figs', f'afa-stats{"-mean-only" if cols == 2 else ""}.tex'), 'w') as stats_handle:
    stats_handle.write("\n".join(tabulate.tabulate(ddata, headers=dheaders, tablefmt='latex_raw').split("\n")[2:-1]))
dheaders = ['tool'] + headers[7:]
ddata = [[d[0]] + d[7:] for d in data]
print(ddata)
with open(os.path.join('figs', 'bre-stats.html'), 'w') as stats_handle:
    stats_handle.write(tabulate.tabulate(ddata, headers=dheaders, tablefmt='html'))
with open(os.path.join('figs', f'bre-stats{"-mean-only" if cols == 2 else ""}.tex'), 'w') as stats_handle:
    stats_handle.write("\n".join(tabulate.tabulate(ddata, headers=dheaders, tablefmt='latex_raw').split("\n")[2:-1]))
print(tabulate.tabulate(ddata, headers=dheaders))

  round(np.nanmean(vals), 1),
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  round(np.nanmean(vals), 1),
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  round(np.nanmean(vals), 1),
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  round(np.nanmean(vals), 1),
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  round(np.nanmean(vals), 1),
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  round(np.nanmean(vals), 1),
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


\begin{tabular}{lllllll}
\hline
 tool        & \multicolumn{3}{c}{a-ltl-param}   & \multicolumn{3}{c}{a-ltl-rand}   & \multicolumn{3}{c}{a-ltl-spec}   & \multicolumn{3}{c}{a-ltlf-patterns}   & \multicolumn{3}{c}{a-noodler}   & \multicolumn{3}{c}{a-sloth}   \\
\hline
 \abc        & 25.4 & {0.6} & 134                & {0.1} & {0.1} & {0}              & {0.1} & {0.1} & {0}              & {0.1} & {0.1} & {0}                   & {0.1} & {0.1} & {3}             & {1.3} & {0.1} & 34            \\
 \bisim      & 58.2 & 60.0 & 308                 & {4.4} & {1.0} & {8}              & 32.9 & 60.0 & 32                 & 37.0 & 60.0 & 1013                    & 31.6 & 26.4 & 6644(8)           & 17.5 & {1.5} & 1087(10)       \\
 \jaltimpact & 47.0 & 60.0 & 205                 & {7.9} & {2.3} & 12               & {2.4} & {1.4} & 0(1)             & {4.0} & {2.8} & {0}                   & {3.8} & {1.8} & 186             & 24.1 & 15.4 & 958             \\
 \minisat    & 58.3 & 60.0 & 310                 

  round(np.nanmean(vals), 1),
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


## Lineplot (#line)

In [8]:
def sum_generator(series, timeout):
    sum = 0
    for num in series:
        if num >= timeout:
            yield None
        else:
            sum += num
            yield sum

In [9]:
dash_map = {
    'bwIC3': (0, ()),
    'Bisim': (0, (1, 1)),
    'Brics': (0, ()),
    'Automata': (0, (1, 1)),
    'JaltImpact': (0, ()),
    'Mona': (0, (1, 1)),
    'VATA': (0, ()),
    'Z3': (0, (1, 1)),
    'Antisat': (0, ()),
    'CVC5': (0, (1, 1)),
    'eNfa': (0, ()),
}

In [None]:
BEST_RATE = 0.2

fig, ax = plt.subplots(3, 3, figsize=(4*5, 3*6))
plt.subplots_adjust(top = 0.99, bottom=0.01, hspace=0.1, wspace=0.1)
bench_list = ['a-ltlf-patterns', 'a-ltl-rand', 'a-sloth', 'a-noodler', 'a-ltl-spec', 'b-hand-made', 'b-armc-incl', 'b-regex', 'b-smt']
bench_to_y = {
    'a-ltlf-patterns': 10**4, 'a-ltl-rand': 10**3, 'a-sloth': 10**4, 'a-noodler': 10**4, 'a-ltl-spec': 10**2,
    'b-hand-made': 10**1, 'b-armc-incl': 10**2, 'b-regex': 10**2, 'b-smt': 10**2}

for key, pdf in plottable_map_per_grp.items():
    bench, timeout = key.split('#')
    timeout = int(timeout)
    subgroups = set(list(pdf['benchmark']))
    for grp in subgroups:
        data = pdf[pdf['benchmark'] == grp]
        if grp.endswith('-param'):
            continue
        i = bench_list.index(grp)
        sums = defaultdict(list)
        grp_name = f"{grp}"

        for _, row in data.iterrows():
            sums[row['tool']].append(row['duration'])
        vdata = {}
        for k in sorted(sums.keys()):
            v = sums[k]
            values = list(sum_generator(sorted(v), timeout))
            val_len = len(values)
            to_len = len([a for a in values if a == None])
            if (val_len - to_len) / val_len > BEST_RATE:
                vdata[k] = values
        g = seaborn.lineplot(
            vdata, linewidth=3.5, ax=ax[i // 3, i % 3], palette=color_map
        )
        #g.set_ylim([0, bench_to_y[grp]])
        g.set(yscale="symlog")
        g.set_xticklabels(g.get_xticklabels(), rotation=30)
        g.set_title(f"{grp_name}", x=0.05)    
        if i % 3 == 0:
            g.set_ylabel("time [s]")
        if i // 3 == 2:
            g.set_xlabel("benchmark")
        seaborn.move_legend(g, "upper left", bbox_to_anchor=(0., 1), frameon=True)
        i += 1

#save_figure(f"cactus_plots_log_scale")
save_figure(f"cactus_plots")
save_figure(f"cactus", ext=".pdf")

  g.set_xticklabels(g.get_xticklabels(), rotation=30)
  g.set_xticklabels(g.get_xticklabels(), rotation=30)
  g.set_xticklabels(g.get_xticklabels(), rotation=30)
  g.set_xticklabels(g.get_xticklabels(), rotation=30)
  g.set_xticklabels(g.get_xticklabels(), rotation=30)
  g.set_xticklabels(g.get_xticklabels(), rotation=30)
  g.set_xticklabels(g.get_xticklabels(), rotation=30)


## Infering Parametric models

In [None]:
columns = ['bench', 'k', 'tool', 'duration']
def to_models(df):
    data = []
    for index, row in df.iterrows():
        if 'parametric_ltl' in row['name']:
            bench_parts = row['name'].split(os.sep)
        else:
            bench_parts = os.path.dirname(row['name']).split(os.sep)
        bench = os.path.join(*bench_parts[:3])
        k = bench_parts[-1]
        if is_real(k):
            k = int(k)
            for col, v in row.items():
                if col == 'name':
                    continue
                tool = unify_tool(col)
                if v == 'ERR':
                    time = np.NaN
                elif is_real(v):
                    time = float(v)
                elif v.startswith('TO'):
                    #time = np.NaN
                    time = int(v[2:])
                else:
                    continue
                data.append([bench, k, tool, time])
    tmp_data = sorted(data, key=lambda x: x[1])
    to_map = defaultdict(list)
    data = []
    for val in tmp_data:
        key = f"{val[0]}:{val[2]}"
        if val[-1] >= 60:
            # it's timeout:
            if not to_map[key] or to_map[key][-1] < 60:
                data.append(val)
        else:
            data.append(val) 
        to_map[key].append(val[-1])
    models = pandas.DataFrame(data, columns=columns)
    return models
for key, df in dataframe_map.items():
    models = to_models(df)

In [None]:
bench_to_paper_tag = {
    'bool_comb/intersect/longstrings': 'b-param(1)',
    'bool_comb/intersect/expbranching': 'b-param(2)',
    'bool_comb/intersect/exppaths1': "b-param(3?)",
    'bool_comb/intersect/exppaths2': "b-param(3?)",
    'bool_comb/cox/diff_sat': "b-param(4)",
    'bool_comb/cox/diff_unsat': "b-param(5)",
    'bool_comb/cox/inter_sat': "b-param(6)",
    'bool_comb/cox/inter_unsat': "b-param(7)",
    'ltl_afa/created_ltl/LTLf-specific': '??',
    'ltl_afa/parametric_ltl/lift_afas': 'a-param(Lift)',
    'ltl_afa/parametric_ltl/counter_afas': 'a-param(Counter)',
}
bench_to_paper = {
    'bool_comb/intersect/longstrings': 'longstrings',
    'bool_comb/intersect/expbranching': 'expbranching',
    'bool_comb/intersect/exppaths1': "exppaths1",
    'bool_comb/intersect/exppaths2': "exppaths2",
    'bool_comb/cox/diff_sat': "diff_sat",
    'bool_comb/cox/diff_unsat': "diff_unsat",
    'bool_comb/cox/inter_sat': "inter_sat",
    'bool_comb/cox/inter_unsat': "inter_unsat",
    'ltl_afa/created_ltl/LTLf-specific': 'LTLf-specific',
    'ltl_afa/parametric_ltl/lift_afas': 'lift_afas',
    'ltl_afa/parametric_ltl/counter_afas': 'counter_afas',
}


f, axs = plt.subplots(4, 3, figsize=(20, 6))
plt.subplots_adjust(top = 0.99, bottom=0.01, hspace=0.5, wspace=0.0)
i = 0
for key in sorted(dataframe_map.keys()):
    df = dataframe_map[key]
    models = to_models(df)
    for bench in sorted(set(models['bench'])):
        if bench in ('ltl_afa/created_ltl/LTLf-specific',):
            continue
        #data_df = models[models['bench'] == bench].dropna()
        data_df = models[models['bench'] == bench]
        #g = seaborn.pointplot(
        g = seaborn.pointplot(
            data_df, x="k", y="duration", hue="tool", errorbar=None, 
            ax=axs[i // 3, i % 3], palette=color_map, markers='o',
            #linewidth=3, 
        )
        g.legend([],[], frameon=False)
        g.set_ylim([0, 60])
        g.set_xticklabels(g.get_xticklabels(), rotation=30)
        tick_rate = len(g.get_xticklabels()) // 10
        if tick_rate != 0:
            for index, label in enumerate(g.get_xticklabels()):
                if index % tick_rate == 0:
                    label.set_visible(True)
                else:
                    label.set_visible(False)
        #g.set_title(f"{grp_name}", x=0.05)  
        #g.set(title=f"{bench_to_paper[bench]}", x=0.05)
        g.set_title(f"{bench_to_paper[bench]}", x=0.05)
        if i % 3 != 0:
            g.set(yticklabels=[])
            g.set(ylabel=None)
        seaborn.move_legend(g, "upper right", bbox_to_anchor=(1, 1), ncols=2)
        i += 1
        if i == 4 or i == 7:
            i += 1
i = 4
axs[i // 3, i % 3].set(xlabel=None)
axs[i // 3, i % 3].set(yticklabels=[])
axs[i // 3, i % 3].set(xticklabels=[])
axs[i // 3, i % 3].set(xticks=[])
axs[i // 3, i % 3].set(yticks=[])
axs[i // 3, i % 3].spines['top'].set_visible(False)
axs[i // 3, i % 3].spines['bottom'].set_visible(False)

i = 7
axs[i // 3, i % 3].legend(
    handles=[
        Line2D(
            [0], [0], color='w', marker='o', markerfacecolor=color_map[tool], label=f"{tool}", 
            markersize=10,
        )
        for tool in sorted(color_map.keys())
    ], ncols=2, loc='lower center', fontsize='18'
)
axs[i // 3, i % 3].set(xlabel=None)
axs[i // 3, i % 3].set(yticklabels=[])
axs[i // 3, i % 3].set(xticklabels=[])
axs[i // 3, i % 3].set(xticks=[])
axs[i // 3, i % 3].set(yticks=[])
axs[i // 3, i % 3].spines['top'].set_visible(False)
axs[i // 3, i % 3].spines['bottom'].set_visible(False)
save_figure(f'models_overview')
save_figure(f'models', ext='.pdf')

In [None]:
overall_df['bench'] = [get_benchmark_name(n) for n in overall_df['name']]
benches = sorted([b for b in sorted(list(set(overall_df['bench']))) if not b.endswith('-param')])
print(f"{len(benches)}: {benches}")

winners = {
    'a-ltl': ['abc', 'jaltimpact', 'afaminisat-nt'], 
    'a-ltlf-patterns': ['abc', 'jaltimpact', 'afaminisat-nt'], 
    'a-ltl-param': ['abc', 'jaltimpact', 'bisim'], 
    'a-ltl-rand': ['abc', 'bisim', 'jaltimpact'], 
    #'a-sloth': ['abc', 'afaminisat-nt', 'bisim'],  # by means
    'a-sloth': ['abc', 'afaminisat-nt', 'jaltimpact'], # by timeouts
    'a-noodler': ['abc', 'afaminisat-nt', 'jaltimpact'], 
    'a-ltl-spec': ['afaminisat-nt', 'abc', 'jaltimpact'], 
    'b-hand-made': ['automata', 'mata-nfa', 'vata'], # remove abc
    'b-armc-incl': ['vata', 'mata-nfa', 'automata'], 
    'b-param': ['z3', 'afaminisat-nt', 'vata'],  #by to
    'b-regex': ['automata', 'abc', 'mata-nfa'], 
    'b-smt': ['afaminisat-nt', 'mata-nfa', 'vata']
}
indices = [(1, 2), (1, 3), (2, 3)]
bench_colors = mpl.colormaps['tab10'].resampled(len(benches)).colors
fig = plt.figure(constrained_layout=True, figsize=(4*5, 3*2.5))
plt.subplots_adjust(top = 0.99, bottom=0.01, hspace=0.1, wspace=0.3)
subfigs = fig.subfigures(3, 3)
for i, subfig in enumerate(subfigs.flat):
    if i == len(benches):
        break
    subfig.suptitle(f"{benches[i]}")
    axs = subfig.subplots(1, 3)
    bench = benches[i]
    bench_df = df_to_float(overall_df[overall_df['bench'] == bench], 60, allow_timeouts=True)
    for ii, ax in enumerate(axs.flat):
        lhs, rhs = indices[ii]
        lhs, rhs = winners[bench][lhs-1], winners[bench][rhs-1]
        ax.grid(True, which='both', linestyle='--')
        ax.set_xlabel(f"{tool_to_output[lhs]}")
        ax.set_ylabel(f"{tool_to_output[rhs]}")
        ax.scatter(
            bench_df[f"{lhs}-runtime"], bench_df[f"{rhs}-runtime"], marker= 'x', c=[bench_colors[i]]
        )
        #pt = min(min(ax.get_xlim()[1], ax.get_ylim()[1]), 60)
        pt = min(max(ax.get_xlim()[1], ax.get_ylim()[1]), 60)
        ax.plot([0, pt], [0, pt], linestyle='--', color='red')
save_figure("scatter_plots")
save_figure("scatter", ext=".pdf")
plt.show()

In [None]:
print("Processing experiments complete")