# Buchi automata complementation

In [3]:
# PREAMBLE
import altair as alt
import altair_saver as alt_saver
import pandas as pd
import numpy as np
import re as re
import tabulate as tab

#FILE_INPUT = "results/random-all-to300-new-new.csv"
FILE_INPUT = "results/results-2020-09-06/random-all-to300-merged.csv"

# in seconds
TIMEOUT = 300
TIMEOUT_VAL = TIMEOUT * 1.1
TIME_MIN = 0.01


# do not care about the limit
alt.data_transformers.disable_max_rows()
#alt.renderers.enable('mimetype')
#alt.renderers.enable('altair_viewer')
#alt.renderers.enable('html')
#alt.renderers.enable('notebook')
#alt.renderers.enable('mimebundle')




# For reading in files
def read_file(filename):
   """Reads a CSV file into Panda's data frame"""
   df = pd.read_csv(
          filename,
          sep=";",
          comment="#",
          na_values=['ERR','TO', 'MISSING'])
   return df 


# For printing scatter plots
def scatter_plot(df, xcol, ycol, domain, xname=None, yname=None, log=False, width=600, height=600, tickCount=5):
    assert len(domain) == 2

    if xname == None:
      xname = xcol
    if yname == None:
      yname = ycol

    plot_type = "log" if log else "linear"
    scatter = alt.Chart(df).mark_point(size=10, filled=True).encode(
       x=alt.X(xcol + ':Q', axis=alt.Axis(title=xname, tickCount=tickCount), scale=alt.Scale(type=plot_type, base=10, domain=domain, clamp=True)),
       y=alt.Y(ycol + ':Q', axis=alt.Axis(title=yname, tickCount=tickCount), scale=alt.Scale(type=plot_type, base=10, domain=domain, clamp=True))
       )

    rules = (alt.Chart(pd.DataFrame({'y': [domain[1]]})).mark_rule(strokeDash=[3,1]).encode(y='y') +
             alt.Chart(pd.DataFrame({'x': [domain[1]]})).mark_rule(strokeDash=[3,1]).encode(x='x'))

    diag = alt.Chart(pd.DataFrame({'x': domain, 'y': domain})).mark_line(color='black', strokeDash=[3,1], size=1).encode(x='x', y='y')

    res = scatter + rules + diag
    res = res.properties(
       width=width, height=height
    )

    return res


# Print a matrix of plots
def matrix_plot(list_of_plots, cols):
  assert len(list_of_plots) > 0
  assert cols >= 0

  matrix_plot = None
  row = None
  for i in range(0, len(list_of_plots)):
    if i % cols == 0:   # starting a new row
      row = list_of_plots[i]
    else:
      row |= list_of_plots[i]

    if (i+1) % cols == 0 or i + 1 == len(list_of_plots): # last chart in a row
      if not matrix_plot:   # first row finished
        matrix_plot = row
      else:
        matrix_plot &= row

  return matrix_plot

In [8]:

df = read_file(FILE_INPUT)
#for i in df.columns:
#  print(i)

print(f"# of automata: {len(df)}")

# some data wrangling

# remove automat with no transitions
df = df[df['ranker-Transitions'] != 0]
df = df[df['ranker-autfilt-States'] != 1]


print(f"# of automata with > 0 transitions and > 1 states: {len(df)}")


summary_states = dict()
for col in df.columns:
  if re.search('-States$', col) or re.search('-runtime$', col):
    summary_states[col] = dict()
    summary_states[col]['max'] = df[col].max()
    summary_states[col]['min'] = df[col].min()
    summary_states[col]['mean'] = df[col].mean()
    summary_states[col]['median'] = df[col].median()
    summary_states[col]['std'] = df[col].std()
    summary_states[col]['timeouts'] = df[col].isna().sum()

df_summary_states = pd.DataFrame(summary_states).transpose()
print(df_summary_states)

print("============================================")
print("###########    Interesting data     ########")

interesting = ["ranker-nopost",
               "ranker-tight-nopost",
               "schewe",
               "ranker-autfilt",
               "ranker-composition-autfilt",
               "safra-autfilt",
               "piterman-autfilt",
               "fribourg-autfilt",              
               "spot-autfilt",
               "seminator-autfilt",
               "roll-autfilt",
               ]

tab_interesting = []
for i in interesting:
  row = df_summary_states.loc[i+'-States']
  row_dict = dict(row)
  row_dict.update({'name':i})
  tab_interesting.append([row_dict['name'],
                          row_dict['min'],
                          row_dict['max'],
                          row_dict['mean'],
                          row_dict['median'],
                          row_dict['std'],
                          row_dict['timeouts']])

headers = ["name", "min", "max", "mean", "median", "std", "timeouts"]
print(tab.tabulate(tab_interesting, headers=headers, tablefmt="latex"))


interesting = ["ranker",
               "ranker-tight",
               "schewe",
               "ranker",
               "ranker-composition",
               "safra",
               "piterman",
               "fribourg",              
               "spot",
               "seminator",
               "roll",
               ]

tab_interesting = []
for i in interesting:
  row = df_summary_states.loc[i+'-runtime']
  row_dict = dict(row)
  row_dict.update({'name':i})
  tab_interesting.append([row_dict['name'],
                          row_dict['min'],
                          row_dict['max'],
                          row_dict['mean'],
                          row_dict['median'],
                          row_dict['std'],
                          row_dict['timeouts']])

headers = ["name", "min", "max", "mean", "median", "std", "timeouts"]
print(tab.tabulate(tab_interesting, headers=headers, tablefmt="latex"))








states_min = 1
states_max = df_summary_states['max'].max()
states_timeout = states_max * 1.1

# sanitizing NAs
for col in df.columns:
  if re.search('-States$', col):
    df[col].fillna(states_timeout, inplace=True)
    df[col].replace(0, states_min, inplace=True)   # to remove 0 (in case of log graph)

  if re.search('-runtime$', col):
    df[col].fillna(TIMEOUT_VAL, inplace=True)
    df.loc[df[col] < TIME_MIN, col] = TIME_MIN   # to remove 0 (in case of log graph)

print("States min: {}".format(states_min))
print("States max: {}".format(states_max))


backoff = df[df["ranker-composition-Engine"].str.contains("GOAL", na=False)]
print(f"backoff num = {len(backoff)}")

compare_methods = [("ranker-nopost-States", "ranker-tight-nopost-States"),
                   ("ranker-nopost-States", "schewe-States"),
                   ("ranker-autfilt-States", "piterman-autfilt-States"),
                   ("ranker-autfilt-States", "safra-autfilt-States"),
                   ("ranker-autfilt-States", "spot-autfilt-States"),
                   ("ranker-autfilt-States", "fribourg-autfilt-States"),
                   ("ranker-autfilt-States", "seminator-autfilt-States"),
                   ("ranker-autfilt-States", "roll-autfilt-States"),
                  ]

tab_wins = []
for left, right in compare_methods:
  left_over_right = df[df[left] < df[right]]
  right_timeouts = left_over_right[left_over_right[right] == states_timeout]

  right_over_left = df[df[left] > df[right]]
  left_timeouts = right_over_left[right_over_left[left] == states_timeout]

  tab_wins.append([right, len(left_over_right), len(right_timeouts), len(right_over_left), len(left_timeouts)])

headers_wins = ["name", "wins", "wins-timeouts", "loses", "loses-timeouts"]
print(tab.tabulate(tab_wins, headers=headers_wins, tablefmt="latex"))

df['other_min'] = df[['safra-autfilt-States','piterman-autfilt-States', 'spot-autfilt-States', 'fribourg-autfilt-States', 'seminator-autfilt-States', 'roll-autfilt-States']].min(axis=1)

ranker_best = df[df['ranker-autfilt-States'] < df['other_min']]
ranker_not_best = df[df['ranker-autfilt-States'] > df['other_min']]

print(f"ranker stricly best = {len(ranker_best)}")
print(f"ranker not best = {len(ranker_not_best)}")
print(f"ranker non-strictly best = {len(df) - len(ranker_not_best)}")






# of automata: 10938
# of automata with > 0 transitions and > 1 states: 4079
                                         max   min         mean   median  \
seminator-runtime                     295.57  0.02     6.578337    0.090   
seminator-States                    27509.00  3.00   470.333138   88.500   
seminator-autfilt-States             1422.00  2.00    73.649590   24.000   
spot-runtime                          267.44  0.01     0.797683    0.040   
spot-States                          4219.00  2.00   177.930336   78.000   
spot-autfilt-States                  2029.00  2.00    61.357213   23.000   
safra-runtime                         612.05  2.78    10.403400    5.315   
safra-autfilt-States                 1771.00  1.00    67.332824   23.500   
safra-States                        73912.00  1.00  1149.836901   83.000   
goal-default-runtime                  178.88  2.78     5.884489    4.830   
goal-default-autfilt-States          1322.00  1.00    60.966323   24.000   
goal-defaul

In [16]:
to_cmp = [
  ('ranker', 'seminator'),
  ('ranker', 'safra'),
#  ('ranker', 'goal-default'),            # -- this is Piterman
  ('ranker', 'piterman'),
  ('ranker', 'schewe'),
  ('ranker', 'fribourg'),
  ('ranker', 'spot'),
  ('ranker', 'roll'),
  ('ranker', 'ranker-tight'),
]

to_cmp = [
  ('seminator', 'ranker'),
  ('seminator', 'ranker-tight'),
  ('seminator', 'safra'),
#  ('ranker', 'goal-default'),            # -- this is Piterman
  ('seminator', 'piterman'),
  ('seminator', 'schewe'),
  ('seminator', 'fribourg'),
  ('seminator', 'spot'),
  ('seminator', 'roll'),
]


to_cmp2 = []
to_cmp2.append({'x': 'seminator', 'y': 'ranker-nopost'})
to_cmp2.append({'x': 'seminator', 'y': 'ranker-tight-nopost'})
for x, y in to_cmp:
  #to_cmp2.append({'x': x + '-nopost', 'y': y})
  to_cmp2.append({'x': x, 'y': y})
  to_cmp2.append({'x': x + '-autfilt', 'y': y + '-autfilt'})
 # to_cmp2.append({'x': x + '-composition-autfilt', 'y': y + '-autfilt'})

if False:
  to_cmp2 = [{'x':"ranker-nopost", 'y':"ranker-tight-nopost", 'xname':'Ranker-MaxR', 'yname':'Ranker-RRestr'},
           #{'x':"ranker-tight-nopost", 'y':"schewe", 'xname': "Ranker-RankRestr", 'yname': 'Schewe'},
           {'x':"ranker-nopost", 'y':"schewe", 'xname': "Ranker-MaxR", 'yname': "Schewe-RedAvgOut"},
           #{'x':"piterman-autfilt", 'y':"safra-autfilt", 'xname': "Piterman+SPOT", 'yname': "Safra+SPOT"}
           {'x':"ranker-autfilt", 'y':"seminator-autfilt", 'xname': "Ranker-MaxR+PP", 'yname': "Seminator 2+PP", 'max': 10000, 'tickCount': 3},
           #{'x':"ranker-autfilt", 'y':"spot-autfilt", 'xname': "Ranker-MaxRank+SPOT", 'yname': "SPOT+SPOT"},   # comparable to Piterman
           {'x':"ranker-autfilt", 'y':"roll-autfilt", 'xname': "Ranker-MaxR+PP", 'yname': "ROLL+PP", 'max': 10000, 'tickCount': 3},
           {'x':"ranker-autfilt", 'y':"piterman-autfilt", 'xname': "Ranker-MaxR+PP", 'yname': "Piterman+PP", 'max': 10000, 'tickCount': 3},
           {'x':"ranker-autfilt", 'y':"fribourg-autfilt", 'xname': "Ranker-MaxR+PP", 'yname': "Fribourg+PP", 'max': 10000, 'tickCount': 3},
           #{'x':"roll-autfilt", 'y':"piterman-autfilt", 'xname': None, 'yname': "Piterman+SPOT"},
           #{'x':"fribourg-autfilt", 'y':"piterman-autfilt", 'xname': None, 'yname': "Piterman+SPOT"},
           #{'x':"spot-autfilt", 'y':"safra-autfilt", 'xname': None, 'yname': None},


          ]



# add max where not present
for params in to_cmp2:
  if 'xname' not in params:
    params['xname'] = None
  if 'yname' not in params:
    params['yname'] = None
  if 'max' not in params:
    params['max'] = states_timeout
  if 'tickCount' not in params:
    params['tickCount'] = 5

#to_cmp = [
#  ('seminat', 'seminat'),
#  ('seminat', 'safra'),
#  ('seminat', 'goal-df'),
#  ('seminat', 'piterman'),
#  ('seminat', 'schewe'),
#  ('seminat', 'spot'),
#  ('seminat', 'roll'),
#  ('seminat', 'frib'),
#  ('seminat', 'rankern'),
#]

size = 250
plot_list = [(params['x'], params['y'], scatter_plot(df,
                                 xcol=params['x'] + '-States', ycol=params['y'] + '-States',
                                 xname=params['xname'], yname=params['yname'],
                                 domain=[states_min, params['max']],
                                 tickCount=params['tickCount'],
                                 log=True, width=size, height=size)) for params in to_cmp2]

for x, y, plot in plot_list:
  print(f"x: {x}, y: {y}")
  alt_saver.save(plot, f"plots/{x}_VS_{y}.pdf")


mplot = matrix_plot([plot for x,y,plot in plot_list], cols=2)

#alt_saver.save(mplot, "graphs.pdf", method="node")
#mplot.save("graphs.pdf")

mplot
alt_saver.save(mplot, f"seminator.pdf")


x: seminator, y: ranker-nopost
x: seminator, y: ranker-tight-nopost
x: seminator, y: ranker
x: seminator-autfilt, y: ranker-autfilt
x: seminator, y: ranker-tight
x: seminator-autfilt, y: ranker-tight-autfilt
x: seminator, y: safra
x: seminator-autfilt, y: safra-autfilt
x: seminator, y: piterman
x: seminator-autfilt, y: piterman-autfilt
x: seminator, y: schewe
x: seminator-autfilt, y: schewe-autfilt
x: seminator, y: fribourg
x: seminator-autfilt, y: fribourg-autfilt
x: seminator, y: spot
x: seminator-autfilt, y: spot-autfilt
x: seminator, y: roll
x: seminator-autfilt, y: roll-autfilt


In [13]:
plot_list = [scatter_plot(df, xcol=x + '-runtime', ycol=y + '-runtime', domain=[TIME_MIN, TIMEOUT_VAL], log=True) for x, y in to_cmp]

mplot = matrix_plot(plot_list, cols=2)

mplot

In [4]:
#(df['ranker+piterman'] == states_timeout).sum()
print("old: {}".format((df['ranker-States'] == states_timeout).sum()))




old: 383
