In [18]:
%matplotlib inline
import json
import plotly.express as px
import plotly.graph_objects as go
import os
import pandas as pd
from statistics import mean, median, stdev, variance
import qgrid

In [19]:
# folder paths
results_folder = 'results/2020_05_14v3/'
gurobi_folder = 'Gurobi/'
ortools_folder = 'OrTools/'
gurobi_cold_folder = 'GurobiCold/'
extension1 = 'ext1/'
extension2 = 'ext2/'
extension3 = 'ext3/'
extension4 = 'ext4/'

# problem names
# p = set(f_name for f_name in gurobi_ext4_files)
p = set()

# solver 
solvers = {'or-tools':'OrTools/', 'gurobi-warm':'Gurobi/', 'gurobi-cold':'GurobiCold/'}

# extension
extensions = [1, 2, 3, 4]

for solver in solvers:
    solverFolder = solvers[solver]
    for extension in extensions:
        folder_path_extension = results_folder + solverFolder + f"ext{extension}/"
        if os.path.exists(folder_path_extension):
            files = set(f for f in os.listdir(folder_path_extension) if  f.endswith('.json'))
            #print(files)
            [p.add(f) for f in files]

# folder paths
f_paths = {}

for s in solvers:
    solver = solvers[s]
    f_paths[s] = {}
    for ext in extensions:
        f_paths[s][ext] = {}
        for problem in p:
            f_path = results_folder + solver + f"ext{ext}/" + problem
            if os.path.isfile(f_path):
                f_paths[s][ext][problem] = f_path

data = {'p':  [],
        'solver': [],
        'ext' : [],
        'clauses' : [],
        'steps': [],
        'total time [s]': [],
        '% hs [s]' : [],
        '% sat [s]' : [],
        '% grow [s]':[],
        's_hs':[],
        's_grow':[],
        'avg_s_hs':[],
        'avg_s_grow':[]
        }

for solver in f_paths:
    for ext in f_paths[solver]:
        for problem in f_paths[solver][ext]:
            f_path = f_paths[solver][ext][problem]

            # data parameters
            if '_random.json' in problem:
                data['ext'].append('2a')
                data['p'].append(problem.replace("_random",''))
            elif 'bestliteral.json' in problem:
                data['ext'].append('2b')
                data['p'].append(problem.replace("bestliteral",''))
            elif 'bestliteral_neg.json' in problem:
                data['ext'].append('2c')
                data['p'].append(problem.replace("bestliteral_neg",''))
            else:
                data['ext'].append(str(ext))
                data['p'].append(problem)
            
            data['solver'].append(solver)
            
            with open(f_path) as f:
                parsed_json = json.load(f)
            
            # data results
            data['clauses'].append(parsed_json['clauses'])
            data['steps'].append(parsed_json['steps'])
            tot_time = sum(parsed_json['t_hitting_set']) +sum(parsed_json['t_sat_check'])  + sum(parsed_json['t_grow'])
            data['total time [s]'].append(tot_time)
            data['% hs [s]'].append(round(100*sum(parsed_json['t_hitting_set'])/tot_time, 2))
            data['% sat [s]'].append(round(100*sum(parsed_json['t_sat_check'])/tot_time,2))
            data['% grow [s]'].append(round(100*sum(parsed_json['t_grow'])/tot_time, 2))
            data['s_hs'].append(parsed_json['s_hs']),
            data['s_grow'].append(parsed_json['s_grow'])
            data['avg_s_hs'].append(round(mean(parsed_json['s_hs']), 2)),
            data['avg_s_grow'].append(round(mean(parsed_json['s_grow']),2))

df = pd.DataFrame (data, columns = [column for column in data])
df = df.sort_values(["clauses", "steps", "total time [s]"], ascending = (False, True, True))
#print(df)
#df.to_excel("results_tiastower/2020_05_08.xlsx") 
#qgrid_widget = qgrid.show_grid(df, show_toolbar=True)
#qgrid_widget

# Results
## RQ1: for or-tools as HS solver, extension 1 vs 2 ia ib ic vs 3

- For OR-tools as optimal hittingset solver, `extension 3` in most cases has the most amount of steps.
- `Extnsion 2b` has both low execution and lowest amount of steps in most cases. 
- **Extension 1** provides results in a considerably longer time even for small instances...

**Conclusion RQ1**

The not so clear best extension with or-tools = `extension 2b`

In [20]:
# filter columns 
selected_columns = ['p','solver', 'ext', 'clauses', 'steps', 'total time [s]', 'avg_s_hs', 'avg_s_grow']
#print(df)
# data filter
df_ortools_ext123 = df[selected_columns]
df_ortools_ext123 = df_ortools_ext123[(df_ortools_ext123['ext'].isin(['2a', '2b', '2c', '3']))]
df_ortools_ext123 = df_ortools_ext123[(df_ortools_ext123.solver == 'or-tools')]
df_ortools_ext123 = df_ortools_ext123.sort_values(
    ["clauses", "p", "total time [s]"], 
    ascending = (False, True, True))
# display table
df_ortools_ext123
#qgrid_widget = qgrid.show_grid(df_ortools_ext123, show_toolbar=True)
#qgrid_widget

Unnamed: 0,p,solver,ext,clauses,steps,total time [s],avg_s_hs,avg_s_grow
11,zebra_v155_c1135.json,or-tools,2c,1160,280,416.613658,3.22,59.33
8,zebra_v155_c1135.json,or-tools,2a,1160,309,466.198889,3.28,58.79
4,zebra_v155_c1135.json,or-tools,2b,1160,306,498.169732,3.29,59.07
26,zebra_v155_c1135.json,or-tools,3,1160,334,540.472964,3.33,58.18
15,par8-1-c.json,or-tools,2c,250,2,0.007377,1.0,56.0
6,par8-1-c.json,or-tools,2b,250,2,0.00791,1.0,56.0
17,par8-1-c.json,or-tools,2a,250,2,0.008004,1.0,56.0
25,par8-1-c.json,or-tools,3,250,2,0.015177,1.0,56.0
0,dubois22.json,or-tools,2b,176,179,10.396245,87.11,1.03
13,dubois22.json,or-tools,2a,176,193,11.620985,84.37,1.18


## RQ2: for the best of RQ1, compare or-tools vs gurobi

We compare the results of `extension 2b` & `extension3` for the 3 solvers : 

- `Or-Tools`: Or-tools optimal hitting set solver
- `Gurobi Warm Start`: Model is built on all variables and for every new Minimum Correction Set found, we had a new constraint corresponding to a new set in the collection of sets to hit.
- `Gurobi Cold Start`: The full model is rebuilt for every optimal hitting set call

**Results are sorted by decreasing number of clauses, Increasing problem name, total solving time**

- `extension 3` has more steps than `2b` and `2c`, but for the largest instance extension 3 has the smallest total execution time. 

**Conclusion RQ1**

Best combo for larger instances= `gurobi warm start + extension 3`

In [21]:
# filter columns 
selected_columns = ['p','solver', 'ext', 'clauses', 'steps', 'total time [s]', 'avg_s_hs', 'avg_s_grow']

# data filter
df_solver_ext3 = df[selected_columns][(df['ext'].isin(['2b', '2c','3']))]
df_solver_ext3 = df_solver_ext3[(df_solver_ext3['solver'].isin(['or-tools', 'gurobi-warm', 'gurobi-cold']))]
df_solver_ext3 = df_solver_ext3.sort_values(
    ["clauses", "p", "total time [s]"], 
    ascending = (False, True, True))
df_solver_ext3
# display marked-up table
#qgrid_widget = qgrid.show_grid(df_solver_ext3, show_toolbar=True)
#qgrid_widget

Unnamed: 0,p,solver,ext,clauses,steps,total time [s],avg_s_hs,avg_s_grow
42,zebra_v155_c1135.json,gurobi-warm,3,1160,282,67.451495,3.21,58.28
36,zebra_v155_c1135.json,gurobi-warm,2c,1160,276,84.726095,3.29,58.64
35,zebra_v155_c1135.json,gurobi-warm,2b,1160,276,85.371624,3.29,58.64
57,zebra_v155_c1135.json,gurobi-cold,3,1160,282,215.948875,3.21,58.28
11,zebra_v155_c1135.json,or-tools,2c,1160,280,416.613658,3.22,59.33
4,zebra_v155_c1135.json,or-tools,2b,1160,306,498.169732,3.29,59.07
26,zebra_v155_c1135.json,or-tools,3,1160,334,540.472964,3.33,58.18
15,par8-1-c.json,or-tools,2c,250,2,0.007377,1.0,56.0
6,par8-1-c.json,or-tools,2b,250,2,0.00791,1.0,56.0
41,par8-1-c.json,gurobi-warm,3,250,2,0.008474,1.0,56.0


## RQ3: for the best of RQ2, compare  with maxsat

We compare the results :
- `Gurobi warm start + extension2b` vs `Gurobi warm start + extension3`
- `Gurobi warm start + Max Sat` 

We see that for the medium instances, extension 3 takes too much time (> 1 day).

1. bf0432-007

For smaller instances, we see that the MaxSat solution takes :
- less steps
- Faster for cases where the grow size is 1

For all instances, we see that the average size of grow is always the best for maxsat.

**Conclusion**

Overall the maxsat solution implemented by https://pysathq.github.io/docs/html/api/examples/rc2.html:

    Alexey Ignatiev, António Morgado, Joao Marques-Silva. RC2: An Efficient MaxSAT Solver. MaxSAT Evaluation 2018. JSAT 11. 2019. pp. 53-64

shows very good results and performs better on medium instances.

**Best combo:**
- Solver: **Gurobi with warm start**
- Extension : **Maxsat**

In [22]:
# filter columns 
selected_columns = ['p','solver', 'ext', 'clauses', 'steps', 'total time [s]', 'avg_s_hs', 'avg_s_grow']

# data filter
df_warm_start = df[selected_columns]
#print(df_warm_start)
df_warm_start = df_warm_start[df_warm_start.solver == 'gurobi-warm']
df_warm_start = df_warm_start[df_warm_start['ext'].isin(['3', '2b', '4'])]
df_warm_start = df_warm_start.sort_values(
    ["clauses", "p", "total time [s]"], 
    ascending = (False, True, True))

df_warm_start

Unnamed: 0,p,solver,ext,clauses,steps,total time [s],avg_s_hs,avg_s_grow
50,bf0432-007.json,gurobi-warm,4,3667,1274,111.693958,628.62,1.17
42,zebra_v155_c1135.json,gurobi-warm,3,1160,282,67.451495,3.21,58.28
35,zebra_v155_c1135.json,gurobi-warm,2b,1160,276,85.371624,3.29,58.64
49,zebra_v155_c1135.json,gurobi-warm,4,1160,544,136.553974,3.17,32.03
41,par8-1-c.json,gurobi-warm,3,250,2,0.008474,1.0,56.0
48,par8-1-c.json,gurobi-warm,4,250,2,0.016155,1.0,56.0
46,dubois22.json,gurobi-warm,4,176,176,0.442118,88.0,1.0
39,dubois22.json,gurobi-warm,3,176,1312,8.8051,47.19,4.94
45,dubois21.json,gurobi-warm,4,168,168,0.401662,84.0,1.0
38,dubois21.json,gurobi-warm,3,168,717,4.296818,51.92,4.45


In [23]:
zebra = df[df['p']== 'zebra_v155_c1135.json']
zebra = zebra[zebra['ext'].isin(['3', '2b', '2c'])]
zebra = zebra[zebra['solver']== 'gurobi-warm']

In [34]:
fig = go.Figure()
for ext in ['3', '2b']:
    zebra_ext = zebra[zebra['ext']== ext]

    x1 = [i for i in range(len(list(zebra_ext['s_hs'])[0]))]
    y1 = list(zebra_ext['s_hs'])[0]
    x2 = [i for i in range(len(list(zebra_ext['s_grow'])[0]))]
    y2 = list(zebra_ext['s_grow'])[0]
    fig.add_trace(go.Scatter(x=x1, y=y1, mode='lines',name=f"{int(zebra_ext['clauses'])} clauses - size of hs ext{ext}"))
    #fig.add_trace(go.Scatter(x=x2, y=y2, mode='lines',name=f"size of C ext{ext}"))
fig.update_layout(    
    autosize=False,
    width=800,
    #height=50%,
    title=f"1160 clauses - size of hs",
                   xaxis_title='step number',
                   yaxis_title=f'size of hs')
fig.show()

In [40]:

for ext in ['3']:
    zebra_ext = zebra[zebra['ext']== ext]

    x1 = [i for i in range(len(list(zebra_ext['s_hs'])[0]))]
    y1 = list(zebra_ext['s_hs'])[0]
    x2 = [i for i in range(len(list(zebra_ext['s_grow'])[0]))]
    y2 = list(zebra_ext['s_grow'])[0]
    fig = go.Figure()
    #fig.add_trace(go.Scatter(x=x1, y=y1, mode='lines',name=f"size of hs ext{ext}"))
    fig.add_trace(go.Scatter(x=x2, y=y2, mode='markers',name=f"{int(zebra_ext['clauses'])} clauses - size of C ext{ext}"))
    #fig.add_trace(go.Scatter(x=x2, y=, mode='markers',name=f"{zebra_ext['clauses']} clauses - size of C ext{ext}"))
    # Edit the layout
    fig.update_layout(
    autosize=False,
    width=800,
        title=f"{int(zebra_ext['clauses'])} clauses - size of C ext{ext}",
                       xaxis_title='step number',
                       yaxis_title=f'size of C ext{ext}')
    
fig.show()

In [36]:

for ext in ['2b']:
    zebra_ext = zebra[zebra['ext']== ext]

    x1 = [i for i in range(len(list(zebra_ext['s_hs'])[0]))]
    y1 = list(zebra_ext['s_hs'])[0]
    x2 = [i for i in range(len(list(zebra_ext['s_grow'])[0]))]
    y2 = list(zebra_ext['s_grow'])[0]
    fig = go.Figure()
    #fig.add_trace(go.Scatter(x=x1, y=y1, mode='lines',name=f"size of hs ext{ext}"))
    fig.add_trace(go.Scatter(x=x2, y=y2, mode='markers',name=f"{int(zebra_ext['clauses'])} clauses - size of C ext{ext}"))
    #fig.add_trace(go.Scatter(x=x2, y=, mode='markers',name=f"{zebra_ext['clauses']} clauses - size of C ext{ext}"))
    # Edit the layout
    fig.update_layout(
    autosize=False,
    width=800,
        title=f"{int(zebra_ext['clauses'])} clauses - size of C ext{ext}",
                       xaxis_title='step number',
                       yaxis_title=f'size of C ext{ext}')
    
    fig.show()