# Results analysis

## Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Loading csv

In [None]:
df = pd.read_csv("../Dataset/results.csv")

## Analysis

In [None]:
df.head()

In [None]:
policy_sets_timeout_tab = len(df.query("Policy == 'Sets' and SavileRowTimeOut==1"))
policy_sets_timeout_solving = len(df.query("Policy == 'Sets' and SolverTimeOut==1"))
num_policy_sets = len(df.query("Policy == 'Sets'"))
print("Number of Policy=Sets timeout tabulation: ", policy_sets_timeout_tab)
print("Percentage of Policy=Sets timeout tabulation: ", 100*policy_sets_timeout_solving/num_policy_sets, "%")
print("Number of Policy=Sets timeout solver: ", policy_sets_timeout_solving)
print("Percentage of Policy=Sets timeout solver: ", 100*policy_sets_timeout_solving/num_policy_sets, "%")
print()
policy_tab2_timeout_tab = len(df.query("Policy == '2' and SavileRowTimeOut==1"))
policy_tab2_timeout_solving = len(df.query("Policy == '2' and SolverTimeOut==1"))
num_policy_tab2 = len(df.query("Policy == '2'"))
print("Number of Policy=2 timeout tabulation: ", policy_tab2_timeout_tab)
print("Percentage of Policy=2 timeout tabulation: ", 100*policy_tab2_timeout_tab/num_policy_tab2, "%")
print("Number of Policy=2 timeout solver: ", policy_tab2_timeout_solving)
print("Percentage of Policy=2 timeout solver: ", 100*policy_tab2_timeout_solving/num_policy_tab2, "%")
print()
policy_base_timeout_savile = len(df.query("Policy == 'baseline' and SavileRowTimeOut==1"))
policy_base_timeout_solving = len(df.query("Policy == 'baseline' and SolverTimeOut==1"))
num_policy_base = len(df.query("Policy == 'baseline'"))
print("Number of Policy=baseline timeout savilerow: ", policy_base_timeout_savile)
print("Percentage of Policy=baseline timeout savilerow: ", 100*policy_base_timeout_savile/num_policy_base, "%")
print("Number of Policy=baseline timeout solver: ", policy_base_timeout_solving)
print("Percentage of Policy=baseline timeout solver: ", 100*policy_base_timeout_solving/num_policy_base, "%")

In [None]:
sets_not_timeout_tab = df.query("Policy == 'Sets' and SavileRowTimeOut==0")
sets_not_timeout_solving = sets_not_timeout_tab.query("SolverTimeOut==0")
num_sets = len(sets_not_timeout_solving)

tab2_not_timeout_tab = df.query("Policy == '2' and SavileRowTimeOut==0")
tab2_not_timeout_solving = tab2_not_timeout_tab.query("SolverTimeOut==0")
num_tab2 = len(tab2_not_timeout_solving)

base_not_timeout_solving = df.query("Policy == 'baseline' and SolverTimeOut==0")
num_base = len(base_not_timeout_solving)

In [None]:
print("Mean tab time Policy=Sets: ", sets_not_timeout_tab['TabulationTime'].mean(), "s")
print("Std tab time Policy=Sets: ", sets_not_timeout_tab['TabulationTime'].std(), "s")
print()
print("Mean tab time Policy=2: ", tab2_not_timeout_tab['TabulationTime'].mean(), "s")
print("Std tab time Policy=2: ", tab2_not_timeout_tab['TabulationTime'].std(), "s")


print()
print("Mean solving time Policy=Sets: ", sets_not_timeout_solving['SolverTotalTime'].mean(), "s")
print("Std solving time Policy=Sets: ", sets_not_timeout_solving['SolverTotalTime'].std(), "s")
print()
print("Mean solving time Policy=2: ", tab2_not_timeout_solving['SolverTotalTime'].mean(), "s")
print("Std solving time Policy=2: ", tab2_not_timeout_solving['SolverTotalTime'].std(), "s")
print()
print("Mean solving time Policy=baseline: ", base_not_timeout_solving['SolverTotalTime'].mean(), "s")
print("Std solving time Policy=baseline: ", base_not_timeout_solving['SolverTotalTime'].std(), "s")

Solving time distribution

In [None]:
plt.figure(figsize=(18, 6));
sets_not_timeout_solving.groupby(by=['Problem']).min()['SolverTotalTime'].plot.density();
tab2_not_timeout_solving['SolverTotalTime'].plot.density();
base_not_timeout_solving['SolverTotalTime'].plot.density();
plt.legend(['Sets', 'Tab2', 'Baseline']);
plt.xlabel("Time (s)");
plt.ylabel("# instances");
plt.title("Solve time KDE for sets, tab2 and baseline");
plt.xlim(left=0);
plt.tight_layout();

Tabulation time distribution

In [None]:
plt.figure(figsize=(18, 6));
sets_not_timeout_tab.groupby(by=['Problem']).min()['TabulationTime'].plot.density();
tab2_not_timeout_tab['TabulationTime'].plot.density();
plt.legend(['Sets', 'Tab2']);
plt.xlabel("Time (s)");
plt.ylabel("# instances");
plt.title("Tabulation time KDE for sets and tab2");
plt.xlim(left=0);
plt.tight_layout();

In [None]:
group_prob = df.query("Policy!='baseline' and SavileRowTimeOut==0").groupby(by=["Problem", "Policy", "Num_cons"])

## Nodes 
No Sets better than tab2

In [None]:
def prRed(skk): print("\033[91m{}\033[00m" .format(skk))
def prGreen(skk): print("\033[92m{}\033[00m" .format(skk))
def prYellow(skk): print("\033[93m{}\033[00m" .format(skk))
def prLightPurple(skk): print("\033[94m{}\033[00m" .format(skk))
def prPurple(skk): print("\033[95m{}\033[00m" .format(skk))
def prCyan(skk): print("\033[96m{}\033[00m" .format(skk))
def prLightGray(skk): print("\033[97m{}\033[00m" .format(skk))
def prBlack(skk): print("\033[98m{}\033[00m" .format(skk))

In [None]:
res = dict()
for name, group in group_prob:
    if not name[0] in res.keys():
        res[name[0]] = dict()
        res[name[0]][name[1]] = dict()
    else:
        if name[1] == 'Sets' and not name[1] in res[name[0]].keys():
            res[name[0]][name[1]] = dict()

    index_min = (group['SolverTotalTime']+group['SavileRowTotalTime']).argmin()
    if group['SolverTimeOut'].values[0] == 1:
        value = [100000, group['SolverNodes'].values[index_min]]
    else:
        value = [(group['SolverTotalTime'].values+group['SavileRowTotalTime'].values)[index_min], group['SolverNodes'].values[index_min]]
    if name[1] == 'Sets':
        res[name[0]][name[1]][name[2]] = value
    else:
        res[name[0]][name[1]] = value
    
for key in res.keys():
    flag = False
    num_cons = -1
    time_sets = 100000
    time_2 = 100000
    nodes_sets = 100000
    nodes_2 = 100000
    if 'Sets' in res[key].keys():
        num_cons = np.argmin([res[key]['Sets'][x][0] for x in res[key]['Sets'].keys()])
        num_cons = list(res[key]['Sets'].keys())[num_cons]
        time_sets = res[key]['Sets'][num_cons][0]
        nodes_sets = res[key]['Sets'][num_cons][1]
    if '2' in res[key].keys():
        time_2 = res[key]['2'][0]
        nodes_2 = res[key]['2'][1]
        
    if nodes_2 > nodes_sets and num_cons!=-1 and time_2<time_sets:   # Red nodes2 > nodessets /\ time2 < timesets
        flag = True
        prRed("Key: " + str(key) + " num_cons: " + str(num_cons))
        prRed("Time tab2: " + str(time_2) + " time sets: " + str(time_sets))
        prRed("Nodes tab2: " + str(nodes_2) + " nodes sets: " + str(nodes_sets))
    elif nodes_2 > nodes_sets and num_cons!=-1 and time_2>time_sets: # Green nodes2 > nodessets /\ time2 > timesets
        flag = True
        prGreen("Key: " + str(key) + " num_cons: " + str(num_cons))
        prGreen("Time tab2: " + str(time_2) + " time sets: " + str(time_sets))
        prGreen("Nodes tab2: " + str(nodes_2) + " nodes sets: " + str(nodes_sets))
    elif nodes_2 < nodes_sets and time_2>time_sets:                  # Yellow nodes2 < nodessets /\ time2 > timesets
        flag = True
        prYellow("Key: " + str(key) + " num_cons: " + str(num_cons))
        prYellow("Time tab2: " + str(time_2) + " time sets: " + str(time_sets))
        prYellow("Nodes tab2: " + str(nodes_2) + " nodes sets: " + str(nodes_sets))
    elif nodes_2 == nodes_sets:                                      # Black equal # nodes
        flag = True
        print("Key: " + str(key) + " num_cons: " + str(num_cons))
        print("Time tab2: " + str(time_2) + " time sets: " + str(time_sets))
        print("Nodes tab2: " + str(nodes_2) + " nodes sets: " + str(nodes_sets))
    elif nodes_2 <nodes_sets and time_2<time_sets:                   # Cyan better tab2
        flag = True
        prCyan("Key: " + str(key) + " num_cons: " + str(num_cons))
        prCyan("Time tab2: " + str(time_2) + " time sets: " + str(time_sets))
        prCyan("Nodes tab2: " + str(nodes_2) + " nodes sets: " + str(nodes_sets))
    if flag:
        print()

## Time 
Sets better than tab2

## Filter num cons

In [None]:
for i in range(1, 4):
    policy_sets_timeout_tab = len(df.query("Policy == 'Sets' and SavileRowTimeOut==1 and Num_cons=="+str(i)))
    policy_sets_timeout_solving = len(df.query("Policy == 'Sets' and SolverTimeOut==1 and Num_cons=="+str(i)))
    print(str(i)+" number of Policy=Sets timeout tabulation: ", policy_sets_timeout_tab)
    print(str(i)+" percentage of Policy=Sets timeout tabulation: ", 100*policy_sets_timeout_solving/num_policy_sets, "%")
    print(str(i)+" number of Policy=Sets timeout solver: ", policy_sets_timeout_solving)
    print(str(i)+" percentage of Policy=Sets timeout solver: ", 100*policy_sets_timeout_solving/num_policy_sets, "%")
    print()

In [None]:
for i in range(1, 4):
    sets_not_timeout_tab = df.query("Policy == 'Sets' and SavileRowTimeOut==0 and Num_cons=="+str(i))
    sets_not_timeout_solving = sets_not_timeout_tab.query("SolverTimeOut==0 and Num_cons=="+str(i))
    print(str(i)+" mean tab time Policy=Sets: ", sets_not_timeout_tab['TabulationTime'].mean(), "s")
    print(str(i)+" std tab time Policy=Sets: ", sets_not_timeout_tab['TabulationTime'].std(), "s")
    print()
    print(str(i)+" mean solving time Policy=Sets: ", sets_not_timeout_solving['SolverTotalTime'].mean(), "s")
    print(str(i)+" std solving time Policy=Sets: ", sets_not_timeout_solving['SolverTotalTime'].std(), "s")
    print()

Solving Time

In [None]:
plt.figure(figsize=(18, 6));
df.query("Policy == 'Sets' and SolverTimeOut==0 and Num_cons==1")['SolverTotalTime'].plot.density();
df.query("Policy == 'Sets' and SolverTimeOut==0 and Num_cons==2")['SolverTotalTime'].plot.density();
df.query("Policy == 'Sets' and SolverTimeOut==0 and Num_cons==3")['SolverTotalTime'].plot.density();
tab2_not_timeout_solving['SolverSolveTime'].plot.density();
#base_not_timeout_solving['SolverSolveTime'].plot.density();
plt.legend(['Sets1', 'Sets2', 'Sets3', 'Tab2', 'Baseline']);
plt.xlabel("Time (s)");
plt.ylabel("# instances");
plt.title("Solve time KDE for each set, tab2 and baseline not timed out while solving");
plt.xlim(left=0);
plt.tight_layout();
plt.show()

plt.figure(figsize=(18, 6));
df.query("Policy == 'Sets' and SavileRowTimeOut==0 and Num_cons==1")['SolverTotalTime'].plot.density();
df.query("Policy == 'Sets' and SavileRowTimeOut==0 and Num_cons==2")['SolverTotalTime'].plot.density();
df.query("Policy == 'Sets' and SavileRowTimeOut==0 and Num_cons==3")['SolverTotalTime'].plot.density();
#tab2_not_timeout_tab['SolverSolveTime'].plot.density();
#df.query("Policy == 'baseline'")['SolverSolveTime'].plot.density();
plt.legend(['Sets1', 'Sets2', 'Sets3', 'Tab2', 'Baseline']);
plt.xlabel("Time (s)");
plt.ylabel("# instances");
plt.title("Solve time KDE for each set, tab2 and baseline not timed out while tabulation");
plt.xlim(left=0);
plt.tight_layout();
plt.show()

Tabulation Time

In [None]:
plt.figure(figsize=(18, 6));
df.query("Policy == 'Sets' and SavileRowTimeOut==0 and Num_cons==1")['TabulationTime'].plot.density();
df.query("Policy == 'Sets' and SavileRowTimeOut==0 and Num_cons==2")['TabulationTime'].plot.density();
df.query("Policy == 'Sets' and SavileRowTimeOut==0 and Num_cons==3")['TabulationTime'].plot.density();
#tab2_not_timeout_tab['TabulationTime'].plot.density();
plt.legend(['Sets1', 'Sets2', 'Sets3', 'Tab2']);
plt.xlabel("Time (s)");
plt.ylabel("Num instances");
plt.title("Tabulation time KDE for each set and tab2");
plt.xlim(left=0);
plt.tight_layout();

In [None]:
plt.figure(figsize=(18, 6));
df.query("Policy == 'Sets' and SolverTimeOut==0 and Num_cons==1")['SolverNodes'].plot.density();
df.query("Policy == 'Sets' and SolverTimeOut==0 and Num_cons==2")['SolverNodes'].plot.density();
df.query("Policy == 'Sets' and SolverTimeOut==0 and Num_cons==3")['SolverNodes'].plot.density();
#tab2_not_timeout_solving['SolverNodes'].plot.density();
#base_not_timeout_solving['SolverNodes'].plot.density();
plt.legend(['Sets1', 'Sets2', 'Sets3', 'Tab2', 'Baseline']);
plt.xlabel("# Nodes");
plt.ylabel("# instances");
plt.title("Number of nodes KDE for each set, tab2 and baseline");
plt.xlim(left=0);
plt.tight_layout();

## Num_cons & Threshold

In [None]:
print("Number of out-of-time during tabulation in percentage:")
df.query("Policy == 'Sets' and SavileRowTimeOut==1").groupby(by=["Num_cons", "thresh_overlap"])['Policy'].count()/num_policy_sets*100

In [None]:
print("Number of out-of-time while solving in percentage:")
df.query("Policy == 'Sets' and SolverTimeOut==1").groupby(by=["Num_cons", "thresh_overlap"])['Policy'].count()/num_policy_sets*100

In [None]:
print("Number of out-of-time while solving or tabulating in percentage:")
df.query("Policy == 'Sets' and (SolverTimeOut==1 or SavileRowTimeOut==1)").groupby(by=["Num_cons", "thresh_overlap"])['Policy'].count()/num_policy_sets*100

In [None]:
plt.figure(figsize=(18, 6));
df.query("Policy == 'Sets' and SavileRowTimeOut==0").groupby(by=["Num_cons", "thresh_overlap"])["SolverNodes"].plot.density(legend=True)
plt.xlabel("# Nodes");
plt.ylabel("# instances");
plt.title("Number of nodes KDE for groupby num_cons & thresholds");
plt.xlim(left=0);
plt.tight_layout();

In [None]:
plt.figure(figsize=(18, 6));
df.query("Policy == 'Sets' and SolverTimeOut==0").groupby(by=["Num_cons", "thresh_overlap"])["SolverTotalTime"].plot.density(legend=True)
plt.xlabel("Time (s)");
plt.ylabel("# instances");
plt.title("Solve time KDE for groupby num_cons & thresholds");
plt.xlim(left=0);
plt.tight_layout();

In [None]:
for num_cons in range(1, 4):
    plt.figure(figsize=(18, 6));
    df.query("Policy == 'Sets' and SolverTimeOut==0 and Num_cons=="+str(num_cons)).groupby(by=["thresh_overlap"])["SolverTotalTime"].plot.density(legend=True)
    plt.xlabel("Time (s)");
    plt.ylabel("# instances");
    plt.title("Solve time KDE for num_cons= " + str(num_cons) + " and groupby thresholds");
    plt.xlim(left=0);
    plt.tight_layout();
    plt.show()

In [None]:
max_x = int(df['SolverTotalTime'].max())
x = np.linspace(0, max_x, max_x*10)
y = {'2': [], 'Sets_1': [], 'Sets_2': [], 'Sets_3': [], 'baseline': []}

tab_2 = df.query("Policy=='2' and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='2' and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
baseline = df.query("Policy=='baseline' and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='baseline' and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_1 = df.query("Policy=='Sets' and Num_cons==1 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and Num_cons==1 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_2 = df.query("Policy=='Sets' and Num_cons==2 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and Num_cons==2 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_3 = df.query("Policy=='Sets' and Num_cons==3 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and Num_cons==3 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()

for i in x:
    y['2'].append((tab_2<i).sum())
    y['baseline'].append((baseline<i).sum())
    y['Sets_1'].append((sets_1<i).sum())
    y['Sets_2'].append((sets_2<i).sum())
    y['Sets_3'].append((sets_3<i).sum())

In [None]:
plt.figure(figsize=(18, 6));
plt.plot(x, y['2'])
plt.plot(x, y['baseline'])
plt.plot(x, y['Sets_1'])
plt.plot(x, y['Sets_2'])
plt.plot(x, y['Sets_3'])
plt.legend(['Tab2', 'Baseline', 'Sets1', 'Sets2', 'Sets3'])
plt.title("Number of instances solved over time.");
plt.xlabel("Time (s)")
plt.ylabel("# instances")
#plt.xlim(left=25)
#plt.ylim(bottom=120)
plt.tight_layout();
plt.show()

In [None]:
max_x = int(df['SolverTotalTime'].max())
x = np.linspace(0, max_x, max_x*10)

for num_cons in [1, 2, 3]:
    y = {'0.0': [], '0.25': [], '0.5': [], '0.75': [], '1.0': []}

    ov_00 = df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_25 = df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.25 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.25 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_50 = df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.50 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.50 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_75 = df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.75 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.75 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_10 = df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==1.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==1.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()

    for i in x:
        y['0.0'].append((ov_00<i).sum())
        y['0.25'].append((ov_25<i).sum())
        y['0.5'].append((ov_50<i).sum())
        y['0.75'].append((ov_75<i).sum())
        y['1.0'].append((ov_10<i).sum())
    plt.figure(figsize=(18, 6));
    plt.plot(x, y['0.0'])
    plt.plot(x, y['0.25'])
    plt.plot(x, y['0.5'])
    plt.plot(x, y['0.75'])
    plt.plot(x, y['1.0'])
    plt.legend(['0.00', '0.25', '0.50', '0.75', '1.00'])
    plt.title("Number of instances solved over time with num_cons="+str(num_cons));
    plt.xlabel("Time (s)")
    plt.ylabel("# instances")
    plt.tight_layout();
    #plt.xlim(left=25)
    #plt.ylim(bottom=120)
    plt.show()

In [None]:
not_tabulated = set()
print("Number of instances failed during tabulation with Sets")
for i in range(1, 4):
    policy_not_tab = df.query("Policy=='Sets' and SavileRowTimeOut==1 and Num_cons=="+str(i)).groupby("Problem")["Problem"].count()==5
    not_tabulated.update(policy_not_tab[policy_not_tab].index)
    sum_failed = policy_not_tab.sum()
    print("\tNum cons ", i, ": ", sum_failed)
print("Number of unique instances not tabulated: ", len(not_tabulated))
print("\nTotal number of instances: ", len(df['Problem'].unique()))
print()
for i in range(1, 4):
    solved_baseline = (df.query("Policy=='baseline' and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='baseline' and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()<3600).sum()
    solved_sets = (df.query("Policy=='Sets' and SolverTimeOut==0 and Num_cons=="+str(i)).groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets' and SolverTimeOut==0 and Num_cons=="+str(i)).groupby("Problem")['SavileRowTotalTime'].min()<3600).sum()
    print("Differences in instances solved by baseline and Sets with num_cons="+str(i)+": ", solved_baseline-solved_sets)

In [None]:
tmp = [x.split("_")[0] for x in df['Problem'].unique()]
problems_count = dict()
for prob_key in np.unique(tmp):
    cont = 0
    for prob in tmp:
        if prob == prob_key:
            cont += 1
    problems_count[prob_key] = cont
print(problems_count)

In [None]:
df['SolverTotalTime'].fillna(3600, inplace=True)

In [None]:
df.columns

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(df.query("Policy=='Sets'").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets'").groupby("Problem")['SavileRowTotalTime'].min(), df.query("Policy=='2'").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='2'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5],[0, 3600+5], c="black");
plt.title("Solver time for two sets toghether");
plt.xlabel("Constraints' sets (s)");
plt.ylabel("Tabulate 2 (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(df.query("Policy=='Sets'").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='Sets'").groupby("Problem")['SavileRowTotalTime'].min(), df.query("Policy=='baseline'").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='baseline'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5], [0, 3600+5], c="black");
plt.title("Solver time for two sets toghether");
plt.xlabel("Constraints' sets (s)");
plt.ylabel("Baseline (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(df.query("Policy=='2'").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='2'").groupby("Problem")['SavileRowTotalTime'].min(), df.query("Policy=='baseline'").groupby("Problem")['SolverTotalTime'].min()+df.query("Policy=='baseline'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5], [0, 3600+5], c="black");
plt.title("Solver time for two sets toghether");
plt.xlabel("Tabulate 2 (s)");
plt.ylabel("Baseline (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();

## First set of problems

In [None]:
journal_problems = ["AccordionTable", "bibd", "blackHole", "bpmp", "carSequencing", "coprime", "handball7", "JPEncoding", "killerSudoku16", 
                   "knights", "knights2", "langford", "nlinkedopt", "nlinkedseq", "paqueens1", "sedfnofix", "sportsScheduling", "stilllife"]

In [None]:
journal_instances = []
for prob in journal_problems:
    journal_instances = journal_instances + [x for x in df.Problem.unique() if prob in x]
journal_df = df[df['Problem']==journal_instances[0]]
for instance_id in range(1, len(journal_instances)):
    journal_df = pd.concat([journal_df, df[df['Problem']==journal_instances[instance_id]]])

In [None]:
max_x = int(journal_df['SolverTotalTime'].max())
x = np.linspace(0, max_x, max_x*10)
y = {'2': [], 'Sets_1': [], 'Sets_2': [], 'Sets_3': [], 'baseline': []}

tab_2 = journal_df.query("Policy=='2' and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='2' and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
baseline = journal_df.query("Policy=='baseline' and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='baseline' and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_1 = journal_df.query("Policy=='Sets' and Num_cons==1 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets' and Num_cons==1 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_2 = journal_df.query("Policy=='Sets' and Num_cons==2 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets' and Num_cons==2 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_3 = journal_df.query("Policy=='Sets' and Num_cons==3 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets' and Num_cons==3 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()

for i in x:
    y['2'].append((tab_2<i).sum())
    y['baseline'].append((baseline<i).sum())
    y['Sets_1'].append((sets_1<i).sum())
    y['Sets_2'].append((sets_2<i).sum())
    y['Sets_3'].append((sets_3<i).sum())
plt.figure(figsize=(18, 6));
plt.plot(x, y['2'])
plt.plot(x, y['baseline'])
plt.plot(x, y['Sets_1'])
plt.plot(x, y['Sets_2'])
plt.plot(x, y['Sets_3'])
plt.legend(['Tab2', 'Baseline', 'Sets1', 'Sets2', 'Sets3'])
plt.title("Number of instances solved over time, first set of problems");
plt.xlabel("Time (s)")
plt.ylabel("# instances")
#plt.xlim(left=25)
#plt.ylim(bottom=120)
plt.tight_layout();
plt.show()

In [None]:
max_x = int(journal_df['SolverTotalTime'].max())
x = np.linspace(0, max_x, max_x*10)

for num_cons in [1, 2, 3]:
    y = {'0.0': [], '0.25': [], '0.5': [], '0.75': [], '1.0': []}

    ov_00 = journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_25 = journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.25 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.25 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_50 = journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.50 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.50 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_75 = journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.75 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.75 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_10 = journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==1.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==1.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()

    for i in x:
        y['0.0'].append((ov_00<i).sum())
        y['0.25'].append((ov_25<i).sum())
        y['0.5'].append((ov_50<i).sum())
        y['0.75'].append((ov_75<i).sum())
        y['1.0'].append((ov_10<i).sum())
    plt.figure(figsize=(18, 6));
    plt.plot(x, y['0.0'])
    plt.plot(x, y['0.25'])
    plt.plot(x, y['0.5'])
    plt.plot(x, y['0.75'])
    plt.plot(x, y['1.0'])
    plt.legend(['0.00', '0.25', '0.50', '0.75', '1.00'])
    plt.title("Number of instances solved over time with num_cons="+str(num_cons)+", first set of problems");
    plt.xlabel("Time (s)")
    plt.ylabel("# instances")
    plt.tight_layout();
    #plt.xlim(left=25)
    #plt.ylim(bottom=120)
    plt.show()

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(journal_df.query("Policy=='Sets'").groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='Sets'").groupby("Problem")['SavileRowTotalTime'].min(), journal_df.query("Policy=='2'").groupby("Problem")['SolverTotalTime'].min()+journal_df.query("Policy=='2'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5], [0, 3600+5], c="black");
plt.title("Solver time for all instances, first set of problems");
plt.xlabel("Constraints' sets (s)");
plt.ylabel("Tabulate 2 (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(journal_df.query("Policy=='Sets'").groupby("Problem")['SolverTotalTime'].min()+
           journal_df.query("Policy=='Sets'").groupby("Problem")['SavileRowTotalTime'].min(), 
           journal_df.query("Policy=='baseline'").groupby("Problem")['SolverTotalTime'].min()+
           journal_df.query("Policy=='baseline'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5], [0, 3600+5], c="black");
plt.title("Solver time for all instances, first set of problems");
plt.xlabel("Constraints' sets (s)");
plt.ylabel("Baseline (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(journal_df.query("Policy=='2'").groupby("Problem")['SolverTotalTime'].min()+
           journal_df.query("Policy=='2'").groupby("Problem")['SavileRowTotalTime'].min(), 
           journal_df.query("Policy=='baseline'").groupby("Problem")['SolverTotalTime'].min()+
           journal_df.query("Policy=='baseline'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5], [0, 3600+5], c="black");
plt.title("Solver time for all instances, first set of problems");
plt.xlabel("Tabulate 2 (s)");
plt.ylabel("Baseline (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();

## Second set of problems

In [None]:
other_instances = []
for instance in df.Problem.unique():
    flag = True
    for prob in journal_problems:
        if prob in instance:
            flag = False
    if flag:
        other_instances.append(instance)
other_df = df[df['Problem']==other_instances[0]]
for instance_id in range(1, len(other_instances)):
    other_df = pd.concat([other_df, df[df['Problem']==other_instances[instance_id]]])

In [None]:
max_x = int(other_df['SolverTotalTime'].max())
x = np.linspace(0, max_x, max_x*10)
y = {'2': [], 'Sets_1': [], 'Sets_2': [], 'Sets_3': [], 'baseline': []}

tab_2 = other_df.query("Policy=='2' and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='2' and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
baseline = other_df.query("Policy=='baseline' and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='baseline' and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_1 = other_df.query("Policy=='Sets' and Num_cons==1 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='Sets' and Num_cons==1 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_2 = other_df.query("Policy=='Sets' and Num_cons==2 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='Sets' and Num_cons==2 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()
sets_3 = other_df.query("Policy=='Sets' and Num_cons==3 and SolverTimeOut==0").groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='Sets' and Num_cons==3 and SolverTimeOut==0").groupby("Problem")['SavileRowTotalTime'].min()

for i in x:
    y['2'].append((tab_2<i).sum())
    y['baseline'].append((baseline<i).sum())
    y['Sets_1'].append((sets_1<i).sum())
    y['Sets_2'].append((sets_2<i).sum())
    y['Sets_3'].append((sets_3<i).sum())
plt.figure(figsize=(18, 6));
plt.plot(x, y['2'])
plt.plot(x, y['baseline'])
plt.plot(x, y['Sets_1'])
plt.plot(x, y['Sets_2'])
plt.plot(x, y['Sets_3'])
plt.legend(['Tab2', 'Baseline', 'Sets1', 'Sets2', 'Sets3'])
plt.title("Number of instances solved over time, second set of problems");
plt.xlabel("Time (s)")
plt.ylabel("# instances")
#plt.xlim(left=25)
#plt.ylim(bottom=120)
plt.tight_layout();
plt.show()

In [None]:
max_x = int(other_df['SolverTotalTime'].max())
x = np.linspace(0, max_x, max_x*10)

for num_cons in [1, 2, 3]:
    y = {'0.0': [], '0.25': [], '0.5': [], '0.75': [], '1.0': []}

    ov_00 = other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_25 = other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.25 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.25 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_50 = other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.50 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.50 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_75 = other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.75 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==0.75 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()
    ov_10 = other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==1.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SolverTotalTime'].min()+other_df.query("Policy=='Sets' and SolverTimeOut==0 and thresh_overlap==1.0 and Num_cons=="+str(num_cons)).groupby("Problem")['SavileRowTotalTime'].min()

    for i in x:
        y['0.0'].append((ov_00<i).sum())
        y['0.25'].append((ov_25<i).sum())
        y['0.5'].append((ov_50<i).sum())
        y['0.75'].append((ov_75<i).sum())
        y['1.0'].append((ov_10<i).sum())
    plt.figure(figsize=(18, 6));
    plt.plot(x, y['0.0'])
    plt.plot(x, y['0.25'])
    plt.plot(x, y['0.5'])
    plt.plot(x, y['0.75'])
    plt.plot(x, y['1.0'])
    plt.legend(['0.00', '0.25', '0.50', '0.75', '1.00'])
    plt.title("Number of instances solved over time with num_cons="+str(num_cons)+", second set of problems");
    plt.xlabel("Time (s)")
    plt.ylabel("# instances")
    plt.tight_layout();
    #plt.xlim(left=25)
    #plt.ylim(bottom=120)
    plt.show()

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(other_df.query("Policy=='Sets'").groupby("Problem")['SolverTotalTime'].min()+
           other_df.query("Policy=='Sets'").groupby("Problem")['SavileRowTotalTime'].min(), 
           other_df.query("Policy=='2'").groupby("Problem")['SolverTotalTime'].min()+
           other_df.query("Policy=='2'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5], [0, 3600+5], c="black");
plt.title("Solver time for all instances, second set of problems");
plt.xlabel("Constraints' sets (s)");
plt.ylabel("Tabulate 2 (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(other_df.query("Policy=='Sets'").groupby("Problem")['SolverTotalTime'].min()+
           other_df.query("Policy=='Sets'").groupby("Problem")['SavileRowTotalTime'].min(), 
           other_df.query("Policy=='baseline'").groupby("Problem")['SolverTotalTime'].min()+
           other_df.query("Policy=='baseline'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5], [0, 3600+5], c="black");
plt.title("Solver time for all instances, second set of problems");
plt.xlabel("Constraints' sets (s)");
plt.ylabel("Baseline (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();

In [None]:
plt.figure(figsize=(8, 8));
ax = plt.gca()
ax.scatter(other_df.query("Policy=='2'").groupby("Problem")['SolverTotalTime'].min()+
           other_df.query("Policy=='2'").groupby("Problem")['SavileRowTotalTime'].min(), 
           other_df.query("Policy=='baseline'").groupby("Problem")['SolverTotalTime'].min()+
           other_df.query("Policy=='baseline'").groupby("Problem")['SavileRowTotalTime'].min(), c="black", marker="x");
ax.set_yscale('symlog')
ax.set_xscale('symlog')
plt.plot([0, 3600+5], [0, 3600+5], c="black");
plt.title("Solver time for all instances, second set of problems");
plt.xlabel("Tabulate 2 (s)");
plt.ylabel("Baseline (s)");
plt.tight_layout();
plt.xlim(left=0, right=3600);
plt.ylim(bottom=0, top=3600);
plt.show();