# Analysis of the results

In [9]:
import json
import pandas as pd
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000

In [10]:
dfs = {}
for i in ["05", "10", "15"]:
    df = pd.read_csv("stats-{:s}.csv".format(i))
    df["instance_name"] = df.apply(lambda row: row["instance"].split(".")[0], axis=1)
    df["solver"] = df.apply(lambda row: row["instance"].split(".")[1][5:], axis=1)    
    
    bap_task = df[df["solver"] == "bap_task"]            
    bap_pair = df[df["solver"] == "bap_pair"]            
    ilp = df[df["solver"] == "global"]
    
    bap_task = bap_task.add_suffix('_bap-task')
    bap_task = bap_task.rename(index=str, columns={'instance_name_bap-task':'instance_name'})
    bap_pair = bap_pair.add_suffix('_bap-pair')
    bap_pair = bap_pair.rename(index=str, columns={'instance_name_bap-pair':'instance_name'})
    ilp = ilp.add_suffix('_ilp')
    ilp = ilp.rename(index=str, columns={'instance_name_ilp':'instance_name'})

    
    # merge all three together
    df = pd.merge(ilp, bap_task, on="instance_name")
    df = pd.merge(df, bap_pair, on="instance_name")
    
    dfs[i] = df

In [11]:
bap_method = "bap-task"
print("BAP method:", bap_method)

for size in ["05", "10", "15"]:
    print()
    print(size)
    
    df = dfs[size]
    
    print("Overall performance:")
    for method in ["ilp", bap_method]:
        avg = df["time_" + method].mean() / 1000 # to s
        std = df["time_" + method].std() / 1000
        avg_nodes = df["metadata_" + bap_method]
        print("time {:s} ${:4.2f} \pm {:4.2f}$".format(method, avg, std))    
                    
    # ILP was faster
    df_cur = df[df["time_ilp"] < df["time_" + bap_method]]
    print(" - ilp was faster {:d} times".format(df_cur["instance_name"].count()))                
    for method in ["ilp", bap_method]:
        avg = df_cur["time_" + method].mean() / 1000
        std = df_cur["time_" + method].std() / 1000
        print("   time {:s} ${:4.2f} \\pm {:4.2f}$".format(method, avg, std))    
    bap_nodes_avg = df_cur["metadata_" + bap_method].apply(lambda row: int(json.loads(row)["number_of_nodes"])).mean()
    bap_nodes_min = df_cur["metadata_" + bap_method].apply(lambda row: int(json.loads(row)["number_of_nodes"])).min()
    bap_nodes_max = df_cur["metadata_" + bap_method].apply(lambda row: int(json.loads(row)["number_of_nodes"])).max()
    print("   bap nodes avg", bap_nodes_avg, "min", bap_nodes_min, "max", bap_nodes_max)
        
    
    df_cur = df[df["time_ilp"] > df["time_" + bap_method]]
    print(" - bap was faster {:d} times".format(df_cur["instance_name"].count()))    
    for method in ["ilp", bap_method]:
        avg = df_cur["time_" + method].mean() / 1000
        std = df_cur["time_" + method].std() / 1000
        print("   time {:s} ${:4.2f} \\pm {:4.2f}$".format(method, avg, std))    
    bap_nodes_avg = df_cur["metadata_" + bap_method].apply(lambda row: int(json.loads(row)["number_of_nodes"])).mean()
    bap_nodes_min = df_cur["metadata_" + bap_method].apply(lambda row: int(json.loads(row)["number_of_nodes"])).min()
    bap_nodes_max = df_cur["metadata_" + bap_method].apply(lambda row: int(json.loads(row)["number_of_nodes"])).max()
    print("   bap nodes avg", bap_nodes_avg, "min", bap_nodes_min, "max", bap_nodes_max)

BAP method: bap-task

05
Overall performance:
time ilp $0.03 \pm 0.02$
time bap-task $0.16 \pm 0.20$
 - ilp was faster 96 times
   time ilp $0.02 \pm 0.02$
   time bap-task $0.16 \pm 0.21$
   bap nodes avg 3.5416666666666665 min 0 max 24
 - bap was faster 4 times
   time ilp $0.07 \pm 0.02$
   time bap-task $0.05 \pm 0.02$
   bap nodes avg 1.0 min 1 max 1

10
Overall performance:
time ilp $0.20 \pm 0.11$
time bap-task $3.58 \pm 7.41$
 - ilp was faster 94 times
   time ilp $0.19 \pm 0.10$
   time bap-task $3.79 \pm 7.60$
   bap nodes avg 15.904255319148936 min 0 max 214
 - bap was faster 6 times
   time ilp $0.38 \pm 0.21$
   time bap-task $0.30 \pm 0.20$
   bap nodes avg 0.8333333333333334 min 0 max 1

15
Overall performance:
time ilp $0.95 \pm 1.12$
time bap-task $24.32 \pm 34.45$
 - ilp was faster 97 times
   time ilp $0.83 \pm 0.70$
   time bap-task $25.03 \pm 34.73$
   bap nodes avg 57.95876288659794 min 1 max 311
 - bap was faster 3 times
   time ilp $4.99 \pm 3.65$
   time bap-ta

In [12]:
df = dfs["15"]
print("number of timeouts on 15-tasks instances:")
print("ILP", df[df["time_ilp"] > 600000]["instance_name"].count())
print("BAP-pair", df[df["time_bap-pair"] > 600000]["instance_name"].count())
print("BAP-task", df[df["time_bap-task"] > 600000]["instance_name"].count())

number of timeouts on 15-tasks instances:
ILP 0
BAP-pair 3
BAP-task 0


In [13]:
print("ILP slower than bap", df[df["time_ilp"] > df["time_bap-pair"]]["instance_name"].count())

ILP slower than bap 5


In [14]:
bap_method = "bap-task"
# Time spent in BAP:
for size in ["05", "10", "15"]:
    print()
    print(size)
    
    df = dfs[size]    
    time_masters_init = df[["metadata_" + bap_method,"time_" + bap_method]].apply(lambda row: 1000*float(json.loads(row["metadata_" + bap_method])["time_masters_init"]) / row["time_" + bap_method], axis=1).mean()
    time_masters_solving = df[["metadata_" + bap_method,"time_" + bap_method]].apply(lambda row: 1000*float(json.loads(row["metadata_" + bap_method])["time_masters_solving"]) / row["time_" + bap_method], axis=1).mean()
    time_sub_init = df[["metadata_" + bap_method,"time_" + bap_method]].apply(lambda row: 1000*float(json.loads(row["metadata_" + bap_method])["time_sub_init"]) / row["time_" + bap_method], axis=1).mean()
    time_sub_solving = df[["metadata_" + bap_method,"time_" + bap_method]].apply(lambda row: 1000*float(json.loads(row["metadata_" + bap_method])["time_sub_solving"]) / row["time_" + bap_method], axis=1).mean()
    time_global = df[["metadata_" + bap_method,"time_" + bap_method]].apply(lambda row: 1000*float(json.loads(row["metadata_" + bap_method])["time_global"]) / row["time_" + bap_method], axis=1).mean()
    time_recovery = df[["metadata_" + bap_method,"time_" + bap_method]].apply(lambda row: 1000*float(json.loads(row["metadata_" + bap_method])["time_recovery"]) / row["time_" + bap_method], axis=1).mean()
        
    print("   time_masters_init", round(100*time_masters_init, 2))
    print("   time_masters_solving", round(100*time_masters_solving,2))
    print("   time_sub_init", round(100*time_sub_init,2))
    print("   time_sub_solving", round(100*time_sub_solving,2))
    print("   time_global", round(100*time_global,2))
    print("   time_recovery", round(100*time_recovery,2))    
        


05
   time_masters_init 3.17
   time_masters_solving 0.43
   time_sub_init 20.0
   time_sub_solving 40.4
   time_global 0.8
   time_recovery 31.77

10
   time_masters_init 3.01
   time_masters_solving 0.71
   time_sub_init 11.19
   time_sub_solving 72.18
   time_global 1.14
   time_recovery 7.93

15
   time_masters_init 2.88
   time_masters_solving 0.8
   time_sub_init 8.71
   time_sub_solving 76.41
   time_global 2.61
   time_recovery 5.35


In [7]:
df = dfs["15"]
df[df["time_bap-task"] > 600000]["instance_name"]

Series([], Name: instance_name, dtype: object)

In [8]:
print(df[df["instance_name"] == "IN_079"]["metadata_bap-task"].values)

['{"objective": "0.7552394869288354", "number_of_nodes": "31", "time_masters_init": "0.25", "time_masters_solving": "0.07", "time_sub_init": "0.62", "time_sub_solving": "6.88", "time_global": "0.59", "time_get_pair": "0.00", "time_recovery": "0.43", "master_relaxation": "0.752333", "patterns_generated_avg": "6.58", "optimal": "True"}']
