# Analysis of the results

In [1]:
import json
import pandas as pd
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000

In [11]:
dfs = {}
for i in ["05", "10", "15"]:
    df = pd.read_csv("stats-{:s}.csv".format(i))
    df["instance_name"] = df.apply(lambda row: row["instance"].split(".")[0], axis=1)
    df["solver"] = df.apply(lambda row: row["instance"].split(".")[1][5:], axis=1)
    # rename ILP
    #df["solver"] = df.apply(lambda row: "ILP" if row["solver"] == "ILP Solver (global):predictor (Python)" else row["solver"], axis=1)
    # merge to separate columns by instance
    
    bap = df[df["solver"] == "bap_task"]            
    ilp = df[df["solver"] == "global"]
        
    df = pd.merge(bap, ilp, on="instance_name",suffixes=["_bap", "_ilp"])
    
    dfs[i] = df

{'05':                 instance_bap solver_bap  time_bap  objective-reported_bap  \
 0   IN_000.json-bap_task.out   bap_task        31               -1.000000   
 1   IN_001.json-bap_task.out   bap_task        76                1.146634   
 2   IN_002.json-bap_task.out   bap_task       419                1.028355   
 3   IN_003.json-bap_task.out   bap_task       251                0.633783   
 4   IN_004.json-bap_task.out   bap_task       128                1.018899   
 5   IN_005.json-bap_task.out   bap_task       355                2.003834   
 6   IN_006.json-bap_task.out   bap_task       286                1.315118   
 7   IN_007.json-bap_task.out   bap_task         7               -1.000000   
 8   IN_008.json-bap_task.out   bap_task       119                0.535914   
 9   IN_009.json-bap_task.out   bap_task       297                1.785361   
 10  IN_010.json-bap_task.out   bap_task        51                1.085376   
 11  IN_011.json-bap_task.out   bap_task        68        

In [15]:
for size in ["05", "10", "15"]:
    print()
    print(size)
    
    df = dfs[size]
    
    print("Overall performance:")
    for method in ["ilp", "bap"]:
        avg = df["time_" + method].mean() / 1000 # to s
        std = df["time_" + method].std() / 1000
        avg_nodes = df["metadata_bap"]
        print("time {:s} ${:4.2f} \pm {:4.2f}$".format(method, avg, std))    
                    
    # ILP was faster
    df_cur = df[df["time_ilp"] < df["time_bap"]]
    print(" - ilp was faster {:d} times".format(df_cur["instance_name"].count()))                
    for method in ["ilp", "bap"]:
        avg = df_cur["time_" + method].mean() / 1000
        std = df_cur["time_" + method].std() / 1000
        print("   time {:s} ${:4.2f} \\pm {:4.2f}$".format(method, avg, std))    
    bap_nodes_avg = df_cur["metadata_bap"].apply(lambda row: int(json.loads(row)["number_of_nodes"])).mean()
    bap_nodes_min = df_cur["metadata_bap"].apply(lambda row: int(json.loads(row)["number_of_nodes"])).min()
    bap_nodes_max = df_cur["metadata_bap"].apply(lambda row: int(json.loads(row)["number_of_nodes"])).max()
    print("   bap nodes avg", bap_nodes_avg, "min", bap_nodes_min, "max", bap_nodes_max)
        
    
    df_cur = df[df["time_ilp"] > df["time_bap"]]
    print(" - bap was faster {:d} times".format(df_cur["instance_name"].count()))    
    for method in ["ilp", "bap"]:
        avg = df_cur["time_" + method].mean() / 1000
        std = df_cur["time_" + method].std() / 1000
        print("   time {:s} ${:4.2f} \\pm {:4.2f}$".format(method, avg, std))    
    bap_nodes_avg = df_cur["metadata_bap"].apply(lambda row: int(json.loads(row)["number_of_nodes"])).mean()
    bap_nodes_min = df_cur["metadata_bap"].apply(lambda row: int(json.loads(row)["number_of_nodes"])).min()
    bap_nodes_max = df_cur["metadata_bap"].apply(lambda row: int(json.loads(row)["number_of_nodes"])).max()
    print("   bap nodes avg", bap_nodes_avg, "min", bap_nodes_min, "max", bap_nodes_max)


05
Overall performance:
time ilp $0.02 \pm 0.03$
time bap $0.15 \pm 0.21$
 - ilp was faster 86 times
   time ilp $0.01 \pm 0.02$
   time bap $0.16 \pm 0.23$
   bap nodes avg 3.872093023255814 min 0 max 33
 - bap was faster 14 times
   time ilp $0.08 \pm 0.04$
   time bap $0.05 \pm 0.02$
   bap nodes avg 0.9285714285714286 min 0 max 1

10
Overall performance:
time ilp $1.29 \pm 2.47$
time bap $2.75 \pm 4.13$
 - ilp was faster 64 times
   time ilp $0.31 \pm 0.47$
   time bap $3.87 \pm 4.79$
   bap nodes avg 22.1875 min 0 max 125
 - bap was faster 36 times
   time ilp $3.05 \pm 3.46$
   time bap $0.75 \pm 0.79$
   bap nodes avg 3.7777777777777777 min 1 max 33

15
Overall performance:
time ilp $37.93 \pm 98.05$
time bap $22.39 \pm 38.04$
 - ilp was faster 65 times
   time ilp $4.46 \pm 7.94$
   time bap $29.02 \pm 41.91$
   bap nodes avg 59.83076923076923 min 1 max 225
 - bap was faster 35 times
   time ilp $100.07 \pm 147.51$
   time bap $10.07 \pm 25.84$
   bap nodes avg 10.085714285714

In [17]:
df = dfs["05"]
print(df[df["time_bap"] > 60]["instance_name"].count())
print(df[df["time_ilp"] > 60]["instance_name"].count())


53
12


In [18]:
df_cur = dfs["15"]

In [25]:
df_cur[df_cur["instance_name"]=="IN_006"]

Unnamed: 0,instance_bap,solver_bap,time_bap,objective-reported_bap,objective_bap,length_bap,metadata_bap,instance_name,instance_ilp,solver_ilp,time_ilp,objective-reported_ilp,objective_ilp,length_ilp,metadata_ilp
6,IN_006.json-bap_task.out,bap_task,11244,0.850214,0.850214,388,"{""objective"": ""0.8502136624960206"", ""number_of...",IN_006,IN_006.json-global.out,global,4950,0.850214,0.850214,388,"{""objective"": ""0.8502136624960124""}"
