## Table Aggregation: BGP Synthesis Time

In this notebook we aggregate the raw data into tables on BGP/OSPF synthesis time as used in our figures

In [1]:
import os
def get_neural_files(basedir): return dict([[int(f.split("-")[2]), basedir + f] for f in os.listdir(basedir) if f.startswith("bgp-reqs")])
neural_files = get_neural_files("results/")
nc_files = dict([[int(f.split("-")[2][:-len("result")]), "./netcomplete/results/" + f] for f in os.listdir("./netcomplete/results/") if f.startswith("bgp-reqs")])

In [2]:
import pandas as pd

In [3]:
TIMEOUT_VALUE = 999

def read_results_file(f, with_consistency=False):
    df = pd.read_csv(f, sep=";", names=["prefix", "time"] + (["consistency"] if with_consistency else []))
    
    df["num_nodes"] = [int(p[len("bgp-n"):]) for p in df["prefix"].values]
    num_samples_per_category = 8
    
    def bucket(n):
        if n < num_samples_per_category: return "0 Small"
        if n < num_samples_per_category*2: return "1 Medium"
        return "2 Large"
    df["bucket"] = [bucket(n) for n in df["num_nodes"]]
    
    df_with_timeouts = pd.DataFrame(df)
    df_with_timeouts["num_timeouts"] = df["time"].values == TIMEOUT_VALUE
    
    #df = df[df["time"] != 999]
    df["time"] = (df["time"] == TIMEOUT_VALUE) * 1500 + (df["time"] != TIMEOUT_VALUE) * df["time"]
    
    if with_consistency:
        df["full_match"] = df["consistency"].values == 1.0
    
    counts = df.groupby("bucket").count()["num_nodes"]
    res = df.groupby("bucket").mean()
    std = df.groupby("bucket").std()
    del res["num_nodes"]
    
    if with_consistency:
        res["consistency-std"] = std["consistency"]

    res["samples"] = counts
    res["min_nodes"] = df.groupby("bucket").min()["num_nodes"]
    res["max_nodes"] = df.groupby("bucket").max()["num_nodes"]
    res["num_timeouts"] = df_with_timeouts.groupby("bucket").sum()["num_timeouts"]
    res["time-std"] = std["time"]
    
    return res
#print(read_results_file("bgp/neural/bgp-reqs-2-result-bgp-2021-09-15_11:25:18.csv", with_consistency=True))
print(read_results_file(nc_files[2]))

                 time  num_timeouts  samples  min_nodes  max_nodes    time-std
bucket                                                                        
0 Small     18.072854             0        8          0          7   14.545376
1 Medium    60.864079             0        8          8         15   33.394428
2 Large   1389.484829             7        8         16         23  312.584108


In [4]:
system_name = "Neural"
consistency_col = "Consistency (Neural)"
full_match_col = "Full Match (Neural)"

def merge(df_netcomplete, df_neural):
    res = pd.DataFrame(df_netcomplete)
        
    res["NetComplete"] = df_netcomplete["time"]
    res["NetComplete Timeouts"] = df_netcomplete["num_timeouts"]
    res["NetComplete-std"] = df_netcomplete["time-std"]
    res[system_name] = df_neural["time"]
    res[system_name + "-std"] = df_neural["time-std"]
    res[consistency_col] = df_neural["consistency"]
    res[consistency_col+"-std"] = df_neural["consistency-std"]
    res[full_match_col] = df_neural["full_match"]
    del res["time"]
    
    speedup_factors = res["NetComplete"].values / res[system_name].values
    res["Speedup"] = ["%.1fx" % f for f in speedup_factors]
    
    res = res.reset_index()
    
    return res[["bucket", "NetComplete", "Neural", "Speedup", "NetComplete Timeouts", consistency_col, full_match_col, "NetComplete-std", "Neural-std", consistency_col + "-std"]]

## 2 Requirements

In [5]:
req2 = merge(
    read_results_file(nc_files[2]),
    read_results_file(neural_files[2], with_consistency=True),
)
req2

Unnamed: 0,bucket,NetComplete,Neural,Speedup,NetComplete Timeouts,Consistency (Neural),Full Match (Neural),NetComplete-std,Neural-std,Consistency (Neural)-std
0,0 Small,18.072854,0.640729,28.2x,0,0.96875,0.875,14.545376,0.38059,0.088388
1,1 Medium,60.864079,2.747783,22.2x,0,0.939236,0.75,33.394428,3.288096,0.13279
2,2 Large,1389.484829,22.300895,62.3x,7,0.988636,0.875,312.584108,26.862637,0.032141


## 8 Requirements

In [6]:
req8 = merge(
    read_results_file(nc_files[8]),
    read_results_file(neural_files[8], with_consistency=True),
)
req8

Unnamed: 0,bucket,NetComplete,Neural,Speedup,NetComplete Timeouts,Consistency (Neural),Full Match (Neural),NetComplete-std,Neural-std,Consistency (Neural)-std
0,0 Small,247.690651,1.065172,232.5x,0,0.96,0.75,436.900863,0.837845,0.07709
1,1 Medium,1500.0,3.470269,432.2x,8,0.967839,0.5,0.0,3.336769,0.035469
2,2 Large,1500.0,30.961408,48.4x,8,0.969444,0.75,0.0,28.179626,0.056889


## 16 Requirements

In [7]:
req16 = merge(
    read_results_file(nc_files[16]),
    read_results_file(neural_files[16], with_consistency=True),
)
req16

Unnamed: 0,bucket,NetComplete,Neural,Speedup,NetComplete Timeouts,Consistency (Neural),Full Match (Neural),NetComplete-std,Neural-std,Consistency (Neural)-std
0,0 Small,1416.828196,2.527836,560.5x,7,0.92577,0.25,235.245388,1.850095,0.069861
1,1 Medium,1500.0,5.481253,273.7x,8,0.947115,0.25,0.0,3.903356,0.047182
2,2 Large,1500.0,69.092142,21.7x,8,0.958699,0.25,0.0,108.170768,0.033228


## Format as LaTex table

In [8]:
import numpy as np

reqs_counts = [2, 8, 16]

for nreqs, reqs in zip(reqs_counts, [req2, req8, req16]):
    print("\\midrule")
    for i, row in enumerate(reqs.iloc):
        row = list(row.values)
        
        def str_v(v, i):
            if v == "0 Small": return "S"
            if v == "1 Medium": return "M"
            if v == "2 Large": return "L"

            if type(v) is np.float64:
                highlighted_indices = set([2])
                res = ""
                if i in highlighted_indices: res += "\\textbf{"
                res += ("%.3f" % v).rstrip("0").rstrip(".")
                if i == 1 or i == 2: res += "s"
                if i in highlighted_indices: res += "}"
                
                return res
            else: return str(v)


        row_data = dict([(reqs.columns[j],v) for j,v in enumerate(row)])
        num_timeouts = row_data["NetComplete Timeouts"]

        row_str = ""
        row_str += str_v(row_data["bucket"], 0)
        
        nc_time = ("%.2f" % row_data["NetComplete"])
        if row_data["NetComplete"] == 1500: 
            nc_time = ">25m"
        row_str += " & " + nc_time
        if row_data["NetComplete"] < 1500: 
            row_str += "$s\pm$" + "%.2f" % row_data["NetComplete-std"]
        if num_timeouts > 0:
            row_str += "\\tiny{" + "  {}/8 TO".format(num_timeouts) + "}"

        neural_time = ("%.2f" % row_data[system_name])
        neural_std = ("%.2f" % row_data[system_name+"-std"])
        row_str += " & " + neural_time + "s$\pm$" + neural_std

        row_str += " & \\textbf{" + row_data["Speedup"] + "}"
        row_str += " & " + "%.2f" % row_data["Consistency (Neural)"]
        row_str += "$\pm$" + "%.2f" % row_data["Consistency (Neural)-std"]
        row_str += " & " + "%d/8" % int(8 * row_data["Full Match (Neural)"])
            
        if i == 0: 
            row_str = "{} reqs. & ".format(nreqs) + row_str
        else:
            row_str = " & " + row_str
        print(row_str + "\\\\")

\midrule
2 reqs. & S & 18.07$s\pm$14.55 & 0.64s$\pm$0.38 & \textbf{28.2x} & 0.97$\pm$0.09 & 7/8\\
 & M & 60.86$s\pm$33.39 & 2.75s$\pm$3.29 & \textbf{22.2x} & 0.94$\pm$0.13 & 6/8\\
 & L & 1389.48$s\pm$312.58\tiny{  7/8 TO} & 22.30s$\pm$26.86 & \textbf{62.3x} & 0.99$\pm$0.03 & 7/8\\
\midrule
8 reqs. & S & 247.69$s\pm$436.90 & 1.07s$\pm$0.84 & \textbf{232.5x} & 0.96$\pm$0.08 & 6/8\\
 & M & >25m\tiny{  8/8 TO} & 3.47s$\pm$3.34 & \textbf{432.2x} & 0.97$\pm$0.04 & 4/8\\
 & L & >25m\tiny{  8/8 TO} & 30.96s$\pm$28.18 & \textbf{48.4x} & 0.97$\pm$0.06 & 6/8\\
\midrule
16 reqs. & S & 1416.83$s\pm$235.25\tiny{  7/8 TO} & 2.53s$\pm$1.85 & \textbf{560.5x} & 0.93$\pm$0.07 & 2/8\\
 & M & >25m\tiny{  8/8 TO} & 5.48s$\pm$3.90 & \textbf{273.7x} & 0.95$\pm$0.05 & 2/8\\
 & L & >25m\tiny{  8/8 TO} & 69.09s$\pm$108.17 & \textbf{21.7x} & 0.96$\pm$0.03 & 2/8\\


## GPU

In [9]:
neural_gpu_files = get_neural_files("./results/bgp-gpu/")

req2_gpu = merge(
    read_results_file(nc_files[2]),
    read_results_file(neural_gpu_files[2], with_consistency=True),
)
req8_gpu = merge(
    read_results_file(nc_files[8]),
    read_results_file(neural_gpu_files[8], with_consistency=True),
)
req16_gpu = merge(
    read_results_file(nc_files[16]),
    read_results_file(neural_gpu_files[16], with_consistency=True),
)

import numpy as np

reqs_counts = [2, 8, 16]

for nreqs, reqs in zip(reqs_counts, [req2_gpu, req8_gpu, req16_gpu]):
    print("\\midrule")
    for i, row in enumerate(reqs.iloc):
        row = list(row.values)
        
        def str_v(v, i):
            if v == "0 Small": return "S"
            if v == "1 Medium": return "M"
            if v == "2 Large": return "L"

            if type(v) is np.float64:
                highlighted_indices = set([2])
                res = ""
                if i in highlighted_indices: res += "\\textbf{"
                res += ("%.3f" % v).rstrip("0").rstrip(".")
                if i == 1 or i == 2: res += "s"
                if i in highlighted_indices: res += "}"
                
                return res
            else: return str(v)

        row_data = dict([(reqs.columns[j],v) for j,v in enumerate(row)])
        num_timeouts = row_data["NetComplete Timeouts"]

        row_str = ""
        row_str += str_v(row_data["bucket"], 0)

        if str_v(row_data["bucket"], 0) == "L":
            continue
        
        nc_time = ("%.2f" % row_data["NetComplete"])
        if row_data["NetComplete"] == 1500: 
            nc_time = ">25m"
        row_str += " & " + nc_time
        if row_data["NetComplete"] < 1500: 
            row_str += "$s\pm$" + "%.2f" % row_data["NetComplete-std"]
        if num_timeouts > 0:
            row_str += "\\tiny{" + "  {}/8 TO".format(num_timeouts) + "}"

        neural_time = ("%.2f" % row_data[system_name])
        neural_std = ("%.2f" % row_data[system_name+"-std"])
        row_str += " & " + neural_time + "s$\pm$" + neural_std

        row_str += " & \\textbf{" + row_data["Speedup"] + "}"
        row_str += " & " + "%.2f" % row_data["Consistency (Neural)"]
        row_str += "$\pm$" + "%.2f" % row_data["Consistency (Neural)-std"]
        row_str += " & " + "%d/8" % int(8 * row_data["Full Match (Neural)"])
            
        if i == 0: 
            row_str = "{} reqs. & ".format(nreqs) + row_str
        else:
            row_str = " & " + row_str
        print(row_str + "\\\\")

\midrule
2 reqs. & S & 18.07$s\pm$14.55 & 0.50s$\pm$0.41 & \textbf{36.4x} & 0.97$\pm$0.09 & 7/8\\
 & M & 60.86$s\pm$33.39 & 0.68s$\pm$0.53 & \textbf{90.1x} & 0.94$\pm$0.13 & 6/8\\
\midrule
8 reqs. & S & 247.69$s\pm$436.90 & 0.72s$\pm$0.48 & \textbf{344.7x} & 0.96$\pm$0.08 & 6/8\\
 & M & >25m\tiny{  8/8 TO} & 1.05s$\pm$0.57 & \textbf{1424.3x} & 0.97$\pm$0.04 & 4/8\\
\midrule
16 reqs. & S & 1416.83$s\pm$235.25\tiny{  7/8 TO} & 1.30s$\pm$0.40 & \textbf{1093.0x} & 0.92$\pm$0.06 & 1/8\\
 & M & >25m\tiny{  8/8 TO} & 1.38s$\pm$0.40 & \textbf{1086.6x} & 0.94$\pm$0.05 & 1/8\\
