## Table Aggregation: OSPF Synthesis Time

In this notebook we aggregate the raw data into tables on ospf synthesis time as used in our figures

In [1]:
import os
import os
def get_neural_files(basedir): return dict([[int(f.split("-")[2]), basedir + f] for f in os.listdir(basedir) if f.startswith("ospf-reqs")])
neural_files = get_neural_files("results/")
nc_files = dict([[int(f.split("-")[2][:-len("result")]), "./netcomplete/results/" + f] for f in os.listdir("./netcomplete/results/") if f.startswith("ospf-reqs")])

In [2]:
import pandas as pd

In [3]:
TIMEOUT_VALUE = 999

def read_results_file(f, with_consistency=False):
    df = pd.read_csv(f, sep=";", names=["prefix", "time"] + (["consistency"] if with_consistency else []))
    
    df["num_nodes"] = [int(p[len("ospf-n"):]) for p in df["prefix"].values]
    num_samples_per_category = 8
    
    def bucket(n):
        if n < num_samples_per_category: return "0 Small"
        if n < num_samples_per_category*2: return "1 Medium"
        return "2 Large"
    df["bucket"] = [bucket(n) for n in df["num_nodes"]]
    
    df_with_timeouts = pd.DataFrame(df)
    df_with_timeouts["num_timeouts"] = df["time"].values == TIMEOUT_VALUE
    
    #df = df[df["time"] != 999]
    df["time"] = (df["time"] == TIMEOUT_VALUE) * 1500 + (df["time"] != TIMEOUT_VALUE) * df["time"]
    
    if with_consistency:
        df["full_match"] = df["consistency"].values == 1.0
    
    counts = df.groupby("bucket").count()["num_nodes"]
    res = df.groupby("bucket").mean()
    std = df.groupby("bucket").std()
    del res["num_nodes"]
    
    if with_consistency:
        res["consistency-std"] = std["consistency"]

    res["samples"] = counts
    res["min_nodes"] = df.groupby("bucket").min()["num_nodes"]
    res["max_nodes"] = df.groupby("bucket").max()["num_nodes"]
    res["num_timeouts"] = df_with_timeouts.groupby("bucket").sum()["num_timeouts"]
    res["time-std"] = std["time"]
    
    return res
#print(read_results_file("bgp/neural/bgp-reqs-2-result-bgp-2021-09-15_11:25:18.csv", with_consistency=True))
print(read_results_file(nc_files[2]))

              time  num_timeouts  samples  min_nodes  max_nodes  time-std
bucket                                                                   
0 Small   0.265297             0        8          0          7  0.089825
1 Medium  0.403998             0        8          8         15  0.089433
2 Large   0.943937             0        8         16         23  0.334002


In [4]:
system_name = "Neural"
consistency_col = "Consistency (Neural)"
full_match_col = "Full Match (Neural)"

def merge(df_netcomplete, df_neural):
    res = pd.DataFrame(df_netcomplete)
        
    res["NetComplete"] = df_netcomplete["time"]
    res["NetComplete Timeouts"] = df_netcomplete["num_timeouts"]
    res["NetComplete-std"] = df_netcomplete["time-std"]
    res[system_name] = df_neural["time"]
    res[system_name + "-std"] = df_neural["time-std"]
    res[consistency_col] = df_neural["consistency"]
    res[consistency_col+"-std"] = df_neural["consistency-std"]
    res[full_match_col] = df_neural["full_match"]
    del res["time"]
    
    speedup_factors = res["NetComplete"].values / res[system_name].values
    res["Speedup"] = ["%.1fx" % f for f in speedup_factors]
    
    res = res.reset_index()
    
    return res[["bucket", "NetComplete", "Neural", "Speedup", "NetComplete Timeouts", consistency_col, full_match_col, "NetComplete-std", "Neural-std", consistency_col + "-std"]]

## 2 Requirements

In [5]:
req2 = merge(
    read_results_file(nc_files[2]),
    read_results_file(neural_files[2], with_consistency=True),
)
req2

Unnamed: 0,bucket,NetComplete,Neural,Speedup,NetComplete Timeouts,Consistency (Neural),Full Match (Neural),NetComplete-std,Neural-std,Consistency (Neural)-std
0,0 Small,0.265297,0.09114,2.9x,0,1.0,True,0.089825,0.002574,0.0
1,1 Medium,0.403998,0.09794,4.1x,0,1.0,True,0.089433,0.001602,0.0
2,2 Large,0.943937,0.140039,6.7x,0,1.0,True,0.334002,0.078821,0.0


## 8 Requirements

In [16]:
req8 = merge(
    read_results_file(nc_files[8]),
    read_results_file(neural_files[8], with_consistency=True),
)
req8

Unnamed: 0,bucket,NetComplete,Neural,Speedup,NetComplete Timeouts,Consistency (Neural),Full Match (Neural),NetComplete-std,Neural-std,Consistency (Neural)-std
0,0 Small,0.876088,0.164134,5.3x,0,0.982413,0.75,0.176679,0.122562,0.035177
1,1 Medium,1.517269,0.103424,14.7x,0,1.0,1.0,0.401317,0.00912,0.0
2,2 Large,3.504761,0.316284,11.1x,0,0.980968,0.5,1.112488,0.150805,0.025535


## 16 Requirements

In [17]:
req16 = merge(
    read_results_file(nc_files[16]),
    read_results_file(neural_files[16], with_consistency=True),
)
req16

Unnamed: 0,bucket,NetComplete,Neural,Speedup,NetComplete Timeouts,Consistency (Neural),Full Match (Neural),NetComplete-std,Neural-std,Consistency (Neural)-std
0,0 Small,1.650322,0.209984,7.9x,0,0.991073,0.625,0.372218,0.142982,0.012466
1,1 Medium,2.645937,0.245172,10.8x,0,0.989153,0.5,0.696411,0.158519,0.01216
2,2 Large,566.635635,0.233142,2430.4x,3,0.986919,0.875,772.90041,0.11045,0.037


In [20]:
import numpy as np

reqs_counts = [2, 8, 16]

for nreqs, reqs in zip(reqs_counts, [req2, req8, req16]):
    print("\\midrule")
    for i, row in enumerate(reqs.iloc):
        row = list(row.values)
        
        def str_v(v, i):
            if v == "0 Small": return "S"
            if v == "1 Medium": return "M"
            if v == "2 Large": return "L"

            if type(v) is np.float64:
                highlighted_indices = set([2])
                res = ""
                if i in highlighted_indices: res += "\\textbf{"
                res += ("%.3f" % v).rstrip("0").rstrip(".")
                if i == 1 or i == 2: res += "s"
                if i in highlighted_indices: res += "}"
                
                return res
            else: return str(v)


        row_data = dict([(reqs.columns[j],v) for j,v in enumerate(row)])
        num_timeouts = row_data["NetComplete Timeouts"]

        row_str = ""
        row_str += str_v(row_data["bucket"], 0)
        
        nc_time = ("%.2f" % row_data["NetComplete"])
        if row_data["NetComplete"] == 1500: 
            nc_time = ">25m"
        row_str += " & " + nc_time
        if row_data["NetComplete"] < 1500: 
            row_str += "$s\pm$" + "%.2f" % row_data["NetComplete-std"]
        if num_timeouts > 0:
            row_str += "\\tiny{" + "  {}/8 TO".format(num_timeouts) + "}"

        neural_time = ("%.2f" % row_data[system_name])
        neural_std = ("%.2f" % row_data[system_name+"-std"])
        row_str += " & " + neural_time + "s$\pm$" + neural_std

        row_str += " & \\textbf{" + row_data["Speedup"] + "}"
        row_str += " & " + "%.2f" % row_data["Consistency (Neural)"]
        row_str += "$\pm$" + "%.2f" % row_data["Consistency (Neural)-std"]
        row_str += " & " + "%d/8" % int(8 * row_data["Full Match (Neural)"])
            
        if i == 0: 
            row_str = "{} reqs. & ".format(nreqs) + row_str
        else:
            row_str = " & " + row_str
        print(row_str + "\\\\")

\midrule
2 reqs. & S & 0.27$s\pm$0.09 & 0.09s$\pm$0.00 & \textbf{2.9x} & 1.00$\pm$0.00 & 8/8\\
 & M & 0.40$s\pm$0.09 & 0.10s$\pm$0.00 & \textbf{4.1x} & 1.00$\pm$0.00 & 8/8\\
 & L & 0.94$s\pm$0.33 & 0.14s$\pm$0.08 & \textbf{6.7x} & 1.00$\pm$0.00 & 8/8\\
\midrule
8 reqs. & S & 0.88$s\pm$0.18 & 0.16s$\pm$0.12 & \textbf{5.3x} & 0.98$\pm$0.04 & 6/8\\
 & M & 1.52$s\pm$0.40 & 0.10s$\pm$0.01 & \textbf{14.7x} & 1.00$\pm$0.00 & 8/8\\
 & L & 3.50$s\pm$1.11 & 0.32s$\pm$0.15 & \textbf{11.1x} & 0.98$\pm$0.03 & 4/8\\
\midrule
16 reqs. & S & 1.65$s\pm$0.37 & 0.21s$\pm$0.14 & \textbf{7.9x} & 0.99$\pm$0.01 & 5/8\\
 & M & 2.65$s\pm$0.70 & 0.25s$\pm$0.16 & \textbf{10.8x} & 0.99$\pm$0.01 & 4/8\\
 & L & 566.64$s\pm$772.90\tiny{  3/8 TO} & 0.23s$\pm$0.11 & \textbf{2430.4x} & 0.99$\pm$0.04 & 7/8\\
