In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
sns.set()

### Analyse the data

In [None]:
BASE_DIR = "."
BASE_PATH = Path(BASE_DIR)

In [None]:
def get_data(base_path, dataset, normal_case=False, constraint_comp=False, l1l2=False, compare_dot_product=False, num_layers_list=None, table=None):
    all_data = []

    if compare_dot_product:
        relevant_norms = ["1", "2"]
    elif (table == 2 or l1l2):
        relevant_norms = ["1", "2"] if l1l2 else ["inf"]
    else:
        relevant_norms = ["1", "2", "inf"]
    
    if (table == 2 or l1l2):
        netName = f"bert_smaller"
        size = "smaller"
    else:
        netName = f"bert_small"
        size = "small"
        
    if dataset == "sstSubset" or dataset == "sst":
        dir = "sst"
    else:
        dir = "yelp"
        
    
    
    
    num_layers_list = ["3", "6", "12"]
    if table == 2:
        methods_and_labels = [("zonotope", "DeepT-Fast"), ("zonotopeSlow", "DeepT-Precise"), ("backward", "CROWN-Backward")]
    elif constraint_comp:
        methods_and_labels = [("zonotope", "DeepT-Fast-With-Constraint"), ("zonotopeNoConstraint", "DeepT-Fast-Without-Constraint")]
    elif compare_dot_product:
        methods_and_labels = [("zonotope", "DeepT-Fast-Linf-First"), ("zonotopeOtherDotProduct", "DeepT-Fast-Lp-First")]
    elif l1l2:
        methods_and_labels = [("zonotope", "DeepT-Fast"), ("baf", "CROWN-BaF"), ("backward", "CROWN-Backward")]
    elif normal_case:
        methods_and_labels = [("zonotope", "DeepT-Fast"), ("baf", "CROWN-BaF")]
    else:
        raise NotImplementedError()
    
    for p in relevant_norms:
        for num_layers in num_layers_list:
            for method, method_label in methods_and_labels:
                if method == "baf" or method == "backward":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_{method}_{p}_*.csv"
                elif method_label == "DeepT-Precise":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_{method}_{p}_box_7000_WithConstraint_*.csv"
                elif method_label in ["DeepT-Fast", "DeepT-Fast-Linf-First", "DeepT-Fast-With-Constraint"]:
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_WithConstraint_*.csv"
                elif method_label == "DeepT-Fast-Without-Constraint":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_NoConstraint*.csv"
                elif method_label == "DeepT-Fast-Lp-First":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_WithConstraintOtherDotProductOrder_*.csv"
                

                
                all_files = list(base_path.glob(glob_path))
                #print(glob_path)
                #print(list(base_path.glob("*")))
                #print()
                if len(all_files) == 0:
                    print(f"No data for method={method} method_label={method_label} p={p} num_layers={num_layers} glob_path={glob_path}")
                    continue

                assert len(all_files) >= 1, f"*_{num_layers}_small_{method}_{p}_*"

                sorted_files = list(sorted(all_files))
                latest_file = sorted_files[-1]
                print(method_label, ":", latest_file)

                try:
                    df = pd.read_csv(latest_file)
                    df['p'] = p
                    df['num_layers'] = int(num_layers)
                    df['Method'] = method_label
                    
                    if 'memory' not in df.columns:
                        df["memory"] = -1
                    
                    all_data.append(df)
                except pd.errors.EmptyDataError:
                    print(f"Empty CSV file for method={method} p={p} num_layers={num_layers}")

    all_data_df = pd.concat(all_data, ignore_index=True)
    all_data_df["sentence"] = pd.to_numeric(all_data_df["sentence"])
    all_data_df["position"] = pd.to_numeric(all_data_df["position"])
    return all_data_df

In [None]:
print(BASE_PATH)  # Table 1 _scaled_down
all_data_df = get_data(BASE_PATH.parent.parent / "normal_case_scaled_down" , dataset="sstSubset", normal_case=True)

In [None]:
SMALLER_NETWORK_RESULTS_PATH = BASE_PATH.parent.parent / 'smaller_network_results_scaled_down'  # Table 2 _scaled_down
print(SMALLER_NETWORK_RESULTS_PATH.absolute())
all_data_smaller_df = get_data(SMALLER_NETWORK_RESULTS_PATH, dataset="sstSubset", table=2)
all_data_smaller_df;

In [None]:
L1L2_RESULTS_PATH = BASE_PATH.parent.parent / "l1l2_scaled_down"  # Table 3 _scaled_down
print(L1L2_RESULTS_PATH.absolute())
all_data_l1l2_df = get_data(L1L2_RESULTS_PATH, dataset="sstSubset", l1l2=True)
all_data_l1l2_df;

In [None]:
NO_CONSTRAINT_RESULTS_PATH = BASE_PATH.parent.parent / "no_constraint_results_scaled_down"  # Table 4 _scaled_down
print(NO_CONSTRAINT_RESULTS_PATH.absolute())
all_data_constraint_comp_df = get_data(NO_CONSTRAINT_RESULTS_PATH, dataset="sstSubset", constraint_comp=True)
all_data_constraint_comp_df;

In [None]:
OTHER_DOT_PRODUCT_RESULTS_PATH = BASE_PATH.parent.parent / "other_dot_product_results_scaled_down"  # Table 5 _scaled_down
print(OTHER_DOT_PRODUCT_RESULTS_PATH.absolute())
all_data_other_dot_product_df = get_data(OTHER_DOT_PRODUCT_RESULTS_PATH, dataset="sstSubset", compare_dot_product=True)
all_data_other_dot_product_df;

In [None]:
def process_data(data_df):
    try:
        data_avg = data_df.groupby(['p', 'num_layers', 'Method'], as_index=False).mean()
        data_min = data_df.groupby(['p', 'num_layers', 'Method'], as_index=False).min()
        
        data_avg = data_avg.drop(columns=["sentence", "position"])#.reset_index()
        data_min = data_min.drop(columns=["sentence", "position"])#.reset_index()
        
        data_avg = data_avg.sort_values(['num_layers', 'p']).astype({'p':"category", "Method": "category", "num_layers": "category"}).reset_index()
        data_min = data_min.sort_values(['num_layers', 'p']).astype({'p':"category", "Method": "category", "num_layers": "category"}).reset_index()

        data = data_avg.rename(columns={"eps": "avg_eps"})
        
        data["min_eps"] = data_min["eps"]
        data = data[["num_layers", "p", "Method", "min_eps", "avg_eps", "timing", "memory"]]
        
        return data
    except Exception as e:
        print(f"Couldn't process dataframe. Exception {e}")
        return None, None

data_normal = process_data(all_data_df)
data_smaller =  process_data(all_data_smaller_df)
data_l1l2 = process_data(all_data_l1l2_df)
data_constraint = process_data(all_data_constraint_comp_df)
data_dot_product = process_data(all_data_other_dot_product_df)

In [None]:
data_normal  # Table 1

In [None]:
data_smaller  # Table 2

In [None]:
data_l1l2  # Table 3

In [None]:
data_constraint  # Table 4

In [None]:
data_dot_product    # Table 5