In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
sns.set()

### Analyse the data

In [None]:
BASE_DIR = "."
BASE_PATH = Path(BASE_DIR)

In [None]:
def get_data(base_path, dataset, normal_case=False, constraint_comp=False, l1l2=False, compare_dot_product=False, num_layers_list=None, table=None):
    all_data = []

    if compare_dot_product:
        relevant_norms = ["1", "2"]
    elif dataset == "sstSubset":
        relevant_norms = ["1", "2"] if l1l2 else ["inf"]
    else:
        relevant_norms = ["1", "2", "inf"]
    
    if (dataset == "sstSubset" and not normal_case):
        netName = f"bert_smaller"
        size = "smaller"
    else:
        netName = f"bert_small"
        size = "small"
        
    if dataset == "sstSubset" or dataset == "sst":
        dir = "sst"
    else:
        dir = "yelp"
        
    
    
    
    num_layers_list = ["3", "6", "12"]
    if table == 2:
        methods_and_labels = [("zonotope", "Zonotope-Fast"), ("zonotopeSlow", "Zonotope-Slow"), ("backward", "Backward")]
    elif constraint_comp:
        methods_and_labels = [("zonotopeNoConstraint", "Zonotope-Fast-No-Constraint"), ("zonotope", "Zonotope-Fast")]
    elif compare_dot_product:
        methods_and_labels = [("zonotopeOtherDotProduct", "Zonotope-Fast-OtherDotProduct"), ("zonotope", "Zonotope-Fast")]
    elif dataset == "sstSubset":
        if l1l2:
            methods_and_labels = [("zonotope", "Zonotope-Fast"), ("baf", "BaF"), ("backward", "Backward")]
        elif normal_case:
            methods_and_labels = [("zonotope", "Zonotope-Fast"), ("zonotopeSlow", "Zonotope-Slow"), ("baf", "BaF")]
        else:
            methods_and_labels = [("zonotope", "Zonotope-Fast"), ("zonotopeSlow", "Zonotope-Slow"), ("backward", "Backward"), ("baf", "BaF")]
    else:
        methods_and_labels = [("baf", "BaF"), ("zonotope", "Zonotope-Fast")]
    
    for p in relevant_norms:
        for num_layers in num_layers_list:
            for method, method_label in methods_and_labels:
                if method == "baf" or method == "backward":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_{method}_{p}_*.csv"
                elif method_label == "Zonotope-Slow":
                    if dataset == "sstSubset" and num_layers == "12" and method == "zonotopeSlow":
                        glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_{method}_{p}_box_7000_WithConstraint_*.csv"
                    else:
                        glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_{method}_{p}_box_7000_WithConstraint_*.csv"
                elif method_label == "Zonotope-Fast":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_WithConstraint_*.csv"
                elif method_label == "Zonotope-Fast-No-Constraint":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_NoConstraint*.csv"
                elif method_label == "Zonotope-Fast-OtherDotProduct":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_WithConstraintOtherDotProductOrder_*.csv"
                

                
                all_files = list(base_path.glob(glob_path))
                #print(glob_path)
                #print(list(base_path.glob("*")))
                #print()
                if len(all_files) == 0:
                    print(f"No data for method={method} method_label={method_label} p={p} num_layers={num_layers} glob_path={glob_path}")
                    continue

                assert len(all_files) >= 1, f"*_{num_layers}_small_{method}_{p}_*"

                sorted_files = list(sorted(all_files, key=lambda x: x.stat().st_mtime))
                latest_file = sorted_files[-1]
                print(method_label, ":", latest_file)

                try:
                    df = pd.read_csv(latest_file)
                    df['p'] = p
                    df['num_layers'] = int(num_layers)
                    df['Method'] = method_label
                    
                    if 'memory' not in df.columns:
                        df["memory"] = -1
                    
                    all_data.append(df)
                except pd.errors.EmptyDataError:
                    print(f"Empty CSV file for method={method} p={p} num_layers={num_layers}")

    all_data_df = pd.concat(all_data, ignore_index=True)
    all_data_df["sentence"] = pd.to_numeric(all_data_df["sentence"])
    all_data_df["position"] = pd.to_numeric(all_data_df["position"])
    return all_data_df

In [None]:
print(BASE_PATH)  # Table 1
all_data_df = get_data(BASE_PATH.parent.parent / "normal_case" , dataset="sst")

In [None]:
SMALLER_NETWORK_RESULTS_PATH = BASE_PATH.parent.parent / 'smaller_network_results'  # Table 2
print(SMALLER_NETWORK_RESULTS_PATH.absolute())
all_data_smaller_df = get_data(SMALLER_NETWORK_RESULTS_PATH, dataset="sstSubset", table=2)
all_data_smaller_df;

In [None]:
L1L2_RESULTS_PATH = BASE_PATH.parent.parent / "l1l2"  # Table 3
print(L1L2_RESULTS_PATH.absolute())
all_data_l1l2_df = get_data(L1L2_RESULTS_PATH, dataset="sstSubset", l1l2=True)
all_data_l1l2_df

In [None]:
NO_CONSTRAINT_RESULTS_PATH = BASE_PATH.parent.parent / "no_constraint_results"  # Table 4
print(NO_CONSTRAINT_RESULTS_PATH.absolute())
all_data_constraint_comp_df = get_data(NO_CONSTRAINT_RESULTS_PATH, dataset="sst", constraint_comp=True)
all_data_constraint_comp_df;

In [None]:
OTHER_DOT_PRODUCT_RESULTS_PATH = BASE_PATH.parent.parent / "other_dot_product_results"  # Table 5
print(OTHER_DOT_PRODUCT_RESULTS_PATH.absolute())
all_data_other_dot_product_df = get_data(OTHER_DOT_PRODUCT_RESULTS_PATH, dataset="sst", compare_dot_product=True)
all_data_other_dot_product_df;

In [None]:
def process_data(data_df):
    #print("Removing data for sentence #1 (the second sentence!)")
    #data_df = data_df.loc[data_df.sentence != 1]
    
    try:
        data_avg = data_df.groupby(['p', 'num_layers', 'Method'], as_index=False).mean()
        data_min = data_df.groupby(['p', 'num_layers', 'Method'], as_index=False).min()
        
        data_avg = data_avg.drop(columns=["sentence", "position"])#.reset_index()
        data_min = data_min.drop(columns=["sentence", "position"])#.reset_index()
        
        data_avg = data_avg.sort_values(['p', 'num_layers', 'Method']).astype({'p':"category", "Method": "category", "num_layers": "category"})
        data_min = data_min.sort_values(['p', 'num_layers', 'Method']).astype({'p':"category", "Method": "category", "num_layers": "category"})

        return data_avg, data_min
    except Exception as e:
        print(f"Couldn't process dataframe. Exception {e}")
        return None, None

data_avg, data_min = process_data(all_data_df)
data_avg_smaller, data_min_smaller =  process_data(all_data_smaller_df)
data_avg_l1l2, data_min_l1l2 =  process_data(all_data_l1l2_df)
data_avg_constraint, data_min_constraint =  process_data(all_data_constraint_comp_df)
data_avg_dot_product, data_min_dot_product =  process_data(all_data_other_dot_product_df)

In [None]:
data_avg  # Table 1

In [None]:
data_min  # Table 1

In [None]:
data_avg_smaller  # Table 2

In [None]:
data_min_smaller  # Table 2

In [None]:
data_avg_l1l2  # Table 3

In [None]:
data_min_l1l2  # Table 3

In [None]:
data_avg_constraint  # Table 4

In [None]:
data_min_constraint  # Table 4

In [None]:
data_avg_dot_product    # Table 5

In [None]:
data_min_dot_product    # Table 5