In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
sns.set()

### Analyse the data

In [None]:
BASE_DIR = "."
BASE_PATH = Path(BASE_DIR)

In [None]:
def get_data(base_path, dataset, normal_case=False, big=False, standard_layer_norm=False, constraint_comp=False, l1l2=False, compare_dot_product=False, num_layers_list=None, table=None):
    all_data = []

    if compare_dot_product:
        relevant_norms = ["1", "2"]
    elif dataset == "sstSubset":
        relevant_norms = ["1", "2"] if l1l2 else ["inf"]
    else:
        relevant_norms = ["1", "2", "inf"]
    
    if standard_layer_norm:
        netName = f"bert_standard_layer_norm"
        size = "big"
    elif big:
        netName = f"bert_big"
        size = "big"
    elif dataset == "sstSubset":
        netName = f"bert_smaller"
        size = "smaller"
    else:
        netName = f"bert_small"
        size = "small"
        
    if dataset == "sstSubset" or dataset == "sst":
        dir = "sst"
    else:
        dir = "yelp"
        
    
    
    
    num_layers_list = ["3", "6", "12"]
    if table == 2:
        methods_and_labels = [("zonotope", "DeepT-Fast"), ("zonotopeSlow", "DeepT-Precise"), ("backward", "CROWN-Backward"), ("baf", "CROWN-BaF")]
    elif constraint_comp:
        methods_and_labels = [("zonotopeNoConstraint", "DeepT-Fast-No-Constraint"), ("zonotope", "DeepT-Fast")]
    elif compare_dot_product:
        methods_and_labels = [("zonotopeOtherDotProduct", "DeepT-Fast-Lp-First"), ("zonotope", "DeepT-Fast-Linf-First")]
    elif dataset == "sstSubset":
        if l1l2:
            methods_and_labels = [("zonotope", "DeepT-Fast"), ("baf", "CROWN-BaF"), ("backward", "CROWN-Backward")]
        elif normal_case:
            methods_and_labels = [("zonotope", "DeepT-Fast"), ("zonotopeSlow", "DeepT-Precise"), ("baf", "CROWN-BaF")]
        else:
            methods_and_labels = [("zonotope", "DeepT-Fast"), ("zonotopeSlow", "DeepT-Precise"), ("backward", "CROWN-Backward"), ("baf", "CROWN-BaF")]
    else:
        methods_and_labels = [("baf", "CROWN-BaF"), ("zonotope", "DeepT-Fast")]
    
    for p in relevant_norms:
        for num_layers in num_layers_list:
            for method, method_label in methods_and_labels:
                if method == "baf" or method == "backward":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_{method}_{p}_*.csv"
                elif method_label == "DeepT-Precise":
                    if dataset == "sstSubset" and num_layers == "12" and method == "zonotopeSlow":
                        glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_{method}_{p}_box_7000_WithConstraint_*.csv"
                    else:
                        glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_{method}_{p}_box_7000_WithConstraint_*.csv"
                elif method_label == "DeepT-Fast" or method_label == "DeepT-Fast-Linf-First":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_WithConstraint_*.csv"
                elif method_label == "DeepT-Fast-No-Constraint":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_NoConstraint*.csv"
                elif method_label == "DeepT-Fast-Lp-First":
                    glob_path = f"*{dataset}_{dir}_{netName}_{num_layers}_{size}_zonotope_{p}_box_14000_WithConstraintOtherDotProductOrder_*.csv"
                

                
                all_files = list(base_path.glob(glob_path))
                #print(glob_path)
                #print(list(base_path.glob("*")))
                #print()
                if len(all_files) == 0:
                    print(f"No data for method={method} method_label={method_label} p={p} num_layers={num_layers} glob_path={glob_path}")
                    continue

                assert len(all_files) >= 1, f"*_{num_layers}_small_{method}_{p}_*"

                sorted_files = list(sorted(all_files, key=lambda x: x.stat().st_mtime))
                latest_file = sorted_files[-1]
                print(method_label, ":", latest_file)

                try:
                    df = pd.read_csv(latest_file)
                    df['p'] = p
                    df['num_layers'] = int(num_layers)
                    df['Method'] = method_label
                    
                    if 'memory' not in df.columns:
                        df["memory"] = -1
                    
                    all_data.append(df)
                except pd.errors.EmptyDataError:
                    print(f"Empty CSV file for method={method} p={p} num_layers={num_layers}")

    all_data_df = pd.concat(all_data, ignore_index=True)
    all_data_df["sentence"] = pd.to_numeric(all_data_df["sentence"])
    all_data_df["position"] = pd.to_numeric(all_data_df["position"])
    return all_data_df

In [None]:
print(BASE_PATH)  # Table 1
all_data_df = get_data(BASE_PATH.parent.parent / "normal_case" , dataset="sst")

In [None]:
SMALLER_NETWORK_RESULTS_PATH = BASE_PATH.parent.parent / 'smaller_network_results/'  # Table 2
print(SMALLER_NETWORK_RESULTS_PATH.absolute())
all_data_smaller_df = get_data(SMALLER_NETWORK_RESULTS_PATH, dataset="sstSubset", table=2)
all_data_smaller_df;

In [None]:
L1L2_RESULTS_PATH = BASE_PATH.parent.parent / "l1l2"  # Table 3
print(L1L2_RESULTS_PATH.absolute())
all_data_l1l2_df = get_data(L1L2_RESULTS_PATH, dataset="sstSubset", l1l2=True)
all_data_l1l2_df;

In [None]:
NO_CONSTRAINT_RESULTS_PATH = BASE_PATH.parent.parent / "no_constraint_results"  # Table 4
print(NO_CONSTRAINT_RESULTS_PATH.absolute())
all_data_constraint_comp_df = get_data(NO_CONSTRAINT_RESULTS_PATH, dataset="sst", constraint_comp=True)
all_data_constraint_comp_df;

In [None]:
OTHER_DOT_PRODUCT_RESULTS_PATH = BASE_PATH.parent.parent / "other_dot_product_results"  # Table 5
print(OTHER_DOT_PRODUCT_RESULTS_PATH.absolute())
all_data_other_dot_product_df = get_data(OTHER_DOT_PRODUCT_RESULTS_PATH, dataset="sst", compare_dot_product=True)
all_data_other_dot_product_df;

In [None]:
data_big_df = get_data(BASE_PATH.parent.parent / "big" , dataset="sst", big=True)
data_big_df = data_big_df[(data_big_df['num_layers'] != 12) | (data_big_df['Method'] == 'Zonotope-Fast')]

In [None]:
data_yelp_df = get_data(BASE_PATH.parent.parent / "yelp" , dataset="yelp")

In [None]:
data_standard_layer_norm_df = get_data(BASE_PATH.parent.parent / "standard_layer_norm" , dataset="sst", standard_layer_norm=True)

In [None]:
def process_data(data_df):
    try:
        data_avg = data_df.groupby(['p', 'num_layers', 'Method'], as_index=False).mean()
        data_min = data_df.groupby(['p', 'num_layers', 'Method'], as_index=False).min()
        
        data_avg = data_avg.drop(columns=["sentence", "position"])#.reset_index()
        data_min = data_min.drop(columns=["sentence", "position"])#.reset_index()
        
        data_avg = data_avg.sort_values(['num_layers', 'p']).astype({'p':"category", "Method": "category", "num_layers": "category"}).reset_index()
        data_min = data_min.sort_values(['num_layers', 'p']).astype({'p':"category", "Method": "category", "num_layers": "category"}).reset_index()

        data = data_avg.rename(columns={"eps": "avg_eps"})
        
        data["min_eps"] = data_min["eps"]
        data = data[["num_layers", "p", "Method", "min_eps", "avg_eps", "timing", "memory"]]
        
        return data
    except Exception as e:
        print(f"Couldn't process dataframe. Exception {e}")
        return None, None

data_normal = process_data(all_data_df)
data_smaller =  process_data(all_data_smaller_df)
data_l1l2 = process_data(all_data_l1l2_df)
data_constraint = process_data(all_data_constraint_comp_df)
data_dot_product = process_data(all_data_other_dot_product_df)

data_big = process_data(data_big_df)
data_yelp =  process_data(data_yelp_df)
data_standard_layer_norm = process_data(data_standard_layer_norm_df)

In [None]:
data_normal  # Table 1

In [None]:
data_yelp # Table 2

In [None]:
data_big # Table 3

In [None]:
data_smaller  # Table 4 / Table 12

In [None]:
data_l1l2  # Table 5

In [None]:
data_dot_product  # Table 6

In [None]:
for m in [3, 6, 12]:
    for p in ['1', '2']:
        d = data_dot_product
        d = d[d['num_layers'] == m]
        d = d[d['p'] == p]
        a = float(d[d['Method'] == 'DeepT-Fast-Linf-First']["avg_eps"])
        b = float(d[d['Method'] == 'DeepT-Fast-Lp-First']["avg_eps"])
        print(f"p={p.ljust(3)} {m: 3.0f} layers - Improvement: {(a/b - 1)*100:.2f}%")
    print()

In [None]:
data_standard_layer_norm # Table 7

In [None]:
list((BASE_PATH.parent / 'synonym').glob("*"))

#data_synonym # Table 8

In [None]:
SYNONYM_DIR = BASE_PATH.parent / 'synonym'  # Table 8

baf = 'resultsSynonym_sstSubset_small3_big_baf_inf_None_None_NoConstraint_Mar04_15-17-22.csv'
deepT = 'resultsSynonym_sstSubset_small3_big_zonotope_inf_box_7000_WithConstraint_Mar04_15-28-11.csv'

baf_results = pd.read_csv(SYNONYM_DIR / baf)
deepT_results = pd.read_csv(SYNONYM_DIR / deepT)

num_sentences = len(deepT_results)
num_verified_sentences_baf = baf_results.isSafe.sum()
time_baf = baf_results.timing.mean()

num_verified_sentences_deepT = deepT_results.isSafe.sum()
time_deepT = deepT_results.timing.mean()

print("Synonym results")
print(f"Baf:   {num_verified_sentences_baf}/{num_sentences} sentences verified (avg time: {time_baf:.2f}s)")
print(f"DeepT: {num_verified_sentences_deepT}/{num_sentences} sentences verified (avg time: {time_deepT:.2f}s)")

In [None]:
VISION_DIR = BASE_PATH.parent / 'vit'  # Table 11

print("Vision Transformers results")

for p in [1, 2, 100]:
    filename = list(VISION_DIR.glob(f"resultsVit_p_{p}_*.csv"))[0]
    data = pd.read_csv(filename)
    min_eps = float(data.eps.min())
    avg_eps = float(data.eps.mean())
    avg_time = float(data.timing.mean())
    
    if p == 100:
        norm = 'inf'
    else:
        norm = f'{p}  '
    
    print(f"p={norm}    min_eps={min_eps:.3f}   avg_eps={avg_eps:.3f}   avg_time={avg_time:.3f}s")

In [None]:
data_constraint  # Table 13

In [None]:
for p in ['1', '2', 'inf']:
    for m in [3, 6, 12]:
        d = data_constraint
        d = d[d['num_layers'] == m]
        d = d[d['p'] == p]
        a = float(d[d['Method'] == 'DeepT-Fast']["avg_eps"])
        b = float(d[d['Method'] == 'DeepT-Fast-No-Constraint']["avg_eps"])
        print(f"p={p.ljust(3)} {m: 3.0f} layers - Improvement: {(a/b - 1)*100:.2f}%")
    print()