In [315]:
import pandas as pd 
import numpy as np
import hashlib
from csv import reader
import re
import os
# pd.set_option('display.max_colwidth', None)

In [316]:
def hash_str(to_hash): 
    stringToHash = to_hash.encode()
    return hashlib.sha256(stringToHash).hexdigest()

In [317]:
def process_lbl(line_list, filename): 
    data_list = []
    i = 0
    name_line = ''
    while (i < len(line_list)): 
        if i+1 < len(line_list) and '---------,---------,---------,---------' in line_list[i+1]:
            name_line = line_list[i].strip('\n')     
        elif "---------,---------,---------,---------" in line_list[i]: 
            pass
        else: 
            for line in reader([filename+','+name_line+','+line_list[i].strip()]):
                data_list.append(line)
        i = i + 1
    return data_list        

In [318]:
fig5_file= open("fig5_shell", "r")
fig5 = '\n'.join([x for x in fig5_file])
fig5_file.close()

overall_file= open("overall_table_shell", "r")
overall = '\n'.join([x for x in overall_file])
overall_file.close()

constraint_file= open("constraint_table_shell", "r")
constraint = '\n'.join([x for x in constraint_file])
constraint_file.close()

In [319]:
data = []
directory = 'tests/results'
 
# iterate over files in
# that directory
for filename in os.listdir(directory):
    filepath = os.path.join(directory, filename)
    if 'Data' not in filename and os.path.isfile(filepath):
        f = open(filepath, "r")
        lbl = [x for x in f]
        f.close()
        data.extend(process_lbl(lbl,filename))

In [320]:
df = pd.DataFrame(data, columns = ['app','doc','setup','time','regex','n_transitions','n_states',
                                    'test_type','component','test','val','metric'])
num_cols = ['time','n_transitions','n_states','val']
df[num_cols] = df[num_cols].apply(pd.to_numeric,errors='coerce', axis=1)

naive_sub = df.loc[(df.setup=='naive') 
       & (df.test.isin(['witness_generation','prove_0']))
      ].groupby(['time', 'regex','app','doc','setup','n_transitions','n_states','test_type']).sum().reset_index()
naive_sub['metric']='μs'
naive_sub['component'] = 'T'
naive_sub['test'] = 'prove+wit'
df = pd.concat([df,naive_sub])
df['hash_id'] = df.apply(lambda x: hash_str(x.doc+x.regex+x.app)[:5], axis=1)
df.loc[df.test=='prove+wit','component'] = 'T'
df.reset_index(inplace=True)

In [321]:
def create_timings_row(setup, hash_id, df): 
    sub = df.loc[
    (df.setup==setup) & 
    (df.hash_id==hash_id)
    & (df.component!='T')][['time','test_type','n_transitions','n_states','component','test','val','hash_id']]
    
    sub_time = sub.loc[sub.test_type=='R']
    
    time_data = pd.DataFrame(
        sub_time.groupby(by=['time','component']).sum().reset_index().groupby(by=['component']).mean().val/1e6
    ).transpose().reset_index()

    r1cs_constraints = sub.loc[(sub.test_type=='NOC') & (sub.test=='step_circuit')].val.unique()[0]

    n_transitions = sub.n_transitions.unique()[0]
    n_states = sub.n_states.unique()[0]

    n_steps = sub_time.loc[sub_time.test.str.contains('prove_')].test.nunique()

    size = sub.loc[(sub.test_type=='S')].groupby(by=['time']).sum().reset_index().val.unique()[0]/1e3
    
    time_data['const'] = r1cs_constraints
    time_data['ns'] = n_states
    time_data['nt'] = n_transitions
    time_data['steps'] = n_steps
    time_data['size'] = size
    time_data['C'] = time_data['C'] + time_data['CG']
    
    return time_data[['ns','nt','const','steps','C','S','P','V','size']].to_string(
    header=False,
    index=False,
    formatters={"ns": "{:,} &".format, 
                "nt": "{:,} &".format, 
                "const": "{:,} &".format, 
                "steps": "{:,} &".format, 
                "C": "{:,.3f} &".format, 
                "S": "{:,.3f} &".format, 
                "P": "{:,.3f} &".format, 
                "V": "{:,.3f} &".format, 
                "size": "{:,.3f}".format, 
        }
    )

In [322]:
def create_big_timings_row(setup, hash_id, df): 
    sub = df.loc[
    (df.setup==setup) & 
    (df.hash_id==hash_id)
    & (df.component!='T')][['time','test_type','n_transitions','n_states','component','test','val','hash_id']]
    
    sub_time = sub.loc[sub.test_type=='R']
    
    time_data = pd.DataFrame(
        sub_time.groupby(by=['time','component']).sum().reset_index().groupby(by=['component']).mean().val/1e6
    ).transpose().reset_index()

    r1cs_constraints = sub.loc[(sub.test_type=='NOC') & (sub.test=='step_circuit')].val.unique()[0]

    n_steps = sub_time.loc[sub_time.test.str.contains('prove_')].test.nunique()

    size = sub.loc[(sub.test_type=='S')].groupby(by=['time']).sum().reset_index().val.unique()[0]/1e3
    doc_size = int(df.loc[df.hash_id==hash_id].doc.unique()[0].split('_')[-1])
    
    time_data['const'] = r1cs_constraints
    time_data['steps'] = n_steps
    time_data['ds'] = doc_size
    time_data['size'] = size
    time_data['C'] = time_data['C'] + time_data['CG']
    
    return time_data[['const','ds','steps','C','S','P','V','size']].to_string(
     header=False,
     index=False,
    formatters={
                "const": "{:,} &".format, 
                "ds": "{:,} &".format, 
                "steps": "{:,} &".format, 
                "C": "{:,.3f} &".format, 
                "S": "{:,.3f} &".format, 
                "P": "{:,.3f} &".format, 
                "V": "{:,.3f} &".format, 
                "size": "{:,.3f}".format, 
        }
    )

In [323]:
fig5_dict = {}
timings_dict = {}
for hash_id in df.loc[df.setup=='reef'].hash_id.unique():
    fig5_dict[hash_id] = create_timings_row('reef',hash_id, df)
    timings_dict[hash_id] = create_big_timings_row('reef',hash_id,df)
     
for key in fig5_dict.keys(): 
    fig5 = fig5.replace('%'+key,fig5_dict[key])
    
for key in timings_dict.keys():    
    overall = overall.replace('%'+key,timings_dict[key])
    
fig5 = fig5.replace('\n\n','\n')
overall = overall.replace('\n\n','\n')
print(fig5)
print(overall)

\begin{figure*}[t]
    {\footnotesize
    \centering
        \begin{tabularx}{\textwidth}{
            | >{\centering\arraybackslash}l|
            >{\centering\arraybackslash}X
            >{\centering\arraybackslash}X 
            >{\centering\arraybackslash}X 
            >{\centering\arraybackslash}X 
            >{\centering\arraybackslash}X 
            >{\centering\arraybackslash}X 
            >{\centering\arraybackslash}X 
            >{\centering\arraybackslash}X 
            >{\centering\arraybackslash}X
            >{\centering\arraybackslash}X
            ||} 
          \textbf{Application} &
            \textbf{Document size} (B) &
            \textbf{\# states} &
            \textbf{\mbox{\# transitions}} &
            \textbf{R1CS \mbox{constraints}} &
            \textbf{\# Steps} &
            \textbf{Compiler time} (s) &
            \textbf{Solver time} (s) &
            \textbf{Prover time} (s) &
            \textbf{Verifier time} (s) &
            \textbf{Proof Siz

In [324]:
def create_constraints_row(hash_id, df): 
    sub = df.loc[
    (df.hash_id==hash_id)
    & (df.test_type=='NOC')
    & (df.test.isin(['step_circuit','circuit']))][['setup','val']].drop_duplicates()

    sub = sub.set_index('setup').transpose().reset_index()
    
    if 'nwr' not in sub.columns:
        sub['nwr'] = np.nan
    if 'naive' not in sub.columns:
        sub['naive'] = np.nan
    return sub[['naive','nwr','safa+nlookup','reef']].to_string(
    header=False,
    index=False,
    na_rep = '--- &',
    formatters={"naive": "{:,} &".format, 
                "nwr": "{:,} &".format, 
                "safa+nlookup": "{:,} &".format, 
                "reef": "{:,}".format,     
        }
    ).replace('nan','---')

In [325]:
constraints_dict = {}
for hash_id in df.loc[df.setup=='reef'].hash_id.unique():
    constraints_dict[hash_id] = create_constraints_row(hash_id, df)

for key in constraints_dict.keys(): 
    constraint = constraint.replace('%'+key,constraints_dict[key])
    
constraint = constraint.replace('\n\n','\n')
print(constraint)

\begin{figure*}[t]
    {\footnotesize
    \centering
      \begin{tabularx}{\textwidth}{ | >{\centering\arraybackslash}l|
            >{\centering\arraybackslash}X
            >{\centering\arraybackslash}X
            >{\centering\arraybackslash}X
            >{\centering\arraybackslash}X
            >{\centering\arraybackslash}X
            >{\centering\arraybackslash}X||}
            \textbf{Application} & \textbf{Document ID} & \textbf{Regex ID} &
            \textbf{DFA} & \textbf{DFA+Recursion} & \textbf{SAFA+nlookup} &  \textbf{Reef} \\
      \toprule
        \textbf{Redactions} & & & & & &\\
        & Small Email & r1 & --- & --- & 54,679 & 52,631\\
        & Large Email  & r2 & --- & --- & 57,268 & 54,636\\
       \midrule
        \textbf{ODoH} & & & & & & \\
        & 5f558 & r3 & 1,552,754 & 24,131 & 22,573 & 18,437\\
        & 25424 & r4 & 553,295 & 16,288 & 25,129 & 22,692\\
        & 55824 & r5 & 422,219 & 15,260 & 25,576 & 23,148\\
        & 21d97 & r6 & 192,831 & 13,456 