In [63]:
import pandas as pd 
import numpy as np
import hashlib
import math
from csv import reader
import re
import os
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
import re
pd.set_option('display.max_rows', None)

In [84]:
def hash_str(to_hash): 
    stringToHash = to_hash.encode()
    return hashlib.sha256(stringToHash).hexdigest()

In [85]:
def process_lbl(line_list, filename): 
    data_list = []
    i = 0
    name_line = ''
    while (i < len(line_list)): 
        if i+1 < len(line_list) and '---------,---------,---------,---------' in line_list[i+1]:
            name_line = line_list[i].strip('\n')     
        elif "---------,---------,---------,---------" in line_list[i]: 
            pass
        else: 
            for line in reader([filename+','+name_line+','+line_list[i].strip()]):
                data_list.append(line)
        i = i + 1
    return data_list        

In [86]:
def process_mem(line_list, filename): 
    data_list = []
    i = 0
    name_line = ''
    setup = ''
    app = ''
    doc = ''
    regex = ''
    while (i < len(line_list)): 
        if 'Command being timed:' in line_list[i]:
            # 'Command being timed: "./target/release/reef --e2e --cmt-name cmt --proof-name proof --doc q1w2e3r4 --metrics ./tests/results/timings/bad_pass --re ^(?=.*[A-Z].*[A-Z])(?=.*[!%^@#$&*])(?=.*[0-9].*[0-9])(?=.*[a-z].*[a-z].*[a-z]).{12}$ -n -y ascii"'
            name_line = line_list[i]
            if 'reef' in filename: 
                setup = 'reef'
            elif 'safa' in filename: 
                setup = 'safa+nlookup'
            elif 'nwr' in filename: 
                setup = 'nwr'
            else: 
                setup = 'naive' 
            
            regex = re.findall("--re (.*?) (-n )?(-p )?(-y )?(ascii|dna)",name_line)[0][0]
            app = re.findall("--metrics (.*?) --",name_line)[0].split('/')[-1]

            doc = re.findall("--doc (.*?) --",name_line)[0]
            doc_len = len(doc)

            match app: 
                case 'bad_pass':
                    doc = doc[:10]+"_"+str(doc_len)
                case 'good_pass': 
                    doc = doc[:10]+"_12"
                case 'email_dkim': 
                    if 'small' in filename: 
                        doc_len = 415 
                    else: 
                        doc_len = 1000
                    doc = "Message-ID_"+str(doc_len)
                case 'pihole': 
                    doc = doc[:10]+"_128"
                case 'brca1_var1_match': 
                    doc = doc[:10]+"_43054295"
                case 'brca1_var1_nonmatch1': 
                    doc = doc[:10]+"_43054295"
                case 'brca1_var1_nonmatch2': 
                    doc = doc[:10]+"_43054295"
                case 'brca1_var2_match': 
                    doc = doc[:10]+"_43054295"
                case 'brca1_var2_nonmatch1': 
                    doc = doc[:10]+"_43054295"
                case 'brca1_var2_nonmatch2': 
                    doc = doc[:10]+"_43054295"
                case 'brca2_var1_match': 
                    doc = doc[:10]+"_32325508"
                case 'brca2_var1_nonmatch': 
                    doc = doc[:10]+"_32325508"                
        elif "Maximum resident set size" in line_list[i]: 
            mem_usage = int(line_list[i].split(':')[1].strip())*1e3
            data_list.append([app, setup, regex, doc, mem_usage])
        else: 
           pass
        i = i + 1
    return data_list     

In [88]:
mem_data = []
mem_directory = 'tests/results/memory'
 
for filename in os.listdir(mem_directory):
    filepath = os.path.join(mem_directory, filename)
    if 'Data' not in filename and 'DS_Store' not in filename and os.path.isfile(filepath):
        print(filepath)
        f = open(filepath, "r")
        lbl = [x for x in f]
        f.close()
        mem_data.extend(process_mem(lbl,filename))

tests/results/memory/pw_bad_reef
tests/results/memory/email_med_reef
tests/results/memory/pw_bad_safa_nlookup
tests/results/memory/email_small_safa_nlookup
tests/results/memory/email_med_nwr
tests/results/memory/pihole_nwr
tests/results/memory/dna_reef
tests/results/memory/email_small_naive
tests/results/memory/pihole_reef
tests/results/memory/email_small_nwr
tests/results/memory/pihole_naive
tests/results/memory/email_med_naive
tests/results/memory/email_med_safa_nlookup
tests/results/memory/pihole_safa_nlookup
tests/results/memory/email_small_reef
tests/results/memory/pw_good_reef
tests/results/memory/dna_safa_nlookup
tests/results/memory/pw_good_safa_nlookup


In [89]:
time_data = []
time_directory = 'tests/results/timings'
 
# iterate over files in
# that directory
for filename in os.listdir(time_directory):
    filepath = os.path.join(time_directory, filename)
    if 'Data' not in filename and 'DS_Store' not in filename and os.path.isfile(filepath):
        print(filepath)
        f = open(filepath, "r")
        lbl = [x for x in f]
        f.close()
        time_data.extend(process_lbl(lbl,filename))

tests/results/timings/brca1_var2_nonmatch1
tests/results/timings/brca1_var1_nonmatch2
tests/results/timings/brca2_var1_match
tests/results/timings/brca1_var1_match
tests/results/timings/brca1_var2_nonmatch2
tests/results/timings/good_pass
tests/results/timings/bad_pass
tests/results/timings/brca1_var2_match
tests/results/timings/brca1_var1_nonmatch1
tests/results/timings/email_dkim
tests/results/timings/pihole
tests/results/timings/brca2_var1_nonmatch


In [90]:
mem_df = pd.DataFrame(mem_data, columns = ['app','setup','regex','doc','val'])
num_cols = ['val']
mem_df[num_cols] = mem_df[num_cols].apply(pd.to_numeric,errors='coerce', axis=1)
mem_df['hash_id'] = mem_df.apply(lambda x: hash_str(x.doc+x.regex+x.app)[:5], axis=1)

In [92]:
df = pd.DataFrame(time_data, columns = ['app','doc','setup','time','regex','n_transitions','n_states',
                                    'test_type','component','test','val','metric'])
num_cols = ['time','n_transitions','n_states','val']
df[num_cols] = df[num_cols].apply(pd.to_numeric,errors='coerce', axis=1)

naive_sub = df.loc[(df.setup=='naive') 
       & (df.test.isin(['witness_generation','prove_0']))
      ].groupby(['time', 'regex','app','doc','setup','n_transitions','n_states','test_type']).sum().reset_index()
naive_sub['metric']='Î¼s'
naive_sub['component'] = 'T'
naive_sub['test'] = 'prove+wit'
df = pd.concat([df,naive_sub])
df['hash_id'] = df.apply(lambda x: hash_str(x.doc+x.regex+x.app)[:5], axis=1)
df.loc[df.test=='prove+wit','component'] = 'T'
df.reset_index(inplace=True)