### Edge coverage Table  
This simple uses edge coverage csv's and reads out their max

In [2]:
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu

In [3]:
def convert_unixtime_deltaseconds(df, unix=True, timecolumn='# unix_time'):
    """
    Convert unix timestamp to pandas datetime object
    """
    
    if unix:
        df['date'] = pd.to_datetime(df[timecolumn],unit='s')
    else:
        df['date'] = pd.to_datetime(df[timecolumn])
    return df

#https://stackoverflow.com/questions/25234941/python-regularise-irregular-time-series-with-linear-interpolation
def resample_timeseries(df, resample_freq, resample_col, time_series_col='date', path=True):
    df_ = df[[time_series_col, resample_col]]
        
    df_ = df_.append({time_series_col: df_[time_series_col].iloc[0] + pd.Timedelta(hours=24)}, ignore_index=True)
    df_[resample_col] = df_[resample_col].ffill()
    
    df_ = df_[~df_[time_series_col].duplicated()]
    df_ = df_.set_index(time_series_col).resample('s').interpolate().resample(resample_freq).asfreq().fillna(0)
    df_ = df_.reset_index()
    return df_

def utest(dffinal, x_var='delta_time', y_var='cur_path', event1='AFL', event2='Curious_AFL'):
    dffinal2 = dffinal.groupby([x_var, 'event']).mean().reset_index()
    dffinal2_afl = dffinal2.loc[dffinal2['event']==event1][y_var]
    dffinal2_curious = dffinal2.loc[dffinal2['event']==event2][y_var]
    stat, p = mannwhitneyu(dffinal2_afl, dffinal2_curious)
    return np.round(p, 6)

In [4]:
def df_coverage_resample(df, event, field='total_coverage_own_finds', neuzz=False, rename_field='cur_path'):
    df = convert_unixtime_deltaseconds(df,unix=False, timecolumn='m_timestamp')
    
    #already done when recording files: df['date'].iloc[0] = df['date'].iloc[1]
    
    if neuzz:
        # for neuzz mtime is different for initial training set, this needs adjustment for graph
        afl_neuzz_delta = df.loc[~df['filename'].str.startswith('id:00')].iloc[0]['date'] - df.loc[df['filename'].str.startswith('id:00')].iloc[-1]['date']
        print(afl_neuzz_delta)
        df.loc[~df['filename'].str.startswith('id:00'),'date'] = df.loc[~df['filename'].str.startswith('id:00'),'date'] - afl_neuzz_delta
    
    df = resample_timeseries(df, '15T', field, time_series_col='date', path=True)
    #running time over fuzzing campaign    
    df['delta_time'] = round((df['date'] - df['date'].iloc[0]).dt.total_seconds(), 0)

    df = df[['delta_time', field]]
    df = df.rename(columns={field: rename_field})
    
    df['event'] = event
    return df

In [26]:
results = {
    'objdump': pd.DataFrame(),
    'nm': pd.DataFrame(),
    'ffmpeg': pd.DataFrame(),
    'mupdf': pd.DataFrame(),
    'tcpdump': pd.DataFrame(),
    'gif2png': pd.DataFrame(),
    'size': pd.DataFrame(),
    'readelf': pd.DataFrame()
}
empty_seed = False

resultspath = './edgecoverage/'

coverage_results = {
    'program': [],
    'fuzzer': [],
    'edges': []
}

if empty_seed:
    seed=''
else:
    seed='pre'
    
for exec_name in results:
    try:
        for trial in range(1,6):
            #afl
            resultpath = os.path.join(resultspath + exec_name + seed, exec_name + seed + 'afl' + str(trial) + '.csv')
            df = pd.read_csv(resultpath, sep=';')
            coverage_results['edges'].append(df['total_coverage_own_finds'].iloc[-1])
            coverage_results['fuzzer'].append('AFL')
            coverage_results['program'].append(exec_name)

    except:
        print('AFL file not available: ' + exec_name + ' trial: ' + str(trial))
    try:    
        for trial in range(1,6):
            resultpath = os.path.join(resultspath + exec_name + seed, exec_name + seed + 'aflfast' + str(trial) + '.csv')
            df = pd.read_csv(resultpath, sep=';')
            coverage_results['edges'].append(df['total_coverage_own_finds'].iloc[-1])
            coverage_results['fuzzer'].append('AFLFast')
            coverage_results['program'].append(exec_name)
    except:
        print('AFL Fast file not available: ' + exec_name + ' trial: ' + str(trial))
    try:    
        for trial in range(1,6):
        #for trial in ['1']:
            resultpath = os.path.join(resultspath + exec_name + seed, exec_name + seed + 'curiousmutation' + str(trial) + '.csv')
            df = pd.read_csv(resultpath, sep=';')
            coverage_results['edges'].append(df['total_coverage_own_finds'].iloc[-1])
            coverage_results['fuzzer'].append('CuriousAFL')
            coverage_results['program'].append(exec_name)
    except:
        print('Curious file not available: ' + exec_name + ' trial: ' + str(trial))
    try:    
        for trial in range(1,6):
        #for trial in ['1']:
            resultpath = os.path.join(resultspath + exec_name + seed, exec_name + seed + 'neuzz' + str(trial) + '.csv')
            df = pd.read_csv(resultpath, sep=';')
            coverage_results['edges'].append(df['total_coverage_own_finds'].iloc[-1])
            coverage_results['fuzzer'].append('NEUZZ')
            coverage_results['program'].append(exec_name)
    except:
        print('NEUZZ file not available: ' + exec_name + ' trial: ' + str(trial))

coverage_df = pd.DataFrame(coverage_results)


In [None]:
def u_test(df1, df2):
    stat, p = mannwhitneyu(df1, df2)
    return np.round(p, 6)

df = pd.DataFrame()
for program in coverage_df['program'].unique():
    df = df.append({
        'program': program,
        'paths_AFL': coverage_df.loc[(coverage_df.program == program) & (coverage_df.fuzzer == 'AFL')]['edges'].mean(),
        'paths_AFLFast': coverage_df.loc[(coverage_df.program == program) & (coverage_df.fuzzer == 'AFLFast')]['edges'].mean(),
        'paths_CuriousAFL': coverage_df.loc[(coverage_df.program == program) & (coverage_df.fuzzer == 'CuriousAFL')]['edges'].mean(),
        'paths_NEUZZ': coverage_df.loc[(coverage_df.program == program) & (coverage_df.fuzzer == 'NEUZZ')]['edges'].mean()    
        ,'p':  u_test(coverage_df.loc[(coverage_df.program == program) & (coverage_df.fuzzer == 'CuriousAFL')]['edges'], coverage_df.loc[(coverage_df.program == program) & (coverage_df.fuzzer == 'NEUZZ')]['edges'])
    }, ignore_index=True)
df