# AFL Edge Coverage Analysis  
Convert a fuzzing run into a CSV of analysis data  
This automates afl-showmap and calculates Edge coverage against AFL, AFLFast, NEUZZ

In [6]:
import os
import platform
import datetime
from tqdm import tqdm
import subprocess
import numpy as np
import datetime
import seaborn as sb
import matplotlib.pyplot as plt
import pandas as pd
import csv
import io

In [11]:
def creation_date(path_to_file):
    """
    Try to get the date that a file was created, falling back to when it was
    last modified if that isn't possible.
    See http://stackoverflow.com/a/39501288/1709587 for explanation.
    """
    if platform.system() == 'Windows':
        return os.path.getctime(path_to_file)
    else:
        stat = os.stat(path_to_file)
        try:
            return stat.st_birthtime
        except AttributeError:
            # We're probably on Linux. No easy way to get creation dates here,
            # so we'll settle for when its content was last modified.
            return datetime.datetime.fromtimestamp(stat.st_mtime)

def reading_files(seed_dir):
    rows_list = []
    for filename in sorted(os.listdir(seed_dir)):
        if filename not in ['.state', '.cur_input']: 
            #file_created_date = creation_date(seed_dir + filename)
            dict1 = {
            'filename': filename,
            'm_timestamp': pd.to_datetime(os.path.getmtime(seed_dir + filename), unit='s')
            }
            rows_list.append(dict1)
            
    df_files = pd.DataFrame(rows_list)
    return pd.DataFrame(rows_list)

def call_for_precoverage(files_in, program_call):
    pre_coverage = []

    for file in os.listdir(files_in):
        if file not in ['.state', '.cur_input']: 

            out = subprocess.check_output(['afl-showmap', '-q', '-e', '-o', '/dev/stdout'] + program_call + [files_in + file])
            for line in out.splitlines():
                # PY 3.7 - added deencoding for compatibility
                edge = line.decode('utf-8').split(':')[0]
                pre_coverage.append(edge)
    return pre_coverage

def call_program_for_coverage(df_files, pre_coverage, program_call, seed_folder, save_location):
    tmp_list = []
    raw_bitmap = {}
    df_files['total_coverage'] = -1
    df_files['current_coverage'] = -1
    df_files['total_coverage_own_finds'] = -1

    with open('./' + save_location + '.csv', 'a') as file_:
        writer = csv.DictWriter(file_, df_files.reset_index().columns[1:], delimiter=';')

        if file_.tell() == 0:
            writer.writeheader()

        for row in tqdm(df_files.itertuples(index=False)):    
            out = subprocess.check_output(['afl-showmap', '-q', '-e', '-o', '/dev/stdout'] + program_call + [seed_folder + row[0]])

            tmp_cnt = []
            for line in out.splitlines():
                # PY 3.7 - added deencoding for compatibility
                edge = line.decode('utf-8').split(':')[0]
                tmp_cnt.append(edge)
                tmp_list.append(edge)

            row_dict = {
            'filename': row[0],
            'm_timestamp': row[1],
            #'running_s': row[2],
            #'pretty_mtime': row[3],
            'current_coverage': len(np.unique(tmp_cnt)),
            'total_coverage': len(np.unique(tmp_list)),
            'total_coverage_own_finds': len(np.unique(list(set(tmp_list) - set(pre_coverage))))
            }
            writer.writerow(row_dict)

            tmp_list = list(np.unique(tmp_list))

In [12]:
fuzz_programs = {
    'objdump'   : '/home/deif/gits/fuzztestbench/objdumppre/'
,    'nm'       : '/home/deif/gits/fuzztestbench/nmpre/'
,    'ffmpeg'   : '/home/deif/gits/fuzztestbench/ffmpegpre/'
,    'harfbuzz' : '/home/deif/gits/fuzztestbench/harfbuzzpre/'
,    'readelf'  : '/home/deif/gits/fuzztestbench/readelfpre/'
,    'gif2png'  : '/home/deif/gits/fuzztestbench/gif2pngpre/'
,    'mupdf'    : '/home/deif/gits/fuzztestbench/mupdfpre/'
,    'size'     : '/home/deif/gits/fuzztestbench/sizepre/'
}

program_arguments = {
    'objdump'   : 'objdump -D'
,    'nm'       : 'nm-new -C'
,    'ffmpeg'   : 'ffmpeg -i'
,    'gif2png'  : 'gif2png'
,    'mupdf'    : 'mutool show'
,    'size'     : 'size'
,    'harfbuzz' : 'hb-fuzzer' 
,    'readelf'  : 'readelf -a'
}

algorithms = ['afl', 'aflfast', 'curious', 'neuzz']

In [45]:
program = 'nm'
for algorithm in algorithms:
    for i in range(1,6):
        base_path = fuzz_programs[program]

        if algorithm == 'neuzz':
            seed_path = '/home/deif/gits/neuzz/programs/'+program + str(i)+'/seeds/'
        else:
            seed_path = base_path + program + 'pre' + algorithm + str(i) + '/afl_out/queue/'

        save_location = 'edgecoverage/' + program + 'pre/' + program + 'pre' + algorithm + str(i)

        inseed_paths = [base_path + program + 'pre' + algorithm + "1" + '/afl_in/']

        if len(os.listdir(seed_path)) > 1:
            if ' ' in str(program_arguments[program]):
                program_call = str(fuzz_programs[program]+program_arguments[program]).split(' ')
            else:
                program_call = [fuzz_programs[program]+program_arguments[program]]

            df_files = reading_files(seed_path)
            if False: #algorithm == 'neuzz':
                #replace mtime of neuzz starter with real mtime
                print("neuzz m_time replacement")
                df_files = df_files.set_index("filename")
                for filename in tqdm(sorted(os.listdir(inseed_paths[1]))):
                    if filename not in ['.state', '.cur_input']: 
                        df_files.loc[filename, 'm_timestamp'] = pd.to_datetime(os.path.getmtime(inseed_paths[1] + filename), unit='s')
                df_files = df_files.reset_index()
            # Running Time
            #creation_date = df_files.iloc[1]['m_timestamp']
            #df_files = df_files.loc[df_files['m_timestamp']<(creation_date + pd.Timedelta(days=1))]

            #df_files['running_s'] = df_files['m_timestamp'] - creation_date
            #df_files = df_files.sort_values('m_timestamp')
            #df_files['running_s'] = df_files['running_s'].apply(lambda x: x if x.total_seconds()>0.0 else 0.0) 
            df_files['m_timestamp'].iloc[0] = df_files['m_timestamp'].iloc[1]
            #df_files = df_files.sort_values('m_timestamp')
            # Calling for pre-coverage (what coverage does the input have?)
            pre_coverage = call_for_precoverage(inseed_paths[0], program_call)
            #for inseed_path in inseed_paths:
            #   pre_coverage += call_for_precoverage(inseed_path, program_call)
            #pre_coverage = np.unique(pre_coverage)
            call_program_for_coverage(df_files, pre_coverage, program_call, seed_path, save_location)
 #   except:
 #       continue



  0%|          | 0/514 [00:00<?, ?it/s][A[A

100%|██████████| 514/514 [00:00<00:00, 3039.65it/s][A[A


0it [00:00, ?it/s][A[A

neuzz m_time replacement




26it [00:00, 252.31it/s][A[A

50it [00:00, 245.48it/s][A[A

73it [00:00, 239.95it/s][A[A

96it [00:00, 234.48it/s][A[A

119it [00:00, 230.75it/s][A[A

141it [00:00, 227.38it/s][A[A

163it [00:00, 224.46it/s][A[A

185it [00:00, 221.86it/s][A[A

208it [00:00, 221.35it/s][A[A

230it [00:01, 219.10it/s][A[A

252it [00:01, 216.29it/s][A[A

274it [00:01, 211.57it/s][A[A

295it [00:01, 209.54it/s][A[A

316it [00:01, 209.04it/s][A[A

337it [00:01, 208.83it/s][A[A

358it [00:01, 208.89it/s][A[A

379it [00:01, 207.76it/s][A[A

400it [00:01, 205.45it/s][A[A

421it [00:01, 202.48it/s][A[A

442it [00:02, 200.15it/s][A[A

463it [00:02, 199.23it/s][A[A

483it [00:02, 197.27it/s][A[A

503it [00:02, 195.24it/s][A[A

523it [00:02, 195.81it/s][A[A

543it [00:02, 194.01it/s][A[A

563it [00:02, 192.30it/s][A[A

583it [00:02, 189.53it/s][A[A

602it [00:02, 187.24it/s][A[A

621it [00:03, 187.10it/s][A[A

640it [00:03, 187.02it/s][A[A

659it [00:03

neuzz m_time replacement



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


FileNotFoundError: [Errno 2] No such file or directory: '/home/deif/gits/fuzztestbench/sizepre/sizepreneuzz2/afl_in/'

# NEUZZ special treatment  
because the folder structure and seed handling make no sense at all

In [28]:
program = 'harfbuzz'

In [31]:
df = pd.read_csv('./edgecoverage/' + program + 'pre/' + program + 'preneuzz1.csv', sep=";")
df['m_timestamp'] = pd.to_datetime(df['m_timestamp'])

### usual treatment - adjust initial seed timestamp to beginning of fuzzing

In [32]:
df['m_timestamp'].iloc[0] = df['m_timestamp'].iloc[1]

### special treatment - adjust neuzz's seeds to stop of AFL's seed corpus stop

In [33]:
#find timedelta
afl_neuzz_delta = df.loc[~df['filename'].str.startswith('id:00')].iloc[0]['m_timestamp'] - df.loc[df['filename'].str.startswith('id:00')].iloc[-1]['m_timestamp']

In [34]:
df.loc[~df['filename'].str.startswith('id:00')]['m_timestamp'] = df.loc[~df['filename'].str.startswith('id:00')]['m_timestamp'] - afl_neuzz_delta

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


### phew that was ... strange

In [35]:
df['running_s'] = round((df['m_timestamp'] - df['m_timestamp'].iloc[0]).dt.total_seconds(), 0)

In [27]:
df.to_csv('./edgecoverage/' + program + 'pre/' + program + 'preneuzz1.csv')

In [12]:
project_dir = '/home/david/Documents/gits/MA-Scripts/SeedFolderToCoverageData/objdumppreafl1/'
seed_dir = project_dir + 'afl_out/queue/'
exec_dir = project_dir + 'objdump'
in_dir = project_dir + 'afl_in/'
args = ['-D']

initial_seed = os.listdir(seed_dir)[0]

In [37]:
df_files = df_files.sort_values(by=['m_timestamp'])
#df_files['modification_date'] = df_files['modification_date'].dt.round('1s')
#df_files['modification_date'] = df_files['modification_date'].apply(lambda x: x.time())
creation_date = df_files.iloc[1]['m_timestamp']
df_files['running_s'] = df_files['m_timestamp'] - creation_date
df_files['running_s'] = df_files['running_s'].apply(lambda x: x if x>0 else 0.0) 
df_files['pretty_mtime'] = df_files['m_timestamp'].apply(lambda x: datetime.datetime.fromtimestamp(x))

df_files = df_files.set_index('filename')

df_files.reset_index().columns