In [1]:
import itertools
import os

import numpy as np
import pandas as pd

In [2]:
fuzzers = ['bedivfuzz-havoc', 'bedivfuzz-simple', 'bedivfuzz-structure', 'bedivfuzz-split', 'quickcheck', 'rlcheck', 'zest']
subjects = ['ant', 'bcel', 'chocopy', 'closure', 'maven', 'nashorn', 'pngj', 'rhino', 'tomcat']
num_trials = 30
campaign_timeout = 180

In [3]:
def coverage_trial_df(experiment_name: str, fuzzer: str, subject: str, trial: int, timeout: int, resolution: int):
    df = pd.read_csv(
        os.path.join('results', experiment_name, fuzzer, subject, f'trial-{trial}', 'plot_data'),
        skipinitialspace=True
    )

    if fuzzer == 'bedivfuzz-split':
        zest_plot_data = pd.read_csv(
            os.path.join('results', experiment_name , fuzzer, subject, f'trial-{trial}', 'zest-results', 'plot_data'),
            skipinitialspace=True
        )

        df = pd.concat([zest_plot_data, df], ignore_index=True)

    # resolution refers to the number of data points per minute
    n_timestamps = timeout * resolution
    timestep = int(60 / resolution) # duration between timestamps in seconds
    
    df = df.loc[np.linspace(1, len(df)-1, n_timestamps, dtype=np.int64)]
    df['time'] = range(timestep, timeout * 60 + timestep, timestep) # time in seconds
    df['trial'] = trial
    df['fuzzer'] = fuzzer
    df['subject'] = subject
    df['validity_rate'] = df['valid_inputs'] / (df['valid_inputs'] + df['invalid_inputs'])
    df['diverse_validity'] = df['valid_inputs'] / (df['valid_inputs'] + df['invalid_inputs'])

    if 'unique_valid_paths' in df.columns:
        return df[[
            'fuzzer', 'subject', 'trial', 'time', 
            'valid_inputs', 'invalid_inputs', 'validity_rate', 'unique_paths', 'unique_valid_paths',
            'num_coverage_probes', 'num_semantic_probes', 'b0', 'b1', 'b2'
        ]]

    else:
        return df[[
                'fuzzer', 'subject', 'trial', 'time', 
                'valid_inputs', 'invalid_inputs', 'validity_rate', 'unique_paths',
                'num_coverage_probes', 'num_semantic_probes', 'b0', 'b1', 'b2'
            ]]

In [4]:
def check_missing_trials(experiment_name, fuzzer, subject):
    for f, s, t in itertools.product(fuzzers, subjects, range(1, num_trials+1)):
        if not os.path.isdir(os.path.join('results', experiment_name, fuzzer, subject, f"trial-{t}")):
            return True
    return False

def coverage_results_to_csv(experiment_name, timeout, resolution):
    dfs = []    
    for fuzzer, subject, in itertools.product(fuzzers, subjects):
        print(f"Loading results for: {fuzzer}-{subject}")
        if check_missing_trials(experiment_name, fuzzer, subject):
            print(f"\tMissing trials for: {fuzzer}-{subject}")
        for trial in range(1, num_trials+1):
            if os.path.isdir(os.path.join('results', experiment_name, fuzzer, subject, f"trial-{trial}")):
                dfs.append(coverage_trial_df(experiment_name, fuzzer, subject, trial, timeout, resolution))
    trials = pd.concat(dfs)

    trials.to_csv(
        os.path.join('results', experiment_name, 'coverage-data.csv'),
        index=False
    )
    return trials

In [5]:
eval_diversity_default = coverage_results_to_csv('eval-bedivfuzz-split-3h/default', campaign_timeout, 1)
eval_diversity_semantic = coverage_results_to_csv('eval-bedivfuzz-split-3h/semantic', campaign_timeout, 1)

Loading results for: bedivfuzz-havoc-ant
Loading results for: bedivfuzz-havoc-bcel
Loading results for: bedivfuzz-havoc-chocopy
Loading results for: bedivfuzz-havoc-closure
Loading results for: bedivfuzz-havoc-maven
Loading results for: bedivfuzz-havoc-nashorn
Loading results for: bedivfuzz-havoc-pngj
Loading results for: bedivfuzz-havoc-rhino
Loading results for: bedivfuzz-havoc-tomcat
Loading results for: bedivfuzz-simple-ant
Loading results for: bedivfuzz-simple-bcel
Loading results for: bedivfuzz-simple-chocopy
Loading results for: bedivfuzz-simple-closure
Loading results for: bedivfuzz-simple-maven
Loading results for: bedivfuzz-simple-nashorn
Loading results for: bedivfuzz-simple-pngj
Loading results for: bedivfuzz-simple-rhino
Loading results for: bedivfuzz-simple-tomcat
Loading results for: bedivfuzz-structure-ant
Loading results for: bedivfuzz-structure-bcel
Loading results for: bedivfuzz-structure-chocopy
Loading results for: bedivfuzz-structure-closure
Loading results for: b

In [6]:
eval_diversity_default_5min = coverage_results_to_csv('eval-bedivfuzz-split-5min/default', 5, 20)
eval_diversity_semantic_5min = coverage_results_to_csv('eval-bedivfuzz-split-5min/semantic', 5, 20)

Loading results for: bedivfuzz-havoc-ant
Loading results for: bedivfuzz-havoc-bcel
Loading results for: bedivfuzz-havoc-chocopy
Loading results for: bedivfuzz-havoc-closure
Loading results for: bedivfuzz-havoc-maven
Loading results for: bedivfuzz-havoc-nashorn
Loading results for: bedivfuzz-havoc-pngj
Loading results for: bedivfuzz-havoc-rhino
Loading results for: bedivfuzz-havoc-tomcat
Loading results for: bedivfuzz-simple-ant
Loading results for: bedivfuzz-simple-bcel
Loading results for: bedivfuzz-simple-chocopy
Loading results for: bedivfuzz-simple-closure
Loading results for: bedivfuzz-simple-maven
Loading results for: bedivfuzz-simple-nashorn
Loading results for: bedivfuzz-simple-pngj
Loading results for: bedivfuzz-simple-rhino
Loading results for: bedivfuzz-simple-tomcat
Loading results for: bedivfuzz-structure-ant
Loading results for: bedivfuzz-structure-bcel
Loading results for: bedivfuzz-structure-chocopy
Loading results for: bedivfuzz-structure-closure
Loading results for: b

In [7]:
def assign_crash_id(subject, exception_class, location, stack_trace):
    if subject == 'bcel':
        if exception_class == 'java.lang.OutOfMemoryError':
            if str(location) == 'nan' or location.startswith('org.apache.bcel.generic.TABLESWITCH.initFromFile'):
                return 'B1'
        elif exception_class == 'org.apache.bcel.verifier.exc.AssertionViolatedException':
            if location == 'org.apache.bcel.util.ClassPath.getClassFile(ClassPath.java:656)':
                # INTERNAL ERROR: Missing class: java.lang.ClassNotFoundException: 
                # Exception while looking for class example.A: java.io.IOException: Couldn't find: example/A.class
                return 'B2'
            elif location == 'java.base/jdk.internal.util.Preconditions.outOfBounds(Preconditions.java:64)':
                # INTERNAL ERROR: Some RuntimeException occurred while verify()ing class ...
                # Additional message: Index -1 out of bounds for length 0
                return 'B3'
            elif location == 'org.apache.bcel.verifier.structurals.ControlFlowGraph$InstructionContextImpl.getOutFrame(ControlFlowGraph.java:285)':
                # INTERNAL ERROR: Some RuntimeException occurred while verify()ing class
                # Additional message: outFrame not set!
                return 'B4'
    if subject == 'closure':
        if exception_class == 'java.lang.RuntimeException':
            if location == 'com.google.javascript.jscomp.jarjar.com.google.common.base.Preconditions.checkState(Preconditions.java:502)':
                # INTERNAL COMPILER ERROR.
                # (Caused by an IllegalStateException)
                return 'C1'
            elif location == 'com.google.javascript.jscomp.InlineObjectLiterals$InliningBehavior.afterExitScope(InlineObjectLiterals.java:87)':
                # INTERNAL COMPILER ERROR.
                # (Caused by a NPE)
                return 'C2'
            elif location == 'com.google.javascript.jscomp.VarCheck.handleUndeclaredVariableRef(VarCheck.java:289)':
                # INTERNAL COMPILER ERROR.
                # (Caused by an IllegalStateException)
                # Additional message: Unexpected variable a
                return 'C3'
        elif exception_class == 'java.lang.IllegalStateException':
            if location == 'com.google.javascript.jscomp.jarjar.com.google.common.base.Preconditions.checkState(Preconditions.java:590)':
                # Cannot format source excerpt; unexpected start character for error: JSC_PARSE_ERROR
                return 'C4'
        elif exception_class == 'java.lang.NullPointerException':
            if location == 'com.google.javascript.jscomp.jarjar.com.google.common.base.Preconditions.checkNotNull(Preconditions.java:907)':
                return 'C5'
    if subject == 'nashorn':
        if exception_class == 'java.lang.RuntimeException':
            if location == 'org.openjdk.nashorn.internal.codegen.Lower.throwNotImplementedYet(Lower.java:875)':
                return -1 # False positive, exception due to not yet implemented feature
        elif exception_class == 'java.lang.AssertionError':
            if location == 'org.openjdk.nashorn.internal.parser.Lexer.scanTemplate(Lexer.java:1072)':
                return 'N1'
            elif location == 'org.openjdk.nashorn.internal.parser.ParserContext.pop(ParserContext.java:91)':
                return 'N2'
            elif location == 'org.openjdk.nashorn.internal.codegen.MethodEmitter.load(MethodEmitter.java:985)':
                # Failed generating bytecode for <eval>:1
                # Additional message: Attempted load of uninitialized slot 3 (as type boolean)
                return 'N3'
            elif location == 'org.openjdk.nashorn.internal.codegen.MethodEmitter.pushType(MethodEmitter.java:250)':
                # Failed generating bytecode for <eval>:1
                # (Caused by a NPE)
                return 'N4'
            elif location == 'org.openjdk.nashorn.internal.codegen.CodeGenerator$2.enterDefault(CodeGenerator.java:1226)':
                # Failed generating bytecode for <eval>:1
                # Additional message: org.openjdk.nashorn.internal.ir.BinaryNode
                return 'N5'
            elif location == 'org.openjdk.nashorn.internal.codegen.CodeGenerator$TypeBounds.<init>(CodeGenerator.java:761)':
                # Failed generating bytecode for <eval>:1
                # Additional message: object<type=Undefined>
                return 'N6'
            elif location == 'org.openjdk.nashorn.internal.codegen.CodeGenerator.storeIdentWithCatchConversion(CodeGenerator.java:3363)':
                # Failed generating bytecode for <eval>:1
                return 'N7'
        elif exception_class == 'java.lang.VerifyError':
            if location == 'java.base/jdk.internal.misc.Unsafe.defineAnonymousClass0(Native Method)':
                # Stack map does not match the one at exception handler X
                return 'N8'
    if subject == 'rhino':
        if exception_class == 'java.lang.IllegalStateException':
            if stack_trace.startswith('org.mozilla.javascript.Kit.codeBug(Kit.java:366)-org.mozilla.javascript.IRFactory.decompile(IRFactory.java:2498)'):
                # FAILED ASSERTION: unexpected token: LP
                return 'R1'
            elif stack_trace.startswith('org.mozilla.javascript.Kit.codeBug(Kit.java:353)-org.mozilla.javascript.TokenStream.ungetChar(TokenStream.java:1990)'):
                # FAILED ASSERTION
                return 'R2'
        elif exception_class == 'java.lang.ArrayIndexOutOfBoundsException':
            if location == 'org.mozilla.javascript.TokenStream.ungetChar(TokenStream.java:1991)':
                return 'R3'
        elif exception_class == 'java.lang.OutOfMemoryError':
            if location == 'java.base/java.util.Arrays.copyOf(Arrays.java:3745)':
                return 'R4'
        elif exception_class == 'java.lang.NullPointerException':
            if location == 'org.mozilla.javascript.TokenStream.stringToKeywordForJS(TokenStream.java:161)':
                return 'R5'
        elif exception_class == 'java.lang.ClassCastException':
            # class org.mozilla.javascript.Node cannot be cast to class org.mozilla.javascript.ast.(ArrayLiteral|ObjectLiteral) 
            if location.startswith('org.mozilla.javascript.Parser.destructuringAssignmentHelper'):
                return 'R6'
    raise ValueError(f"Unknown bug: {subject} {exception_class} {location}")

In [27]:
def crash_trial_df(experiment_name: str, fuzzer: str, subject: str, trial: int):
    if fuzzer in ('quickcheck', 'rlcheck'):
        trial_dir = os.path.join('results', experiment_name, fuzzer, 'dry-run', subject, f"trial-{trial}")
    else:
        trial_dir = os.path.join('results', experiment_name, fuzzer, subject, f"trial-{trial}")

    # Read crash information from repro
    df = pd.read_csv(
        os.path.join(trial_dir, 'failure_log.csv'),
        skipinitialspace=True
    ).rename(columns={'# crash_id': 'crash_id'})

    # Retrieve failure TTE from failure_info.csv file
    crash_id_to_tte = {}
    crash_info = pd.read_csv(
        os.path.join(trial_dir, 'failure_info.csv'),
        skipinitialspace=True
    ).rename(columns={'# ttd': 'tte'})

    if fuzzer == 'bedivfuzz-split':
        zest_crash_info = pd.read_csv(
            os.path.join(trial_dir, 'zest-results', 'failure_info.csv'),
            skipinitialspace=True
        ).rename(columns={'# ttd': 'tte'})

        # BeDivFuzz-Split resets the timer after switching guidances, so we need to correct the TTE values
        crash_info['tte'] = crash_info['tte'] + (campaign_timeout/2 * 60 * 1000) # convert min to ms
        
        if len(zest_crash_info) > 0:
            crash_info = pd.concat([crash_info, zest_crash_info], ignore_index=True)
    
    for crash_idx, tte in enumerate(crash_info['tte']):
        crash_id_to_tte[f"id_{crash_idx:06}"] = tte

    df['fuzzer'] = fuzzer
    df['subject'] = subject
    df['trial'] = trial
    df['tte'] = df.apply(lambda row: crash_id_to_tte.get(row['crash_id'], -1), axis=1)
    df['crash_id'] = df.apply(lambda row: assign_crash_id(subject, row['exception_class'].split(' ')[1], row['location'], row['stack_trace']), axis=1)
    
    return df[['fuzzer', 'subject', 'trial', 'tte', 'exception_class', 'location', 'message', 'stack_trace', 'crash_id']]

In [28]:
def check_failure_repro(trial_dir):
    # Check if repro was run, i.e., if a failure_log was produced
    failure_log = os.path.join(trial_dir, 'failure_log.csv')
    if not os.path.isfile(failure_log):
        return False
        
    # If repro was run, check if it contains at least one entry
    with open(failure_log, 'r') as f:
        failure_count = len(f.readlines()) - 1 # Minus header
        return failure_count > 0

In [31]:
def crash_results_to_csv(experiment_name: str):
    trial_crashes = []
    for fuzzer, subject, trial in itertools.product(fuzzers, subjects, range(1, num_trials+1)):
        if fuzzer in ('quickcheck', 'rlcheck'):
            trial_dir = os.path.join('results', experiment_name, fuzzer, 'dry-run', subject, f"trial-{trial}")
        else:
            trial_dir = os.path.join('results', experiment_name, fuzzer, subject, f"trial-{trial}")
        
        # Check if we were able to repro crashes
        failures_found = check_failure_repro(trial_dir)
        bediv_split_failures_found = check_failure_repro(os.path.join(trial_dir, 'zest-results'))
        if failures_found or (fuzzer == 'bedivfuzz-split' and bediv_split_failures_found):
            trial_crashes.append(crash_trial_df(experiment_name, fuzzer, subject, trial))
    crash_data = pd.concat(trial_crashes)
    crash_data.to_csv(
        os.path.join('results', experiment_name, 'crash-data.csv'),
        index=False
    )
    return crash_data

In [32]:
default_crashes = crash_results_to_csv('eval-bedivfuzz-split-3h/default')
semantic_crashes = crash_results_to_csv('eval-bedivfuzz-split-3h/semantic')

# Process ICSE22 Results

In [17]:
import shutil

In [20]:
# Reorganizes the directory structure of the original artifact
move_dirs = False
experiment = 'eval-icse22-crashes'

if move_dirs:
    base_dir = os.path.join('results', experiment, 'java-data')
    for d in os.listdir(base_dir):
        results_dir = os.path.join(base_dir, d)
        if os.path.isdir(results_dir):
            tokens = d.split('-')
            tech = tokens[0]
            if tokens[1] in ['simple', 'structure']:
                tech = f"{tokens[0]}-{tokens[1]}"
                benchmark = tokens[2]
                trial_id = f"trial-{'-'.join(tokens[3:])}"
            else:
                benchmark = tokens[1]
                trial_id = f"trial-{'-'.join(tokens[2:])}" 
    
            new_results_dir = os.path.join('results', experiment, tech)
            if not os.path.exists(new_results_dir):
                os.makedirs(new_results_dir)
    
            shutil.move(results_dir, os.path.join(new_results_dir, benchmark, trial_id))

In [4]:
fuzzers = ['bediv-simple', 'bediv-structure', 'quickcheck', 'rl', 'zest']
subjects = ['ant', 'closure', 'maven', 'nashorn', 'rhino', 'tomcat']
num_trials = 30
timeout = 60

In [5]:
def icse_coverage_trial_df(fuzzer: str, subject: str, trial: int):
    if fuzzer in ['quickcheck', 'rl']:
        plot_data = os.path.join('results', 'eval-icse22-coverage', fuzzer, subject, f'trial-{trial}-replay', 'plot_data')
    else:
        plot_data = os.path.join('results', 'eval-icse22-coverage', fuzzer, subject, f'trial-{trial}', 'plot_data')

    df = pd.read_csv(
        plot_data,
        names=[
            'unix_time', 'unique_crashes', 'total_cov', 'valid_cov', 'total_inputs', 'valid_inputs', 
            'unique_valid_paths', 'unique_valid_branch_sets', 'unique_valid_inputs', 'b0', 'b1', 'b2'],
        header=None,
        skiprows=1,
        skipinitialspace=True
    )

    # two datapoints per minute
    df = df.loc[np.linspace(0, len(df)-1, 2*timeout, endpoint=True, dtype=np.int64)]
    df['time'] = np.arange(0.5, timeout + 0.5, 0.5)
    df['trial'] = trial
    df['fuzzer'] = fuzzer
    df['subject'] = subject
    df['validity_rate'] = df['valid_inputs'] / df['total_inputs']

    return df[[
            'fuzzer', 'subject', 'trial', 'time', 
            'valid_inputs', 'total_inputs', 'validity_rate', 'unique_valid_paths', 'b0', 'b1', 'b2'
        ]]

In [6]:
dfs = []
for f, s, t in itertools.product(fuzzers, subjects, range(1, num_trials+1)):
    dfs.append(icse_coverage_trial_df(fuzzer=f, subject=s, trial=t))
    
trials = pd.concat(dfs)
trials.to_csv(
    os.path.join('results', 'eval-icse22-coverage', 'coverage-data.csv'),
    index=False
)

In [73]:
def icse_crash_trial_df(fuzzer: str, subject: str, trial: int):
    def get_num_failures(failure_directory):
        num_files = len([entry for entry in os.listdir(failure_directory) if os.path.isfile(os.path.join(failure_directory, entry))])
        assert num_files % 2 == 0, failure_directory
        return int(num_files / 2) # two files per failure
    
    def read_stack_trace(file):
        with open(file, 'r') as f:
            return '-'.join([line.strip() for line in f][1:]) # skip exception class

    def read_crash_stats(file):
        with open(file, 'r') as f:
            lines = [line.strip() for line in f]
            clazz = (lines[0].split("class "))[1]
            tte = (lines[1].split("TTD: "))[1]
            return clazz, int(int(tte) / 1000) # convert ms to s

    # Map failure_id to failure message
    failure_directory = os.path.join('results', 'eval-icse22-crashes', fuzzer, subject, f"trial-{trial}", 'failure_info')
    fuzz_log = os.path.join('results', 'eval-icse22-crashes', fuzzer, subject, f"trial-{trial}", 'fuzz.log')
    failure_id_to_message = {}

    # Return empty df if no crashes have been found
    if not os.path.exists(fuzz_log):
        return pd.DataFrame(columns=['fuzzer', 'subject', 'trial', 'tte', 'exception_class', 'location', 'message', 'stack_trace', 'crash_id'])
    
    rows = []
    with open(fuzz_log, 'r') as f:
        for line in f:
            if 'Found crash' not in line:
                continue
            tokens = line.split(" ")
            tte = int(int(tokens[0]) / 1000) # convert ms to s
            failure_id = tokens[1]
            exception_class = tokens[5]
            message = " ".join(tokens[7:]) if len(tokens) > 7 else 'no message'
            stack_trace_file = os.path.join(failure_directory, f"{failure_id}.stacktrace")
            stack_trace = read_stack_trace(stack_trace_file)

            rows.append({
                    'fuzzer': fuzzer,
                    'subject': subject,
                    'trial': trial,
                    'tte': tte,
                    'exception_class': exception_class,
                    'location': stack_trace.split('-')[0],
                    'message': message,
                    'stack_trace': stack_trace,
                    'crash_id': deduplicate_icse_crash(exception_class, stack_trace.split('-')[0])
                })

    return pd.DataFrame(rows)

In [75]:
# Note: this takes *a lot* of time (because quickcheck produces so many crashes)
# Set the following flag to true if you have some spare time...
generate_icse_crash_data = False

icse_crash_to_id = {}
def deduplicate_icse_crash(exception_class, location):
    key = (exception_class, location)
    if key in icse_crash_to_id.keys():
        return icse_crash_to_id[key]
    else:
        crash_id = len(icse_crash_to_id)
        icse_crash_to_id[key] = crash_id
        return crash_id

# Generate ICSE crash data csv
if generate_icse_crash_data:
    trial_crashes = []
    for f, s, t in itertools.product(fuzzers, subjects, range(1, num_trials+1)):
        trial_crashes.append(icse_crash_trial_df(fuzzer=f, subject=s, trial=t))
        
    crash_data = pd.concat(trial_crashes)
    crash_data.to_csv(
        os.path.join('results', 'eval-icse22-crashes', 'crash-data.csv'),
        index=False
    )

In [79]:
icse_crash_to_id

{('java.lang.RuntimeException',
  'com.google.javascript.jscomp.jarjar.com.google.common.base.Preconditions.checkArgument(Preconditions.java)'): 0,
 ('java.lang.StringIndexOutOfBoundsException',
  'java.base/java.lang.StringLatin1.charAt(StringLatin1.java:47)'): 1,
 ('java.lang.RuntimeException',
  'com.google.javascript.jscomp.InlineObjectLiterals$InliningBehavior.afterExitScope(InlineObjectLiterals.java)'): 2,
 ('java.lang.RuntimeException', ''): 3,
 ('java.lang.NullPointerException',
  'com.google.javascript.jscomp.jarjar.com.google.common.base.Preconditions.checkNotNull(Preconditions.java)'): 4,
 ('java.lang.RuntimeException',
  'com.google.javascript.jscomp.VarCheck.handleUndeclaredVariableRef(VarCheck.java)'): 5,
 ('java.lang.RuntimeException',
  'com.google.javascript.jscomp.PeepholeRemoveDeadCode.tryFoldLabel(PeepholeRemoveDeadCode.java)'): 6,
 ('java.lang.AssertionError',
  'jdk.scripting.nashorn/jdk.nashorn.internal.parser.ParserContext.pop(ParserContext.java:91)'): 7,
 ('jav