In [10]:
#!/usr/bin/env python
# coding: utf-8

# In[25]:


import pandas as pd

TIME = 82800

# Important note!
# If you want to compare libafl-based fuzzer then exclude aflpp, honggfuzz, libfuzzer from the fuzzer list
# and if you want to compare aflpp, honggfuzz, libfuzzer then exclude libafl-based fuzzer
# It doesn't really make sense to mix them together, as we wrote in the paper "rank is relative".
# They should be compared on the same baseline.
# Yes, we presented the result of comparisons on afl++, honggfuzz, libfuzzer, though. but the main purpose of it is 
# more about confirming our study aligns with Dylan's work or work.

# the list of fuzzers included in 22th May, 2023 experiment
fuzzer_list_1 = [
    # These three are disabled to see libafl-based fuzzer's rank correlation
    # "aflplusplus",
    # "honggfuzz",
    # "libfuzzer",
    "libafl_fuzzbench_cov_accounting",
    "libafl_fuzzbench_explore",
    "libafl_fuzzbench_mopt",
    "libafl_fuzzbench_value_profile",
    "libafl_fuzzbench_weighted",
    "libafl_fuzzbench_cmplog",
    "libafl_fuzzbench_naive",
    "libafl_fuzzbench_fast",
    "libafl_fuzzbench_rand_scheduler",
]
pd.set_option('display.max_rows', None)
pd.set_option('display.max_rows', None)


df = pd.read_csv("./experiments/libafl0522.csv", engine='python')
selected = df[df['fuzzer'].isin(fuzzer_list_1)]
selected = selected[['fuzzer', 'benchmark', 'edges_covered', 'time']]
selected = selected[selected['time'] == TIME]

benchmarks = selected.benchmark.unique()
# Some experiment was not complete so we ran it again on 2th June, 2023
fuzzer_list_2 = [
    "libafl_fuzzbench_ngram4",
    "libafl_fuzzbench_ngram8",
    "libafl_fuzzbench_naive_ctx",
]

dff = pd.read_csv("./experiments/libafl0602.csv", engine='python')
selectedd = dff[dff['fuzzer'].isin(fuzzer_list_2)]
selectedd = selectedd[['fuzzer', 'benchmark', 'edges_covered', 'time']]
selectedd = selectedd[selectedd['time'] == TIME]

# Lastly we added fix to grimoire fuzzer, so rerun the experiment on 25th September 2023 
fuzzer_list_3 = [
    "libafl_fuzzbench_grimoire",
]
dfff = pd.read_csv("./experiments/libafl0925.csv", engine = 'python')
selecteddd = dfff[dfff['fuzzer'].isin(fuzzer_list_3)]
selecteddd = selecteddd[['fuzzer', 'benchmark', 'edges_covered', 'time']]
selecteddd = selecteddd[selecteddd['time'] == TIME]


result = pd.concat([selected, selectedd, selecteddd])
fuzzer_list = fuzzer_list_1 + fuzzer_list_2 + fuzzer_list_3

result.to_csv("result.csv")

import matplotlib.pyplot as plt

import json
import numpy as np



In [11]:
# Spearman
import warnings
warnings.filterwarnings("ignore")
# mode = "PERCENTILE"
pd.options.mode.chained_assignment = None 

corr_result = dict()

printf_debug = False
# to color the graphs
def get_cmap(n, name='hsv'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)

def run_analysis(file, property_data, filename):
    if printf_debug:
        print("Loading", abs_path)
    res_spearman = dict()
    
    for FUZZER in fuzzer_list:
        points = dict()
        for benchmark in benchmarks:
            for fuzzer in fuzzer_list:
                if fuzzer == FUZZER:
                    # Select the result with this specific benchmark
                    # The idea is to see
                    # On benchmark A, what is the rank of the fuzzer X compared to all the other?
                    data = result[(result['benchmark'] == benchmark)]
                    property_rank, property_value = property_data[benchmark]

                    # Rank them
                    data.loc[:, 'fuzzer_rank'] = data.loc[:, 'edges_covered'].rank(method = 'average')
                    # and get the target fuzzer
                    data = data[(data['fuzzer'] == fuzzer)]

                    for fuzzer_rank in data['fuzzer_rank']:
                        if benchmark in points:
                            points[benchmark].append((fuzzer_rank, property_rank))
                        else:
                            points[benchmark] = [(fuzzer_rank, property_rank)]
        
        X = []
        y = []
        cmap = get_cmap(len(points.keys()))
        for (i, (benchmark, vec)) in enumerate(points.items()):
            for fuzzer_rank, property_rank in vec:
                X.append(property_rank)
                y.append(fuzzer_rank)
            

        reg = np.polyfit(X, y, 1)
    
        plt.scatter(X, y)
        plt.xlabel("property rank")
        plt.ylabel("fuzzer rank")
        plt.title("{} {}".format(filename, FUZZER))
        # plt.show()
        # plt.plot(X, f(X), color = "r")
        from scipy import stats
        import math

        # Spearman
        spe = stats.spearmanr(X, y)
        # Kendall
        tau = stats.kendalltau(X, y).statistic
        spearman_r = spe.statistic
        pvalue = spe.pvalue
        count = len(X)
        
        res_spearman[FUZZER] = (spearman_r, tau, pvalue)
        plt.annotate(text='r = {}, p = {}'.format(spearman_r, pvalue), xy=(0.03, 0.03), xycoords='figure fraction')
        plt.clf()
    
    for (key, (r, tau, pvalue)) in res_spearman.items():
        if key in corr_result:
            corr_result[key].append((file, (r, tau, pvalue)))
        else:
            corr_result[key] = [(file, (r, tau, pvalue))]
import os
file_list = []
# Place the data in '../data' to load
for r, subdir, files in os.walk("../data"):
    for file in files:
        ab = os.path.join(r, file)
        file_list.append((ab, file))

# we don't really need to sort but just for debug
file_list.sort()
for abs_path, file in file_list:
    property_data = dict()
    with open(abs_path) as f:
        property_data = json.load(f)
    assert(len(property_data) == 23)
    run_analysis(ab, property_data, file)

for k, v in corr_result.items():
    v.sort(key = lambda x: x[1][0])
    for nnn, (r, tau, _pvalue) in v:
        if printf_debug:
            print(k, nnn, r, tau)


<Figure size 640x480 with 0 Axes>