# Coverage Monitor

Problem statement

Approach

Who are we?

## Constants

In [1]:
import os

# benchmark directory
benchmark_loc = "/Users/IceCream/Documents/hw22_fuzzing/benchmark"

# studied project
proj = "libpng-1.6.37-setting7"
proj_loc = os.path.join(benchmark_loc, proj)

# output directory
out_loc = os.path.join(proj_loc, "analysis")
if not os.path.exists(out_loc):
    os.makedirs(out_loc)

## Lauching AFL

In [None]:
# afl driver goes here

## Monitoring executions

#### 1. Collecting coverage

In [None]:
%run cov_tracker.ipynb

'''
Process code coverage and store the output into cov.csv and exec_c.csv for coverage and execution counts 
["cid", "file", "line", "fn"]
'''
def process_cov(proj_loc, out_loc):
    global df_exec_c, fn_dict
    
    fn_json = os.path.join(out_loc, 'raw', 'fn.json')
    exec_c_csv = os.path.join(out_loc, 'raw', 'exec_c.csv')
    
    raw_data_loc = os.path.join(out_loc, 'raw')
    if not os.path.exists(raw_data_loc):
        os.makedirs(raw_data_loc)
    
    if not os.path.exists(exec_c_csv):
        # collect the code coverage and store it in `df_exec_c`
        print("Collecting coverage results...")
        collect_cov(proj_loc)
        # store exec_c.csv
        df_exec_c.to_csv(exec_c_csv, encoding='utf-8', index=False)
        print("Stored the execution counts as exec_c.csv")
    
    # read from data
    df_exec_c = pd.read_csv(exec_c_csv)
    # format the code coverage
    df_cov = compute_cov_spectra(df_exec_c)
    
    # format the execution count
    df_exec_c = pd.merge(df_exec_c, df_cov.reset_index(),  how='outer', on=['line', 'file', 'fn'])
    df_exec_c = df_exec_c[['trace_id', 'cid', 'exec']].sort_values(by=['trace_id', 'cid'])

    return df_cov, df_exec_c

# set global df_exec_c to store raw coverage data
df_exec_c = []
df_cov, df_exec_c = process_cov(proj_loc, out_loc)

# output 1: the coverage spectra [line, fn, file] covered by tests
cov_csv = os.path.join(out_loc, 'spectra.csv')
df_cov.to_csv(cov_csv)

# output 2: the execution count on each statement `cid` for each trace id [trace_id, cid, exec]
exec_csv = os.path.join(out_loc, 'execution.csv')
df_exec_c.to_csv(exec_csv, index=False)

Collecting coverage results...


#### 2. Computing response time

This time is based on the creation time of the coverage file

In [68]:
%run cov_tracker.ipynb

import os
import glob
import datetime

def get_response_time(proj_loc):
    print("Computing the response time...")
    lcov_loc = os.path.join(proj_loc, 'isort-seeds-out', 'cov', 'lcov')
    # get list of all coverage files
    test_files = glob.glob(lcov_loc+'/*_trace.lcov_info_final')
    id_dict = {get_trace_id(test_file): test_file for test_file in test_files}
    
    # info_final
    id_dict[-1] = os.path.join(lcov_loc, 'trace.lcov_info_final')
    # base lcov
    id_dict[-2] = os.path.join(lcov_loc, 'trace.lcov_base')

    # collect creation time for all other trace_id
    # 1 to skip the last one which might contain no instrumentation when the fuzzing run is manually interrupted
    c_time = {}
    for trace_id in sorted(id_dict, reverse=True)[1:]:
        stat = os.stat(id_dict[trace_id])
        c_timestamp = stat.st_birthtime
        c_time[trace_id] = datetime.datetime.fromtimestamp(c_timestamp)
        #print(trace_id, c_time[trace_id])
        
    # compute difference in creation time as the response time
    c_time = {i: (time-c_time[i-1]).total_seconds() for i, time in c_time.items() if i >= -1}
    df_c_time = pd.DataFrame(list(c_time.items()), columns=['trace_id', 'response_time'])
    
    return df_c_time

df_c_time = get_response_time(proj_loc)

# output: response time based on the creation time of the coverage file
response_time_csv = os.path.join(out_loc, 'response_time.csv')
df_c_time.to_csv(response_time_csv, index=False)

Computing the response time...


#### 3. Looking for patterns

In [69]:
import numpy as np

'''
This function groups the tests with similar data flow patterns
'''
def group_tests():
    return

'''
This function looks for statements that are input sensitive.
'''
def input_sensitive(row):
    
    return

def get_preprocessed_data(out_loc):
    df_spectra = pd.read_csv(os.path.join(out_loc, 'spectra.csv'))
    df_exec_c = pd.read_csv(os.path.join(out_loc, 'execution.csv'))
    df_response_time = pd.read_csv(os.path.join(out_loc, 'response_time.csv'))
    return df_spectra, df_exec_c, df_response_time

df_spectra, df_exec_c, df_response_time = get_preprocessed_data(out_loc)

df_cid_executed = df_exec_c.groupby('cid')['exec'].apply(lambda row: list(np.unique(row))).reset_index(name='exec_c_list')
display(df_cid_executed)
cid_executed_csv = os.path.join(out_loc, 'cid_executed.csv')
df_cid_executed.to_csv(cid_executed_csv, index=False)
# df_exec_c['input_sensitive'] = df_exec_c.apply(lambda row: input_sensitive(row))
# likely to be changed by more tests, but less execution counts
# likely to be changed by less tests, but more execution counts

Unnamed: 0,cid,exec_c_list
0,0,[64]
1,1,[63]
2,2,[63]
3,3,"[799, 832, 847, 854, 861, 877, 887, 893, 897, ..."
4,4,"[736, 769, 784, 791, 798, 814, 824, 830, 834, ..."
5,5,"[736, 769, 784, 791, 798, 814, 824, 830, 834, ..."
6,6,"[736, 769, 784, 791, 798, 814, 824, 830, 834, ..."
7,7,[63]
8,8,[1]
9,9,[65]
