Skip to content

Commit

Permalink
Differential graphs on report (#657)
Browse files Browse the repository at this point in the history
  • Loading branch information
Shadoom7 committed Aug 15, 2020
1 parent e4004fa commit d27bae7
Show file tree
Hide file tree
Showing 13 changed files with 556 additions and 122 deletions.
83 changes: 77 additions & 6 deletions analysis/benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
import functools

from analysis import data_utils
from analysis import coverage_data_utils
from analysis import stat_tests


class BenchmarkResults: # pylint: disable=too-many-public-methods
# pylint: disable=too-many-public-methods, too-many-arguments
class BenchmarkResults:
"""Represents results of various analysis done on benchmark data.
NOTE: Do not create this class manually! Instead, use the |benchmarks|
Expand All @@ -31,11 +33,12 @@ class BenchmarkResults: # pylint: disable=too-many-public-methods
template, properties are computed on demand and only once.
"""

def __init__(self, benchmark_name, experiment_df, output_directory,
plotter):
def __init__(self, benchmark_name, experiment_df, coverage_dict,
output_directory, plotter):
self.name = benchmark_name

self._experiment_df = experiment_df
self._coverage_dict = coverage_dict
self._output_directory = output_directory
self._plotter = plotter

Expand All @@ -45,6 +48,19 @@ def _prefix_with_benchmark(self, filename):
def _get_full_path(self, filename):
return os.path.join(self._output_directory, filename)

def _get_experiment_filestore_path(self, fuzzer_name):
return coverage_data_utils.get_fuzzer_filestore_path(
self._benchmark_df, fuzzer_name)

def get_filestore_name(self, fuzzer_name):
"""Returns the filestore name of the |fuzzer_name|."""
filestore_path = self._get_experiment_filestore_path(fuzzer_name)
gcs_prefix = 'gs://'
gcs_http_prefix = 'https://storage.googleapis.com/'
if filestore_path.startswith(gcs_prefix):
filestore_path = filestore_path.replace(gcs_prefix, gcs_http_prefix)
return filestore_path

@property
@functools.lru_cache()
# TODO(lszekeres): With python3.8+, replace above two decorators with:
Expand All @@ -53,16 +69,44 @@ def _benchmark_df(self):
exp_df = self._experiment_df
return exp_df[exp_df.benchmark == self.name]

@property
@functools.lru_cache()
def fuzzer_names(self):
"""Names of all fuzzers."""
return self._benchmark_df.fuzzer.unique()

@property
@functools.lru_cache()
def _benchmark_snapshot_df(self):
return data_utils.get_benchmark_snapshot(self._benchmark_df)

@property
@functools.lru_cache()
def fuzzers(self):
"""Fuzzers with valid trials on this benchmark."""
return self._benchmark_df.fuzzer.unique()
def _benchmark_coverage_dict(self):
"""Covered regions of each fuzzer on this benchmark."""
return coverage_data_utils.get_benchmark_cov_dict(
self._coverage_dict, self.name)

@property
@functools.lru_cache()
def _benchmark_aggregated_coverage_df(self):
"""Aggregated covered regions of each fuzzer on this benchmark."""
return coverage_data_utils.get_benchmark_aggregated_cov_df(
self._benchmark_coverage_dict)

@property
@functools.lru_cache()
def _unique_region_dict(self):
"""Unique regions with the fuzzers that cover it."""
return coverage_data_utils.get_unique_region_dict(
self._benchmark_coverage_dict)

@property
@functools.lru_cache()
def _unique_region_cov_df(self):
"""Fuzzers with the number of covered unique regions."""
return coverage_data_utils.get_unique_region_cov_df(
self._unique_region_dict, self.fuzzer_names)

@property
def fuzzers_with_not_enough_samples(self):
Expand Down Expand Up @@ -236,3 +280,30 @@ def better_than_plot(self):
self._plotter.write_better_than_plot(better_than_table,
self._get_full_path(plot_filename))
return plot_filename

@property
def unique_coverage_ranking_plot(self):
"""Ranking plot for unique coverage."""
plot_filename = self._prefix_with_benchmark('ranking_unique_region.svg')
unique_region_cov_df_combined = self._unique_region_cov_df.merge(
self._benchmark_aggregated_coverage_df, on='fuzzer')
self._plotter.write_unique_coverage_ranking_plot(
unique_region_cov_df_combined, self._get_full_path(plot_filename))
return plot_filename

@property
@functools.lru_cache()
def pairwise_unique_coverage_table(self):
"""Pairwise unique coverage table for each pair of fuzzers."""
return coverage_data_utils.get_pairwise_unique_coverage_table(
self._benchmark_coverage_dict)

@property
def pairwise_unique_coverage_plot(self):
"""Pairwise unique coverage plot for each pair of fuzzers."""
plot_filename = self._prefix_with_benchmark(
'pairwise_unique_coverage_plot.svg')
self._plotter.write_pairwise_unique_coverage_heatmap_plot(
self.pairwise_unique_coverage_table,
self._get_full_path(plot_filename))
return plot_filename
159 changes: 159 additions & 0 deletions analysis/coverage_data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for coverage data calculation."""

import collections
import json
import os
import posixpath
import tempfile
import pandas as pd

from common import filestore_utils


def get_fuzzer_benchmark_key(fuzzer: str, benchmark: str):
"""Returns the key in coverage dict for a pair of fuzzer-benchmark."""
return fuzzer + ' ' + benchmark


def get_fuzzer_filestore_path(benchmark_df, fuzzer):
"""Gets the filestore_path for |fuzzer| in |benchmark_df|."""
fuzzer_df = benchmark_df[benchmark_df.fuzzer == fuzzer]
filestore_path = fuzzer_df.experiment_filestore.unique()[0]
exp_name = fuzzer_df.experiment.unique()[0]
return posixpath.join(filestore_path, exp_name)


def get_covered_regions_dict(experiment_df):
"""Combines json files for different fuzzer-benchmark pair
in |experiment_df| and returns a dictionary of the covered regions."""
covered_regions_dict = {}
benchmarks = experiment_df.benchmark.unique()
for benchmark in benchmarks:
benchmark_df = experiment_df[experiment_df.benchmark == benchmark]
fuzzers = benchmark_df.fuzzer.unique()
for fuzzer in fuzzers:
fuzzer_covered_regions = get_fuzzer_covered_regions(
benchmark_df, benchmark, fuzzer)
key = get_fuzzer_benchmark_key(fuzzer, benchmark)
covered_regions_dict[key] = fuzzer_covered_regions
return covered_regions_dict


def get_fuzzer_covered_regions(benchmark_df, benchmark, fuzzer):
"""Gets the covered regions for |fuzzer| in |benchmark_df| from the json
file in the bucket."""
with tempfile.TemporaryDirectory() as temp_dir:
dst_file = os.path.join(temp_dir, 'tmp.json')
src_filestore_path = get_fuzzer_filestore_path(benchmark_df, fuzzer)
src_file = posixpath.join(src_filestore_path, 'coverage', 'data',
benchmark, fuzzer, 'covered_regions.json')
filestore_utils.cp(src_file, dst_file)
with open(dst_file) as json_file:
return json.load(json_file)


def get_unique_region_dict(benchmark_coverage_dict):
"""Returns a dictionary containing the covering fuzzers for each
unique region, where the |threshold| defines which regions are unique."""
region_dict = collections.defaultdict(list)
unique_region_dict = {}
threshold_count = 1
for fuzzer in benchmark_coverage_dict:
for region in benchmark_coverage_dict[fuzzer]:
region_dict[region].append(fuzzer)
for region, fuzzers in region_dict.items():
if len(fuzzers) <= threshold_count:
unique_region_dict[region] = fuzzers
return unique_region_dict


def get_unique_region_cov_df(unique_region_dict, fuzzer_names):
"""Returns a DataFrame where the two columns are fuzzers and the number
of unique regions covered."""
fuzzers = collections.defaultdict(int)
for region in unique_region_dict:
for fuzzer in unique_region_dict[region]:
fuzzers[fuzzer] += 1
dict_to_transform = {'fuzzer': [], 'unique_regions_covered': []}
for fuzzer in fuzzer_names:
covered_num = fuzzers[fuzzer]
dict_to_transform['fuzzer'].append(fuzzer)
dict_to_transform['unique_regions_covered'].append(covered_num)
return pd.DataFrame(dict_to_transform)


def get_benchmark_cov_dict(coverage_dict, benchmark):
"""Returns a dictionary to store the covered regions of each fuzzer.
Uses a set of tuples to store the covered regions."""
benchmark_cov_dict = {}
for key_pair, covered_regions in coverage_dict.items():
current_fuzzer, current_benchmark = key_pair.split()
if current_benchmark == benchmark:
covered_regions_in_set = set()
for region in covered_regions:
covered_regions_in_set.add(tuple(region))
benchmark_cov_dict[current_fuzzer] = covered_regions_in_set
return benchmark_cov_dict


def get_benchmark_aggregated_cov_df(benchmark_coverage_dict):
"""Returns a dataframe where each row represents a fuzzer and its
aggregated coverage number."""
dict_to_transform = {'fuzzer': [], 'aggregated_edges_covered': []}
for fuzzer in benchmark_coverage_dict:
aggregated_edges_covered = len(benchmark_coverage_dict[fuzzer])
dict_to_transform['fuzzer'].append(fuzzer)
dict_to_transform['aggregated_edges_covered'].append(
aggregated_edges_covered)
return pd.DataFrame(dict_to_transform)


def get_pairwise_unique_coverage_table(benchmark_coverage_dict):
"""Returns a table that shows the unique coverage between
each pair of fuzzers.
The pairwise unique coverage table is a square matrix where each
row and column represents a fuzzer, and each cell contains a number
showing the regions covered by the fuzzer of the column but not by
the fuzzer of the row."""

fuzzers = benchmark_coverage_dict.keys()

pairwise_unique_coverage_values = []
for fuzzer_in_row in fuzzers:
row = []
for fuzzer_in_col in fuzzers:
pairwise_unique_coverage_value = get_unique_covered_percentage(
benchmark_coverage_dict[fuzzer_in_row],
benchmark_coverage_dict[fuzzer_in_col])
row.append(pairwise_unique_coverage_value)
pairwise_unique_coverage_values.append(row)

return pd.DataFrame(pairwise_unique_coverage_values,
index=fuzzers,
columns=fuzzers)


def get_unique_covered_percentage(fuzzer_row_covered_regions,
fuzzer_col_covered_regions):
"""Returns the number of regions covered by the fuzzer of the column
but not by the fuzzer of the row."""

unique_region_count = 0
for region in fuzzer_col_covered_regions:
if region not in fuzzer_row_covered_regions:
unique_region_count += 1
return unique_region_count
3 changes: 2 additions & 1 deletion analysis/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def validate_data(experiment_df):
def drop_uninteresting_columns(experiment_df):
"""Returns table with only interesting columns."""
return experiment_df[[
'benchmark', 'fuzzer', 'trial_id', 'time', 'edges_covered'
'benchmark', 'fuzzer', 'trial_id', 'time', 'edges_covered',
'experiment', 'experiment_filestore'
]]


Expand Down
18 changes: 12 additions & 6 deletions analysis/experiment_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from analysis import stat_tests


class ExperimentResults:
class ExperimentResults: # pylint: disable=too-many-instance-attributes
"""Provides the main interface for getting various analysis results and
plots about an experiment, represented by |experiment_df|.
Expand All @@ -31,11 +31,13 @@ class ExperimentResults:
template, only the properties needed for the given report will be computed.
"""

def __init__(self,
experiment_df,
output_directory,
plotter,
experiment_name=None):
def __init__( # pylint: disable=too-many-arguments
self,
experiment_df,
coverage_dict,
output_directory,
plotter,
experiment_name=None):
if experiment_name:
self.name = experiment_name
else:
Expand Down Expand Up @@ -63,6 +65,9 @@ def __init__(self,

self._plotter = plotter

# Dictionary to store the full coverage data.
self._coverage_dict = coverage_dict

def _get_full_path(self, filename):
return os.path.join(self._output_directory, filename)

Expand All @@ -87,6 +92,7 @@ def benchmarks(self):
benchmark_names = self._experiment_df.benchmark.unique()
return [
benchmark_results.BenchmarkResults(name, self._experiment_df,
self._coverage_dict,
self._output_directory,
self._plotter)
for name in sorted(benchmark_names)
Expand Down
Loading

0 comments on commit d27bae7

Please sign in to comment.