Skip to content

Commit

Permalink
Merge pull request #44 from Pennycook/optional-coverage-key
Browse files Browse the repository at this point in the history
Simplify divergence usage by making cov optional
  • Loading branch information
Pennycook committed Apr 26, 2024
2 parents e365cc1 + 736f549 commit 76b269b
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 27 deletions.
40 changes: 22 additions & 18 deletions p3/metrics/_divergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def _coverage_to_divergence(maps):
for entry in coverage:
unique_fn = (entry["file"], entry["id"])
for region in entry["lines"]:

# If a region is a single integer, it represents one line.
if isinstance(region, int):
line = region
Expand All @@ -82,7 +81,7 @@ def _coverage_string_to_json(string):
return _validate_coverage_json(string)


def divergence(df, cov):
def divergence(df, cov=None):
r"""
Calculate code divergence.
Expand Down Expand Up @@ -120,12 +119,14 @@ def divergence(df, cov):
----------
df: DataFrame
A pandas DataFrame storing performance data. The following columns are
required: "problem", "platform", "application", "coverage_key".
required: "problem", "platform", "application".
Values of the "coverage_key" column are used as keys, to identify
corresponding coverage traces stored in the `cov` DataFrame.
If `cov` is None, a "coverage" column is required. Values of the
"coverage" column must be coverage traces adhering to the P3 Analysis
Library coverage schema. Otherwise, a "coverage_key" column is
required.
cov: DataFrame
cov: DataFrame, optional
A pandas DataFrame storing coverage data. The following columns are
required: "coverage_key", "coverage".
Expand All @@ -136,27 +137,30 @@ def divergence(df, cov):
-------
DataFrame
A new pandas DataFrame storing the code divergence values calculated
from the coverage data provided in `cov` and the configuration data
provided in `df`.
from the configuration and coverage data provided.
Raises
------
ValueError
If any of the required columns are missing from `df`.
If any value in the "coverage" column of `cov` fails to validate
against the P3 coverage schema.
If any of the required columns are missing.
If any coverage string fails to validate against the P3 coverage
schema.
TypeError
If any value in the "coverage" column of `cov` is not a JSON string.
If any value in the "coverage" column is not a JSON string.
"""
_require_columns(
df, ["problem", "platform", "application", "coverage_key"]
)
_require_columns(cov, ["coverage_key", "coverage"])
_require_columns(df, ["problem", "platform", "application"])
if cov is None:
# The original df must already contain coverage information
_require_columns(df, ["coverage"])
p3df = df.copy()
else:
# Expand original df by substituting the sha for its coverage string
_require_columns(df, ["coverage_key"])
_require_columns(cov, ["coverage_key", "coverage"])
p3df = df.join(cov.set_index("coverage_key"), on="coverage_key")

# Expand the original df by substituting the sha for its coverage string
p3df = df.join(cov.set_index("coverage_key"), on="coverage_key")
p3df["coverage"] = p3df["coverage"].apply(_coverage_string_to_json)

key = ["problem", "application"]
Expand Down
27 changes: 18 additions & 9 deletions p3/report/_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def index_function(row):
return df


def snapshot(df, cov, directory=None):
def snapshot(df, cov=None, directory=None):
"""
Generate an HTML report representing a snapshot of P3 characteristics.
Expand All @@ -68,10 +68,15 @@ def snapshot(df, cov, directory=None):
df: DataFrame
A pandas DataFrame storing performance efficiency data.
The following columns are always required: "problem", "platform",
"application", "coverage_key". At least one of the following columns
"application". At least one of the following columns
is required: "app eff" or "arch eff".
cov: DataFrame
If `cov` is None, a "coverage" column is required. Values of the
"coverage" column must be coverage traces adhering to the P3 Analysis
Library coverage schema. Otherwise, a "coverage_key" column is
required.
cov: DataFrame, optional
A pandas DataFrame storing coverage data. The following columns are
required: "coverage_key", "coverage".
Expand All @@ -86,13 +91,13 @@ def snapshot(df, cov, directory=None):
Raises
------
ValueError
If any of the required columns are missing from `df` or `cov`.
If any value in the "coverage" column of `cov` fails to validate
against the P3 coverage schema.
If any of the required columns are missing.
If any coverage string fails to validate against the P3 coverage
schema.
TypeError
If any of the values in the "fom" column of `df` are non-numeric.
If any of the values in the "coverage" column of `cov` are not strings.
If any of the values in the "coverage" is not a JSON string.
PermissionError
If the directory specified by `directory` or any of the files generated
Expand All @@ -103,9 +108,13 @@ def snapshot(df, cov, directory=None):
"""
_require_columns(
df,
["problem", "platform", "application", "coverage_key"],
["problem", "platform", "application"],
)
_require_columns(cov, ["coverage_key", "coverage"])
if cov is None:
_require_columns(df, ["coverage"])
else:
_require_columns(df, ["coverage_key"])
_require_columns(cov, ["coverage_key", "coverage"])

if len(df["problem"].unique()) > 1:
raise NotImplementedError(
Expand Down

0 comments on commit 76b269b

Please sign in to comment.