Skip to content

Commit

Permalink
Merge pull request #31 from Pennycook/coverage-0.3.0
Browse files Browse the repository at this point in the history
Update coverage schema to 0.3.0
  • Loading branch information
Pennycook committed Apr 22, 2024
2 parents 69aaa92 + 9973052 commit e365cc1
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 36 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
include p3/data/coverage-0.1.0.schema
include p3/data/coverage-0.2.0.schema
include p3/data/coverage-0.3.0.schema
4 changes: 2 additions & 2 deletions p3/data/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _validate_coverage_json(json_string: str) -> object:

instance = json.loads(json_string)

schema_string = pkgutil.get_data(__name__, "coverage-0.2.0.schema")
schema_string = pkgutil.get_data(__name__, "coverage-0.3.0.schema")
if not schema_string:
msg = "Could not locate coverage schema file"
raise RuntimeError(msg)
Expand All @@ -46,7 +46,7 @@ def _validate_coverage_json(json_string: str) -> object:
msg = "Coverage string failed schema validation"
raise ValueError(msg)
except jsonschema.exceptions.SchemaError:
msg = "coverage-0.1.0.schema is not a valid schema"
msg = "coverage-0.3.0.schema is not a valid schema"
raise RuntimeError(msg)

return instance
41 changes: 41 additions & 0 deletions p3/data/coverage-0.3.0.schema
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/intel/p3-analysis-library/main/p3/data/coverage-0.3.0.schema",
"title": "Coverage",
"description": "Lines of code used in each file of a code base.",
"type": "array",
"items": {
"type": "object",
"properties": {
"file": {
"type": "string"
},
"id": {
"type": "string"
},
"lines": {
"type": "array",
"items": {
"oneOf": [
{
"type": "integer"
},
{
"type": "array",
"contains": {
"type": "integer"
},
"minContains": 2,
"maxContains": 2
}
]
}
}
},
"required": [
"file",
"id",
"lines"
]
}
}
19 changes: 13 additions & 6 deletions p3/metrics/_divergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,22 @@ def _coverage_to_divergence(maps):
linemap = collections.defaultdict(set)
for p, coverage in enumerate(maps):
for entry in coverage:
fn = entry["file"]
for region in entry["regions"]:
linemap[(fn, tuple(region))].add(p)
unique_fn = (entry["file"], entry["id"])
for region in entry["lines"]:

# If a region is a single integer, it represents one line.
if isinstance(region, int):
line = region
linemap[(unique_fn, line)].add(p)

# If a region is a list, it represents a [start, end] pair.
if isinstance(region, list):
for line in range(region[0], region[1]):
linemap[(unique_fn, line)].add(p)

setmap = collections.defaultdict(int)
for key, platforms in linemap.items():
fn, triple = key
start, end, num_lines = triple
setmap[frozenset(platforms)] += num_lines
setmap[frozenset(platforms)] += 1

return _average_distance(setmap)

Expand Down
8 changes: 4 additions & 4 deletions tests/data/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ class TestValidation(unittest.TestCase):

def test_coverage_json_valid(self):
"""p3.data.validation.coverage_json_valid"""
json_string = '[{"file": "id", "regions": [[1,2,3]]}]'
json_string = '[{"file": "path", "id": "sha", "lines": [1, 2, [3, 5]]}]'
result_object = _validate_coverage_json(json_string)
expected_object = [{"file": "id", "regions": [[1, 2, 3]]}]
expected_object = [{"file": "path", "id": "sha", "lines": [1, 2, [3, 5]]}]
self.assertTrue(result_object == expected_object)

def test_coverage_json_invalid(self):
"""p3.data.validation.coverage_json_invalid"""
json_string = '[{"file": "id", "regions": [["1"]]}]'
json_string = '[{"file": "path", "id": "sha", "lines": [["1"]]}]'
with self.assertRaises(ValueError):
_validate_coverage_json(json_string)

with self.assertRaises(TypeError):
json_object = [{"file": "id", "regions": [[1, 2, 3]]}]
json_object = [{"file": "path", "id": "sha", "lines": [["1"]]}]
_validate_coverage_json(json_object)


Expand Down
94 changes: 70 additions & 24 deletions tests/metrics/test_divergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ class TestDivergence(unittest.TestCase):
"""

def test_required_columns(self):
"""p3.data.divergence.required_columns"""
"""Check that divergence() validates required columns."""
df = pd.DataFrame()
cov = pd.DataFrame()

with self.assertRaises(ValueError):
divergence(df, cov)

def test_side_effects(self):
"""p3.data.divergence.side_effects"""
"""Check that divergence() has no side effects."""
key = 0
data = {
"problem": ["test"] * 2,
Expand All @@ -33,10 +33,9 @@ def test_side_effects(self):
json_string = json.dumps(
[
{
"file": "0",
"regions": [
[0, 1, 1],
],
"file": "file.cpp",
"id": "0",
"lines": [0],
}
]
)
Expand All @@ -54,7 +53,7 @@ def test_side_effects(self):
pd.testing.assert_frame_equal(cov_before, cov_after)

def test_divergence(self):
"""p3.data.divergence"""
"""Check that divergence() produces expected results for valid data."""
data = {
"problem": ["test"] * 2,
"platform": ["A", "B"],
Expand All @@ -66,27 +65,24 @@ def test_divergence(self):
source1_json_string = json.dumps(
[
{
"file": "0",
"regions": [
[0, 10, 10],
],
"file": "foo.cpp",
"id": "0",
"lines": [[0, 9]],
}
]
)

source2_json_string = json.dumps(
[
{
"file": "0",
"regions": [
[0, 10, 10],
],
"file": "foo.cpp",
"id": "0",
"lines": [[0, 9]],
},
{
"file": "1",
"regions": [
[0, 10, 10],
],
"file": "bar.cpp",
"id": "1",
"lines": [[0, 9]],
},
]
)
Expand All @@ -110,7 +106,7 @@ def test_divergence(self):
pd.testing.assert_frame_equal(result, expected_result)

def test_divergence_single(self):
"""p3.data.divergence.single"""
"""Check that divergence() does not fail with only one platform."""
key = 0
data = {
"problem": ["test"],
Expand All @@ -123,10 +119,9 @@ def test_divergence_single(self):
json_string = json.dumps(
[
{
"file": "0",
"regions": [
[0, 1, 1],
],
"file": "file.cpp",
"id": "0",
"lines": [0],
}
]
)
Expand All @@ -143,6 +138,57 @@ def test_divergence_single(self):

pd.testing.assert_frame_equal(result, expected_df)

def test_divergence_duplicate(self):
"""Check that divergence() uses both file and id for uniqueness."""
data = {
"problem": ["test"] * 2,
"platform": ["A", "B"],
"application": ["latest"] * 2,
"coverage_key": ["source1", "source2"],
}
df = pd.DataFrame(data)

# First file called "foo.cpp" has an id of "0".
source1_json_string = json.dumps(
[
{
"file": "foo.cpp",
"id": "0",
"lines": [[0, 9]],
}
]
)

# Second file called "foo.cpp" has a different id ("1").
# It should therefore be recognized as a different file.
source2_json_string = json.dumps(
[
{
"file": "foo.cpp",
"id": "1",
"lines": [[0, 9]],
},
]
)

cov = pd.DataFrame(
{
"coverage_key": ["source1", "source2"],
"coverage": [source1_json_string, source2_json_string],
}
)

result = divergence(df, cov)

expected_data = {
"problem": ["test"],
"application": ["latest"],
"divergence": [1.0],
}
expected_result = pd.DataFrame(expected_data)

pd.testing.assert_frame_equal(result, expected_result)


if __name__ == "__main__":
unittest.main()

0 comments on commit e365cc1

Please sign in to comment.