Skip to content

Commit

Permalink
Merge pull request #22 from Pennycook/drop-duplicate-results
Browse files Browse the repository at this point in the history
Drop duplicate results
  • Loading branch information
Pennycook committed Dec 18, 2023
2 parents fec67d5 + 28461a7 commit 720ffdb
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 0 deletions.
6 changes: 6 additions & 0 deletions p3/metrics/_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ def pp(df):
if not df[eff].fillna(0).between(0, 1).all():
raise ValueError("%s must in range [0, 1]" % eff)

# Keep only the most efficient (application, platform) results.
key = ["problem", "platform", "application"]
groups = df[key + efficiencies].groupby(key)
df = groups.agg(max)
df.reset_index(inplace=True)

# Add a "did not run" value for applications that did not run
rows = []
combination_keys = ["problem", "platform", "application"]
Expand Down
6 changes: 6 additions & 0 deletions p3/plot/_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,12 @@ def cascade(df, eff=None, size=(6, 5), **kwargs):
raise ValueError(msg % (eff_column))
_require_numeric(df, [eff_column])

# Keep only the most efficient (application, platform) results.
key = ["problem", "platform", "application"]
groups = df[key + [eff_column]].groupby(key)
df = groups.agg(max)
df.reset_index(inplace=True)

platforms = df["platform"].unique()
applications = df["application"].unique()

Expand Down
25 changes: 25 additions & 0 deletions tests/metrics/test_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,31 @@ def test_pp_single(self):

pd.testing.assert_frame_equal(result, expected_df)

def test_pp_duplicates(self):
"""p3.data.pp.duplicates"""

# Regression for case with duplicate result
data = {
"problem": ["test"] * 4,
"platform": ["A", "A", "B", "B"],
"application": ["latest"] * 4,
"fom": [float("NaN"), 25.0, 1.0, 2.0],
"app eff": [0, 1.0, 0.5, 1.0],
"arch eff": [0, 0.5, 0.25, 0.5],
}
df = pd.DataFrame(data)

result = pp(df)

expected_data = {
"problem": ["test"],
"application": ["latest"],
"app pp": [1.0],
"arch pp": [0.5],
}
expected_df = pd.DataFrame(expected_data)

pd.testing.assert_frame_equal(result, expected_df)

if __name__ == "__main__":
unittest.main()

0 comments on commit 720ffdb

Please sign in to comment.