Merge pull request #22 from Pennycook/drop-duplicate-results

Drop duplicate results
intel · Dec 18, 2023 · 720ffdb · 720ffdb
2 parents fec67d5 + 28461a7
commit 720ffdb
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 0 deletions.
diff --git a/p3/metrics/_pp.py b/p3/metrics/_pp.py
@@ -83,6 +83,12 @@ def pp(df):
         if not df[eff].fillna(0).between(0, 1).all():
             raise ValueError("%s must in range [0, 1]" % eff)
 
+    # Keep only the most efficient (application, platform) results.
+    key = ["problem", "platform", "application"]
+    groups = df[key + efficiencies].groupby(key)
+    df = groups.agg(max)
+    df.reset_index(inplace=True)
+
     # Add a "did not run" value for applications that did not run
     rows = []
     combination_keys = ["problem", "platform", "application"]

diff --git a/p3/plot/_cascade.py b/p3/plot/_cascade.py
@@ -347,6 +347,12 @@ def cascade(df, eff=None, size=(6, 5), **kwargs):
             raise ValueError(msg % (eff_column))
     _require_numeric(df, [eff_column])
 
+    # Keep only the most efficient (application, platform) results.
+    key = ["problem", "platform", "application"]
+    groups = df[key + [eff_column]].groupby(key)
+    df = groups.agg(max)
+    df.reset_index(inplace=True)
+
     platforms = df["platform"].unique()
     applications = df["application"].unique()
 

diff --git a/tests/metrics/test_pp.py b/tests/metrics/test_pp.py
@@ -134,6 +134,31 @@ def test_pp_single(self):
 
         pd.testing.assert_frame_equal(result, expected_df)
 
+    def test_pp_duplicates(self):
+        """p3.data.pp.duplicates"""
+
+        # Regression for case with duplicate result
+        data = {
+            "problem": ["test"] * 4,
+            "platform": ["A", "A", "B", "B"],
+            "application": ["latest"] * 4,
+            "fom": [float("NaN"), 25.0, 1.0, 2.0],
+            "app eff": [0, 1.0, 0.5, 1.0],
+            "arch eff": [0, 0.5, 0.25, 0.5],
+        }
+        df = pd.DataFrame(data)
+
+        result = pp(df)
+
+        expected_data = {
+            "problem": ["test"],
+            "application": ["latest"],
+            "app pp": [1.0],
+            "arch pp": [0.5],
+        }
+        expected_df = pd.DataFrame(expected_data)
+
+        pd.testing.assert_frame_equal(result, expected_df)
 
 if __name__ == "__main__":
     unittest.main()