diff --git a/haystack/evaluation/eval_run_result.py b/haystack/evaluation/eval_run_result.py
index 3b9c0e82aa..6149ad1b9c 100644
--- a/haystack/evaluation/eval_run_result.py
+++ b/haystack/evaluation/eval_run_result.py
@@ -131,7 +131,8 @@ def aggregated_report(
             JSON or DataFrame with aggregated scores, in case the output is set to a CSV file, a message confirming the
             successful write or an error message.
         """
-        results = {k: v["score"] for k, v in self.results.items()}
+
+        results = {k: str(v["score"]) for k, v in self.results.items()}
         data = {"metrics": list(results.keys()), "score": list(results.values())}
         return self._handle_output(data, output_format, csv_file)
 
diff --git a/test/evaluation/test_eval_run_result.py b/test/evaluation/test_eval_run_result.py
index 055b1d7186..a171bed14d 100644
--- a/test/evaluation/test_eval_run_result.py
+++ b/test/evaluation/test_eval_run_result.py
@@ -100,7 +100,7 @@ def test_score_report():
                 "faithfulness",
                 "semantic_answer_similarity",
             ],
-            "score": [0.476932, 0.75, 0.46428375, 0.58177975, 0.40585375, 0.53757075],
+            "score": ["0.476932", "0.75", "0.46428375", "0.58177975", "0.40585375", "0.53757075"],
         }
     )