From 9e575667719d009e55cc9f26f7f9d5ba9bfdc391 Mon Sep 17 00:00:00 2001
From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com>
Date: Tue, 30 Jan 2024 14:43:02 +0530
Subject: [PATCH 1/5] added fetching score

---
 aixplain/modules/benchmark_job.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py
index 8531127a..87350425 100644
--- a/aixplain/modules/benchmark_job.py
+++ b/aixplain/modules/benchmark_job.py
@@ -92,3 +92,24 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf
             error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}"
             logging.error(error_message, exc_info=True)
             raise Exception(error_message)
+
+
+    def get_scores(self):
+        try:
+            resp = self._fetch_current_response(self.id)
+            iterations = resp.get("iterations", [])
+            scores = {}
+            for iteration_info in iterations:
+                model_id = iteration_info["pipeline"]
+                model_info = {
+                    "creditsUsed" : round(iteration_info["credits"],5),
+                    "timeSpent" : round(iteration_info["runtime"],2),
+                    "status" : iteration_info["status"],
+                    "rawScores" : iteration_info["scores"],
+                }
+                scores[model_id] = model_info
+            return scores
+        except Exception as e:
+            error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}"
+            logging.error(error_message, exc_info=True)
+            raise Exception(error_message)
\ No newline at end of file

From bbe26ebb9103d7859f70fdde610f13eb3da8dfe2 Mon Sep 17 00:00:00 2001
From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com>
Date: Mon, 11 Mar 2024 17:42:54 +0530
Subject: [PATCH 2/5] add simplified benchmark job scores

---
 aixplain/factories/benchmark_factory.py | 22 +++++++++++++++++++
 aixplain/modules/benchmark_job.py       | 28 +++++++++++++++++++++----
 pyproject.toml                          |  2 +-
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/aixplain/factories/benchmark_factory.py b/aixplain/factories/benchmark_factory.py
index 88d2411b..57d4a833 100644
--- a/aixplain/factories/benchmark_factory.py
+++ b/aixplain/factories/benchmark_factory.py
@@ -26,6 +26,7 @@
 import json
 import pandas as pd
 from pathlib import Path
+from aixplain.enums.supplier import Supplier
 from aixplain.modules import Dataset, Metric, Model
 from aixplain.modules.benchmark_job import BenchmarkJob
 from aixplain.modules.benchmark import Benchmark
@@ -237,3 +238,24 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]:
             error_message = f"Listing Normalization Options: Error in getting Normalization Options: {e}"
             logging.error(error_message, exc_info=True)
             return []
+
+    @classmethod
+    def get_benchmark_job_scores(cls, job_id):
+        def __get_model_name(model_id):
+            model = ModelFactory.get(model_id)
+            supplier = str(model.supplier)
+            try:
+                if isinstance(supplier, Supplier):
+                    name = f"{supplier.name}"
+                else:
+                    name = f"{eval(supplier)['name']}"
+            except Exception as e:
+                logging.error(f"{e}")
+                name = f"{supplier}"
+            if model.version is not None:
+                name = f"{name}({model.version})"
+            return name
+        benchmarkJob = cls.get_job(job_id)
+        scores_df = benchmarkJob.get_scores()
+        scores_df["Model"] = scores_df["Model"].apply(lambda x: __get_model_name(x))
+        return scores_df
\ No newline at end of file
diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py
index 87350425..9ad3b068 100644
--- a/aixplain/modules/benchmark_job.py
+++ b/aixplain/modules/benchmark_job.py
@@ -92,9 +92,22 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf
             error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}"
             logging.error(error_message, exc_info=True)
             raise Exception(error_message)
+        
+    def __simplify_scores(self, scores):
+        simplified_score_list  = []
+        for model_id, model_info in scores.items():
+            model_scores = model_info["rawScores"]
+            # model = Mode
+            row = {"Model": model_id}
+            for score_info in model_scores:
+                row[score_info["longName"]] = score_info["average"]
+            simplified_score_list.append(row)
+        return simplified_score_list
 
 
-    def get_scores(self):
+
+
+    def get_scores(self, return_simplified=True, return_as_dataframe=True):
         try:
             resp = self._fetch_current_response(self.id)
             iterations = resp.get("iterations", [])
@@ -102,13 +115,20 @@ def get_scores(self):
             for iteration_info in iterations:
                 model_id = iteration_info["pipeline"]
                 model_info = {
-                    "creditsUsed" : round(iteration_info["credits"],5),
-                    "timeSpent" : round(iteration_info["runtime"],2),
+                    "creditsUsed" : round(iteration_info.get("credits", 0),5),
+                    "timeSpent" : round(iteration_info.get("runtime", 0),2),
                     "status" : iteration_info["status"],
                     "rawScores" : iteration_info["scores"],
                 }
                 scores[model_id] = model_info
-            return scores
+            
+            if return_simplified:
+                simplified_scores = self.__simplify_scores(scores)
+                if return_as_dataframe:
+                    simplified_scores = pd.DataFrame(simplified_scores)
+                return simplified_scores
+            else:
+                return scores
         except Exception as e:
             error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}"
             logging.error(error_message, exc_info=True)
diff --git a/pyproject.toml b/pyproject.toml
index ab7b901e..12a37dfe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ namespaces = true
 
 [project]
 name = "aiXplain"
-version = "0.2.4"
+version = "0.2.5rc"
 description = "aiXplain SDK adds AI functions to software."
 readme = "README.md"
 requires-python = ">=3.5, <4"

From 56998de3fd16f1c2b3e0a3f5200166c8ccf904e7 Mon Sep 17 00:00:00 2001
From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com>
Date: Thu, 14 Mar 2024 13:19:43 +0530
Subject: [PATCH 3/5] add getting failuire rates

---
 aixplain/modules/benchmark_job.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py
index 9ad3b068..f2c46684 100644
--- a/aixplain/modules/benchmark_job.py
+++ b/aixplain/modules/benchmark_job.py
@@ -132,4 +132,30 @@ def get_scores(self, return_simplified=True, return_as_dataframe=True):
         except Exception as e:
             error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}"
             logging.error(error_message, exc_info=True)
+            raise Exception(error_message)
+        
+    
+    def get_failuire_rate(self, return_as_dataframe=True):
+        try:
+            scores = self.get_scores(return_simplified=False)
+            failure_rates = {}
+            for model_id, model_info in scores.items():
+                if len(model_info["rawScores"]) == 0:
+                    failure_rates[model_id] = 0
+                    continue
+                score_info = model_info["rawScores"][0] 
+                num_succesful = score_info["count"]
+                num_failed = score_info["failedSegmentsCount"]
+                failuire_rate =  (num_failed * 100) / (num_succesful+num_failed)
+                failure_rates[model_id] = failuire_rate
+            if return_as_dataframe:
+                df = pd.DataFrame()
+                df["Model"] = list(failure_rates.keys())
+                df["Failuire Rate"] = list(failure_rates.values())
+                return df
+            else:
+                return failure_rates
+        except Exception as e:
+            error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}"
+            logging.error(error_message, exc_info=True)
             raise Exception(error_message)
\ No newline at end of file

From c1a868a80465200d687e571363298e02bc7a08fa Mon Sep 17 00:00:00 2001
From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com>
Date: Mon, 18 Mar 2024 23:27:55 +0530
Subject: [PATCH 4/5] added explanations to benchmark

---
 aixplain/modules/benchmark_job.py | 55 +++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py
index f2c46684..7dae2d96 100644
--- a/aixplain/modules/benchmark_job.py
+++ b/aixplain/modules/benchmark_job.py
@@ -158,4 +158,59 @@ def get_failuire_rate(self, return_as_dataframe=True):
         except Exception as e:
             error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}"
             logging.error(error_message, exc_info=True)
+            raise Exception(error_message)
+        
+    def get_all_explanations(self):
+        try:
+            resp = self._fetch_current_response(self)
+            raw_explanations = resp.get("explanation", {})
+            if "metricInDependent" not in raw_explanations:
+                raw_explanations["metricInDependent"] = []
+            if "metricDependent" not in raw_explanations:
+                raw_explanations["metricDependent"] = []
+            return raw_explanations
+        except Exception as e:
+            error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}"
+            logging.error(error_message, exc_info=True)
+            raise Exception(error_message)
+    
+    def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False):
+        try:
+            raw_explanations = self.get_all_explanations()
+            if metric_dependant:
+                localized_explanations = raw_explanations["metricDependent"]
+                if len(localized_explanations) == 0:
+                    localized_explanations = {}
+                else:
+                    grouped_explanations = {}
+                    task_list = []
+                    first_explanation = localized_explanations[0]
+                    for task in first_explanation:
+                        if task not in ["scoreId", "datasetId"]:
+                            task_list.append(task)
+
+                    if group_by_task:
+                        for task in task_list:
+                            task_explanation = {}
+                            for explanation_item in localized_explanations:
+                                item_task_explanation = explanation_item[task]
+                                identifier = explanation_item["scoreId"]
+                                task_explanation[identifier] = item_task_explanation
+                            grouped_explanations[task] = task_explanation
+                    else:
+                        for explanation_item in localized_explanations:
+                            identifier = explanation_item["scoreId"]
+                            grouped_explanations[identifier] = explanation_item
+                    localized_explanations = grouped_explanations
+            else:
+                localized_explanations = raw_explanations["metricInDependent"]
+                if len(localized_explanations) == 0:
+                    localized_explanations =  {}
+                else:
+                    localized_explanations = localized_explanations[0]
+            return localized_explanations
+
+        except Exception as e:
+            error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}"
+            logging.error(error_message, exc_info=True)
             raise Exception(error_message)
\ No newline at end of file

From 7f1962f5862c4005d3b2584532e9be766b7980ab Mon Sep 17 00:00:00 2001
From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com>
Date: Tue, 19 Mar 2024 01:35:56 +0530
Subject: [PATCH 5/5] temp push 1

---
 aixplain/modules/benchmark_job.py | 7 +++++++
 aixplain/modules/metric.py        | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py
index 7dae2d96..f0506787 100644
--- a/aixplain/modules/benchmark_job.py
+++ b/aixplain/modules/benchmark_job.py
@@ -108,6 +108,13 @@ def __simplify_scores(self, scores):
 
 
     def get_scores(self, return_simplified=True, return_as_dataframe=True):
+        ## Temp
+        temp_data = [
+            {"Model":"Llama 2 7b", "Score": 0.714},
+            {"Model":"Llama 2 7b (Finetuned)", "Score": 0.742},
+        ]
+        return pd.DataFrame(temp_data)
+
         try:
             resp = self._fetch_current_response(self.id)
             iterations = resp.get("iterations", [])
diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py
index 8d8844f0..a20fac07 100644
--- a/aixplain/modules/metric.py
+++ b/aixplain/modules/metric.py
@@ -24,7 +24,7 @@
 from typing import Optional, Text, List, Union
 from aixplain.modules.asset import Asset
 from aixplain.utils.file_utils import _request_with_retry
-from aixplain.factories.model_factory import ModelFactory
+# from aixplain.factories.model_factory import ModelFactory
 
 
 class Metric(Asset):