From 9e575667719d009e55cc9f26f7f9d5ba9bfdc391 Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Tue, 30 Jan 2024 14:43:02 +0530 Subject: [PATCH 1/5] added fetching score --- aixplain/modules/benchmark_job.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index 8531127a..87350425 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -92,3 +92,24 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}" logging.error(error_message, exc_info=True) raise Exception(error_message) + + + def get_scores(self): + try: + resp = self._fetch_current_response(self.id) + iterations = resp.get("iterations", []) + scores = {} + for iteration_info in iterations: + model_id = iteration_info["pipeline"] + model_info = { + "creditsUsed" : round(iteration_info["credits"],5), + "timeSpent" : round(iteration_info["runtime"],2), + "status" : iteration_info["status"], + "rawScores" : iteration_info["scores"], + } + scores[model_id] = model_info + return scores + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) \ No newline at end of file From bbe26ebb9103d7859f70fdde610f13eb3da8dfe2 Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Mon, 11 Mar 2024 17:42:54 +0530 Subject: [PATCH 2/5] add simplified benchmark job scores --- aixplain/factories/benchmark_factory.py | 22 +++++++++++++++++++ aixplain/modules/benchmark_job.py | 28 +++++++++++++++++++++---- pyproject.toml | 2 +- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/aixplain/factories/benchmark_factory.py b/aixplain/factories/benchmark_factory.py index 88d2411b..57d4a833 100644 --- a/aixplain/factories/benchmark_factory.py +++ b/aixplain/factories/benchmark_factory.py @@ -26,6 +26,7 @@ import json import pandas as pd from pathlib import Path +from aixplain.enums.supplier import Supplier from aixplain.modules import Dataset, Metric, Model from aixplain.modules.benchmark_job import BenchmarkJob from aixplain.modules.benchmark import Benchmark @@ -237,3 +238,24 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: error_message = f"Listing Normalization Options: Error in getting Normalization Options: {e}" logging.error(error_message, exc_info=True) return [] + + @classmethod + def get_benchmark_job_scores(cls, job_id): + def __get_model_name(model_id): + model = ModelFactory.get(model_id) + supplier = str(model.supplier) + try: + if isinstance(supplier, Supplier): + name = f"{supplier.name}" + else: + name = f"{eval(supplier)['name']}" + except Exception as e: + logging.error(f"{e}") + name = f"{supplier}" + if model.version is not None: + name = f"{name}({model.version})" + return name + benchmarkJob = cls.get_job(job_id) + scores_df = benchmarkJob.get_scores() + scores_df["Model"] = scores_df["Model"].apply(lambda x: __get_model_name(x)) + return scores_df \ No newline at end of file diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index 87350425..9ad3b068 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -92,9 +92,22 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}" logging.error(error_message, exc_info=True) raise Exception(error_message) + + def __simplify_scores(self, scores): + simplified_score_list = [] + for model_id, model_info in scores.items(): + model_scores = model_info["rawScores"] + # model = Mode + row = {"Model": model_id} + for score_info in model_scores: + row[score_info["longName"]] = score_info["average"] + simplified_score_list.append(row) + return simplified_score_list - def get_scores(self): + + + def get_scores(self, return_simplified=True, return_as_dataframe=True): try: resp = self._fetch_current_response(self.id) iterations = resp.get("iterations", []) @@ -102,13 +115,20 @@ def get_scores(self): for iteration_info in iterations: model_id = iteration_info["pipeline"] model_info = { - "creditsUsed" : round(iteration_info["credits"],5), - "timeSpent" : round(iteration_info["runtime"],2), + "creditsUsed" : round(iteration_info.get("credits", 0),5), + "timeSpent" : round(iteration_info.get("runtime", 0),2), "status" : iteration_info["status"], "rawScores" : iteration_info["scores"], } scores[model_id] = model_info - return scores + + if return_simplified: + simplified_scores = self.__simplify_scores(scores) + if return_as_dataframe: + simplified_scores = pd.DataFrame(simplified_scores) + return simplified_scores + else: + return scores except Exception as e: error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}" logging.error(error_message, exc_info=True) diff --git a/pyproject.toml b/pyproject.toml index ab7b901e..12a37dfe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.4" +version = "0.2.5rc" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" From 56998de3fd16f1c2b3e0a3f5200166c8ccf904e7 Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Thu, 14 Mar 2024 13:19:43 +0530 Subject: [PATCH 3/5] add getting failuire rates --- aixplain/modules/benchmark_job.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index 9ad3b068..f2c46684 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -132,4 +132,30 @@ def get_scores(self, return_simplified=True, return_as_dataframe=True): except Exception as e: error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}" logging.error(error_message, exc_info=True) + raise Exception(error_message) + + + def get_failuire_rate(self, return_as_dataframe=True): + try: + scores = self.get_scores(return_simplified=False) + failure_rates = {} + for model_id, model_info in scores.items(): + if len(model_info["rawScores"]) == 0: + failure_rates[model_id] = 0 + continue + score_info = model_info["rawScores"][0] + num_succesful = score_info["count"] + num_failed = score_info["failedSegmentsCount"] + failuire_rate = (num_failed * 100) / (num_succesful+num_failed) + failure_rates[model_id] = failuire_rate + if return_as_dataframe: + df = pd.DataFrame() + df["Model"] = list(failure_rates.keys()) + df["Failuire Rate"] = list(failure_rates.values()) + return df + else: + return failure_rates + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}" + logging.error(error_message, exc_info=True) raise Exception(error_message) \ No newline at end of file From c1a868a80465200d687e571363298e02bc7a08fa Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Mon, 18 Mar 2024 23:27:55 +0530 Subject: [PATCH 4/5] added explanations to benchmark --- aixplain/modules/benchmark_job.py | 55 +++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index f2c46684..7dae2d96 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -158,4 +158,59 @@ def get_failuire_rate(self, return_as_dataframe=True): except Exception as e: error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}" logging.error(error_message, exc_info=True) + raise Exception(error_message) + + def get_all_explanations(self): + try: + resp = self._fetch_current_response(self) + raw_explanations = resp.get("explanation", {}) + if "metricInDependent" not in raw_explanations: + raw_explanations["metricInDependent"] = [] + if "metricDependent" not in raw_explanations: + raw_explanations["metricDependent"] = [] + return raw_explanations + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + + def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False): + try: + raw_explanations = self.get_all_explanations() + if metric_dependant: + localized_explanations = raw_explanations["metricDependent"] + if len(localized_explanations) == 0: + localized_explanations = {} + else: + grouped_explanations = {} + task_list = [] + first_explanation = localized_explanations[0] + for task in first_explanation: + if task not in ["scoreId", "datasetId"]: + task_list.append(task) + + if group_by_task: + for task in task_list: + task_explanation = {} + for explanation_item in localized_explanations: + item_task_explanation = explanation_item[task] + identifier = explanation_item["scoreId"] + task_explanation[identifier] = item_task_explanation + grouped_explanations[task] = task_explanation + else: + for explanation_item in localized_explanations: + identifier = explanation_item["scoreId"] + grouped_explanations[identifier] = explanation_item + localized_explanations = grouped_explanations + else: + localized_explanations = raw_explanations["metricInDependent"] + if len(localized_explanations) == 0: + localized_explanations = {} + else: + localized_explanations = localized_explanations[0] + return localized_explanations + + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" + logging.error(error_message, exc_info=True) raise Exception(error_message) \ No newline at end of file From 7f1962f5862c4005d3b2584532e9be766b7980ab Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Tue, 19 Mar 2024 01:35:56 +0530 Subject: [PATCH 5/5] temp push 1 --- aixplain/modules/benchmark_job.py | 7 +++++++ aixplain/modules/metric.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index 7dae2d96..f0506787 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -108,6 +108,13 @@ def __simplify_scores(self, scores): def get_scores(self, return_simplified=True, return_as_dataframe=True): + ## Temp + temp_data = [ + {"Model":"Llama 2 7b", "Score": 0.714}, + {"Model":"Llama 2 7b (Finetuned)", "Score": 0.742}, + ] + return pd.DataFrame(temp_data) + try: resp = self._fetch_current_response(self.id) iterations = resp.get("iterations", []) diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index 8d8844f0..a20fac07 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -24,7 +24,7 @@ from typing import Optional, Text, List, Union from aixplain.modules.asset import Asset from aixplain.utils.file_utils import _request_with_retry -from aixplain.factories.model_factory import ModelFactory +# from aixplain.factories.model_factory import ModelFactory class Metric(Asset):