Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions aixplain/factories/benchmark_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import json
import pandas as pd
from pathlib import Path
from aixplain.enums.supplier import Supplier
from aixplain.modules import Dataset, Metric, Model
from aixplain.modules.benchmark_job import BenchmarkJob
from aixplain.modules.benchmark import Benchmark
Expand Down Expand Up @@ -237,3 +238,24 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]:
error_message = f"Listing Normalization Options: Error in getting Normalization Options: {e}"
logging.error(error_message, exc_info=True)
return []

@classmethod
def get_benchmark_job_scores(cls, job_id):
def __get_model_name(model_id):
model = ModelFactory.get(model_id)
supplier = str(model.supplier)
try:
if isinstance(supplier, Supplier):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Supplier is a trick field. Check the different ways we handled in here to avoid bugs:

https://github.com/aixplain/aiXplain/blob/main/aixplain/modules/asset.py#L53C9-L61C14

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I saw a case where supplier was a dict but in form of a string, hence the extra condition. Since we do not work on raw data from BE API, all the cases handled in model factory make sure that supplier is of instance Supplier or a string in worst case. So benchmark factory does not need to again do the same.

name = f"{supplier.name}"
else:
name = f"{eval(supplier)['name']}"
except Exception as e:
logging.error(f"{e}")
name = f"{supplier}"
if model.version is not None:
name = f"{name}({model.version})"
return name
benchmarkJob = cls.get_job(job_id)
scores_df = benchmarkJob.get_scores()
scores_df["Model"] = scores_df["Model"].apply(lambda x: __get_model_name(x))
return scores_df
129 changes: 129 additions & 0 deletions aixplain/modules/benchmark_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,132 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf
error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}"
logging.error(error_message, exc_info=True)
raise Exception(error_message)

def __simplify_scores(self, scores):
simplified_score_list = []
for model_id, model_info in scores.items():
model_scores = model_info["rawScores"]
# model = Mode
row = {"Model": model_id}
for score_info in model_scores:
row[score_info["longName"]] = score_info["average"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure these fields are always there. Does it make sense to add a try/catch to treat when they are not?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are mandatory fields and will always be there. In case of any issue, they are sent as None instead but the key will always be there

simplified_score_list.append(row)
return simplified_score_list




def get_scores(self, return_simplified=True, return_as_dataframe=True):
## Temp
temp_data = [
{"Model":"Llama 2 7b", "Score": 0.714},
{"Model":"Llama 2 7b (Finetuned)", "Score": 0.742},
]
return pd.DataFrame(temp_data)

try:
resp = self._fetch_current_response(self.id)
iterations = resp.get("iterations", [])
scores = {}
for iteration_info in iterations:
model_id = iteration_info["pipeline"]
model_info = {
"creditsUsed" : round(iteration_info.get("credits", 0),5),
"timeSpent" : round(iteration_info.get("runtime", 0),2),
"status" : iteration_info["status"],
"rawScores" : iteration_info["scores"],
}
scores[model_id] = model_info

if return_simplified:
simplified_scores = self.__simplify_scores(scores)
if return_as_dataframe:
simplified_scores = pd.DataFrame(simplified_scores)
return simplified_scores
else:
return scores
except Exception as e:
error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}"
logging.error(error_message, exc_info=True)
raise Exception(error_message)


def get_failuire_rate(self, return_as_dataframe=True):
try:
scores = self.get_scores(return_simplified=False)
failure_rates = {}
for model_id, model_info in scores.items():
if len(model_info["rawScores"]) == 0:
failure_rates[model_id] = 0
continue
score_info = model_info["rawScores"][0]
num_succesful = score_info["count"]
num_failed = score_info["failedSegmentsCount"]
failuire_rate = (num_failed * 100) / (num_succesful+num_failed)
failure_rates[model_id] = failuire_rate
if return_as_dataframe:
df = pd.DataFrame()
df["Model"] = list(failure_rates.keys())
df["Failuire Rate"] = list(failure_rates.values())
return df
else:
return failure_rates
except Exception as e:
error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}"
logging.error(error_message, exc_info=True)
raise Exception(error_message)

def get_all_explanations(self):
try:
resp = self._fetch_current_response(self)
raw_explanations = resp.get("explanation", {})
if "metricInDependent" not in raw_explanations:
raw_explanations["metricInDependent"] = []
if "metricDependent" not in raw_explanations:
raw_explanations["metricDependent"] = []
return raw_explanations
except Exception as e:
error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}"
logging.error(error_message, exc_info=True)
raise Exception(error_message)

def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False):
try:
raw_explanations = self.get_all_explanations()
if metric_dependant:
localized_explanations = raw_explanations["metricDependent"]
if len(localized_explanations) == 0:
localized_explanations = {}
else:
grouped_explanations = {}
task_list = []
first_explanation = localized_explanations[0]
for task in first_explanation:
if task not in ["scoreId", "datasetId"]:
task_list.append(task)

if group_by_task:
for task in task_list:
task_explanation = {}
for explanation_item in localized_explanations:
item_task_explanation = explanation_item[task]
identifier = explanation_item["scoreId"]
task_explanation[identifier] = item_task_explanation
grouped_explanations[task] = task_explanation
else:
for explanation_item in localized_explanations:
identifier = explanation_item["scoreId"]
grouped_explanations[identifier] = explanation_item
localized_explanations = grouped_explanations
else:
localized_explanations = raw_explanations["metricInDependent"]
if len(localized_explanations) == 0:
localized_explanations = {}
else:
localized_explanations = localized_explanations[0]
return localized_explanations

except Exception as e:
error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}"
logging.error(error_message, exc_info=True)
raise Exception(error_message)
2 changes: 1 addition & 1 deletion aixplain/modules/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from typing import Optional, Text, List, Union
from aixplain.modules.asset import Asset
from aixplain.utils.file_utils import _request_with_retry
from aixplain.factories.model_factory import ModelFactory
# from aixplain.factories.model_factory import ModelFactory


class Metric(Asset):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespaces = true

[project]
name = "aiXplain"
version = "0.2.4"
version = "0.2.5rc"
description = "aiXplain SDK adds AI functions to software."
readme = "README.md"
requires-python = ">=3.5, <4"
Expand Down