-
Notifications
You must be signed in to change notification settings - Fork 23
M 5905660469 enhance benchmark job response #145
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9e57566
bbe26eb
56998de
c1a868a
7f1962f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -92,3 +92,132 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf | |
| error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}" | ||
| logging.error(error_message, exc_info=True) | ||
| raise Exception(error_message) | ||
|
|
||
| def __simplify_scores(self, scores): | ||
| simplified_score_list = [] | ||
| for model_id, model_info in scores.items(): | ||
| model_scores = model_info["rawScores"] | ||
| # model = Mode | ||
| row = {"Model": model_id} | ||
| for score_info in model_scores: | ||
| row[score_info["longName"]] = score_info["average"] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make sure these fields are always there. Does it make sense to add a try/catch to treat when they are not?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are mandatory fields and will always be there. In case of any issue, they are sent as None instead but the key will always be there |
||
| simplified_score_list.append(row) | ||
| return simplified_score_list | ||
|
|
||
|
|
||
|
|
||
|
|
||
| def get_scores(self, return_simplified=True, return_as_dataframe=True): | ||
| ## Temp | ||
| temp_data = [ | ||
| {"Model":"Llama 2 7b", "Score": 0.714}, | ||
| {"Model":"Llama 2 7b (Finetuned)", "Score": 0.742}, | ||
| ] | ||
| return pd.DataFrame(temp_data) | ||
|
|
||
| try: | ||
| resp = self._fetch_current_response(self.id) | ||
| iterations = resp.get("iterations", []) | ||
| scores = {} | ||
| for iteration_info in iterations: | ||
| model_id = iteration_info["pipeline"] | ||
| model_info = { | ||
| "creditsUsed" : round(iteration_info.get("credits", 0),5), | ||
| "timeSpent" : round(iteration_info.get("runtime", 0),2), | ||
| "status" : iteration_info["status"], | ||
| "rawScores" : iteration_info["scores"], | ||
| } | ||
| scores[model_id] = model_info | ||
|
|
||
| if return_simplified: | ||
| simplified_scores = self.__simplify_scores(scores) | ||
| if return_as_dataframe: | ||
| simplified_scores = pd.DataFrame(simplified_scores) | ||
| return simplified_scores | ||
| else: | ||
| return scores | ||
| except Exception as e: | ||
| error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}" | ||
| logging.error(error_message, exc_info=True) | ||
| raise Exception(error_message) | ||
|
|
||
|
|
||
| def get_failuire_rate(self, return_as_dataframe=True): | ||
| try: | ||
| scores = self.get_scores(return_simplified=False) | ||
| failure_rates = {} | ||
| for model_id, model_info in scores.items(): | ||
| if len(model_info["rawScores"]) == 0: | ||
| failure_rates[model_id] = 0 | ||
| continue | ||
| score_info = model_info["rawScores"][0] | ||
| num_succesful = score_info["count"] | ||
| num_failed = score_info["failedSegmentsCount"] | ||
| failuire_rate = (num_failed * 100) / (num_succesful+num_failed) | ||
| failure_rates[model_id] = failuire_rate | ||
| if return_as_dataframe: | ||
| df = pd.DataFrame() | ||
| df["Model"] = list(failure_rates.keys()) | ||
| df["Failuire Rate"] = list(failure_rates.values()) | ||
| return df | ||
| else: | ||
| return failure_rates | ||
| except Exception as e: | ||
| error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}" | ||
| logging.error(error_message, exc_info=True) | ||
| raise Exception(error_message) | ||
|
|
||
| def get_all_explanations(self): | ||
| try: | ||
| resp = self._fetch_current_response(self) | ||
| raw_explanations = resp.get("explanation", {}) | ||
| if "metricInDependent" not in raw_explanations: | ||
| raw_explanations["metricInDependent"] = [] | ||
| if "metricDependent" not in raw_explanations: | ||
| raw_explanations["metricDependent"] = [] | ||
| return raw_explanations | ||
| except Exception as e: | ||
| error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" | ||
| logging.error(error_message, exc_info=True) | ||
| raise Exception(error_message) | ||
|
|
||
| def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False): | ||
| try: | ||
| raw_explanations = self.get_all_explanations() | ||
| if metric_dependant: | ||
| localized_explanations = raw_explanations["metricDependent"] | ||
| if len(localized_explanations) == 0: | ||
| localized_explanations = {} | ||
| else: | ||
| grouped_explanations = {} | ||
| task_list = [] | ||
| first_explanation = localized_explanations[0] | ||
| for task in first_explanation: | ||
| if task not in ["scoreId", "datasetId"]: | ||
| task_list.append(task) | ||
|
|
||
| if group_by_task: | ||
| for task in task_list: | ||
| task_explanation = {} | ||
| for explanation_item in localized_explanations: | ||
| item_task_explanation = explanation_item[task] | ||
| identifier = explanation_item["scoreId"] | ||
| task_explanation[identifier] = item_task_explanation | ||
| grouped_explanations[task] = task_explanation | ||
| else: | ||
| for explanation_item in localized_explanations: | ||
| identifier = explanation_item["scoreId"] | ||
| grouped_explanations[identifier] = explanation_item | ||
| localized_explanations = grouped_explanations | ||
| else: | ||
| localized_explanations = raw_explanations["metricInDependent"] | ||
| if len(localized_explanations) == 0: | ||
| localized_explanations = {} | ||
| else: | ||
| localized_explanations = localized_explanations[0] | ||
| return localized_explanations | ||
|
|
||
| except Exception as e: | ||
| error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" | ||
| logging.error(error_message, exc_info=True) | ||
| raise Exception(error_message) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Supplier is a trick field. Check the different ways we handled in here to avoid bugs:
https://github.com/aixplain/aiXplain/blob/main/aixplain/modules/asset.py#L53C9-L61C14
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I saw a case where supplier was a dict but in form of a string, hence the extra condition. Since we do not work on raw data from BE API, all the cases handled in model factory make sure that supplier is of instance Supplier or a string in worst case. So benchmark factory does not need to again do the same.