generated from amazon-archives/__template_Apache-2.0
-
Notifications
You must be signed in to change notification settings - Fork 11
Add errors metadata in agent debug info with granular sdk client error metrics #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
a659e33
Add errors metadata in agent debug info with granular sdk client erro…
pandpara 8f30db7
Fix error count calculation for Rnfe errors
pandpara a33c28c
Add back synchronization util
pandpara 3626cfd
Add back synchronization util
pandpara b2c8b89
Incorporating review comments of revision 2
pandpara 2fda871
Using utcfromtimestamp to get to_iso time to make sure local timezone…
pandpara File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
124 changes: 124 additions & 0 deletions
124
codeguru_profiler_agent/agent_metadata/agent_debug_info.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| import logging | ||
| import os | ||
|
|
||
| from codeguru_profiler_agent.utils.synchronization import synchronized | ||
| from codeguru_profiler_agent.utils.time import to_iso | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class ErrorsMetadata: | ||
| def __init__(self): | ||
| self.reset() | ||
|
|
||
| def reset(self): | ||
| """ | ||
| We want to differentiate API call errors more granularly. We want to gather ResourceNotFoundException errors | ||
| because we are going to get this exception with auto-create feature and want to monitor how many times | ||
| the agent is not able to create the PG and resulting in subsequent ResourceNotFoundException. | ||
| """ | ||
| self.errors_count = 0 | ||
| self.sdk_client_errors = 0 | ||
| self.configure_agent_errors = 0 | ||
| self.configure_agent_rnfe_auto_create_enabled_errors = 0 | ||
| self.create_profiling_group_errors = 0 | ||
| self.post_agent_profile_errors = 0 | ||
| self.post_agent_profile_rnfe_auto_create_enabled_errors = 0 | ||
|
|
||
| def serialize_to_json(self): | ||
| """ | ||
| This needs to be compliant with errors count schema. | ||
| """ | ||
| return { | ||
| "errorsCount": self.errors_count, | ||
| "sdkClientErrors": self.sdk_client_errors, | ||
| "configureAgentErrors": self.configure_agent_errors, | ||
| "configureAgentRnfeAutoCreateEnabledErrors": self.configure_agent_rnfe_auto_create_enabled_errors, | ||
| "createProfilingGroupErrors": self.create_profiling_group_errors, | ||
| "postAgentProfileErrors": self.post_agent_profile_errors, | ||
| "postAgentProfileRnfeAutoCreateEnabledErrors": self.post_agent_profile_rnfe_auto_create_enabled_errors | ||
| } | ||
|
|
||
| @synchronized | ||
pandpara marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| def increment_sdk_error(self, error_type): | ||
| """ | ||
| ErrorsCount is the umbrella of all the kinds of error we want to capture. Currently we have only SdkClientErrors | ||
| in it. SdkClientErrors is comprised of different API level errors like ConfigureAgentErrors, | ||
| PostAgentProfileErrors, CreateProfilingGroupErrors. | ||
| :param error_type: The type of API level error that we want to capture. | ||
| """ | ||
| self.errors_count += 1 | ||
| self.sdk_client_errors += 1 | ||
|
|
||
| """ | ||
| Special handling for ResourceNotFoundException errors. | ||
| For example configureAgentRnfeAutoCreateEnabledErrors is also a configureAgentErrors. | ||
| """ | ||
| if error_type == "configureAgentErrors": | ||
| self.configure_agent_errors += 1 | ||
| elif error_type == "configureAgentRnfeAutoCreateEnabledErrors": | ||
| self.configure_agent_errors += 1 | ||
| self.configure_agent_rnfe_auto_create_enabled_errors += 1 | ||
| elif error_type == "createProfilingGroupErrors": | ||
| self.create_profiling_group_errors += 1 | ||
| elif error_type == "postAgentProfileErrors": | ||
| self.post_agent_profile_errors += 1 | ||
| elif error_type == "postAgentProfileRnfeAutoCreateEnabledErrors": | ||
| self.post_agent_profile_errors += 1 | ||
| self.post_agent_profile_rnfe_auto_create_enabled_errors += 1 | ||
|
|
||
| def record_sdk_error(self, error_type): | ||
| self.increment_sdk_error(error_type) | ||
|
|
||
|
|
||
| class AgentDebugInfo: | ||
| def __init__(self, errors_metadata=None, agent_start_time=None, timer=None): | ||
| self.process_id = get_process_id() | ||
| self.errors_metadata = errors_metadata | ||
| self.agent_start_time = agent_start_time | ||
| self.timer = timer | ||
|
|
||
| def serialize_to_json(self): | ||
| """ | ||
| This needs to be compliant with agent debug info schema. | ||
| """ | ||
| json = {} | ||
|
|
||
| self.add_agent_start_time(json) | ||
| self.add_process_id(json) | ||
| self.add_errors_metadata(json) | ||
| self.add_generic_metrics(json) | ||
|
|
||
| return json | ||
|
|
||
| def add_agent_start_time(self, json): | ||
| if self.agent_start_time is not None: | ||
| json["agentStartTime"] = to_iso(self.agent_start_time) | ||
|
|
||
| def add_errors_metadata(self, json): | ||
| if self.errors_metadata is not None: | ||
| json["errorsCount"] = self.errors_metadata.serialize_to_json() | ||
|
|
||
| def add_process_id(self, json): | ||
| if self.process_id is not None: | ||
| json["processId"] = self.process_id | ||
|
|
||
| def add_generic_metrics(self, json): | ||
| if self.timer is not None and self.timer.metrics: | ||
| generic_metrics = {} | ||
|
|
||
| for metric, metric_value in self.timer.metrics.items(): | ||
| generic_metrics[metric + "_timings_max"] = metric_value.max | ||
| generic_metrics[metric + "_timings_average"] = metric_value.average() | ||
|
|
||
| if generic_metrics: | ||
| json["genericMetrics"] = generic_metrics | ||
|
|
||
|
|
||
| def get_process_id(): | ||
| try: | ||
| return os.getpid() | ||
| except Exception as e: | ||
| logger.info("Failed to get the process id", exc_info=True) | ||
| return None | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| import functools | ||
| import threading | ||
|
|
||
|
|
||
| def synchronized(wrapped): | ||
| """The missing @synchronized decorator | ||
|
|
||
| https://git.io/vydTA""" | ||
| _lock = threading.RLock() | ||
|
|
||
| @functools.wraps(wrapped) | ||
| def _wrapper(*args, **kwargs): | ||
| with _lock: | ||
| return wrapped(*args, **kwargs) | ||
| return _wrapper |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.