Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion codeguru_profiler_agent/agent_metadata/aws_lambda.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
import logging
import uuid

from unittest.mock import MagicMock
from codeguru_profiler_agent.agent_metadata.fleet_info import FleetInfo
from codeguru_profiler_agent.aws_lambda.lambda_context import LambdaContext

logger = logging.getLogger(__name__)

LAMBDA_MEMORY_SIZE_ENV = "AWS_LAMBDA_FUNCTION_MEMORY_SIZE"
LAMBDA_EXECUTION_ENV = "AWS_EXECUTION_ENV"
HANDLER_ENV_NAME_FOR_CODEGURU_KEY = "HANDLER_ENV_NAME_FOR_CODEGURU"
LAMBDA_TASK_ROOT = "LAMBDA_TASK_ROOT"
LAMBDA_RUNTIME_DIR = "LAMBDA_RUNTIME_DIR"

# Those are used for the configure agent call:
# See https://docs.aws.amazon.com/codeguru/latest/profiler-api/API_ConfigureAgent.html
Expand Down Expand Up @@ -100,7 +104,12 @@ def get_metadata_for_configure_agent_call(self, lambda_context=None):
as_map[LAMBDA_MEMORY_LIMIT_IN_MB_KEY] = str(self.memory_limit_mb)
if self.execution_env:
as_map[EXECUTION_ENVIRONMENT_KEY] = self.execution_env
if lambda_context.context is not None:

'''
Adding a specific condition to ignore MagicMock instances from being added to the metadata since
it causes boto to raise a ParamValidationError, similar to https://github.com/boto/botocore/issues/2063.
'''
if lambda_context.context is not None and not isinstance(lambda_context.context, MagicMock):
as_map[AWS_REQUEST_ID_KEY] = lambda_context.context.aws_request_id
as_map[LAMBDA_REMAINING_TIME_IN_MILLISECONDS_KEY] = \
str(lambda_context.context.get_remaining_time_in_millis())
Expand Down
7 changes: 3 additions & 4 deletions codeguru_profiler_agent/aws_lambda/lambda_handler.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
import os
import logging
from codeguru_profiler_agent.aws_lambda.profiler_decorator import with_lambda_profiler

from codeguru_profiler_agent.agent_metadata.aws_lambda import HANDLER_ENV_NAME_FOR_CODEGURU_KEY
HANDLER_ENV_NAME = "_HANDLER"
HANDLER_ENV_NAME_FOR_CODEGURU = "HANDLER_ENV_NAME_FOR_CODEGURU"
logger = logging.getLogger(__name__)


def restore_handler_env(original_handler, env=os.environ):
env[HANDLER_ENV_NAME] = original_handler


def load_handler(bootstrap_module, env=os.environ, original_handler_env_key=HANDLER_ENV_NAME_FOR_CODEGURU):
def load_handler(bootstrap_module, env=os.environ, original_handler_env_key=HANDLER_ENV_NAME_FOR_CODEGURU_KEY):
try:
original_handler_name = env.get(original_handler_env_key)
if not original_handler_name:
raise ValueError("Could not find module and function name from " + HANDLER_ENV_NAME_FOR_CODEGURU
raise ValueError("Could not find module and function name from " + HANDLER_ENV_NAME_FOR_CODEGURU_KEY
+ " environment variable")

# Delegate to the lambda code to load the customer's module.
Expand Down
3 changes: 2 additions & 1 deletion codeguru_profiler_agent/aws_lambda/profiler_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ def _create_lambda_profiler(profiling_group_name, region_name, environment_overr
from codeguru_profiler_agent.agent_metadata.aws_lambda import AWSLambda
override = {'agent_metadata': AgentMetadata(AWSLambda.look_up_metadata(context))}
override.update(environment_override)
profiler = build_profiler(pg_name=profiling_group_name, region_name=region_name, override=override, env=env)
profiler = build_profiler(pg_name=profiling_group_name, region_name=region_name, override=override, env=env,
should_autocreate_profiling_group=True)
if profiler is None:
return _EmptyProfiler()
return profiler
Expand Down
14 changes: 13 additions & 1 deletion codeguru_profiler_agent/local_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from codeguru_profiler_agent.metrics.with_timer import with_timer
from codeguru_profiler_agent.model.profile import Profile
from codeguru_profiler_agent.utils.time import current_milli_time
from codeguru_profiler_agent.sdk_reporter.sdk_reporter import SdkReporter

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -100,13 +101,24 @@ def refresh_configuration(self):
self.reporter.refresh_configuration()

def _report_profile(self, now):
previous_last_report_attempted_value = self.last_report_attempted
self.last_report_attempted = now
self._add_overhead_metric_to_profile()
logger.info("Attempting to report profile data: " + str(self.profile))
if self.profile.is_empty():
logger.info("Report was cancelled because it was empty")
return False
return self.reporter.report(self.profile)
is_reporting_successful = self.reporter.report(self.profile)
'''
If we attempt to create a Profiling Group in the report() call, we do not want to update the last_report_attempted_value
since we did not actually report a profile.

This will occur only in the case of profiling using CodeGuru Profiler Python agent Lambda layer.
'''
if SdkReporter.check_create_pg_called_during_submit_profile == True:
self.last_report_attempted = previous_last_report_attempted_value
SdkReporter.reset_check_create_pg_called_during_submit_profile_flag()
return is_reporting_successful

def _is_under_min_reporting_time(self, now):
return AgentConfiguration.get().is_under_min_reporting_time(now - self.last_report_attempted)
Expand Down
16 changes: 12 additions & 4 deletions codeguru_profiler_agent/profiler_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
CREDENTIAL_PATH = "AWS_CODEGURU_PROFILER_CREDENTIAL_PATH"
ENABLED_ENV = "AWS_CODEGURU_PROFILER_ENABLED"

# Environment variables provided by AWS Lambda
AWS_LAMBDA_FUNCTION_NAME_ENV_VAR_KEY = "AWS_LAMBDA_FUNCTION_NAME"

# non documented parameters
SAMPLING_INTERVAL = "AWS_CODEGURU_PROFILER_SAMPLING_INTERVAL_MS"
REPORTING_INTERVAL = "AWS_CODEGURU_PROFILER_REPORTING_INTERVAL_MS"
Expand Down Expand Up @@ -111,7 +114,8 @@ def _check_credential_through_environment(env=os.environ):


def build_profiler(pg_name=None, region_name=None, credential_profile=None,
env=os.environ, session_factory=boto3.session.Session, profiler_factory=None, override=None):
env=os.environ, session_factory=boto3.session.Session, profiler_factory=None, override=None,
should_autocreate_profiling_group=False):
"""
Creates a Profiler object from given parameters or environment variables
:param pg_name: given profiling group name, default is None
Expand All @@ -120,6 +124,7 @@ def build_profiler(pg_name=None, region_name=None, credential_profile=None,
:param env: environment variables are used if parameters are not provided, default is os.environ
:param session_factory: (For testing) function for creating boto3.session.Session, default is boto3.session.Session
:param override: a dictionary with possible extra parameters to override default values
:param should_autocreate_profiling_group: True when Compute Platform is AWS Lambda. False otherwise
:return: a Profiler object or None, this function does not throw exceptions
"""
if profiler_factory is None:
Expand All @@ -137,9 +142,12 @@ def build_profiler(pg_name=None, region_name=None, credential_profile=None,
name_from_arn, region_from_arn, _account_id = _read_profiling_group_arn(env)
profiling_group_name = _get_profiling_group_name(pg_name, name_from_arn, env)
if not profiling_group_name:
logger.info("Could not find a profiling group name to start the CodeGuru Profiler agent. "
+ "Add command line argument or environment variable. e.g. " + PG_ARN_ENV)
return None
if should_autocreate_profiling_group:
profiling_group_name = "aws-lambda-" + env.get(AWS_LAMBDA_FUNCTION_NAME_ENV_VAR_KEY)
else:
logger.info("Could not find a profiling group name to start the CodeGuru Profiler agent. "
+ "Add command line argument or environment variable. e.g. " + PG_ARN_ENV)
return None
region = _get_region(region_name, region_from_arn, env)
session = session_factory(region_name=region, profile_name=credential_profile)

Expand Down
80 changes: 76 additions & 4 deletions codeguru_profiler_agent/sdk_reporter/sdk_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,25 @@

import logging
import io
import os

from botocore.exceptions import ClientError
from codeguru_profiler_agent.utils.log_exception import log_exception
from codeguru_profiler_agent.reporter.reporter import Reporter
from codeguru_profiler_agent.metrics.with_timer import with_timer
from codeguru_profiler_agent.sdk_reporter.profile_encoder import ProfileEncoder
from codeguru_profiler_agent.agent_metadata.aws_lambda import HANDLER_ENV_NAME_FOR_CODEGURU_KEY, \
LAMBDA_TASK_ROOT, LAMBDA_RUNTIME_DIR

logger = logging.getLogger(__name__)

AWS_EXECUTION_ENV_KEY = "AWS_EXECUTION_ENV"

class SdkReporter(Reporter):
"""
Handles communication with the CodeGuru Profiler Service backend.
Encodes profiles using the ProfilerEncoder and reports them using the CodeGuru profiler SDK.
"""

is_create_pg_called_during_submit_profile = False
def __init__(self, environment):
"""
:param environment: dependency container dictionary for the current profiler.
Expand Down Expand Up @@ -51,6 +54,11 @@ def setup(self):
def refresh_configuration(self):
"""
Refresh the agent configuration by calling the profiler backend service.

Note:
For an agent running on AWS Lambda, if the environment variables for Profiling using
Lambda layers are set, it tries to create a Profiling Group whenever a ResourceNotFoundException
is encountered.
"""
try:
fleet_instance_id = self.metadata.fleet_info.get_fleet_instance_id()
Expand All @@ -67,9 +75,18 @@ def refresh_configuration(self):
# whole process because the customer may fix this on their side by creating/changing the profiling group.
# We handle service exceptions like this in boto3
# see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html
if error.response['Error']['Code'] in ['ResourceNotFoundException', 'ValidationException']:
if error.response['Error']['Code'] == 'ValidationException':
self.agent_config_merger.disable_profiling()
self._log_request_failed(operation="configure_agent", exception=error)
self._log_request_failed(operation="configure_agent", exception=error)
if error.response['Error']['Code'] == 'ResourceNotFoundException':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question here : What configuration will be used in case of ResourceNotFoundException and ValidationException.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default configuration defined here

if self.should_auto_create_profiling_group():
logger.info(
"Profiling group not found. Will try to create a profiling group "
"with name = {} and compute platform = {} and retry calling configure agent after 5 minutes"
.format(self.profiling_group_name, 'AWSLambda'))
self.create_profiling_group()
else:
self.agent_config_merger.disable_profiling()
except Exception as e:
self._log_request_failed(operation="configure_agent", exception=e)

Expand All @@ -80,6 +97,11 @@ def report(self, profile):

:param profile: Profile to be encoded and reported to the profiler backend service.
:return: True if profile gets reported successfully; False otherwise.

Note:
For an agent running on AWS Lambda, if the environment variables for Profiling using
Lambda layers are set, it tries to create a Profiling Group whenever a ResourceNotFoundException
is encountered.
"""
try:
profile_stream = self._encode_profile(profile)
Expand All @@ -90,11 +112,61 @@ def report(self, profile):
)
logger.info("Reported profile successfully")
return True
except ClientError as error:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please update the documentation of this functions that this creates a PG in the case of using Lamba layers if it throws ResourceNotFoundException? I would be clearer when reading the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done!

if error.response['Error']['Code'] == 'ResourceNotFoundException':
if self.should_auto_create_profiling_group():
self.__class__.is_create_pg_called_during_submit_profile = True
logger.info(
"Profiling group not found. Will try to create a profiling group "
"with name = {} and compute platform = {}".format(self.profiling_group_name, 'AWSLambda'))
self.create_profiling_group()
return False
except Exception as e:
self._log_request_failed(operation="post_agent_profile", exception=e)
return False

@with_timer("createProfilingGroup", measurement="wall-clock-time")
def create_profiling_group(self):
"""
Create a Profiling Group for the AWS Lambda function.
"""
try:
self.codeguru_client_builder.codeguru_client.create_profiling_group(
profilingGroupName=self.profiling_group_name,
computePlatform='AWSLambda'
)
logger.info("Created Lambda Profiling Group with name " + str(self.profiling_group_name))
except ClientError as error:
if error.response['Error']['Code'] == 'ConflictException':
logger.info("Profiling Group with name {} already exists. Please use a different name."
.format(self.profiling_group_name))
except Exception as e:
Copy link
Contributor

@pandpara pandpara Mar 31, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Handle ConflictException too by logging an appropriate message and setting is_profiling_group_created_during_execution if required.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

self._log_request_failed(operation="create_profiling_group", exception=e)

def should_auto_create_profiling_group(self):
"""
Currently the only condition we check is to verify that the Compute Platform is AWS Lambda.
In future, other checks could be places inside this method.
"""
return self.is_compute_platform_lambda()

def is_compute_platform_lambda(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a wrapper method should_auto_create_profiling_group which checks if the compute platform is lambda for now. Use that wrapper method while deciding to call create_profiling_group in report and refresh_configuration methods.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"""
Check if the compute platform is AWS Lambda.
"""
does_lambda_task_root_exist = os.environ.get(LAMBDA_TASK_ROOT)
does_lambda_runtime_dir_exist = os.environ.get(LAMBDA_RUNTIME_DIR)
return bool(does_lambda_task_root_exist) and bool(does_lambda_runtime_dir_exist)

@staticmethod
def _log_request_failed(operation, exception):
log_exception(logger, "Failed to call the CodeGuru Profiler service for the {} operation: {}"
.format(operation, str(exception)))

@classmethod
def check_create_pg_called_during_submit_profile(cls):
return cls.is_create_pg_called_during_submit_profile

@classmethod
def reset_check_create_pg_called_during_submit_profile_flag(cls):
cls.is_create_pg_called_during_submit_profile = False
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os

from datetime import timedelta
from mock import patch
from unittest.mock import patch
from pathlib import Path

from codeguru_profiler_agent.profiler import Profiler
Expand Down
5 changes: 4 additions & 1 deletion test/acceptance/test_end_to_end_profiling.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from botocore.stub import Stubber, ANY
from datetime import timedelta
from mock import patch
from unittest.mock import patch
from test.pytestutils import before

from codeguru_profiler_agent.profiler import Profiler
Expand Down Expand Up @@ -33,6 +33,9 @@ def test_report_when_stopped(self):
with \
patch(
"codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time",
return_value=False), \
patch(
"codeguru_profiler_agent.sdk_reporter.sdk_reporter.SdkReporter.check_create_pg_called_during_submit_profile",
return_value=False):
with self.client_stubber:
self.profiler.start()
Expand Down
6 changes: 5 additions & 1 deletion test/acceptance/test_live_profiling.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import time

from datetime import timedelta
from mock import patch
from unittest.mock import patch

from codeguru_profiler_agent.reporter.agent_configuration import AgentConfiguration
from codeguru_profiler_agent.sdk_reporter.sdk_reporter import SdkReporter
from codeguru_profiler_agent.profiler import Profiler
from codeguru_profiler_agent.agent_metadata.agent_metadata import AgentMetadata, DefaultFleetInfo
from test.help_utils import DUMMY_TEST_PROFILING_GROUP_NAME
Expand All @@ -16,6 +17,9 @@ def test_live_profiling(self):
patch(
"codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time",
return_value=False), \
patch(
"codeguru_profiler_agent.sdk_reporter.sdk_reporter.SdkReporter.check_create_pg_called_during_submit_profile",
return_value=False), \
patch(
"codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration._is_reporting_interval_smaller_than_minimum_allowed",
return_value=False):
Expand Down
2 changes: 1 addition & 1 deletion test/unit/agent_metadata/test_aws_lambda.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from test.pytestutils import before
from mock import Mock
from unittest.mock import Mock
from datetime import timedelta
from codeguru_profiler_agent.agent_metadata.aws_lambda import AWSLambda
from codeguru_profiler_agent.aws_lambda.lambda_context import LambdaContext
Expand Down
4 changes: 2 additions & 2 deletions test/unit/aws_lambda/test_profiler_decorator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
import codeguru_profiler_agent.aws_lambda.profiler_decorator

from mock import MagicMock, patch
from unittest.mock import MagicMock
from codeguru_profiler_agent.reporter.agent_configuration import AgentConfiguration
from codeguru_profiler_agent import with_lambda_profiler
from codeguru_profiler_agent import Profiler
Expand Down Expand Up @@ -71,7 +71,7 @@ def around(self):
self.context = MagicMock()
self.context.invoked_function_arn = "the_lambda_function_arn"
self.env = {"AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "1024",
"AWS_EXECUTION_ENV": "AWS_Lambda_python3.6"}
"AWS_EXECUTION_ENV_KEY": "AWS_Lambda_python3.6"}

# define a handler function with the profiler decorator and parameters
@with_lambda_profiler(profiling_group_name="pg_name", region_name="eu-north-1",
Expand Down
4 changes: 2 additions & 2 deletions test/unit/file_reporter/test_file_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import pytest
import shutil

from mock import MagicMock
from mock import ANY
from unittest.mock import MagicMock
from unittest.mock import ANY
from pathlib import Path

from codeguru_profiler_agent.file_reporter.file_reporter import FileReporter
Expand Down
2 changes: 1 addition & 1 deletion test/unit/model/test_call_graph_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from codeguru_profiler_agent.model.frame import Frame
from test.pytestutils import before
from mock import MagicMock
from unittest.mock import MagicMock

from codeguru_profiler_agent.model.call_graph_node import CallGraphNode
from codeguru_profiler_agent.model.memory_counter import MemoryCounter
Expand Down
2 changes: 1 addition & 1 deletion test/unit/model/test_profile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from mock import Mock
from unittest.mock import Mock

from codeguru_profiler_agent.model.frame import Frame
from test.pytestutils import before
Expand Down
2 changes: 1 addition & 1 deletion test/unit/sdk_reporter/test_sdk_profile_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import platform

import pytest
from mock import MagicMock
from unittest.mock import MagicMock

from codeguru_profiler_agent.agent_metadata.agent_metadata import AgentMetadata
from codeguru_profiler_agent.agent_metadata.aws_ec2_instance import AWSEC2Instance
Expand Down
Loading