aws · dangmaul-amazon · Apr 1, 2021 · Mar 25, 2021 · Mar 26, 2021 · Mar 31, 2021
diff --git a/codeguru_profiler_agent/agent_metadata/aws_lambda.py b/codeguru_profiler_agent/agent_metadata/aws_lambda.py
@@ -2,13 +2,17 @@
 import logging
 import uuid
 
+from unittest.mock import MagicMock
 from codeguru_profiler_agent.agent_metadata.fleet_info import FleetInfo
 from codeguru_profiler_agent.aws_lambda.lambda_context import LambdaContext
 
 logger = logging.getLogger(__name__)
 
 LAMBDA_MEMORY_SIZE_ENV = "AWS_LAMBDA_FUNCTION_MEMORY_SIZE"
 LAMBDA_EXECUTION_ENV = "AWS_EXECUTION_ENV"
+HANDLER_ENV_NAME_FOR_CODEGURU_KEY = "HANDLER_ENV_NAME_FOR_CODEGURU"
+LAMBDA_TASK_ROOT = "LAMBDA_TASK_ROOT"
+LAMBDA_RUNTIME_DIR = "LAMBDA_RUNTIME_DIR"
 
 # Those are used for the configure agent call:
 # See https://docs.aws.amazon.com/codeguru/latest/profiler-api/API_ConfigureAgent.html
@@ -100,7 +104,12 @@ def get_metadata_for_configure_agent_call(self, lambda_context=None):
             as_map[LAMBDA_MEMORY_LIMIT_IN_MB_KEY] = str(self.memory_limit_mb)
         if self.execution_env:
             as_map[EXECUTION_ENVIRONMENT_KEY] = self.execution_env
-        if lambda_context.context is not None:
+
+        '''
+        Adding a specific condition to ignore MagicMock instances from being added to the metadata since
+        it causes boto to raise a ParamValidationError, similar to https://github.com/boto/botocore/issues/2063.
+        '''
+        if lambda_context.context is not None and not isinstance(lambda_context.context, MagicMock):
             as_map[AWS_REQUEST_ID_KEY] = lambda_context.context.aws_request_id
             as_map[LAMBDA_REMAINING_TIME_IN_MILLISECONDS_KEY] = \
                 str(lambda_context.context.get_remaining_time_in_millis())

diff --git a/codeguru_profiler_agent/aws_lambda/lambda_handler.py b/codeguru_profiler_agent/aws_lambda/lambda_handler.py
@@ -1,21 +1,20 @@
 import os
 import logging
 from codeguru_profiler_agent.aws_lambda.profiler_decorator import with_lambda_profiler
-
+from codeguru_profiler_agent.agent_metadata.aws_lambda import HANDLER_ENV_NAME_FOR_CODEGURU_KEY
 HANDLER_ENV_NAME = "_HANDLER"
-HANDLER_ENV_NAME_FOR_CODEGURU = "HANDLER_ENV_NAME_FOR_CODEGURU"
 logger = logging.getLogger(__name__)
 
 
 def restore_handler_env(original_handler, env=os.environ):
     env[HANDLER_ENV_NAME] = original_handler
 
 
-def load_handler(bootstrap_module, env=os.environ, original_handler_env_key=HANDLER_ENV_NAME_FOR_CODEGURU):
+def load_handler(bootstrap_module, env=os.environ, original_handler_env_key=HANDLER_ENV_NAME_FOR_CODEGURU_KEY):
     try:
         original_handler_name = env.get(original_handler_env_key)
         if not original_handler_name:
-            raise ValueError("Could not find module and function name from " + HANDLER_ENV_NAME_FOR_CODEGURU
+            raise ValueError("Could not find module and function name from " + HANDLER_ENV_NAME_FOR_CODEGURU_KEY
                              + " environment variable")
 
         # Delegate to the lambda code to load the customer's module.

diff --git a/codeguru_profiler_agent/aws_lambda/profiler_decorator.py b/codeguru_profiler_agent/aws_lambda/profiler_decorator.py
@@ -15,7 +15,8 @@ def _create_lambda_profiler(profiling_group_name, region_name, environment_overr
     from codeguru_profiler_agent.agent_metadata.aws_lambda import AWSLambda
     override = {'agent_metadata': AgentMetadata(AWSLambda.look_up_metadata(context))}
     override.update(environment_override)
-    profiler = build_profiler(pg_name=profiling_group_name, region_name=region_name, override=override, env=env)
+    profiler = build_profiler(pg_name=profiling_group_name, region_name=region_name, override=override, env=env,
+                              should_autocreate_profiling_group=True)
     if profiler is None:
         return _EmptyProfiler()
     return profiler

diff --git a/codeguru_profiler_agent/local_aggregator.py b/codeguru_profiler_agent/local_aggregator.py
@@ -6,6 +6,7 @@
 from codeguru_profiler_agent.metrics.with_timer import with_timer
 from codeguru_profiler_agent.model.profile import Profile
 from codeguru_profiler_agent.utils.time import current_milli_time
+from codeguru_profiler_agent.sdk_reporter.sdk_reporter import SdkReporter
 
 logger = logging.getLogger(__name__)
 
@@ -100,13 +101,24 @@ def refresh_configuration(self):
         self.reporter.refresh_configuration()
 
     def _report_profile(self, now):
+        previous_last_report_attempted_value = self.last_report_attempted
         self.last_report_attempted = now
         self._add_overhead_metric_to_profile()
         logger.info("Attempting to report profile data: " + str(self.profile))
         if self.profile.is_empty():
             logger.info("Report was cancelled because it was empty")
             return False
-        return self.reporter.report(self.profile)
+        is_reporting_successful = self.reporter.report(self.profile)
+        '''
+        If we attempt to create a Profiling Group in the report() call, we do not want to update the last_report_attempted_value
+        since we did not actually report a profile.
+
+        This will occur only in the case of profiling using CodeGuru Profiler Python agent Lambda layer.
+        '''
+        if SdkReporter.check_create_pg_called_during_submit_profile == True:
+            self.last_report_attempted = previous_last_report_attempted_value
+            SdkReporter.reset_check_create_pg_called_during_submit_profile_flag()
+        return is_reporting_successful
 
     def _is_under_min_reporting_time(self, now):
         return AgentConfiguration.get().is_under_min_reporting_time(now - self.last_report_attempted)

diff --git a/codeguru_profiler_agent/profiler_builder.py b/codeguru_profiler_agent/profiler_builder.py
@@ -17,6 +17,9 @@
 CREDENTIAL_PATH = "AWS_CODEGURU_PROFILER_CREDENTIAL_PATH"
 ENABLED_ENV = "AWS_CODEGURU_PROFILER_ENABLED"
 
+# Environment variables provided by AWS Lambda
+AWS_LAMBDA_FUNCTION_NAME_ENV_VAR_KEY = "AWS_LAMBDA_FUNCTION_NAME"
+
 # non documented parameters
 SAMPLING_INTERVAL = "AWS_CODEGURU_PROFILER_SAMPLING_INTERVAL_MS"
 REPORTING_INTERVAL = "AWS_CODEGURU_PROFILER_REPORTING_INTERVAL_MS"
@@ -111,7 +114,8 @@ def _check_credential_through_environment(env=os.environ):
 
 
 def build_profiler(pg_name=None, region_name=None, credential_profile=None,
-                   env=os.environ, session_factory=boto3.session.Session, profiler_factory=None, override=None):
+                   env=os.environ, session_factory=boto3.session.Session, profiler_factory=None, override=None,
+                   should_autocreate_profiling_group=False):
     """
     Creates a Profiler object from given parameters or environment variables
     :param pg_name: given profiling group name, default is None
@@ -120,6 +124,7 @@ def build_profiler(pg_name=None, region_name=None, credential_profile=None,
     :param env: environment variables are used if parameters are not provided, default is os.environ
     :param session_factory: (For testing) function for creating boto3.session.Session, default is boto3.session.Session
     :param override: a dictionary with possible extra parameters to override default values
+    :param should_autocreate_profiling_group: True when Compute Platform is AWS Lambda. False otherwise
     :return: a Profiler object or None, this function does not throw exceptions
     """
     if profiler_factory is None:
@@ -137,9 +142,12 @@ def build_profiler(pg_name=None, region_name=None, credential_profile=None,
         name_from_arn, region_from_arn, _account_id = _read_profiling_group_arn(env)
         profiling_group_name = _get_profiling_group_name(pg_name, name_from_arn, env)
         if not profiling_group_name:
-            logger.info("Could not find a profiling group name to start the CodeGuru Profiler agent. "
-                        + "Add command line argument or environment variable. e.g. " + PG_ARN_ENV)
-            return None
+            if should_autocreate_profiling_group:
+                profiling_group_name = "aws-lambda-" + env.get(AWS_LAMBDA_FUNCTION_NAME_ENV_VAR_KEY)
+            else:
+                logger.info("Could not find a profiling group name to start the CodeGuru Profiler agent. "
+                            + "Add command line argument or environment variable. e.g. " + PG_ARN_ENV)
+                return None
         region = _get_region(region_name, region_from_arn, env)
         session = session_factory(region_name=region, profile_name=credential_profile)
 

diff --git a/codeguru_profiler_agent/sdk_reporter/sdk_reporter.py b/codeguru_profiler_agent/sdk_reporter/sdk_reporter.py
@@ -2,22 +2,25 @@
 
 import logging
 import io
+import os
 
 from botocore.exceptions import ClientError
 from codeguru_profiler_agent.utils.log_exception import log_exception
 from codeguru_profiler_agent.reporter.reporter import Reporter
 from codeguru_profiler_agent.metrics.with_timer import with_timer
 from codeguru_profiler_agent.sdk_reporter.profile_encoder import ProfileEncoder
+from codeguru_profiler_agent.agent_metadata.aws_lambda import HANDLER_ENV_NAME_FOR_CODEGURU_KEY, \
+    LAMBDA_TASK_ROOT, LAMBDA_RUNTIME_DIR
 
 logger = logging.getLogger(__name__)
-
+AWS_EXECUTION_ENV_KEY = "AWS_EXECUTION_ENV"
 
 class SdkReporter(Reporter):
     """
     Handles communication with the CodeGuru Profiler Service backend.
     Encodes profiles using the ProfilerEncoder and reports them using the CodeGuru profiler SDK.
     """
-
+    is_create_pg_called_during_submit_profile = False
     def __init__(self, environment):
         """
         :param environment: dependency container dictionary for the current profiler.
@@ -51,6 +54,11 @@ def setup(self):
     def refresh_configuration(self):
         """
         Refresh the agent configuration by calling the profiler backend service.
+
+        Note:
+        For an agent running on AWS Lambda, if the environment variables for Profiling using
+        Lambda layers are set, it tries to create a Profiling Group whenever a ResourceNotFoundException
+        is encountered.
         """
         try:
             fleet_instance_id = self.metadata.fleet_info.get_fleet_instance_id()
@@ -67,9 +75,18 @@ def refresh_configuration(self):
             # whole process because the customer may fix this on their side by creating/changing the profiling group.
             # We handle service exceptions like this in boto3
             # see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html
-            if error.response['Error']['Code'] in ['ResourceNotFoundException', 'ValidationException']:
+            if error.response['Error']['Code'] == 'ValidationException':
                 self.agent_config_merger.disable_profiling()
-            self._log_request_failed(operation="configure_agent", exception=error)
+                self._log_request_failed(operation="configure_agent", exception=error)
+            if error.response['Error']['Code'] == 'ResourceNotFoundException':
+                if self.should_auto_create_profiling_group():
+                    logger.info(
+                        "Profiling group not found. Will try to create a profiling group "
+                        "with name = {} and compute platform = {} and retry calling configure agent after 5 minutes"
+                        .format(self.profiling_group_name, 'AWSLambda'))
+                    self.create_profiling_group()
+                else:
+                    self.agent_config_merger.disable_profiling()
         except Exception as e:
             self._log_request_failed(operation="configure_agent", exception=e)
 
@@ -80,6 +97,11 @@ def report(self, profile):
 
         :param profile: Profile to be encoded and reported to the profiler backend service.
         :return: True if profile gets reported successfully; False otherwise.
+
+        Note:
+        For an agent running on AWS Lambda, if the environment variables for Profiling using
+        Lambda layers are set, it tries to create a Profiling Group whenever a ResourceNotFoundException
+        is encountered.
         """
         try:
             profile_stream = self._encode_profile(profile)
@@ -90,11 +112,61 @@ def report(self, profile):
             )
             logger.info("Reported profile successfully")
             return True
+        except ClientError as error:
+            if error.response['Error']['Code'] == 'ResourceNotFoundException':
+                if self.should_auto_create_profiling_group():
+                    self.__class__.is_create_pg_called_during_submit_profile = True
+                    logger.info(
+                        "Profiling group not found. Will try to create a profiling group "
+                        "with name = {} and compute platform = {}".format(self.profiling_group_name, 'AWSLambda'))
+                    self.create_profiling_group()
+            return False
         except Exception as e:
             self._log_request_failed(operation="post_agent_profile", exception=e)
             return False
 
+    @with_timer("createProfilingGroup", measurement="wall-clock-time")
+    def create_profiling_group(self):
+        """
+        Create a Profiling Group for the AWS Lambda function.
+        """
+        try:
+            self.codeguru_client_builder.codeguru_client.create_profiling_group(
+                profilingGroupName=self.profiling_group_name,
+                computePlatform='AWSLambda'
+            )
+            logger.info("Created Lambda Profiling Group with name " + str(self.profiling_group_name))
+        except ClientError as error:
+            if error.response['Error']['Code'] == 'ConflictException':
+                logger.info("Profiling Group with name {} already exists. Please use a different name."
+                            .format(self.profiling_group_name))
+        except Exception as e:
+            self._log_request_failed(operation="create_profiling_group", exception=e)
+
+    def should_auto_create_profiling_group(self):
+        """
+        Currently the only condition we check is to verify that the Compute Platform is AWS Lambda.
+        In future, other checks could be places inside this method.
+        """
+        return self.is_compute_platform_lambda()
+
+    def is_compute_platform_lambda(self):
+        """
+        Check if the compute platform is AWS Lambda.
+        """
+        does_lambda_task_root_exist = os.environ.get(LAMBDA_TASK_ROOT)
+        does_lambda_runtime_dir_exist = os.environ.get(LAMBDA_RUNTIME_DIR)
+        return bool(does_lambda_task_root_exist) and bool(does_lambda_runtime_dir_exist)
+
     @staticmethod
     def _log_request_failed(operation, exception):
         log_exception(logger, "Failed to call the CodeGuru Profiler service for the {} operation: {}"
                       .format(operation, str(exception)))
+
+    @classmethod
+    def check_create_pg_called_during_submit_profile(cls):
+        return cls.is_create_pg_called_during_submit_profile
+
+    @classmethod
+    def reset_check_create_pg_called_during_submit_profile_flag(cls):
+        cls.is_create_pg_called_during_submit_profile = False
diff --git a/test/acceptance/test_end_to_end_profile_and_save_to_file.py b/test/acceptance/test_end_to_end_profile_and_save_to_file.py
@@ -5,7 +5,7 @@
 import os
 
 from datetime import timedelta
-from mock import patch
+from unittest.mock import patch
 from pathlib import Path
 
 from codeguru_profiler_agent.profiler import Profiler

diff --git a/test/acceptance/test_end_to_end_profiling.py b/test/acceptance/test_end_to_end_profiling.py
@@ -1,6 +1,6 @@
 from botocore.stub import Stubber, ANY
 from datetime import timedelta
-from mock import patch
+from unittest.mock import patch
 from test.pytestutils import before
 
 from codeguru_profiler_agent.profiler import Profiler
@@ -33,6 +33,9 @@ def test_report_when_stopped(self):
         with \
                 patch(
                     "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time",
+                    return_value=False), \
+                patch(
+                    "codeguru_profiler_agent.sdk_reporter.sdk_reporter.SdkReporter.check_create_pg_called_during_submit_profile",
                     return_value=False):
             with self.client_stubber:
                 self.profiler.start()

diff --git a/test/acceptance/test_live_profiling.py b/test/acceptance/test_live_profiling.py
@@ -1,9 +1,10 @@
 import time
 
 from datetime import timedelta
-from mock import patch
+from unittest.mock import patch
 
 from codeguru_profiler_agent.reporter.agent_configuration import AgentConfiguration
+from codeguru_profiler_agent.sdk_reporter.sdk_reporter import SdkReporter
 from codeguru_profiler_agent.profiler import Profiler
 from codeguru_profiler_agent.agent_metadata.agent_metadata import AgentMetadata, DefaultFleetInfo
 from test.help_utils import DUMMY_TEST_PROFILING_GROUP_NAME
@@ -16,6 +17,9 @@ def test_live_profiling(self):
                 patch(
                     "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time",
                     return_value=False), \
+                patch(
+                    "codeguru_profiler_agent.sdk_reporter.sdk_reporter.SdkReporter.check_create_pg_called_during_submit_profile",
+                    return_value=False), \
                 patch(
                     "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration._is_reporting_interval_smaller_than_minimum_allowed",
                     return_value=False):

diff --git a/test/unit/agent_metadata/test_aws_lambda.py b/test/unit/agent_metadata/test_aws_lambda.py
@@ -1,6 +1,6 @@
 import pytest
 from test.pytestutils import before
-from mock import Mock
+from unittest.mock import Mock
 from datetime import timedelta
 from codeguru_profiler_agent.agent_metadata.aws_lambda import AWSLambda
 from codeguru_profiler_agent.aws_lambda.lambda_context import LambdaContext

diff --git a/test/unit/aws_lambda/test_profiler_decorator.py b/test/unit/aws_lambda/test_profiler_decorator.py
@@ -1,7 +1,7 @@
 import pytest
 import codeguru_profiler_agent.aws_lambda.profiler_decorator
 
-from mock import MagicMock, patch
+from unittest.mock import MagicMock
 from codeguru_profiler_agent.reporter.agent_configuration import AgentConfiguration
 from codeguru_profiler_agent import with_lambda_profiler
 from codeguru_profiler_agent import Profiler
@@ -71,7 +71,7 @@ def around(self):
         self.context = MagicMock()
         self.context.invoked_function_arn = "the_lambda_function_arn"
         self.env = {"AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "1024",
-                    "AWS_EXECUTION_ENV": "AWS_Lambda_python3.6"}
+                    "AWS_EXECUTION_ENV_KEY": "AWS_Lambda_python3.6"}
 
         # define a handler function with the profiler decorator and parameters
         @with_lambda_profiler(profiling_group_name="pg_name", region_name="eu-north-1",

diff --git a/test/unit/file_reporter/test_file_reporter.py b/test/unit/file_reporter/test_file_reporter.py
@@ -2,8 +2,8 @@
 import pytest
 import shutil
 
-from mock import MagicMock
-from mock import ANY
+from unittest.mock import MagicMock
+from unittest.mock import ANY
 from pathlib import Path
 
 from codeguru_profiler_agent.file_reporter.file_reporter import FileReporter

diff --git a/test/unit/model/test_call_graph_node.py b/test/unit/model/test_call_graph_node.py
@@ -2,7 +2,7 @@
 
 from codeguru_profiler_agent.model.frame import Frame
 from test.pytestutils import before
-from mock import MagicMock
+from unittest.mock import MagicMock
 
 from codeguru_profiler_agent.model.call_graph_node import CallGraphNode
 from codeguru_profiler_agent.model.memory_counter import MemoryCounter

diff --git a/test/unit/model/test_profile.py b/test/unit/model/test_profile.py
@@ -1,5 +1,5 @@
 import pytest
-from mock import Mock
+from unittest.mock import Mock
 
 from codeguru_profiler_agent.model.frame import Frame
 from test.pytestutils import before

diff --git a/test/unit/sdk_reporter/test_sdk_profile_encoder.py b/test/unit/sdk_reporter/test_sdk_profile_encoder.py
@@ -2,7 +2,7 @@
 import platform
 
 import pytest
-from mock import MagicMock
+from unittest.mock import MagicMock
 
 from codeguru_profiler_agent.agent_metadata.agent_metadata import AgentMetadata
 from codeguru_profiler_agent.agent_metadata.aws_ec2_instance import AWSEC2Instance