Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions codeguru_profiler_agent/sampling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import linecache
import threading
import traceback

import re
from codeguru_profiler_agent.model.frame import Frame

BOTO_CLIENT_PATH = re.compile("[/\\\\]botocore[/\\\\]client.py$")
TRUNCATED_FRAME = Frame(name="<Truncated>")

TIME_SLEEP_FRAME = Frame(name="<Sleep>")
Expand Down Expand Up @@ -64,15 +65,35 @@ def _extract_stack(stack, max_depth):
"""
result = []
for raw_frame, line_no in stack:
_maybe_add_boto_operation_name(raw_frame, result)
co = raw_frame.f_code
result.append(
Frame(name=co.co_name, class_name=_extract_class(raw_frame.f_locals), line_no=line_no,
file_path=co.co_filename)
)
if len(stack) < max_depth:
if len(result) < max_depth:
last_frame, last_frame_line_no = stack[-1]
_maybe_append_synthetic_frame(result, last_frame, last_frame_line_no)
return result
return result[:max_depth]


def _maybe_add_boto_operation_name(raw_frame, result):
"""
boto is dealing with API calls in a very generic way so by default the sampling
would only show that we are making an api call without having the actual operation name.
This function checks if this frame is botocore.client.py:_api_call and if it is, it adds
a frame with the actual operation name.
:param raw_frame: the raw frame
"""
if (raw_frame.f_code.co_name == '_api_call'
and BOTO_CLIENT_PATH.search(raw_frame.f_code.co_filename) is not None
and raw_frame.f_locals and 'py_operation_name' in raw_frame.f_locals.keys()
and raw_frame.f_locals.get('py_operation_name')):
result.append(
Frame(name=raw_frame.f_locals.get('py_operation_name'),
class_name=_extract_class(raw_frame.f_locals),
file_path=raw_frame.f_code.co_filename)
)


def _maybe_append_synthetic_frame(result, frame, line_no):
Expand Down
19 changes: 19 additions & 0 deletions test/help_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import threading
import time
from queue import Queue
import boto3
from botocore.client import Config

INTEGRATION_TEST_ACCOUNT_ID = "519630429520"
MY_PROFILING_GROUP_NAME_FOR_INTEG_TESTS = "MyProfilingGroupForIntegrationTests"
Expand Down Expand Up @@ -48,6 +50,23 @@ def dummy_parent_method(self):
"""
self.dummy_method()

def make_boto_api_call(self, boto_client):
try:
boto_client.put_metric_data(Namespace="any_namespace", MetricData=[])
except Exception as e:
print("This should be a ConnectTimeoutError", e)

def new_thread_sending_boto_api_call(self, timeout_seconds=1, thread_name="test-boto-api-call"):
no_retry_config = Config(connect_timeout=timeout_seconds, retries={'max_attempts': 0})
# we do not want boto to look for real credentials so provide fake ones
session = boto3.Session(region_name="us-east-1", aws_access_key_id="fake_id", aws_secret_access_key="fake_key")
# we set a fake endpoint in the client because we do not want to make a real call
# this is only so we can have a thread inside an api call trying to make a connection
# long enough for us to take a sample
no_target_client = session.client('cloudwatch', endpoint_url='https://notExisting.com/', config=no_retry_config)
self.boto_thread = threading.Thread(
name=thread_name, target=self.make_boto_api_call, daemon=True, kwargs={"boto_client": no_target_client})
self.boto_thread.start()

def wait_for(condition, timeout_seconds=1.0, poll_interval_seconds=0.01):
"""
Expand Down
58 changes: 58 additions & 0 deletions test/unit/test_sampling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,20 @@
import sys

from test import help_utils
from collections import namedtuple

from codeguru_profiler_agent.sampling_utils import get_stacks

DEFAULT_TRUNCATED_FRAME_NAME = "<Truncated>"

test_code = namedtuple('code', ['co_filename', 'co_name'])
test_frame = namedtuple('frame', ['f_code', 'f_locals'])
test_tb = namedtuple('tb', ['tb_frame', 'tb_lineno'])


def make_frame(path, method, line_nbr, f_locals={}):
return test_tb(test_frame(test_code(path, method), f_locals), line_nbr)


def is_frame_in_stacks(stacks, target_frame):
for stack in stacks:
Expand Down Expand Up @@ -88,3 +97,52 @@ def test_it_does_not_include_zombie_threads(self):

assert not is_frame_in_stacks(
stacks, "dummy_parent_method")

def test_it_adds_operation_name_frame_for_boto(self):
raw_stack = [
make_frame('path/to/foo.py', 'foo', 3),
make_frame('site-packages/botocore/client.py', '_api_call', 3, {'py_operation_name': 'boto_api_call'}),
make_frame('path/to/bar.py', 'bar', 3)
]
with mock.patch(
"traceback.walk_stack",
side_effect=
lambda end_frame: raw_stack
):
stacks = get_stacks(
threads_to_sample=sys._current_frames().items(),
excluded_threads=set(),
max_depth=100)
assert len(stacks[0]) == 4
assert is_frame_in_stacks(stacks, "boto_api_call")

def test_adding_boto_frame_does_not_exceed_maximum_depth(self):
raw_stack = [
make_frame('site-packages/botocore/client.py', '_api_call', 34, {'py_operation_name': 'boto_api_call'}),
make_frame('path/to/foo.py', 'bar', 12),

]
for i in range(100):
raw_stack.insert(0, make_frame('path/to/foo.py', 'bar' + str(i), 1))
with mock.patch(
"traceback.walk_stack",
side_effect=
lambda end_frame: raw_stack
):
stacks = get_stacks(
threads_to_sample=sys._current_frames().items(),
excluded_threads=set(),
max_depth=100)
assert len(stacks[0]) == 100
assert is_frame_in_stacks(stacks, "boto_api_call")

def test_it_adds_operation_name_frame_for_real_boto_call(self):
# Run a thread that will try to do a boto3 api call for 1 second then fail with a log
# the function will call put_metric_data on a cloudwatch client
# so get_stack should capture it.
self.helper.new_thread_sending_boto_api_call(timeout_seconds=1)
stacks = get_stacks(
threads_to_sample=sys._current_frames().items(),
excluded_threads=set(),
max_depth=100)
assert is_frame_in_stacks(stacks, "put_metric_data")