Skip to content

Commit

Permalink
Log duration of calls to S3
Browse files Browse the repository at this point in the history
 ## Summary
We think we have some performance issues with calls to S3 on certain
views. This replaces the existing logs for these calls and inserts the
duration of the call so that we can start to investigate the issue more
thoroughly.

It also adds logs/durations to any calls out to S3 that were missing
them, with a more restrictive condition that the call must be 'slow' or
sampled.

This PR also defines a 'slow' request to an external provider as one
that takes 250ms or longer.
  • Loading branch information
Samuel Williams authored and Samuel Williams committed May 17, 2018
1 parent 437515c commit 381fdf8
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 34 deletions.
2 changes: 1 addition & 1 deletion dmutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
import flask_featureflags # noqa


__version__ = '36.10.1'
__version__ = '36.11.0'
93 changes: 67 additions & 26 deletions dmutils/s3.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
from __future__ import absolute_import
import os

import boto3
import datetime
import mimetypes
import logging
from dateutil.parser import parse as parse_time
import logging
import mimetypes
import os
import sys

# a bit of a lie here - retains compatibility with consumers that were importing boto2's S3ResponseError from here. this
# is the exception boto3 raises in (mostly) the same situations.
from botocore.exceptions import ClientError as S3ResponseError

from .formats import DATETIME_FORMAT
from .timing import (
different_message_for_success_or_error,
exceeds_slow_external_call_threshold,
logged_duration,
request_is_sampled,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -56,22 +64,28 @@ def save(self, path, file_, acl='public-read', timestamp=None, download_filename
# to strip non-ascii chars..
str(download_filename).encode("ascii", errors="ignore").decode(),
)
obj.put(
ACL=acl,
Body=file_,
ContentType=self._get_mimetype(path),
# using a custom "timestamp" field allows us to manually override it if necessary
Metadata={"timestamp": timestamp.strftime(DATETIME_FORMAT)},
**extra_kwargs

log_duration_message = different_message_for_success_or_error(
success_message='Uploaded file {filepath} of size {filesize} and acl {fileacl} in {duration_real}s',
error_message='Exception during file upload ({filepath} of size {filesize} and acl {fileacl}) '
'with {exc_info[0]} after {duration_real}s'
)
logger.info(
"Uploaded file {filepath} of size {filesize} with acl {fileacl}",
extra={

with logged_duration(message=log_duration_message, condition=None) as log_context:
log_context.update({
"filepath": path,
"filesize": filesize,
"fileacl": acl,
},
)
})

obj.put(
ACL=acl,
Body=file_,
ContentType=self._get_mimetype(path),
# using a custom "timestamp" field allows us to manually override it if necessary
Metadata={"timestamp": timestamp.strftime(DATETIME_FORMAT)},
**extra_kwargs
)

return self._format_key(obj)

Expand All @@ -91,18 +105,24 @@ def copy(self, src_bucket, src_key, target_key, acl=None):
raise ValueError('Target key already exists in S3.')

extra_args = {'ACL': acl} if acl else {}
self._bucket.copy(CopySource={"Bucket": src_bucket, "Key": src_key}, Key=target_key, ExtraArgs=extra_args)

logger.info(
"Created a copy of {src_bucket}/{src_key} at {target_bucket}/{target_key}{set_acl}",
extra={
log_duration_message = different_message_for_success_or_error(
success_message="Copied {src_bucket}/{src_key} to {target_bucket}/{target_key}{set_acl} in "
"{duration_real}s",
error_message="Failed to copy key ({src_bucket}/{src_key} to "
"{target_bucket}/{target_key}{set_acl}) with {exc_info[0].__name__} after "
"{duration_real}s"
)
with logged_duration(message=log_duration_message, condition=None) as log_context:
log_context.update({
"src_bucket": src_bucket,
"src_key": src_key,
"target_bucket": self.bucket_name,
"target_key": target_key,
"set_acl": f" with '{acl} ACL" if acl else ""
},
)
})

self._bucket.copy(CopySource={"Bucket": src_bucket, "Key": src_key}, Key=target_key, ExtraArgs=extra_args)

return self._format_key(self._bucket.Object(target_key))

Expand Down Expand Up @@ -165,11 +185,32 @@ def list(self, prefix='', delimiter='', load_timestamps=False):
:return: list
"""
prefix = self._normalize_path(prefix)
return sorted((
self._format_key(obj_s, with_timestamp=load_timestamps)
for obj_s in self._bucket.objects.filter(Prefix=prefix, Delimiter=delimiter)
if not (obj_s.size == 0 and obj_s.key[-1] == '/')
), key=lambda obj_s: (obj_s.get("last_modified") or "", obj_s["path"],))

def slow_call_or_sampled_request_or_error(log_context):
return (
exceeds_slow_external_call_threshold(log_context)
or request_is_sampled(log_context) or
sys.exc_info()[0]
)

filtered_objects = self._bucket.objects.filter(Prefix=prefix, Delimiter=delimiter)

with logged_duration(
message=different_message_for_success_or_error(
success_message='Retrieved objects from S3 in {duration_real}s',
error_message='Failed to retrieve objects from S3 with {exc_info[0].__name__} '
'after {duration_real}s'
),
condition=slow_call_or_sampled_request_or_error
):
# Consume the `filtered_objects` generator to memory and prepare for sorting
objects_for_sorting = [
self._format_key(obj_s, with_timestamp=load_timestamps)
for obj_s in filtered_objects
if not (obj_s.size == 0 and obj_s.key[-1] == '/')
]

return sorted(objects_for_sorting, key=lambda obj_s: (obj_s.get("last_modified") or "", obj_s["path"]))

def _format_key(self, obj, with_timestamp=True):
"""
Expand Down
24 changes: 24 additions & 0 deletions dmutils/timing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,30 @@
from flask.ctx import has_request_context


SLOW_EXTERNAL_CALL_THRESHOLD = 0.25


# General public conditions that may be useful
def exceeds_slow_external_call_threshold(log_context):
"""A public condition that will return True if the duration is above the threshold we have defined as acceptable for
calls to external services (e.g. Notify, Mailchimp, S3, etc)."""
return log_context['duration_real'] > SLOW_EXTERNAL_CALL_THRESHOLD


def request_is_sampled(log_context):
"""A public condition that returns True if the request has the X-B3-Sampled flag set in its headers. While this is
the default condition for logged_duration, exposing it publically allows it to be easily combined with other
conditions."""
return has_request_context() and getattr(request, "is_sampled", False)
# End public conditions ----------------------


def different_message_for_success_or_error(success_message, error_message):
"""Can be passed into `logged_duration` as `message=different_message_for_success_or_error(x, y)` in order to
generate different log messages depending on whether the block completed successfully or raised an exception."""
return lambda _: success_message if sys.exc_info()[0] is None else error_message


def _logged_duration_default_message(log_context):
return "Block {} in {{duration_real}}s of real-time".format(
"executed" if sys.exc_info()[0] is None else "raised {}".format(sys.exc_info()[0].__name__)
Expand Down
47 changes: 40 additions & 7 deletions tests/test_timing.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,44 @@ def _default_and_no_exception(log_context):
(
timing.logged_duration.default_message,
(
AnyStringMatching(r"Block (executed|raised \w+) in \{duration_real\}s of real-time"),
AnyStringMatching(r"Block (executed|raised \w+) in [0-9eE.-]+s of real-time"),
{
'success': "Block executed in {duration_real}s of real-time",
'error': AnyStringMatching(r"Block raised \w+ in \{duration_real\}s of real-time"),
},
{
'success': AnyStringMatching(r"Block executed in [0-9eE.-]+s of real-time"),
'error': AnyStringMatching(r"Block raised \w+ in [0-9eE.-]+s of real-time"),
},
),
),
(
timing.different_message_for_success_or_error(
success_message='Block succeeded in {duration_real}s',
error_message='Block raised {exc_info[0]} in {duration_real}s',
),
(
{
'success': "Block succeeded in {duration_real}s",
'error': "Block raised {exc_info[0]} in {duration_real}s",
},
{
'success': AnyStringMatching(r"Block succeeded in [0-9eE.-]+s"),
'error': AnyStringMatching(r"Block raised \w+ in [0-9eE.-]+s"),
},
),
),
(
"{name}: {street} - {duration_process}s",
(
"{name}: {street} - {duration_process}s",
AnyStringMatching(r"conftest\.foobar: (\{.*\}|eccles) - [0-9eE.-]+s"),
)
{
'success': "{name}: {street} - {duration_process}s",
'error': "{name}: {street} - {duration_process}s",
},
{
'success': AnyStringMatching(r"conftest\.foobar: (\{.*\}|eccles) - [0-9eE.-]+s"),
'error': AnyStringMatching(r"conftest\.foobar: (\{.*\}|eccles) - [0-9eE.-]+s"),
},
),
),
))

Expand All @@ -146,6 +174,8 @@ def _default_and_no_exception(log_context):
timing.logged_duration.default_condition,
_duration_real_gt_075,
_default_and_no_exception,
timing.exceeds_slow_external_call_threshold,
timing.request_is_sampled
),
( # raise_exception values
None,
Expand Down Expand Up @@ -174,6 +204,9 @@ def _expect_log(
(condition is timing.logged_duration.default_condition and is_sampled)
or (condition is _duration_real_gt_075 and sleep_time >= 0.08)
or (condition is _default_and_no_exception and is_sampled and raise_exception is None)
or (condition is timing.exceeds_slow_external_call_threshold
and sleep_time >= timing.SLOW_EXTERNAL_CALL_THRESHOLD)
or (condition is timing.request_is_sampled and is_sampled)
or (condition in (True, None,))
)

Expand Down Expand Up @@ -204,7 +237,7 @@ def _expect_log(
[ # expected_call_args_list
mock.call(
log_level,
_messages_expected[message][0],
_messages_expected[message][0].get('error' if raise_exception else 'success'),
exc_info=bool(raise_exception),
extra={
"duration_real": MalleableAny(
Expand Down Expand Up @@ -382,7 +415,7 @@ def test_logged_duration_mock_logger(
AnySupersetOf({
"name": "conftest.foobar",
"levelname": logging.getLevelName(log_level),
"message": _messages_expected[message][1],
"message": _messages_expected[message][1].get('error' if raise_exception else 'success'),
"duration_real": MalleableAny(
# a double-closure here to get around python's weird behaviour when capturing iterated
# variables (in this case `sleep_time`)
Expand Down

0 comments on commit 381fdf8

Please sign in to comment.