Skip to content

Commit

Permalink
use shell globs on file name for include/exclude patterns (#137)
Browse files Browse the repository at this point in the history
The previously used module path was very brittle. Especially in the
case of Django, where the default `include_path` was the set of
`INSTALLED_APPS`, this would have a lot of false positives, namely
all 3rd party Django apps.

Using the `lib/python` exclude patterns should work for all cases that
use virtualenvs or the system python.

closes #137
  • Loading branch information
beniwohli committed Jan 24, 2018
1 parent ae3655a commit afb445c
Show file tree
Hide file tree
Showing 14 changed files with 108 additions and 51 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ https://github.com/elastic/apm-agent-python/compare/v1.0.0\...master[Check the H
** `app_name` arguments to API calls in the whole code base changed to `service_name`.
** `context.request.url.raw` has been renamed to `context.request.url.full` ({pull}121[#121])
* BREAKING: added `elasticapm.set_custom_context` in favor of the more generic `set_custom_data` function ({pull}133[#133])
* BREAKING: `include_patterns` and `exclude_patterns` now use shell globs instead of regular expressions, and
are matched against the full path file path of the module, not against the module name ({pull}137[#137])

[[release-1.0.0]]
[float]
Expand Down
27 changes: 16 additions & 11 deletions docs/configuration.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -405,33 +405,38 @@ WARNING: We recommend to always include the default set of validators if you cus

|============
| Environment | Django/Flask | Default
| `ELASTIC_APM_INCLUDE_PATHS` | `INCLUDE_PATHS` | Depending on framework
| `ELASTIC_APM_INCLUDE_PATHS` | `INCLUDE_PATHS` | `[]`
| multiple values separated by commas, without spaces |||
|============

A set of module paths that should be considered when detecting if a stacktrace frame is a library frame or an "in-app" frame.
For Django, the default set is the list of `INSTALLED_APPS`, but without `django.contrib` apps.
For Flask, it's the app module.
A set of paths, optionally using shell globs
(see https://docs.python.org/3/library/fnmatch.html[`fnmatch`] for a description of the syntax).
These are matched against the absolute filename of every frame, and if a pattern matches, the frame is considered
to be an "in-app frame".

NOTE: for a given module path, all sub-modules will also match. E.g. `foo.bar` also matches for `foo.bar.baz`.
`include_paths` *takes precedence* over `exclude_paths`.

[float]
[[config-exclude-paths]]
==== `exclude_paths`

|============
| Environment | Django/Flask | Default
| `ELASTIC_APM_EXCLUDE_PATHS` | `EXCLUDE_PATHS` | Depending on framework
| `ELASTIC_APM_EXCLUDE_PATHS` | `EXCLUDE_PATHS` | Varies on Python version and implementation
| multiple values separated by commas, without spaces |||
|============

A set of module paths that should be considered when excluding a frame from being detected as an in-app frame.
`exclude_paths` *takes precedence* over `include_paths`.
A set of paths, optionally using shell globs
(see https://docs.python.org/3/library/fnmatch.html[`fnmatch`] for a description of the syntax).
These are matched against the absolute filename of every frame, and if a pattern matches, the frame is considered
to be a "library frame".

For Django, the default is set to `{'elasticapm', 'django'}`.
Everywhere else, the default is `set(['elasticapm'])`.
`include_paths` *takes precedence* over `exclude_paths`.

NOTE: for a given module path, all sub-modules will also match. E.g. `foo.bar` also matches for `foo.bar.baz`.
The default value varies based on your Python version and implementation, e.g.:

* PyPy3: `['\*/lib-python/3/*', '\*/site-packages/*']`
* CPython 2.7: `['\*/lib/python2.7/*', '\*/lib64/python2.7/*']`

[float]
[[config-debug]]
Expand Down
9 changes: 2 additions & 7 deletions elasticapm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import logging
import os
import platform
import re
import socket
import sys
import threading
Expand Down Expand Up @@ -147,8 +146,8 @@ def __init__(self, config=None, **defaults):
max_queue_length=self.config.max_event_queue_length,
ignore_patterns=self.config.transactions_ignore_patterns,
)
self.include_paths_re = self._get_path_regex(self.config.include_paths) if self.config.include_paths else None
self.exclude_paths_re = self._get_path_regex(self.config.exclude_paths) if self.config.exclude_paths else None
self.include_paths_re = stacks.get_path_regex(self.config.include_paths) if self.config.include_paths else None
self.exclude_paths_re = stacks.get_path_regex(self.config.exclude_paths) if self.config.exclude_paths else None
compat.atexit_register(self.close)

def get_handler(self, name):
Expand Down Expand Up @@ -327,10 +326,6 @@ def _send_remote(self, url, data, headers=None):
url = transport.send(data, headers, timeout=self.config.timeout)
self.handle_transport_success(url=url)

def _get_path_regex(self, paths):
paths = '|'.join(map(re.escape, paths))
return re.compile('^(?:{})(?:.|$)'.format(paths))

def get_service_info(self):
language_version = platform.python_version()
if hasattr(sys, 'pypy_version_info'):
Expand Down
2 changes: 1 addition & 1 deletion elasticapm/conf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class Config(_ConfigBase):
server_url = _ConfigValue('SERVER_URL', default='http://localhost:8200', required=True)
verify_server_cert = _BoolConfigValue('VERIFY_SERVER_CERT', default=True)
include_paths = _ListConfigValue('INCLUDE_PATHS')
exclude_paths = _ListConfigValue('EXCLUDE_PATHS', default=['elasticapm'])
exclude_paths = _ListConfigValue('EXCLUDE_PATHS', default=compat.get_default_library_patters())
filter_exception_types = _ListConfigValue('FILTER_EXCEPTION_TYPES')
timeout = _ConfigValue('TIMEOUT', type=float, default=5)
hostname = _ConfigValue('HOSTNAME', default=socket.gethostname())
Expand Down
9 changes: 1 addition & 8 deletions elasticapm/contrib/django/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,13 @@ def get_client(client=None):

if _client[0] != client:
client_class = import_string(client)
instance = client_class(**get_client_default_config())
instance = client_class()
if not tmp_client:
_client = (client, instance)
return instance
return _client[1]


def get_client_default_config():
return dict(
include_paths=_get_installed_apps_paths(),
exclude_paths={'django', 'elasticapm'}
)


class DjangoClient(Client):
logger = logging.getLogger('elasticapm.errors.client.django')

Expand Down
8 changes: 3 additions & 5 deletions elasticapm/contrib/django/management/commands/elasticapm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from django.core.management.color import color_style
from django.utils import termcolors

from elasticapm.contrib.django.client import (DjangoClient,
get_client_default_config)
from elasticapm.contrib.django.client import DjangoClient

try:
from django.core.management.base import OutputWrapper
Expand Down Expand Up @@ -123,7 +122,7 @@ def handle(self, *args, **options):
def handle_test(self, command, **options):
"""Send a test error to APM Server"""
self.write(LOGO, cyan)
config = get_client_default_config()
config = {}
# can't be async for testing
config['async_mode'] = False
for key in ('service_name', 'secret_token'):
Expand Down Expand Up @@ -161,8 +160,7 @@ def handle_check(self, command, **options):
"""Check your settings for common misconfigurations"""
self.write(LOGO, cyan)
passed = True
config = get_client_default_config()
client = DjangoClient(**config)
client = DjangoClient()
# check if org/app and token are set:
is_set = lambda x: x and x != 'None'
values = [client.config.service_name, client.config.secret_token]
Expand Down
1 change: 0 additions & 1 deletion elasticapm/contrib/flask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
def make_client(client_cls, app, **defaults):
config = app.config.get('ELASTIC_APM', {})

defaults.setdefault('include_paths', {app.import_name})
if 'framework_name' not in defaults:
defaults['framework_name'] = 'flask'
defaults['framework_version'] = getattr(flask, '__version__', '<0.7')
Expand Down
23 changes: 23 additions & 0 deletions elasticapm/utils/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import atexit
import functools
import operator
import platform
import sys
import types

Expand Down Expand Up @@ -91,3 +92,25 @@ def iterkeys(d, **kwargs):

def iteritems(d, **kwargs):
return iter(d.items(**kwargs))


def get_default_library_patters():
"""
Returns library paths depending on the used platform.
TODO: ensure that this works correctly on Windows
:return: a list of glob paths
"""
python_version = platform.python_version_tuple()
python_implementation = platform.python_implementation()
system = platform.system()
if python_implementation == 'PyPy':
if python_version[0] == '2':
return ['*/lib-python/%s.%s/*' % python_version[:2], '*/site-packages/*']
else:
return ['*/lib-python/%s/*' % python_version[0], '*/site-packages/*']
else:
if system == 'Windows':
return [r'*\lib\*']
return ['*/lib/python%s.%s/*' % python_version[:2], '*/lib64/python%s.%s/*' % python_version[:2]]
27 changes: 21 additions & 6 deletions elasticapm/utils/stacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
:copyright: (c) 2010 by the Sentry Team, see AUTHORS for more details.
:license: BSD, see LICENSE for more details.
"""
import fnmatch
import inspect
import os
import re
Expand Down Expand Up @@ -200,7 +201,7 @@ def get_frame_info(frame, lineno, with_source_context=True, with_locals=True,
'module': module_name,
'function': function,
'lineno': lineno + 1,
'library_frame': _is_library_frame(module_name, include_paths_re, exclude_paths_re)
'library_frame': is_library_frame(abs_path, include_paths_re, exclude_paths_re)
}

if with_source_context:
Expand Down Expand Up @@ -275,8 +276,22 @@ def _walk_stack(frame):


@lru_cache(512)
def _is_library_frame(module_name, include_paths_re, exclude_paths_re):
return not(module_name and bool(
(include_paths_re.match(module_name) if include_paths_re else False) and
not (exclude_paths_re.match(module_name) if exclude_paths_re else False)
))
def is_library_frame(abs_file_path, include_paths_re, exclude_paths_re):
if not abs_file_path:
return True
if include_paths_re and include_paths_re.match(abs_file_path):
# frame is in-app, return False
return False
elif exclude_paths_re and exclude_paths_re.match(abs_file_path):
return True
# if neither excluded nor included, assume it's an in-app frame
return False


def get_path_regex(paths):
"""
compiles a list of path globs into a single pattern that matches any of the given paths
:param paths: a list of strings representing fnmatch path globs
:return: a compiled regex
"""
return re.compile('|'.join(map(fnmatch.translate, paths)))
7 changes: 4 additions & 3 deletions tests/client/client_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def test_exception_event(elasticapm_client):


@pytest.mark.parametrize('elasticapm_client', [{
'include_paths': ('tests',),
'include_paths': ('*/tests/*',),
'local_var_max_length': 20,
'local_var_list_max_length': 10,
}], indirect=True)
Expand All @@ -350,7 +350,8 @@ def test_message_event(elasticapm_client):
assert 'timestamp' in event
assert 'stacktrace' in event['log']
# check that only frames from `tests` module are not marked as library frames
assert all(frame['library_frame'] or frame['module'].startswith('tests') for frame in event['log']['stacktrace'])
for frame in event['log']['stacktrace']:
assert frame['library_frame'] or frame['module'].startswith(('tests', '__main__')), (frame['module'], frame['abs_path'])
frame = event['log']['stacktrace'][0]
assert frame['vars']['a_local_var'] == 1
assert len(frame['vars']['a_long_local_var']) == 20
Expand Down Expand Up @@ -592,7 +593,7 @@ def test_collect_source_transactions(should_collect, elasticapm_client):
elasticapm_client.end_transaction('test', 'ok')
transaction = elasticapm_client.instrumentation_store.get_all()[0]
in_app_frame = transaction['spans'][0]['stacktrace'][5]
library_frame = transaction['spans'][0]['stacktrace'][1]
library_frame = transaction['spans'][0]['stacktrace'][6]
assert not in_app_frame['library_frame']
assert library_frame['library_frame']
if mode in ('transactions', 'all'):
Expand Down
2 changes: 1 addition & 1 deletion tests/contrib/celery/flask_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def failing_task():
assert t.status == 'FAILURE'
assert len(apm_client.events[0]['errors']) == 1
error = apm_client.events[0]['errors'][0]
assert error['culprit'] == 'tests.contrib.flask.fixtures.__call__'
assert error['culprit'] == 'tests.contrib.celery.flask_tests.failing_task'
assert error['exception']['message'] == 'ValueError: foo'
assert error['exception']['handled'] is False

Expand Down
4 changes: 2 additions & 2 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def elasticapm_client(request):
client_config = getattr(request, 'param', {})
client_config.setdefault('service_name', 'myapp')
client_config.setdefault('secret_token', 'test_key')
client_config.setdefault('include_paths', ('tests',))
client_config.setdefault('include_paths', ('*/tests/*',))
client = TempStoreClient(**client_config)
yield client
client.close()
Expand All @@ -91,7 +91,7 @@ def sending_elasticapm_client(request, validating_httpserver):
client_config.setdefault('service_name', 'myapp')
client_config.setdefault('secret_token', 'test_key')
client_config.setdefault('transport_class', 'elasticapm.transport.http.Transport')
client_config.setdefault('include_paths', ('tests',))
client_config.setdefault('include_paths', ('*/tests/*',))
client = Client(**client_config)
client.httpserver = validating_httpserver
yield client
Expand Down
23 changes: 23 additions & 0 deletions tests/utils/compat_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import mock

from elasticapm.utils import compat


@mock.patch('platform.system')
@mock.patch('platform.python_implementation')
@mock.patch('platform.python_version_tuple')
def test_default_library_paths(version_tuple, python_implementation, system):
cases = (
('Linux', ('3', '5', '1'), 'CPython', ['*/lib/python3.5/*', '*/lib64/python3.5/*']),
('Linux', ('2', '7', '9'), 'CPython', ['*/lib/python2.7/*', '*/lib64/python2.7/*']),
('Windows', ('3', '5', '1'), 'CPython', ['*\\lib\\*']),
('Windows', ('2', '7', '9'), 'CPython', ['*\\lib\\*']),
('Linux', ('3', '6', '3'), 'PyPy', ['*/lib-python/3/*', '*/site-packages/*']),
('Linux', ('2', '7', '9'), 'PyPy', ['*/lib-python/2.7/*', '*/site-packages/*']),
)
for system_name, version, implementation, expected in cases:
system.return_value = system_name
version_tuple.return_value = version
python_implementation.return_value = implementation

assert compat.get_default_library_patters() == expected
15 changes: 9 additions & 6 deletions tests/utils/stacks/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,25 @@ def get_me_a_filtered_frame(hide=True):


@pytest.mark.parametrize('elasticapm_client', [{
'include_paths': ('a.b.c', 'c.d'),
'exclude_paths': ('c',)
'include_paths': ('/a/b/c/*', '/c/d/*'),
'exclude_paths': ('/c/*',)
}], indirect=True)
def test_library_frames(elasticapm_client):
include = elasticapm_client.include_paths_re
exclude = elasticapm_client.exclude_paths_re
frame1 = Mock(f_globals={'__name__': 'a.b.c'})
frame2 = Mock(f_globals={'__name__': 'a.b.c.d'})
frame3 = Mock(f_globals={'__name__': 'c.d'})
frame1 = Mock(f_code=Mock(co_filename='/a/b/c/d.py'))
frame2 = Mock(f_code=Mock(co_filename='/a/b/c/d/e.py'))
frame3 = Mock(f_code=Mock(co_filename='/c/d/e.py'))
frame4 = Mock(f_code=Mock(co_filename='/c/e.py'))

info1 = stacks.get_frame_info(frame1, 1, False, False, include_paths_re=include, exclude_paths_re=exclude)
info2 = stacks.get_frame_info(frame2, 1, False, False, include_paths_re=include, exclude_paths_re=exclude)
info3 = stacks.get_frame_info(frame3, 1, False, False, include_paths_re=include, exclude_paths_re=exclude)
info4 = stacks.get_frame_info(frame4, 1, False, False, include_paths_re=include, exclude_paths_re=exclude)
assert not info1['library_frame']
assert not info2['library_frame']
assert info3['library_frame']
assert not info3['library_frame']
assert info4['library_frame']


def test_get_frame_info():
Expand Down

0 comments on commit afb445c

Please sign in to comment.