Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements-base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ sqlparse>=0.1.16,<0.2.0
statsd>=3.1.0,<3.2.0
strict-rfc3339>=0.7
structlog==16.1.0
symbolic>=5.5.0,<6.0.0
symbolic>=5.5.3,<6.0.0
toronado>=0.0.11,<0.1.0
ua-parser>=0.6.1,<0.8.0
# for bitbucket client
Expand Down
6 changes: 5 additions & 1 deletion src/sentry/coreapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def insert_data_to_database(self, data, start_time=None,
data = dict(data.items())

cache_timeout = 3600
cache_key = u'e:{1}:{0}'.format(data['project'], data['event_id'])
cache_key = cache_key_for_event(data)
default_cache.set(cache_key, data, cache_timeout)

# Attachments will be empty or None if the "event-attachments" feature
Expand Down Expand Up @@ -257,6 +257,10 @@ def auth_from_request(self, request):
return auth


def cache_key_for_event(data):
return u'e:{1}:{0}'.format(data['project'], data['event_id'])


def decompress_deflate(encoded_data):
try:
return zlib.decompress(encoded_data).decode("utf-8")
Expand Down
1 change: 1 addition & 0 deletions src/sentry/interfaces/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def apierror(message="Invalid data"):
'in_app': {'type': 'boolean', 'default': False},
'instruction_addr': {},
'instruction_offset': {},
'trust': {'type': 'string'},
'lineno': {'type': ['number', 'string']},
'module': {
'type': 'string',
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/interfaces/stacktrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ def to_python(cls, data, raw=False):
'symbol': trim(symbol, 256),
'symbol_addr': to_hex_addr(data.get('symbol_addr')),
'instruction_addr': to_hex_addr(data.get('instruction_addr')),
'trust': trim(data.get('trust'), 16),
'in_app': in_app,
'context_line': context_line,
# TODO(dcramer): trim pre/post_context
Expand Down Expand Up @@ -477,6 +478,7 @@ def get_api_context(self, is_public=False, pad_addr=None):
'lineNo': self.lineno,
'colNo': self.colno,
'inApp': self.in_app,
'trust': self.trust,
'errors': self.errors,
}
if not is_public:
Expand Down Expand Up @@ -522,6 +524,7 @@ def get_meta_context(self, meta, is_public=False):
'lineNo': meta.get('lineno'),
'colNo': meta.get('colno'),
'inApp': meta.get('in_app'),
'trust': meta.get('trust'),
'errors': meta.get('errors'),
}

Expand Down
270 changes: 270 additions & 0 deletions src/sentry/lang/native/cfi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
from __future__ import absolute_import

import logging
import six

from symbolic import FrameInfoMap, FrameTrust, ObjectLookup

from sentry.attachments import attachment_cache
from sentry.coreapi import cache_key_for_event
from sentry.lang.native.minidump import process_minidump, frames_from_minidump_thread, \
MINIDUMP_ATTACHMENT_TYPE
from sentry.lang.native.utils import rebase_addr
from sentry.models import Project, ProjectDebugFile
from sentry.utils.cache import cache
from sentry.utils.hashlib import hash_values


logger = logging.getLogger(__name__)

# Frame trust values achieved through the use of CFI
CFI_TRUSTS = ('cfi', 'cfi-scan')

# Minimum frame trust value that we require to omit CFI reprocessing
MIN_TRUST = FrameTrust.fp

# Placeholder used to indicate that no CFI could be used to stackwalk a thread
NO_CFI_PLACEHOLDER = '__no_cfi__'


class ThreadRef(object):
"""Cacheable and mutable reference to stack frames of an event thread."""

def __init__(self, frames, modules):
self.raw_frames = frames
self.modules = modules
self.resolved_frames = None

def _get_frame_key(self, frame):
module = self.modules.find_object(frame['instruction_addr'])

# If we cannot resolve a module for this frame, this means we're dealing
# with an absolute address here. Since this address changes with every
# crash and would poison our cache, we skip it for the key calculation.
if not module:
return None

return (
module.id,
rebase_addr(frame['instruction_addr'], module)
)

@property
def _cache_key(self):
values = [self._get_frame_key(f) for f in self.raw_frames]
# XXX: The seed is hard coded for a future refactor
return 'st:%s' % hash_values(values, seed='MinidumpCfiProcessor')

def _frame_from_cache(self, entry):
debug_id, offset, trust = entry[:3]
module = self.modules.get_object(debug_id)

# The debug_id can be None or refer to a missing module. If the module
# was missing, the stored offset was absolute as well. Otherwise, we
# have no choice but to assume an absolute address. In practice, the
# latter hopefully never happens.
addr = module.addr + offset if module else offset

return module, {
'instruction_addr': '0x%x' % addr,
'function': '<unknown>', # Required by interface
'module': module.name if module else None,
'trust': trust,
}

def load_from_cache(self):
"""Attempts to load the reprocessed stack trace from the cache. The
return value is ``True`` for a cache hit, and ``False`` for a miss.
The loaded addresses are rebased to the provided code modules.
"""

cached = cache.get(self._cache_key)
if cached is None:
return False

if cached == NO_CFI_PLACEHOLDER:
self.resolved_frames = NO_CFI_PLACEHOLDER
else:
self.resolved_frames = [self._frame_from_cache(c) for c in cached]

return True

def save_to_cache(self):
"""Stores the reprocessed stack trace to the cache. For frames with
known code modules only relative offsets are stored, otherwise the
absolute address as fallback."""
if self.resolved_frames is None:
raise RuntimeError('save_to_cache called before resolving frames')

if self.resolved_frames == NO_CFI_PLACEHOLDER:
cache.set(self._cache_key, NO_CFI_PLACEHOLDER)
return

values = []
for module, frame in self.resolved_frames:
module_id = module and module.id
addr = frame['instruction_addr']
if module:
addr = '0x%x' % rebase_addr(addr, module)
values.append((module_id, addr, frame['trust']))

cache.set(self._cache_key, values)

def load_from_minidump(self, thread):
"""Loads the stack trace from a minidump process state thread."""

# Convert the entire thread into frames conforming to the `Frame`
# interface. Note that this is done with the same function as the
# initial ingestion to avoid normalization conflicts.
frames = frames_from_minidump_thread(thread)

# Filter out stack traces that did not improve during reprocessing. For
# these cases we only store a marker. This also prevents us from
# destroying absolute addresses when restoring from the cache. Stack
# traces containing CFI frames are mapped to their modules and stored.
if any(frame['trust'] in CFI_TRUSTS for frame in frames):
self.resolved_frames = [(self.modules.find_object(f['instruction_addr']), f)
for f in frames]
else:
self.resolved_frames = NO_CFI_PLACEHOLDER

def apply_to_event(self):
"""Writes the loaded stack trace back to the event's payload. Returns
``True`` if the payload was changed, otherwise ``False``."""
if self.resolved_frames is None:
raise RuntimeError('apply_to_event called before resolving frames')

if self.resolved_frames == NO_CFI_PLACEHOLDER:
return False

self.raw_frames[:] = [frame for module, frame in self.resolved_frames]
return True

@property
def needs_cfi(self):
"""Indicates whether this thread requires reprocessing with CFI due to
scanned stack frames."""
return any(
getattr(FrameTrust, f.get('trust', ''), 0) < MIN_TRUST
for f in self.raw_frames
)


class ThreadProcessingHandle(object):
"""Helper object for processing all event threads.

This class offers a view on all threads in the given event payload,
including the crashing exception thread. Use ``iter_threads`` to iterate
pointers to the original threads' stack traces. Likewise, ``iter_modules``
returns references to all modules (images) loaded into the process.

The handle keeps track of changes to the original data. To signal mutation,
call ``indicate_change``. Finally, ``result`` returns the changed data or
None if it was not changed.
"""

def __init__(self, data):
self.data = data
self.modules = self._get_modules()
self.changed = False

def _get_modules(self):
modules = self.data.get('debug_meta', {}).get('images', [])
return ObjectLookup(modules)

def iter_modules(self):
"""Returns an iterator over all code modules (images) loaded by the
process at the time of the crash. The values are of type ``ObjectRef``.
"""
return self.modules.iter_objects()

def iter_threads(self):
"""Returns an iterator over all threads of the process at the time of
the crash, including the crashing thread. The values are of type
``ThreadRef``."""
for thread in self.data.get('threads', {}).get('values', []):
if thread.get('crashed'):
# XXX: Assumes that the full list of threads is present in the
# original crash report. This is guaranteed by KSCrash and our
# minidump utility.
exceptions = self.data.get('exception', {}).get('values', [])
exception = exceptions[0] if exceptions else {}
frames = exception.get('stacktrace', {}).get('frames')
else:
frames = thread.get('stacktrace', {}).get('frames')

tid = thread.get('id')
if tid and frames:
yield tid, ThreadRef(frames, self.modules)

def indicate_change(self):
"""Signals mutation of the data."""
self.changed = True

def result(self):
"""Returns ``data`` if ``indicate_change`` was called, otherwise None.
"""
if self.changed:
return self.data


def reprocess_minidump_with_cfi(data):
"""Reprocesses a minidump event if CFI(call frame information) is available
and viable. The event is only processed if there are stack traces that
contain scanned frames.
"""

handle = ThreadProcessingHandle(data)

# Check stacktrace caches first and skip all that do not need CFI. This is
# either if a thread is trusted (i.e. it does not contain scanned frames) or
# since it can be fetched from the cache.
threads = {}
for tid, thread in handle.iter_threads():
if not thread.needs_cfi:
continue

if thread.load_from_cache():
if thread.apply_to_event():
handle.indicate_change()
continue

threads[tid] = thread

if not threads:
return handle.result()

# Check if we have a minidump to reprocess
cache_key = cache_key_for_event(data)
attachments = attachment_cache.get(cache_key) or []
minidump = next((a for a in attachments if a.type == MINIDUMP_ATTACHMENT_TYPE), None)
if not minidump:
return handle.result()

# Determine modules loaded into the process during the crash
debug_ids = [module.id for module in handle.iter_modules()]
if not debug_ids:
return handle.result()

# Load CFI caches for all loaded modules (even unreferenced ones)
project = Project.objects.get_from_cache(id=data['project'])
cficaches = ProjectDebugFile.difcache.get_cficaches(project, debug_ids)
if not cficaches:
return handle.result()

# Reprocess the minidump with CFI
cfi_map = FrameInfoMap.new()
for debug_id, cficache in six.iteritems(cficaches):
cfi_map.add(debug_id, cficache)
state = process_minidump(minidump.data, cfi=cfi_map)

# Merge existing stack traces with new ones from the minidump
for minidump_thread in state.threads():
thread = threads.get(minidump_thread.thread_id)
if thread:
thread.load_from_minidump(minidump_thread)
thread.save_to_cache()
if thread.apply_to_event():
handle.indicate_change()

return handle.result()
Loading