-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
feat(minidump): Support CFI for minidumps #9344
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
c19428f
ref(minidump): Move minidump logic to its own module
jan-auer cf930e4
ref: Add a constant for the minidump attachment type
jan-auer eff66c5
feat(minidump): Expand the merge_minidump_event interface
jan-auer 038f7a4
feat(minidump): Add a util to detect minidump events
jan-auer 020c663
feat: Add an event enhancer hook to Plugin v2
jan-auer 39ec9dd
feat: Optionally pass CFI to minidump processing
jan-auer 9583c17
ref: Move hashing helper from stacktraces to utils.hashlib
jan-auer fdb5ed0
feat: Add an optional stackframe trust
jan-auer f1012d3
feat: Emit the stackframe trust for minidump events
jan-auer 1ae5248
fix: Add frame trust to schema
jan-auer 0eca33e
feat(native): Reprocess minidumps with CFI
jan-auer 7d0c32b
meta: Add more description and comments to CFI processing
jan-auer 382fe6c
fix: Fix a doc code example
jan-auer 50aa573
feat: Show scanned frames in the UI
jan-auer 00fee11
feat: Refactor event cache keys into a function
jan-auer 305c92f
fix: Correctly detect outdated CFI caches
jan-auer baf1721
fix: Correctly detect unchanged stacktraces
jan-auer 14812bd
test: Add tests for CFI reprocessing
jan-auer 0987e51
build: Bump symbolic to 5.5.3
jan-auer 8871539
meta: Add suggested comment
jan-auer File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,270 @@ | ||
| from __future__ import absolute_import | ||
|
|
||
| import logging | ||
| import six | ||
|
|
||
| from symbolic import FrameInfoMap, FrameTrust, ObjectLookup | ||
|
|
||
| from sentry.attachments import attachment_cache | ||
| from sentry.coreapi import cache_key_for_event | ||
| from sentry.lang.native.minidump import process_minidump, frames_from_minidump_thread, \ | ||
| MINIDUMP_ATTACHMENT_TYPE | ||
| from sentry.lang.native.utils import rebase_addr | ||
| from sentry.models import Project, ProjectDebugFile | ||
| from sentry.utils.cache import cache | ||
| from sentry.utils.hashlib import hash_values | ||
|
|
||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| # Frame trust values achieved through the use of CFI | ||
| CFI_TRUSTS = ('cfi', 'cfi-scan') | ||
|
|
||
| # Minimum frame trust value that we require to omit CFI reprocessing | ||
| MIN_TRUST = FrameTrust.fp | ||
|
|
||
| # Placeholder used to indicate that no CFI could be used to stackwalk a thread | ||
| NO_CFI_PLACEHOLDER = '__no_cfi__' | ||
|
|
||
|
|
||
| class ThreadRef(object): | ||
| """Cacheable and mutable reference to stack frames of an event thread.""" | ||
|
|
||
| def __init__(self, frames, modules): | ||
| self.raw_frames = frames | ||
| self.modules = modules | ||
| self.resolved_frames = None | ||
|
|
||
| def _get_frame_key(self, frame): | ||
| module = self.modules.find_object(frame['instruction_addr']) | ||
|
|
||
| # If we cannot resolve a module for this frame, this means we're dealing | ||
| # with an absolute address here. Since this address changes with every | ||
| # crash and would poison our cache, we skip it for the key calculation. | ||
| if not module: | ||
| return None | ||
|
|
||
| return ( | ||
| module.id, | ||
| rebase_addr(frame['instruction_addr'], module) | ||
| ) | ||
|
|
||
| @property | ||
| def _cache_key(self): | ||
| values = [self._get_frame_key(f) for f in self.raw_frames] | ||
| # XXX: The seed is hard coded for a future refactor | ||
| return 'st:%s' % hash_values(values, seed='MinidumpCfiProcessor') | ||
|
|
||
| def _frame_from_cache(self, entry): | ||
| debug_id, offset, trust = entry[:3] | ||
| module = self.modules.get_object(debug_id) | ||
|
|
||
| # The debug_id can be None or refer to a missing module. If the module | ||
| # was missing, the stored offset was absolute as well. Otherwise, we | ||
| # have no choice but to assume an absolute address. In practice, the | ||
| # latter hopefully never happens. | ||
| addr = module.addr + offset if module else offset | ||
|
|
||
| return module, { | ||
| 'instruction_addr': '0x%x' % addr, | ||
| 'function': '<unknown>', # Required by interface | ||
| 'module': module.name if module else None, | ||
| 'trust': trust, | ||
| } | ||
|
|
||
| def load_from_cache(self): | ||
| """Attempts to load the reprocessed stack trace from the cache. The | ||
| return value is ``True`` for a cache hit, and ``False`` for a miss. | ||
| The loaded addresses are rebased to the provided code modules. | ||
| """ | ||
|
|
||
| cached = cache.get(self._cache_key) | ||
| if cached is None: | ||
| return False | ||
|
|
||
| if cached == NO_CFI_PLACEHOLDER: | ||
| self.resolved_frames = NO_CFI_PLACEHOLDER | ||
| else: | ||
| self.resolved_frames = [self._frame_from_cache(c) for c in cached] | ||
|
|
||
| return True | ||
|
|
||
| def save_to_cache(self): | ||
| """Stores the reprocessed stack trace to the cache. For frames with | ||
| known code modules only relative offsets are stored, otherwise the | ||
| absolute address as fallback.""" | ||
| if self.resolved_frames is None: | ||
| raise RuntimeError('save_to_cache called before resolving frames') | ||
|
|
||
| if self.resolved_frames == NO_CFI_PLACEHOLDER: | ||
| cache.set(self._cache_key, NO_CFI_PLACEHOLDER) | ||
| return | ||
|
|
||
| values = [] | ||
| for module, frame in self.resolved_frames: | ||
| module_id = module and module.id | ||
| addr = frame['instruction_addr'] | ||
| if module: | ||
| addr = '0x%x' % rebase_addr(addr, module) | ||
| values.append((module_id, addr, frame['trust'])) | ||
|
|
||
| cache.set(self._cache_key, values) | ||
|
|
||
| def load_from_minidump(self, thread): | ||
| """Loads the stack trace from a minidump process state thread.""" | ||
|
|
||
| # Convert the entire thread into frames conforming to the `Frame` | ||
| # interface. Note that this is done with the same function as the | ||
| # initial ingestion to avoid normalization conflicts. | ||
| frames = frames_from_minidump_thread(thread) | ||
|
|
||
| # Filter out stack traces that did not improve during reprocessing. For | ||
| # these cases we only store a marker. This also prevents us from | ||
| # destroying absolute addresses when restoring from the cache. Stack | ||
| # traces containing CFI frames are mapped to their modules and stored. | ||
| if any(frame['trust'] in CFI_TRUSTS for frame in frames): | ||
| self.resolved_frames = [(self.modules.find_object(f['instruction_addr']), f) | ||
| for f in frames] | ||
| else: | ||
| self.resolved_frames = NO_CFI_PLACEHOLDER | ||
|
|
||
| def apply_to_event(self): | ||
| """Writes the loaded stack trace back to the event's payload. Returns | ||
| ``True`` if the payload was changed, otherwise ``False``.""" | ||
| if self.resolved_frames is None: | ||
| raise RuntimeError('apply_to_event called before resolving frames') | ||
|
|
||
| if self.resolved_frames == NO_CFI_PLACEHOLDER: | ||
| return False | ||
|
|
||
| self.raw_frames[:] = [frame for module, frame in self.resolved_frames] | ||
| return True | ||
|
|
||
| @property | ||
| def needs_cfi(self): | ||
| """Indicates whether this thread requires reprocessing with CFI due to | ||
| scanned stack frames.""" | ||
| return any( | ||
| getattr(FrameTrust, f.get('trust', ''), 0) < MIN_TRUST | ||
| for f in self.raw_frames | ||
| ) | ||
|
|
||
|
|
||
| class ThreadProcessingHandle(object): | ||
| """Helper object for processing all event threads. | ||
|
|
||
| This class offers a view on all threads in the given event payload, | ||
| including the crashing exception thread. Use ``iter_threads`` to iterate | ||
| pointers to the original threads' stack traces. Likewise, ``iter_modules`` | ||
| returns references to all modules (images) loaded into the process. | ||
|
|
||
| The handle keeps track of changes to the original data. To signal mutation, | ||
| call ``indicate_change``. Finally, ``result`` returns the changed data or | ||
| None if it was not changed. | ||
| """ | ||
|
|
||
| def __init__(self, data): | ||
| self.data = data | ||
| self.modules = self._get_modules() | ||
| self.changed = False | ||
|
|
||
| def _get_modules(self): | ||
| modules = self.data.get('debug_meta', {}).get('images', []) | ||
| return ObjectLookup(modules) | ||
|
|
||
| def iter_modules(self): | ||
| """Returns an iterator over all code modules (images) loaded by the | ||
| process at the time of the crash. The values are of type ``ObjectRef``. | ||
| """ | ||
| return self.modules.iter_objects() | ||
|
|
||
| def iter_threads(self): | ||
| """Returns an iterator over all threads of the process at the time of | ||
| the crash, including the crashing thread. The values are of type | ||
| ``ThreadRef``.""" | ||
| for thread in self.data.get('threads', {}).get('values', []): | ||
| if thread.get('crashed'): | ||
| # XXX: Assumes that the full list of threads is present in the | ||
| # original crash report. This is guaranteed by KSCrash and our | ||
| # minidump utility. | ||
| exceptions = self.data.get('exception', {}).get('values', []) | ||
| exception = exceptions[0] if exceptions else {} | ||
| frames = exception.get('stacktrace', {}).get('frames') | ||
| else: | ||
| frames = thread.get('stacktrace', {}).get('frames') | ||
|
|
||
| tid = thread.get('id') | ||
| if tid and frames: | ||
| yield tid, ThreadRef(frames, self.modules) | ||
|
|
||
| def indicate_change(self): | ||
| """Signals mutation of the data.""" | ||
| self.changed = True | ||
|
|
||
| def result(self): | ||
| """Returns ``data`` if ``indicate_change`` was called, otherwise None. | ||
| """ | ||
| if self.changed: | ||
| return self.data | ||
|
|
||
|
|
||
| def reprocess_minidump_with_cfi(data): | ||
| """Reprocesses a minidump event if CFI(call frame information) is available | ||
| and viable. The event is only processed if there are stack traces that | ||
| contain scanned frames. | ||
| """ | ||
|
|
||
| handle = ThreadProcessingHandle(data) | ||
|
|
||
| # Check stacktrace caches first and skip all that do not need CFI. This is | ||
| # either if a thread is trusted (i.e. it does not contain scanned frames) or | ||
| # since it can be fetched from the cache. | ||
| threads = {} | ||
| for tid, thread in handle.iter_threads(): | ||
| if not thread.needs_cfi: | ||
| continue | ||
|
|
||
| if thread.load_from_cache(): | ||
| if thread.apply_to_event(): | ||
| handle.indicate_change() | ||
| continue | ||
|
|
||
| threads[tid] = thread | ||
|
|
||
| if not threads: | ||
| return handle.result() | ||
|
|
||
| # Check if we have a minidump to reprocess | ||
| cache_key = cache_key_for_event(data) | ||
| attachments = attachment_cache.get(cache_key) or [] | ||
| minidump = next((a for a in attachments if a.type == MINIDUMP_ATTACHMENT_TYPE), None) | ||
| if not minidump: | ||
| return handle.result() | ||
|
|
||
| # Determine modules loaded into the process during the crash | ||
| debug_ids = [module.id for module in handle.iter_modules()] | ||
| if not debug_ids: | ||
| return handle.result() | ||
|
|
||
| # Load CFI caches for all loaded modules (even unreferenced ones) | ||
| project = Project.objects.get_from_cache(id=data['project']) | ||
| cficaches = ProjectDebugFile.difcache.get_cficaches(project, debug_ids) | ||
| if not cficaches: | ||
| return handle.result() | ||
|
|
||
| # Reprocess the minidump with CFI | ||
| cfi_map = FrameInfoMap.new() | ||
| for debug_id, cficache in six.iteritems(cficaches): | ||
| cfi_map.add(debug_id, cficache) | ||
| state = process_minidump(minidump.data, cfi=cfi_map) | ||
|
|
||
| # Merge existing stack traces with new ones from the minidump | ||
| for minidump_thread in state.threads(): | ||
| thread = threads.get(minidump_thread.thread_id) | ||
| if thread: | ||
| thread.load_from_minidump(minidump_thread) | ||
| thread.save_to_cache() | ||
| if thread.apply_to_event(): | ||
| handle.indicate_change() | ||
|
|
||
| return handle.result() | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.