From 86513720e9c776f8622ed69b220251adb5b1107a Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Fri, 4 Oct 2013 14:30:45 -0700 Subject: [PATCH 1/9] added suport for inlined base64 source maps --- src/sentry/tasks/fetch_source.py | 25 +++++++++++++++++-------- src/sentry/utils/sourcemaps.py | 20 +++++++++++++++----- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/sentry/tasks/fetch_source.py b/src/sentry/tasks/fetch_source.py index 495281f8165349..92668000973fd2 100644 --- a/src/sentry/tasks/fetch_source.py +++ b/src/sentry/tasks/fetch_source.py @@ -12,6 +12,7 @@ import re import urllib2 import zlib +import base64 from collections import namedtuple from urlparse import urljoin @@ -161,11 +162,15 @@ def fetch_url(url): def fetch_sourcemap(url): - result = fetch_url(url) - if result == BAD_SOURCE: - return + if url.startswith('data:application/json;base64,'): + body = base64.b64decode(url[29:]) + else: + result = fetch_url(url) + if result == BAD_SOURCE: + return + + body = result.body - body = result.body # According to spec (https://docs.google.com/document/d/1U1RGAehQwRypUTovF1KRlpiOFze0b-_2gc6fAH0KY0k/edit#heading=h.h7yy76c5il9v) # A SourceMap may be prepended with ")]}'" to cause a Javascript error. # If the file starts with that string, ignore the entire first line. @@ -255,7 +260,8 @@ def expand_javascript_source(data, **kwargs): # TODO: we're currently running splitlines twice if sourcemap: logger.debug('Found sourcemap %r for minified script %r', sourcemap, result.url) - elif sourcemap in sourmap_idxs or not sourcemap: + + if sourcemap in sourmap_idxs or not sourcemap: continue # pull down sourcemap @@ -270,7 +276,11 @@ def expand_javascript_source(data, **kwargs): for source in index.sources: next_filename = urljoin(result.url, source) if next_filename not in done_file_list: - pending_file_list.add(next_filename) + if index.content: + source_code[next_filename] = (index.content[source], None) + done_file_list.add(next_filename) + else: + pending_file_list.add(next_filename) has_changes = False for frame in frames: @@ -283,8 +293,7 @@ def expand_javascript_source(data, **kwargs): # may have had a failure pulling down the sourcemap previously if sourcemap in sourmap_idxs and frame.colno is not None: state = find_source(sourmap_idxs[sourcemap], frame.lineno, frame.colno) - # TODO: is this urljoin right? (is it relative to the sourcemap or the originating file) - abs_path = urljoin(sourcemap, state.src) + abs_path = urljoin(result.url, state.src) logger.debug('Mapping compressed source %r to mapping in %r', frame.abs_path, abs_path) try: source, _ = source_code[abs_path] diff --git a/src/sentry/utils/sourcemaps.py b/src/sentry/utils/sourcemaps.py index ba21bc2d999af4..965198df1a4085 100644 --- a/src/sentry/utils/sourcemaps.py +++ b/src/sentry/utils/sourcemaps.py @@ -17,7 +17,7 @@ SourceMap = namedtuple('SourceMap', ['dst_line', 'dst_col', 'src', 'src_line', 'src_col', 'name']) -SourceMapIndex = namedtuple('SourceMapIndex', ['states', 'keys', 'sources']) +SourceMapIndex = namedtuple('SourceMapIndex', ['states', 'keys', 'sources', 'content']) # Mapping of base64 letter -> integer value. B64 = dict( @@ -59,12 +59,11 @@ def parse_vlq(segment): return values -def parse_sourcemap(sourcemap): +def parse_sourcemap(smap): """ Given a file-like object, yield SourceMap objects as they are read from it. """ - smap = json.loads(sourcemap) sources = smap['sources'] sourceRoot = smap.get('sourceRoot') names = smap['names'] @@ -108,16 +107,27 @@ def parse_sourcemap(sourcemap): def sourcemap_to_index(sourcemap): + smap = json.loads(sourcemap) + state_list = [] key_list = [] src_list = set() + content = None + + if(smap['sourcesContent']): + content = {} + for idx, source in enumerate(smap['sources']): + if smap['sourcesContent'][idx]: + content[source] = smap['sourcesContent'][idx].splitlines() + else: + content[source] = [] - for state in parse_sourcemap(sourcemap): + for state in parse_sourcemap(smap): state_list.append(state) key_list.append((state.dst_line, state.dst_col)) src_list.add(state.src) - return SourceMapIndex(state_list, key_list, src_list) + return SourceMapIndex(state_list, key_list, src_list, content) def find_source(indexed_sourcemap, lineno, colno): From 3d461407b4ba9d6b430279288df426f79cd73248 Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Fri, 4 Oct 2013 15:51:37 -0700 Subject: [PATCH 2/9] cleanup --- src/sentry/tasks/fetch_source.py | 5 +++-- src/sentry/utils/sourcemaps.py | 5 ++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/sentry/tasks/fetch_source.py b/src/sentry/tasks/fetch_source.py index 92668000973fd2..4f54074244ced1 100644 --- a/src/sentry/tasks/fetch_source.py +++ b/src/sentry/tasks/fetch_source.py @@ -28,6 +28,7 @@ LINES_OF_CONTEXT = 5 CHARSET_RE = re.compile(r'charset=(\S+)') DEFAULT_ENCODING = 'utf-8' +BASE64_SOURCEMAP_PREAMBLE = 'data:application/json;base64,' UrlResult = namedtuple('UrlResult', ['url', 'headers', 'body']) @@ -162,8 +163,8 @@ def fetch_url(url): def fetch_sourcemap(url): - if url.startswith('data:application/json;base64,'): - body = base64.b64decode(url[29:]) + if url.startswith(BASE64_SOURCEMAP_PREAMBLE): + body = base64.b64decode(url[len(BASE64_SOURCEMAP_PREAMBLE):]) else: result = fetch_url(url) if result == BAD_SOURCE: diff --git a/src/sentry/utils/sourcemaps.py b/src/sentry/utils/sourcemaps.py index 965198df1a4085..065781ec1b2872 100644 --- a/src/sentry/utils/sourcemaps.py +++ b/src/sentry/utils/sourcemaps.py @@ -61,9 +61,8 @@ def parse_vlq(segment): def parse_sourcemap(smap): """ - Given a file-like object, yield SourceMap objects as they are read from it. + Given a sourcemap json object, yield SourceMap objects as they are read from it. """ - sources = smap['sources'] sourceRoot = smap.get('sourceRoot') names = smap['names'] @@ -114,7 +113,7 @@ def sourcemap_to_index(sourcemap): src_list = set() content = None - if(smap['sourcesContent']): + if 'sourcesContent' in smap: content = {} for idx, source in enumerate(smap['sources']): if smap['sourcesContent'][idx]: From 0d716f8f6f2d2c379de6508ad9751cfd24f9544a Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Fri, 4 Oct 2013 17:27:36 -0700 Subject: [PATCH 3/9] updated parse sourcemap test --- tests/sentry/utils/sourcemaps/tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/sentry/utils/sourcemaps/tests.py b/tests/sentry/utils/sourcemaps/tests.py index 03efc3b5eb01e2..3908813ce2b3b7 100644 --- a/tests/sentry/utils/sourcemaps/tests.py +++ b/tests/sentry/utils/sourcemaps/tests.py @@ -6,6 +6,8 @@ find_source) from sentry.testutils import TestCase +from sentry.utils import json + sourcemap = """{"version":3,"file":"file.min.js","sources":["file1.js","file2.js"],"names":["add","a","b","multiply","divide","c","e","Raven","captureException"],"mappings":"AAAA,QAASA,KAAIC,EAAGC,GACf,YACA,OAAOD,GAAIC,ECFZ,QAASC,UAASF,EAAGC,GACpB,YACA,OAAOD,GAAIC,EAEZ,QAASE,QAAOH,EAAGC,GAClB,YACA,KACC,MAAOC,UAASH,IAAIC,EAAGC,GAAID,EAAGC,GAAKG,EAClC,MAAOC,GACRC,MAAMC,iBAAiBF"}""" @@ -30,7 +32,8 @@ def test_simple(self): class ParseSourcemapTest(TestCase): def test_basic(self): - states = list(parse_sourcemap(sourcemap)) + smap = json.loads(sourcemap) + states = list(parse_sourcemap(smap)) assert states == [ SourceMap(dst_line=0, dst_col=0, src='file1.js', src_line=0, src_col=0, name=None), From bfdfe1fcf15aaf02f834fde4b694fb8a13c2e298 Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Tue, 8 Oct 2013 16:14:00 -0700 Subject: [PATCH 4/9] hashing souremap urls because the could potentially be huge(hundreds of kb) and eat up a lot of memory --- src/sentry/tasks/fetch_source.py | 22 ++++++++++++++++------ tests/sentry/tasks/fetch_source/tests.py | 12 +++++++++++- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/sentry/tasks/fetch_source.py b/src/sentry/tasks/fetch_source.py index 4f54074244ced1..fbbfed6436e7a7 100644 --- a/src/sentry/tasks/fetch_source.py +++ b/src/sentry/tasks/fetch_source.py @@ -29,6 +29,7 @@ CHARSET_RE = re.compile(r'charset=(\S+)') DEFAULT_ENCODING = 'utf-8' BASE64_SOURCEMAP_PREAMBLE = 'data:application/json;base64,' +BASE64_PREAMBLE_LENGTH = len(BASE64_SOURCEMAP_PREAMBLE) UrlResult = namedtuple('UrlResult', ['url', 'headers', 'body']) @@ -163,8 +164,8 @@ def fetch_url(url): def fetch_sourcemap(url): - if url.startswith(BASE64_SOURCEMAP_PREAMBLE): - body = base64.b64decode(url[len(BASE64_SOURCEMAP_PREAMBLE):]) + if is_data_uri(url): + body = base64.b64decode(url[BASE64_PREAMBLE_LENGTH:]) else: result = fetch_url(url) if result == BAD_SOURCE: @@ -185,6 +186,10 @@ def fetch_sourcemap(url): return index +def is_data_uri(url): + return url[:BASE64_PREAMBLE_LENGTH] == BASE64_SOURCEMAP_PREAMBLE + + def expand_javascript_source(data, **kwargs): """ Attempt to fetch source code for javascript frames. @@ -256,7 +261,8 @@ def expand_javascript_source(data, **kwargs): continue sourcemap = discover_sourcemap(result) - source_code[filename] = (result.body.splitlines(), sourcemap) + sourcemap_key = hashlib.md5(sourcemap).hexdigest() + source_code[filename] = (result.body.splitlines(), sourcemap_key) # TODO: we're currently running splitlines twice if sourcemap: @@ -271,7 +277,10 @@ def expand_javascript_source(data, **kwargs): logger.debug('Failed parsing sourcemap index: %r', sourcemap[:15]) continue - sourmap_idxs[sourcemap] = index + if is_data_uri(sourcemap): + sourmap_idxs[sourcemap_key] = (index, result.url) + else: + sourmap_idxs[sourcemap_key] = (index, sourcemap) # queue up additional source files for download for source in index.sources: @@ -293,8 +302,9 @@ def expand_javascript_source(data, **kwargs): # may have had a failure pulling down the sourcemap previously if sourcemap in sourmap_idxs and frame.colno is not None: - state = find_source(sourmap_idxs[sourcemap], frame.lineno, frame.colno) - abs_path = urljoin(result.url, state.src) + index, relative_to = sourmap_idxs[sourcemap] + state = find_source(index, frame.lineno, frame.colno) + abs_path = urljoin(relative_to, state.src) logger.debug('Mapping compressed source %r to mapping in %r', frame.abs_path, abs_path) try: source, _ = source_code[abs_path] diff --git a/tests/sentry/tasks/fetch_source/tests.py b/tests/sentry/tasks/fetch_source/tests.py index d5498c40e45607..4aad909aa7ef0e 100644 --- a/tests/sentry/tasks/fetch_source/tests.py +++ b/tests/sentry/tasks/fetch_source/tests.py @@ -3,9 +3,10 @@ from __future__ import absolute_import import mock +import base64 from sentry.tasks.fetch_source import ( - UrlResult, expand_javascript_source, discover_sourcemap) + UrlResult, expand_javascript_source, discover_sourcemap, fetch_sourcemap) from sentry.testutils import TestCase @@ -82,3 +83,12 @@ def test_simple(self, fetch_sourcemap, fetch_url, update): assert frame['pre_context'] == [] assert frame['context_line'] == 'h' assert frame['post_context'] == ['e', 'l', 'l', 'o', ' '] + + +class FetchBase64SourcemapTest(TestCase): + @mock.patch('sentry.utils.sourcemaps.sourcemap_to_index') + def test_simple(self, sourcemap_to_index): + sourcemap_to_index.return_value = None + url = 'data:application/json;base64,' + base64.b64encode('hello, World!') + fetch_sourcemap(url) + sourcemap_to_index.assert_called_once_with('hello, World!') From 0ee28cf21c1e62586635a130f124d8c0a454509a Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Tue, 8 Oct 2013 18:11:37 -0700 Subject: [PATCH 5/9] fixed my test --- tests/sentry/tasks/fetch_source/tests.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/sentry/tasks/fetch_source/tests.py b/tests/sentry/tasks/fetch_source/tests.py index 4aad909aa7ef0e..01726a65b5d955 100644 --- a/tests/sentry/tasks/fetch_source/tests.py +++ b/tests/sentry/tasks/fetch_source/tests.py @@ -3,12 +3,14 @@ from __future__ import absolute_import import mock -import base64 from sentry.tasks.fetch_source import ( UrlResult, expand_javascript_source, discover_sourcemap, fetch_sourcemap) +from sentry.utils.sourcemaps import (SourceMap, SourceMapIndex) from sentry.testutils import TestCase +base64_sourcemap = 'data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZ2VuZXJhdGVkLmpzIiwic291cmNlcyI6WyIvdGVzdC5qcyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiO0FBQUEiLCJzb3VyY2VzQ29udGVudCI6WyJjb25zb2xlLmxvZyhcImhlbGxvLCBXb3JsZCFcIikiXX0=' + class DiscoverSourcemapTest(TestCase): # discover_sourcemap(result) @@ -86,9 +88,11 @@ def test_simple(self, fetch_sourcemap, fetch_url, update): class FetchBase64SourcemapTest(TestCase): - @mock.patch('sentry.utils.sourcemaps.sourcemap_to_index') - def test_simple(self, sourcemap_to_index): - sourcemap_to_index.return_value = None - url = 'data:application/json;base64,' + base64.b64encode('hello, World!') - fetch_sourcemap(url) - sourcemap_to_index.assert_called_once_with('hello, World!') + def test_simple(self): + index = fetch_sourcemap(base64_sourcemap) + states = [SourceMap(1, 0, '/test.js', 0, 0, None)] + sources = set(['/test.js']) + keys = [(1, 0)] + content = {'/test.js': ['console.log("hello, World!")']} + + assert index == SourceMapIndex(states, keys, sources, content) From 02b92a8b0a282d91c304fc90e623ef7c62f71d9d Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Tue, 8 Oct 2013 18:35:36 -0700 Subject: [PATCH 6/9] only hashing if there is a source map --- src/sentry/tasks/fetch_source.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/sentry/tasks/fetch_source.py b/src/sentry/tasks/fetch_source.py index fbbfed6436e7a7..0f3e020c121efc 100644 --- a/src/sentry/tasks/fetch_source.py +++ b/src/sentry/tasks/fetch_source.py @@ -261,14 +261,18 @@ def expand_javascript_source(data, **kwargs): continue sourcemap = discover_sourcemap(result) - sourcemap_key = hashlib.md5(sourcemap).hexdigest() - source_code[filename] = (result.body.splitlines(), sourcemap_key) # TODO: we're currently running splitlines twice - if sourcemap: - logger.debug('Found sourcemap %r for minified script %r', sourcemap, result.url) + if not sourcemap: + source_code[filename] = (result.body.splitlines(), None) + continue + else: + logger.debug('Found sourcemap %r for minified script %r', sourcemap[:256], result.url) + + sourcemap_key = hashlib.md5(sourcemap).hexdigest() + source_code[filename] = (result.body.splitlines(), sourcemap_key) - if sourcemap in sourmap_idxs or not sourcemap: + if sourcemap in sourmap_idxs: continue # pull down sourcemap From 48433a5ee15f67ee903df3b166d498142a34f298 Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Wed, 9 Oct 2013 09:55:17 -0700 Subject: [PATCH 7/9] added an extra mock to expand_javascript test --- tests/sentry/tasks/fetch_source/tests.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/sentry/tasks/fetch_source/tests.py b/tests/sentry/tasks/fetch_source/tests.py index 01726a65b5d955..59bcf3f42fece3 100644 --- a/tests/sentry/tasks/fetch_source/tests.py +++ b/tests/sentry/tasks/fetch_source/tests.py @@ -45,7 +45,8 @@ class ExpandJavascriptSourceTest(TestCase): @mock.patch('sentry.models.Event.update') @mock.patch('sentry.tasks.fetch_source.fetch_url') @mock.patch('sentry.tasks.fetch_source.fetch_sourcemap') - def test_simple(self, fetch_sourcemap, fetch_url, update): + @mock.patch('sentry.tasks.fetch_source.discover_sourcemap') + def test_simple(self, discover_sourcemap, fetch_sourcemap, fetch_url, update): data = { 'sentry.interfaces.Exception': { 'values': [{ @@ -68,6 +69,7 @@ def test_simple(self, fetch_sourcemap, fetch_url, update): }], } } + discover_sourcemap.return_value = None fetch_sourcemap.return_value = None fetch_url.return_value.body = '\n'.join('hello world') From c08d391b4d82b723c5d9c1e0315a2a6e4d78027a Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Wed, 9 Oct 2013 10:22:58 -0700 Subject: [PATCH 8/9] added test that covers expanding a sourcemap with sourcesContent --- tests/sentry/tasks/fetch_source/tests.py | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/sentry/tasks/fetch_source/tests.py b/tests/sentry/tasks/fetch_source/tests.py index 59bcf3f42fece3..2e97bf3aada5d1 100644 --- a/tests/sentry/tasks/fetch_source/tests.py +++ b/tests/sentry/tasks/fetch_source/tests.py @@ -88,6 +88,38 @@ def test_simple(self, discover_sourcemap, fetch_sourcemap, fetch_url, update): assert frame['context_line'] == 'h' assert frame['post_context'] == ['e', 'l', 'l', 'o', ' '] + @mock.patch('sentry.models.Event.update') + @mock.patch('sentry.tasks.fetch_source.fetch_url') + @mock.patch('sentry.tasks.fetch_source.discover_sourcemap') + def test_inlined_sources(self, discover_sourcemap, fetch_url, update): + data = { + 'sentry.interfaces.Exception': { + 'values': [{ + 'stacktrace': { + 'frames': [ + { + 'abs_path': 'http://example.com/test.js', + 'filename': 'test.js', + 'lineno': 1, + 'colno': 0, + }, + ], + }, + }], + } + } + discover_sourcemap.return_value = base64_sourcemap + fetch_url.return_value.body = '\n'.join('') + + expand_javascript_source(data) + fetch_url.assert_called_once_with('http://example.com/test.js') + + frame_list = data['sentry.interfaces.Exception']['values'][0]['stacktrace']['frames'] + frame = frame_list[0] + assert frame['pre_context'] == [] + assert frame['context_line'] == 'console.log("hello, World!")' + assert frame['post_context'] == [] + class FetchBase64SourcemapTest(TestCase): def test_simple(self): From b27c1923519498f175c1a133ef69632631235e56 Mon Sep 17 00:00:00 2001 From: Michael Hayes Date: Wed, 9 Oct 2013 10:50:27 -0700 Subject: [PATCH 9/9] merged master --- src/sentry/static/sentry/bootstrap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/static/sentry/bootstrap b/src/sentry/static/sentry/bootstrap index d9b502dfb876c4..de683e9003b5ec 160000 --- a/src/sentry/static/sentry/bootstrap +++ b/src/sentry/static/sentry/bootstrap @@ -1 +1 @@ -Subproject commit d9b502dfb876c40b0735008bac18049c7ee7b6d2 +Subproject commit de683e9003b5ec6860a1fcb768416356dd685ef5