Skip to content

Commit

Permalink
Add support for imports of UTF-16 encoded text w/ Byte Order Mark (BO…
Browse files Browse the repository at this point in the history
…M) (#314)

Before this PR, we blindly assumed that all text imported into speedscope was UTF-8 encoded. This, unsurprisingly, is not always true. After this PR, we support text that's UTF-16 encoded, with either the little-endian or big-endian byte-order-mark.

Fixed #291
  • Loading branch information
jlfwong committed Sep 29, 2020
1 parent f3a1c09 commit f758130
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 9 deletions.
Binary file added sample/profiles/stackcollapse/simple-utf16-be.txt
Binary file not shown.
Binary file added sample/profiles/stackcollapse/simple-utf16-le.txt
Binary file not shown.
108 changes: 108 additions & 0 deletions src/import/__snapshots__/bg-flamegraph.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,114 @@ exports[`importFromBGFlameGraph with CRLF: indexToView 1`] = `0`;

exports[`importFromBGFlameGraph with CRLF: profileGroup.name 1`] = `"simple-crlf.txt"`;

exports[`importFromBGFlameGraph with UTF-16, Big Endian 1`] = `
Object {
"frames": Array [
Frame {
"col": undefined,
"file": undefined,
"key": "a",
"line": undefined,
"name": "a",
"selfWeight": 0,
"totalWeight": 14,
},
Frame {
"col": undefined,
"file": undefined,
"key": "b",
"line": undefined,
"name": "b",
"selfWeight": 5,
"totalWeight": 14,
},
Frame {
"col": undefined,
"file": undefined,
"key": "c",
"line": undefined,
"name": "c",
"selfWeight": 5,
"totalWeight": 5,
},
Frame {
"col": undefined,
"file": undefined,
"key": "d",
"line": undefined,
"name": "d",
"selfWeight": 4,
"totalWeight": 4,
},
],
"name": "simple-utf16-be.txt",
"stacks": Array [
"a;b;c 2",
"a;b;d 4",
"a;b;c 3",
"a;b 5",
],
}
`;

exports[`importFromBGFlameGraph with UTF-16, Big Endian: indexToView 1`] = `0`;

exports[`importFromBGFlameGraph with UTF-16, Big Endian: profileGroup.name 1`] = `"simple-utf16-be.txt"`;

exports[`importFromBGFlameGraph with UTF-16, Little Endian 1`] = `
Object {
"frames": Array [
Frame {
"col": undefined,
"file": undefined,
"key": "a",
"line": undefined,
"name": "a",
"selfWeight": 0,
"totalWeight": 14,
},
Frame {
"col": undefined,
"file": undefined,
"key": "b",
"line": undefined,
"name": "b",
"selfWeight": 5,
"totalWeight": 14,
},
Frame {
"col": undefined,
"file": undefined,
"key": "c",
"line": undefined,
"name": "c",
"selfWeight": 5,
"totalWeight": 5,
},
Frame {
"col": undefined,
"file": undefined,
"key": "d",
"line": undefined,
"name": "d",
"selfWeight": 4,
"totalWeight": 4,
},
],
"name": "simple-utf16-le.txt",
"stacks": Array [
"a;b;c 2",
"a;b;d 4",
"a;b;c 3",
"a;b 5",
],
}
`;

exports[`importFromBGFlameGraph with UTF-16, Little Endian: indexToView 1`] = `0`;

exports[`importFromBGFlameGraph with UTF-16, Little Endian: profileGroup.name 1`] = `"simple-utf16-le.txt"`;

exports[`importFromBGFlameGraph: indexToView 1`] = `0`;

exports[`importFromBGFlameGraph: profileGroup.name 1`] = `"simple.txt"`;
4 changes: 2 additions & 2 deletions src/import/__snapshots__/safari.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ Object {
"totalWeight": 0.0005174240213818848,
},
],
"name": "Grabación de Control temporal 1",
"name": "Grabación de Control temporal 1",
"stacks": Array [
"injectModule;(anonymous) 1.00ms",
" 39.93ms",
Expand All @@ -98,4 +98,4 @@ Object {

exports[`importFromSafari: indexToView 1`] = `0`;

exports[`importFromSafari: profileGroup.name 1`] = `"Grabación de Control temporal 1"`;
exports[`importFromSafari: profileGroup.name 1`] = `"Grabación de Control temporal 1"`;
8 changes: 8 additions & 0 deletions src/import/bg-flamegraph.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,11 @@ test('importFromBGFlameGraph', async () => {
test('importFromBGFlameGraph with CRLF', async () => {
await checkProfileSnapshot('./sample/profiles/stackcollapse/simple-crlf.txt')
})

test('importFromBGFlameGraph with UTF-16, Little Endian', async () => {
await checkProfileSnapshot('./sample/profiles/stackcollapse/simple-utf16-le.txt')
})

test('importFromBGFlameGraph with UTF-16, Big Endian', async () => {
await checkProfileSnapshot('./sample/profiles/stackcollapse/simple-utf16-be.txt')
})
25 changes: 20 additions & 5 deletions src/import/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,30 @@ export class MaybeCompressedDataReader implements ProfileDataSource {

async readAsText(): Promise<string> {
const buffer = await this.readAsArrayBuffer()
let ret: string = ''

// By default, we assume the file is utf-8 encoded.
let encoding = 'utf-8'

const array = new Uint8Array(buffer)
if (array.length > 2) {
if (array[0] === 0xff && array[1] === 0xfe) {
// UTF-16, Little Endian encoding
encoding = 'utf-16le'
} else if (array[0] === 0xfe && array[1] === 0xff) {
// UTF-16, Big Endian encoding
encoding = 'utf-16be'
}
}

if (typeof TextDecoder !== 'undefined') {
const decoder = new TextDecoder()
const decoder = new TextDecoder(encoding)
return decoder.decode(buffer)
} else {
// JavaScript strings are UTF-16 encoded, but we're reading data
// from disk that we're going to asusme is UTF-8 encoded.
const array = new Uint8Array(buffer)
// JavaScript strings are UTF-16 encoded, but we're reading data from disk
// that we're going to blindly assume it's ASCII encoded. This codepath
// only exists for older browser support.
console.warn('This browser does not support TextDecoder. Decoding text as ASCII.')
let ret: string = ''
for (let i = 0; i < array.length; i++) {
ret += String.fromCharCode(array[i])
}
Expand Down
10 changes: 8 additions & 2 deletions src/jest-setup.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// Because we're not going to use this in our actual build, it's okay for this
// to be inefficient.

(function () {
;(function () {
const nodeVersion = process.versions.node
const versionParts = nodeVersion.split('.')
const majorVersion = parseInt(versionParts[0], 10)
Expand Down Expand Up @@ -48,4 +48,10 @@
this.splice(0, this.length, ...arrayWithIndices.map(x => x[0]))
return this
}
})()
})()
;(function () {
// TextDecoder is a global API in browsers, but an imported API in node.
//
// Let's emulate it being a global API during tests.
global.TextDecoder = require('util').TextDecoder
})()

0 comments on commit f758130

Please sign in to comment.