Skip to content

Commit

Permalink
Improve splitLines: return iterator instead (#434)
Browse files Browse the repository at this point in the history
The current behavior of splitLines is to eagerly split all the lines and return an array of strings.

This PR improves this by returning an iterator instead, which will emit lines. This lets callers decide how to best use the splitLines function (i.e. lazily enumerate over lines)

Relates to #433
  • Loading branch information
Goose97 committed Jun 28, 2023
1 parent bb063e4 commit 26884c1
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 19 deletions.
2 changes: 1 addition & 1 deletion src/import/callgrind.ts
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ class CallgrindParser {
private savedFunctionNames: {[id: string]: string} = {}

constructor(contents: TextFileContent, private importedFileName: string) {
this.lines = contents.splitLines()
this.lines = [...contents.splitLines()]
this.lineNum = 0
}

Expand Down
2 changes: 1 addition & 1 deletion src/import/instruments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {FileSystemDirectoryEntry, FileSystemEntry, FileSystemFileEntry} from './
import {MaybeCompressedDataReader, TextFileContent} from './utils'

function parseTSV<T>(contents: TextFileContent): T[] {
const lines = contents.splitLines().map(l => l.split('\t'))
const lines = [...contents.splitLines()].map(l => l.split('\t'))

const headerLine = lines.shift()
if (!headerLine) return []
Expand Down
6 changes: 3 additions & 3 deletions src/import/papyrus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ export function importFromPapyrus(papyrusProfile: TextFileContent): Profile {
const profile = new CallTreeProfileBuilder()
profile.setValueFormatter(new TimeFormatter('milliseconds'))

const papyrusProfileLines = papyrusProfile
.splitLines()
.filter(line => !/^$|^Log closed$|log opened/.exec(line))
const papyrusProfileLines = [...papyrusProfile.splitLines()].filter(
line => !/^$|^Log closed$|log opened/.exec(line),
)

let startValue = -1
const firstLineParsed = parseLine(papyrusProfileLines[0])
Expand Down
31 changes: 20 additions & 11 deletions src/import/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export interface ProfileDataSource {
}

export interface TextFileContent {
splitLines(): string[]
splitLines(): Iterable<string>
firstChunk(): string
parseAsJSON(): any
}
Expand All @@ -19,7 +19,7 @@ export interface TextFileContent {
//
// We provide a simple splitLines() which returns simple strings under the
// assumption that most extremely large text profiles will be broken into many
// lines. This isn't true in the general case, but will be true for most common
// lines. This isn't true in the general case, but will be true for most common
// large files.
//
// See: https://github.com/v8/v8/blob/8b663818fc311217c2cdaaab935f020578bfb7a8/src/objects/string.h#L479-L483
Expand Down Expand Up @@ -151,17 +151,26 @@ export class BufferBackedTextFileContent implements TextFileContent {
}
}

splitLines(): string[] {
let parts: string[] = this.chunks[0].split('\n')
for (let i = 1; i < this.chunks.length; i++) {
const chunkParts = this.chunks[i].split('\n')
if (chunkParts.length === 0) continue
if (parts.length > 0) {
parts[parts.length - 1] += chunkParts.shift()
splitLines(): Iterable<string> {
const iterator = function* (this: BufferBackedTextFileContent) {
let lineBuffer: string = ''
for (let chunk of this.chunks) {
const fragments = chunk.split('\n')
for (let i = 0; i < fragments.length; i++) {
if (i === 0) lineBuffer += fragments[i]
else {
yield lineBuffer
lineBuffer = fragments[i]
}
}
}
parts = parts.concat(chunkParts)

yield lineBuffer
}

return {
[Symbol.iterator]: iterator.bind(this),
}
return parts
}

firstChunk(): string {
Expand Down
13 changes: 10 additions & 3 deletions src/lib/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,18 @@ test('BufferBackedTextFileContent.firstChunk', async () => {
})

test('BufferBackedTextFileContent.splitLines', async () => {
await withMockedFileChunkSizeForTests(2, () => {
const str = 'may\nyour\nrope\nbe\nlong'
function testWithString(str: string) {
const buffer = new TextEncoder().encode(str).buffer
const content = new BufferBackedTextFileContent(buffer)
expect(content.splitLines()).toEqual(['may', 'your', 'rope', 'be', 'long'])
expect([...content.splitLines()]).toEqual(str.split('\n'))
}

await withMockedFileChunkSizeForTests(2, () => {
testWithString('may\nyour\nrope\nbe\nlong')
testWithString('one-single-line')
testWithString('multiple\n\nnew\n\nlines')
testWithString('end-with-new-line\n')
testWithString('')
})
})

Expand Down

0 comments on commit 26884c1

Please sign in to comment.