Improve splitLines: return iterator instead (#434)

The current behavior of splitLines is to eagerly split all the lines and return an array of strings. This PR improves this by returning an iterator instead, which will emit lines. This lets callers decide how to best use the splitLines function (i.e. lazily enumerate over lines) Relates to #433
jlfwong · Jun 28, 2023 · 26884c1 · 26884c1
1 parent bb063e4
commit 26884c1
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 19 deletions.
diff --git a/src/import/callgrind.ts b/src/import/callgrind.ts
@@ -346,7 +346,7 @@ class CallgrindParser {
   private savedFunctionNames: {[id: string]: string} = {}
 
   constructor(contents: TextFileContent, private importedFileName: string) {
-    this.lines = contents.splitLines()
+    this.lines = [...contents.splitLines()]
     this.lineNum = 0
   }
 

diff --git a/src/import/instruments.ts b/src/import/instruments.ts
@@ -14,7 +14,7 @@ import {FileSystemDirectoryEntry, FileSystemEntry, FileSystemFileEntry} from './
 import {MaybeCompressedDataReader, TextFileContent} from './utils'
 
 function parseTSV<T>(contents: TextFileContent): T[] {
-  const lines = contents.splitLines().map(l => l.split('\t'))
+  const lines = [...contents.splitLines()].map(l => l.split('\t'))
 
   const headerLine = lines.shift()
   if (!headerLine) return []

diff --git a/src/import/papyrus.ts b/src/import/papyrus.ts
@@ -38,9 +38,9 @@ export function importFromPapyrus(papyrusProfile: TextFileContent): Profile {
   const profile = new CallTreeProfileBuilder()
   profile.setValueFormatter(new TimeFormatter('milliseconds'))
 
-  const papyrusProfileLines = papyrusProfile
-    .splitLines()
-    .filter(line => !/^$|^Log closed$|log opened/.exec(line))
+  const papyrusProfileLines = [...papyrusProfile.splitLines()].filter(
+    line => !/^$|^Log closed$|log opened/.exec(line),
+  )
 
   let startValue = -1
   const firstLineParsed = parseLine(papyrusProfileLines[0])

diff --git a/src/import/utils.ts b/src/import/utils.ts
@@ -8,7 +8,7 @@ export interface ProfileDataSource {
 }
 
 export interface TextFileContent {
-  splitLines(): string[]
+  splitLines(): Iterable<string>
   firstChunk(): string
   parseAsJSON(): any
 }
@@ -19,7 +19,7 @@ export interface TextFileContent {
 //
 // We provide a simple splitLines() which returns simple strings under the
 // assumption that most extremely large text profiles will be broken into many
-// lines.  This isn't true in the general case, but will be true for most common
+// lines. This isn't true in the general case, but will be true for most common
 // large files.
 //
 // See: https://github.com/v8/v8/blob/8b663818fc311217c2cdaaab935f020578bfb7a8/src/objects/string.h#L479-L483
@@ -151,17 +151,26 @@ export class BufferBackedTextFileContent implements TextFileContent {
     }
   }
 
-  splitLines(): string[] {
-    let parts: string[] = this.chunks[0].split('\n')
-    for (let i = 1; i < this.chunks.length; i++) {
-      const chunkParts = this.chunks[i].split('\n')
-      if (chunkParts.length === 0) continue
-      if (parts.length > 0) {
-        parts[parts.length - 1] += chunkParts.shift()
+  splitLines(): Iterable<string> {
+    const iterator = function* (this: BufferBackedTextFileContent) {
+      let lineBuffer: string = ''
+      for (let chunk of this.chunks) {
+        const fragments = chunk.split('\n')
+        for (let i = 0; i < fragments.length; i++) {
+          if (i === 0) lineBuffer += fragments[i]
+          else {
+            yield lineBuffer
+            lineBuffer = fragments[i]
+          }
+        }
       }
-      parts = parts.concat(chunkParts)
+
+      yield lineBuffer
+    }
+
+    return {
+      [Symbol.iterator]: iterator.bind(this),
     }
-    return parts
   }
 
   firstChunk(): string {

diff --git a/src/lib/utils.test.ts b/src/lib/utils.test.ts
@@ -247,11 +247,18 @@ test('BufferBackedTextFileContent.firstChunk', async () => {
 })
 
 test('BufferBackedTextFileContent.splitLines', async () => {
-  await withMockedFileChunkSizeForTests(2, () => {
-    const str = 'may\nyour\nrope\nbe\nlong'
+  function testWithString(str: string) {
     const buffer = new TextEncoder().encode(str).buffer
     const content = new BufferBackedTextFileContent(buffer)
-    expect(content.splitLines()).toEqual(['may', 'your', 'rope', 'be', 'long'])
+    expect([...content.splitLines()]).toEqual(str.split('\n'))
+  }
+
+  await withMockedFileChunkSizeForTests(2, () => {
+    testWithString('may\nyour\nrope\nbe\nlong')
+    testWithString('one-single-line')
+    testWithString('multiple\n\nnew\n\nlines')
+    testWithString('end-with-new-line\n')
+    testWithString('')
   })
 })