Skip to content
This repository has been archived by the owner on Mar 3, 2023. It is now read-only.

Commit

Permalink
Process unicode results from ripgrep correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
rafeca committed May 22, 2019
1 parent 411e2a9 commit 6748b84
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 1 deletion.
22 changes: 22 additions & 0 deletions spec/workspace-spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2647,6 +2647,28 @@ describe('Workspace', () => {
})
}

it('returns results on lines with unicode strings', async () => {
const results = []

await scan(
/line with unico/,
{},
result => results.push(result)
)
expect(results.length).toBe(1)
const { filePath, matches } = results[0]
expect(filePath).toBe(atom.project.getDirectories()[0].resolve('file-with-unicode'))
expect(matches).toHaveLength(1)
expect(matches[0]).toEqual({
matchText: 'line with unico',
lineText: 'ДДДДДДДДДДДДДДДДДД line with unicode',
lineTextOffset: 0,
range: [[0, 19], [0, 34]],
leadingContextLines: [],
trailingContextLines: []
})
})

describe('when the core.excludeVcsIgnoredPaths config is truthy', () => {
let projectPath
let ignoredPath
Expand Down
32 changes: 31 additions & 1 deletion src/ripgrep-directory-searcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,35 @@ function getPositionFromColumn (lines, column) {
return [currentLine - 1, column - previousLength]
}

function processUnicodeMatch (match) {
if (match.lines.text.length === Buffer.byteLength(match.lines.text)) {
// fast codepath for lines that only contain characters of 1 byte length.
return
}

let remainingBuffer = Buffer.from(match.lines.text)
let currentLength = 0
let previousPosition = 0

function convertPosition (position) {
const currentBuffer = remainingBuffer.slice(0, position - previousPosition)
currentLength = currentBuffer.toString().length + currentLength
remainingBuffer = remainingBuffer.slice(position)

previousPosition = position

return currentLength
}

// Iterate over all the submatches to find the convert the start and end values
// (which come as bytes from ripgrep) to character positions.
// We can do this because submatches come ordered by position.
for (const submatch of match.submatches) {
submatch.start = convertPosition(submatch.start)
submatch.end = convertPosition(submatch.end)
}
}

// This function processes a ripgrep submatch to create the correct
// range. This is mostly needed for multi-line results, since the range
// will have differnt start and end rows and we need to calculate these
Expand Down Expand Up @@ -247,7 +276,6 @@ module.exports = class RipgrepDirectorySearcher {
buffer = lines.pop()
for (const line of lines) {
const message = JSON.parse(line)

updateTrailingContexts(message, pendingTrailingContexts, options)

if (message.type === 'begin') {
Expand All @@ -261,6 +289,8 @@ module.exports = class RipgrepDirectorySearcher {
const trailingContextLines = []
pendingTrailingContexts.add(trailingContextLines)

processUnicodeMatch(message.data)

for (const submatch of message.data.submatches) {
const { lineText, range } = processSubmatch(
submatch,
Expand Down

0 comments on commit 6748b84

Please sign in to comment.