From 61ed5637ceaf64d5b4c62d389ebb6b7fec6d50fe Mon Sep 17 00:00:00 2001 From: Jason Fields Date: Thu, 16 Sep 2021 03:53:48 -0400 Subject: [PATCH] Significant refactor of search system and `:s[ubstitute]` Central to this refactor is a rationalization of pattern parsing - we now have an immutable`Pattern` class with its own Parsimmon parser; it's now used everywhere. A few enhancements, such as supporting `/{pattern}/` as line specifier in line range And some bugs fixed: - `\n` now expands to `\r?\n` (to work with CRLF files) - `\<` and `\>` (word boundaries) are now mapped to `\b` Refs #3996 --- src/actions/commands/actions.ts | 2 +- src/actions/commands/commandLine.ts | 12 +- src/actions/commands/search.ts | 32 +- src/actions/motion.ts | 10 +- src/cmd_line/commands/history.ts | 2 +- src/cmd_line/commands/substitute.ts | 68 ++-- src/cmd_line/subparsers/substitute.ts | 291 ++++++----------- src/state/globalState.ts | 3 +- src/state/searchState.ts | 345 +++++---------------- src/state/substituteState.ts | 6 +- src/statusBar.ts | 5 +- src/vimscript/exCommandParser.ts | 4 +- src/vimscript/lineRange.ts | 57 +++- src/vimscript/pattern.ts | 295 ++++++++++++++++++ test/cmd_line/subparser.substitute.test.ts | 17 +- test/cmd_line/substitute.test.ts | 5 +- test/register/register.test.ts | 4 +- test/vimscript/lineRangeParse.test.ts | 52 +++- test/vimscript/lineRangeResolve.test.ts | 14 + test/vimscript/searchOffset.test.ts | 43 +++ 20 files changed, 699 insertions(+), 568 deletions(-) create mode 100644 src/vimscript/pattern.ts create mode 100644 test/vimscript/searchOffset.test.ts diff --git a/src/actions/commands/actions.ts b/src/actions/commands/actions.ts index f1cfe7f5258..9150e3c4d91 100644 --- a/src/actions/commands/actions.ts +++ b/src/actions/commands/actions.ts @@ -20,7 +20,6 @@ import { visualBlockGetBottomRightPosition, } from './../../mode/mode'; import { Register, RegisterMode } from './../../register/register'; -import { SearchDirection } from './../../state/searchState'; import { EditorScrollByUnit, EditorScrollDirection, TextEditor } from './../../textEditor'; import { isTextTransformation, Transformation } from './../../transformations/transformations'; import { RegisterAction, BaseCommand } from './../base'; @@ -35,6 +34,7 @@ import { Position } from 'vscode'; import { WriteQuitCommand } from '../../cmd_line/commands/writequit'; import { shouldWrapKey } from '../wrapping'; import { ErrorCode, VimError } from '../../error'; +import { SearchDirection } from '../../vimscript/pattern'; /** * A very special snowflake. diff --git a/src/actions/commands/commandLine.ts b/src/actions/commands/commandLine.ts index dba35d937ae..69fa6414d04 100644 --- a/src/actions/commands/commandLine.ts +++ b/src/actions/commands/commandLine.ts @@ -14,11 +14,11 @@ import { StatusBar } from '../../statusBar'; import { getPathDetails, readDirectory } from '../../util/path'; import { Clipboard } from '../../util/clipboard'; import { VimError, ErrorCode } from '../../error'; -import { SearchDirection } from '../../state/searchState'; import { scrollView } from '../../util/util'; import { getWordLeftInText, getWordRightInText, WordType } from '../../textobject/word'; import { Position } from 'vscode'; import { builtinExCommands } from '../../vimscript/exCommandParser'; +import { SearchDirection } from '../../vimscript/pattern'; /** * Commands that are only relevant when entering a command or search @@ -421,7 +421,7 @@ class CommandInsertInSearchMode extends BaseCommand { searchState.searchString.slice(vimState.statusBarCursorCharacterPos); vimState.statusBarCursorCharacterPos = Math.max(vimState.statusBarCursorCharacterPos - 1, 0); } else if (key === '') { - await new CommandShowSearchHistory(searchState.searchDirection).exec(position, vimState); + await new CommandShowSearchHistory(searchState.direction).exec(position, vimState); } else if (key === '') { searchState.searchString = searchState.searchString.slice( vimState.statusBarCursorCharacterPos @@ -473,7 +473,7 @@ class CommandInsertInSearchMode extends BaseCommand { StatusBar.displayError( vimState, VimError.fromCode( - searchState.searchDirection === SearchDirection.Backward + searchState.direction === SearchDirection.Backward ? ErrorCode.SearchHitTop : ErrorCode.SearchHitBottom, searchState.searchString @@ -790,9 +790,9 @@ class CommandCtrlLInSearchMode extends BaseCommand { const nextMatch = globalState.searchState.getNextSearchMatchRange(vimState.editor, position); if (nextMatch) { - const line = vimState.document.lineAt(nextMatch.end).text; - if (nextMatch.end.character < line.length) { - globalState.searchState.searchString += line[nextMatch.end.character]; + const line = vimState.document.lineAt(nextMatch.range.end).text; + if (nextMatch.range.end.character < line.length) { + globalState.searchState.searchString += line[nextMatch.range.end.character]; vimState.statusBarCursorCharacterPos++; } } diff --git a/src/actions/commands/search.ts b/src/actions/commands/search.ts index ec5d8020574..a7eaa5ea8f5 100644 --- a/src/actions/commands/search.ts +++ b/src/actions/commands/search.ts @@ -1,17 +1,20 @@ import * as _ from 'lodash'; -import { Position, Selection } from 'vscode'; +import { escapeRegExp } from 'lodash'; +import {} from 'vscode'; +import { Position, Range, Selection } from 'vscode'; import { sorted } from '../../common/motion/position'; import { configuration } from '../../configuration/configuration'; import { VimError, ErrorCode } from '../../error'; import { Mode } from '../../mode/mode'; import { Register } from '../../register/register'; import { globalState } from '../../state/globalState'; -import { SearchDirection, SearchState } from '../../state/searchState'; +import { SearchState } from '../../state/searchState'; import { VimState } from '../../state/vimState'; import { StatusBar } from '../../statusBar'; import { TextEditor } from '../../textEditor'; import { TextObject } from '../../textobject/textobject'; import { reportSearch } from '../../util/statusBarTextUtils'; +import { SearchDirection } from '../../vimscript/pattern'; import { RegisterAction, BaseCommand } from '../base'; import { failedMovement, IMovement } from '../baseMotion'; @@ -84,7 +87,7 @@ async function searchCurrentSelection(vimState: VimState, direction: SearchDirec * Used by [g]* and [g]# */ async function createSearchStateAndMoveToMatch(args: { - needle?: string | undefined; + needle: string; vimState: VimState; direction: SearchDirection; isExact: boolean; @@ -92,18 +95,18 @@ async function createSearchStateAndMoveToMatch(args: { }): Promise { const { needle, vimState, isExact } = args; - if (needle === undefined || needle.length === 0) { + if (needle.length === 0) { return; } - const searchString = isExact ? `\\b${needle}\\b` : needle; + const searchString = isExact ? `\\<${escapeRegExp(needle)}\\>` : escapeRegExp(needle); // Start a search for the given term. globalState.searchState = new SearchState( args.direction, vimState.cursorStopPosition, searchString, - { isRegex: isExact, ignoreSmartcase: true }, + { ignoreSmartcase: true }, vimState.currentMode ); Register.setReadonlyRegister('/', globalState.searchState.searchString); @@ -238,7 +241,7 @@ class CommandSearchForwards extends BaseCommand { SearchDirection.Forward, vimState.cursorStopPosition, '', - { isRegex: true }, + {}, vimState.currentMode ); await vimState.setCurrentMode(Mode.SearchInProgressMode); @@ -263,7 +266,7 @@ class CommandSearchBackwards extends BaseCommand { SearchDirection.Backward, vimState.cursorStopPosition, '', - { isRegex: true }, + {}, vimState.currentMode ); await vimState.setCurrentMode(Mode.SearchInProgressMode); @@ -287,14 +290,13 @@ abstract class SearchObject extends TextObject { this.direction, vimState.cursorStopPosition, searchState.searchString, - { isRegex: true }, + {}, vimState.currentMode ); let result: | { - start: Position; - end: Position; + range: Range; index: number; } | undefined; @@ -302,11 +304,11 @@ abstract class SearchObject extends TextObject { // At first, try to search for current word, and stop searching if matched. // Try to search for the next word if not matched or // if the cursor is at the end of a match string in visual-mode. - result = newSearchState.getSearchMatchRangeOf(vimState.editor, vimState.cursorStopPosition); + result = newSearchState.findContainingMatchRange(vimState.editor, vimState.cursorStopPosition); if ( result && vimState.currentMode === Mode.Visual && - vimState.cursorStopPosition.isEqual(result.end.getLeftThroughLineBreaks()) + vimState.cursorStopPosition.isEqual(result.range.end.getLeftThroughLineBreaks()) ) { result = undefined; } @@ -322,8 +324,8 @@ abstract class SearchObject extends TextObject { reportSearch(result.index, searchState.getMatchRanges(vimState.editor).length, vimState); let [start, stop] = [ - vimState.currentMode === Mode.Normal ? result.start : vimState.cursorStopPosition, - result.end.getLeftThroughLineBreaks(), + vimState.currentMode === Mode.Normal ? result.range.start : vimState.cursorStopPosition, + result.range.end.getLeftThroughLineBreaks(), ]; if (vimState.recordedState.operator) { diff --git a/src/actions/motion.ts b/src/actions/motion.ts index f14f73cd632..374dd97faec 100755 --- a/src/actions/motion.ts +++ b/src/actions/motion.ts @@ -18,7 +18,6 @@ import { globalState } from '../state/globalState'; import { reportSearch } from '../util/statusBarTextUtils'; import { SneakForward, SneakBackward } from './plugins/sneak'; import { Notation } from '../configuration/notation'; -import { SearchDirection } from '../state/searchState'; import { StatusBar } from '../statusBar'; import { clamp } from '../util/util'; import { getCurrentParagraphBeginning, getCurrentParagraphEnd } from '../textobject/paragraph'; @@ -27,6 +26,7 @@ import { Position } from 'vscode'; import { sorted } from '../common/motion/position'; import { WordType } from '../textobject/word'; import { CommandInsertAtCursor } from './commands/actions'; +import { SearchDirection } from '../vimscript/pattern'; /** * A movement is something like 'h', 'k', 'w', 'b', 'gg', etc. @@ -505,7 +505,7 @@ class CommandNextSearchMatch extends BaseMovement { // we have to handle a special case here: searching for $ or \n, // which we approximate by positionIsEOL. In that case (but only when searching forward) // we need to "offset" by getRight for searching the next match, otherwise we get stuck. - const searchForward = searchState.searchDirection === SearchDirection.Forward; + const searchForward = searchState.direction === SearchDirection.Forward; const positionIsEOL = position.getRight().isEqual(position.getLineEnd()); const nextMatch = positionIsEOL && searchForward @@ -516,7 +516,7 @@ class CommandNextSearchMatch extends BaseMovement { StatusBar.displayError( vimState, VimError.fromCode( - searchState.searchDirection === SearchDirection.Forward + searchState.direction === SearchDirection.Forward ? ErrorCode.SearchHitBottom : ErrorCode.SearchHitTop, searchState.searchString @@ -557,7 +557,7 @@ class CommandPreviousSearchMatch extends BaseMovement { return failedMovement(vimState); } - const searchForward = searchState.searchDirection === SearchDirection.Forward; + const searchForward = searchState.direction === SearchDirection.Forward; const positionIsEOL = position.getRight().isEqual(position.getLineEnd()); // see implementation of n, above. @@ -578,7 +578,7 @@ class CommandPreviousSearchMatch extends BaseMovement { StatusBar.displayError( vimState, VimError.fromCode( - searchState.searchDirection === SearchDirection.Forward + searchState.direction === SearchDirection.Forward ? ErrorCode.SearchHitTop : ErrorCode.SearchHitBottom, searchState.searchString diff --git a/src/cmd_line/commands/history.ts b/src/cmd_line/commands/history.ts index cc4aa3a428a..39a9dae57f0 100644 --- a/src/cmd_line/commands/history.ts +++ b/src/cmd_line/commands/history.ts @@ -3,8 +3,8 @@ import { CommandShowSearchHistory, CommandShowCommandHistory, } from '../../actions/commands/actions'; -import { SearchDirection } from '../../state/searchState'; import { ExCommand } from '../../vimscript/exCommand'; +import { SearchDirection } from '../../vimscript/pattern'; export enum HistoryCommandType { Cmd, diff --git a/src/cmd_line/commands/substitute.ts b/src/cmd_line/commands/substitute.ts index 7eecbcb8046..c24fcdf6e71 100644 --- a/src/cmd_line/commands/substitute.ts +++ b/src/cmd_line/commands/substitute.ts @@ -1,8 +1,6 @@ -/* tslint:disable:no-bitwise */ - import * as vscode from 'vscode'; import { Jump } from '../../jumps/jump'; -import { SearchState, SearchDirection } from '../../state/searchState'; +import { SearchState } from '../../state/searchState'; import { SubstituteState } from '../../state/substituteState'; import { VimError, ErrorCode } from '../../error'; import { VimState } from '../../state/vimState'; @@ -13,6 +11,7 @@ import { Position } from 'vscode'; import { StatusBar } from '../../statusBar'; import { LineRange } from '../../vimscript/lineRange'; import { ExCommand } from '../../vimscript/exCommand'; +import { Pattern, SearchDirection } from '../../vimscript/pattern'; /** * NOTE: for "pattern", undefined is different from an empty string. @@ -21,9 +20,9 @@ import { ExCommand } from '../../vimscript/exCommand'; * and replace with whatever's set by "replace" (even an empty string). */ export interface ISubstituteCommandArguments { - pattern: string | undefined; + pattern: Pattern | undefined; replace: string; - flags: number; + flags: SubstituteFlags; count?: number; } @@ -44,20 +43,19 @@ export interface ISubstituteCommandArguments { * [r] When the search pattern is empty, use the previously used search pattern * instead of the search pattern from the last substitute or ":global". */ -export enum SubstituteFlags { - None = 0, - KeepPreviousFlags = 0x1, // TODO: use this flag - ConfirmEach = 0x2, - SuppressError = 0x4, // TODO: use this flag - ReplaceAll = 0x8, - IgnoreCase = 0x10, - NoIgnoreCase = 0x20, // TODO: use this flag - PrintCount = 0x40, +export interface SubstituteFlags { + keepPreviousFlags?: true; // TODO: use this flag + confirmEach?: true; + suppressError?: true; // TODO: use this flag + replaceAll?: true; + ignoreCase?: true; + noIgnoreCase?: true; // TODO: use this flag + printCount?: true; // TODO: use the following flags: - PrintLastMatchedLine = 0x80, - PrintLastMatchedLineWithNumber = 0x100, - PrintLastMatchedLineWithList = 0x200, - UsePreviousPattern = 0x400, + printLastMatchedLine?: true; + printLastMatchedLineWithNumber?: true; + printLastMatchedLineWithList?: true; + usePreviousPattern?: true; } /** @@ -96,25 +94,24 @@ export class SubstituteCommand extends ExCommand { public override neovimCapable(): boolean { // We need to use VSCode's quickpick capabilities to do confirmation - return (this.arguments.flags & SubstituteFlags.ConfirmEach) === 0; + return !this.arguments.flags.confirmEach; } - getRegex(args: ISubstituteCommandArguments, vimState: VimState) { + private getRegex(args: ISubstituteCommandArguments, vimState: VimState) { let jsRegexFlags = ''; - if (configuration.gdefault || configuration.substituteGlobalFlag) { // the gdefault flag is on, then /g if on by default and /g negates that - if (!(args.flags & SubstituteFlags.ReplaceAll)) { + if (!args.flags.replaceAll) { jsRegexFlags += 'g'; } } else { // the gdefault flag is off, then /g means replace all - if (args.flags & SubstituteFlags.ReplaceAll) { + if (args.flags.replaceAll) { jsRegexFlags += 'g'; } } - if (args.flags & SubstituteFlags.IgnoreCase) { + if (args.flags.ignoreCase) { jsRegexFlags += 'i'; } @@ -122,33 +119,36 @@ export class SubstituteCommand extends ExCommand { // If no pattern is entered, use previous SUBSTITUTION state and don't update search state // i.e. :s const prevSubstituteState = globalState.substituteState; - if (prevSubstituteState === undefined || prevSubstituteState.searchPattern === '') { + if ( + prevSubstituteState === undefined || + prevSubstituteState.searchPattern.patternString === '' + ) { throw VimError.fromCode(ErrorCode.NoPreviousSubstituteRegularExpression); } else { args.pattern = prevSubstituteState.searchPattern; args.replace = prevSubstituteState.replaceString; } } else { - if (args.pattern === '') { + if (args.pattern.patternString === '') { // If an explicitly empty pattern is entered, use previous search state (including search with * and #) and update both states // e.g :s/ or :s/// const prevSearchState = globalState.searchState; if (prevSearchState === undefined || prevSearchState.searchString === '') { throw VimError.fromCode(ErrorCode.NoPreviousRegularExpression); } else { - args.pattern = prevSearchState.searchString; + args.pattern = prevSearchState.pattern; } } globalState.substituteState = new SubstituteState(args.pattern, args.replace); globalState.searchState = new SearchState( SearchDirection.Forward, vimState.cursorStopPosition, - args.pattern, - { isRegex: true }, + args.pattern?.patternString, + {}, vimState.currentMode ); } - return new RegExp(args.pattern, jsRegexFlags); + return new RegExp(args.pattern.regex.source, jsRegexFlags); } /** @@ -164,9 +164,9 @@ export class SubstituteCommand extends ExCommand { let count = 0; - if (this.arguments.flags & SubstituteFlags.PrintCount) { + if (this.arguments.flags.printCount) { return matches.length; - } else if (this.arguments.flags & SubstituteFlags.ConfirmEach) { + } else if (this.arguments.flags.confirmEach) { // Loop through each match on this line and get confirmation before replacing let newContent = originalContent; @@ -270,7 +270,7 @@ export class SubstituteCommand extends ExCommand { if (selection === 'q' || selection === 'l' || !selection) { this.abort = true; } else if (selection === 'a') { - this.arguments.flags = this.arguments.flags & ~SubstituteFlags.ConfirmEach; + this.arguments.flags.confirmEach = undefined; } return selection === 'y' || selection === 'a' || selection === 'l'; @@ -328,7 +328,7 @@ export class SubstituteCommand extends ExCommand { ) { if (substitutions === 0) { StatusBar.displayError(vimState, VimError.fromCode(ErrorCode.PatternNotFound, regex.source)); - } else if (this.arguments.flags & SubstituteFlags.PrintCount) { + } else if (this.arguments.flags.printCount) { StatusBar.setText( vimState, `${substitutions} match${substitutions > 1 ? 'es' : ''} on ${lines} line${ diff --git a/src/cmd_line/subparsers/substitute.ts b/src/cmd_line/subparsers/substitute.ts index 6a52349d253..83668ad8377 100644 --- a/src/cmd_line/subparsers/substitute.ts +++ b/src/cmd_line/subparsers/substitute.ts @@ -1,227 +1,120 @@ -/* tslint:disable:no-bitwise */ - -import { Scanner } from '../scanner'; -import * as error from '../../error'; import { SubstituteCommand, SubstituteFlags } from '../commands/substitute'; - -function isValidDelimiter(char: string): boolean { - return !!/^[^\w\s\\|"]{1}$/g.exec(char); -} - -function parsePattern(scanner: Scanner, delimiter: string): [string, boolean] { - let pattern = ''; - while (!scanner.isAtEof) { - let currentChar = scanner.next(); - - if (currentChar === delimiter) { - return [pattern, true]; // found second delimiter - } else if (currentChar === '\\') { - if (!scanner.isAtEof) { - currentChar = scanner.next(); - if (currentChar === delimiter) { - pattern += delimiter; +import { + alt, + any, + noneOf, + oneOf, + optWhitespace, + Parser, + regexp, + seq, + string, + whitespace, +} from 'parsimmon'; +import { Pattern, SearchDirection } from '../../vimscript/pattern'; +import { numberParser } from '../../vimscript/parserUtils'; + +// TODO: `:help sub-replace-special` +// TODO: `:help sub-replace-expression` +const replaceStringParser = (delimiter: string): Parser => + alt( + string('\\').then( + any.fallback(undefined).map((escaped) => { + if (escaped === undefined || escaped === '\\') { + return '\\'; + } else if (escaped === '/') { + return '/'; + } else if (escaped === 'b') { + return '\b'; + } else if (escaped === 'r') { + return '\r'; + } else if (escaped === 'n') { + return '\n'; + } else if (escaped === 't') { + return '\t'; + } else if (/[&0-9]/.test(escaped)) { + return `$${escaped}`; } else { - pattern += '\\' + currentChar; + return `\\${escaped}`; } - } else { - pattern += '\\\\'; // :s/\ is treated like :s/\\ - } - } else { - pattern += currentChar; - } + }) + ), + noneOf(delimiter) + ) + .many() + .map((chars) => chars.join('')); + +const substituteFlagsParser: Parser = seq( + string('&').fallback(undefined), + oneOf('cegiInp#lr').many() +).map(([amp, flagChars]) => { + const flags: SubstituteFlags = {}; + if (amp === '&') { + flags.keepPreviousFlags = true; } - return [pattern, false]; -} - -// See Vim's sub-replace-special documentation -// TODO: \u, \U, \l, \L, \e, \E -const replaceEscapes = { - b: '\b', - r: '\r', - n: '\n', - t: '\t', - '&': '$&', - '0': '$0', - '1': '$1', - '2': '$2', - '3': '$3', - '4': '$4', - '5': '$5', - '6': '$6', - '7': '$7', - '8': '$8', - '9': '$9', -}; - -function parseReplace(scanner: Scanner, delimiter: string): string { - let replace = ''; - while (!scanner.isAtEof) { - let currentChar = scanner.next(); - - if (currentChar === delimiter) { - return replace; // found second delimiter - } else if (currentChar === '\\') { - if (!scanner.isAtEof) { - currentChar = scanner.next(); - if (currentChar === delimiter) { - replace += delimiter; - } else if (replaceEscapes.hasOwnProperty(currentChar)) { - replace += replaceEscapes[currentChar]; - } else { - replace += currentChar; - } - } else { - replace += '\\'; // :s/.../\ is treated like :s/.../\\ - } - } else { - replace += currentChar; - } - } - return replace; -} - -function parseSubstituteFlags(scanner: Scanner): number { - let flags: number = 0; - let index = 0; - while (true) { - if (scanner.isAtEof) { - break; - } - - const c = scanner.next(); - switch (c) { - case '&': - if (index === 0) { - flags |= SubstituteFlags.KeepPreviousFlags; - } else { - // Raise Error - return SubstituteFlags.None; - } - break; + for (const flag of flagChars) { + switch (flag) { case 'c': - flags |= SubstituteFlags.ConfirmEach; + flags.confirmEach = true; break; case 'e': - flags |= SubstituteFlags.SuppressError; + flags.suppressError = true; break; case 'g': - flags |= SubstituteFlags.ReplaceAll; + flags.replaceAll = true; break; case 'i': - flags |= SubstituteFlags.IgnoreCase; + flags.ignoreCase = true; break; case 'I': - flags |= SubstituteFlags.NoIgnoreCase; + flags.noIgnoreCase = true; break; case 'n': - flags |= SubstituteFlags.PrintCount; + flags.printCount = true; break; case 'p': - flags |= SubstituteFlags.PrintLastMatchedLine; + flags.printLastMatchedLine = true; break; case '#': - flags |= SubstituteFlags.PrintLastMatchedLineWithNumber; + flags.printLastMatchedLineWithNumber = true; break; case 'l': - flags |= SubstituteFlags.PrintLastMatchedLineWithList; + flags.printLastMatchedLineWithList = true; break; case 'r': - flags |= SubstituteFlags.UsePreviousPattern; + flags.usePreviousPattern = true; break; - default: - scanner.backup(); - return flags; } - - index++; } - return flags; -} - -function parseCount(scanner: Scanner): number { - let countStr = ''; - - while (true) { - if (scanner.isAtEof) { - break; - } - countStr += scanner.next(); - } - - const count = Number.parseInt(countStr, 10); - - // TODO: If count is not valid number, raise error - return Number.isInteger(count) ? count : -1; -} -/** - * Substitute - * :[range]s[ubstitute]/{pattern}/{string}/[flags] [count] - * For each line in [range] replace a match of {pattern} with {string}. - * {string} can be a literal string, or something special; see |sub-replace-special|. - */ -export function parseSubstituteCommandArgs(args: string): SubstituteCommand { - try { - let searchPattern: string | undefined; - let replaceString: string; - let flags: number; - let count: number; - - if (!args || !args.trim()) { - // special case for :s - return new SubstituteCommand({ - pattern: undefined, - replace: '', // ignored in this context - flags: SubstituteFlags.None, - }); - } - let scanner: Scanner; - - const delimiter = args[0]; - - if (isValidDelimiter(delimiter)) { - if (args.length === 1) { - // special case for :s/ or other delimiters - return new SubstituteCommand({ - pattern: '', - replace: '', - flags: SubstituteFlags.None, - }); - } - - let secondDelimiterFound: boolean; - - scanner = new Scanner(args.substr(1, args.length - 1)); - [searchPattern, secondDelimiterFound] = parsePattern(scanner, delimiter); - - if (!secondDelimiterFound) { - // special case for :s/search - return new SubstituteCommand({ - pattern: searchPattern, +}); + +const countParser: Parser = whitespace.then(numberParser).fallback(undefined); + +export const substituteCommandArgs: Parser = optWhitespace.then( + alt( + // :s[ubstitute]/{pattern}/{string}/[flags] [count] + regexp(/[^\w\s\\|"]{1}/).chain((delimiter) => + seq( + Pattern.parser({ direction: SearchDirection.Forward, delimiter }), + replaceStringParser(delimiter), + string(delimiter).then(substituteFlagsParser).fallback({}), + countParser + ).map( + ([pattern, replace, flags, count]) => + new SubstituteCommand({ pattern, replace, flags, count }) + ) + ), + + // :s[ubstitute] [flags] [count] + seq(substituteFlagsParser, countParser).map( + ([flags, count]) => + new SubstituteCommand({ + pattern: undefined, replace: '', - flags: SubstituteFlags.None, - }); - } - replaceString = parseReplace(scanner, delimiter); - } else { - // if it's not a valid delimiter, it must be flags, so start parsing from here - searchPattern = undefined; - replaceString = ''; - scanner = new Scanner(args); - } - - scanner.skipWhiteSpace(); - flags = parseSubstituteFlags(scanner); - scanner.skipWhiteSpace(); - count = parseCount(scanner); - - return new SubstituteCommand({ - pattern: searchPattern, - replace: replaceString, - flags, - count, - }); - } catch (e) { - throw error.VimError.fromCode(error.ErrorCode.PatternNotFound); - } -} + flags, + count, + }) + ) + ) +); diff --git a/src/state/globalState.ts b/src/state/globalState.ts index 33380d79165..598917f8c38 100644 --- a/src/state/globalState.ts +++ b/src/state/globalState.ts @@ -3,10 +3,11 @@ import { JumpTracker } from '../jumps/jumpTracker'; import { Mode } from '../mode/mode'; import { RecordedState } from './../state/recordedState'; import { SearchHistory } from '../history/historyFile'; -import { SearchState, SearchDirection } from './searchState'; +import { SearchState } from './searchState'; import { SubstituteState } from './substituteState'; import { configuration } from '../configuration/configuration'; import { Position } from 'vscode'; +import { SearchDirection } from '../vimscript/pattern'; /** * State which stores global state (across editors) diff --git a/src/state/searchState.ts b/src/state/searchState.ts index 3ba64f62a43..e9f8a5a4df8 100644 --- a/src/state/searchState.ts +++ b/src/state/searchState.ts @@ -1,60 +1,69 @@ -import * as vscode from 'vscode'; -import { Position } from 'vscode'; +import { Position, Range, TextEditor } from 'vscode'; import { configuration } from '../configuration/configuration'; -import { PositionDiff } from './../common/motion/position'; +import { Pattern, SearchDirection, SearchOffset, searchStringParser } from '../vimscript/pattern'; import { Mode } from './../mode/mode'; -export enum SearchDirection { - Forward = 1, - Backward = -1, -} - -// Older browsers don't support lookbehind - in this case, use an inferior regex rather than crashing -let supportsLookbehind = true; -try { - // tslint:disable-next-line - new RegExp('(?<=x)'); -} catch { - supportsLookbehind = false; -} - /** * State involved with beginning a search (/). */ export class SearchState { - private static readonly MAX_SEARCH_RANGES = 1000; + constructor( + direction: SearchDirection, + startPosition: Position, + searchString = '', + { ignoreSmartcase = false } = {}, + currentMode: Mode + ) { + this._searchString = searchString; - private static readonly specialCharactersRegex = /[\-\[\]{}()*+?.,\\\^$|#\s]/g; - // c or C with an odd number of preceding \'s triggers "case override" - private static readonly caseOverrideRegex = supportsLookbehind - ? new RegExp('(?<=(?:^|[^\\\\])(?:\\\\\\\\)*)\\\\[Cc]', 'g') - : /\\[Cc]/g; - private static readonly notEscapedSlashRegex = supportsLookbehind - ? new RegExp('(?<=[^\\\\])\\/', 'g') - : /\//g; - private static readonly notEscapedQuestionMarkRegex = supportsLookbehind - ? new RegExp('(?<=[^\\\\])\\?', 'g') - : /\?/g; - private static readonly searchOffsetBeginRegex = /b(\+-)?[0-9]*/; - private static readonly searchOffsetEndRegex = /e(\+-)?[0-9]*/; + const { pattern, offset } = searchStringParser({ direction, ignoreSmartcase }).tryParse( + searchString + ); + this.pattern = pattern; + this.offset = offset; + + this.cursorStartPosition = startPosition; + this.ignoreSmartcase = ignoreSmartcase; + this.previousMode = currentMode; + } + + private _searchString: string; + public pattern: Pattern; + private offset?: SearchOffset; public readonly previousMode: Mode; - public readonly searchDirection: SearchDirection; public readonly cursorStartPosition: Position; + public get searchString(): string { + return this._searchString; + } + public set searchString(str: string) { + this._searchString = str; + const { pattern, offset } = searchStringParser({ + direction: this.pattern.direction, + ignoreSmartcase: this.ignoreSmartcase, + }).tryParse(str); + if (pattern.patternString !== this.pattern.patternString) { + this.pattern = pattern; + this.matchRanges.clear(); + } + this.offset = offset; + } + + public get direction(): SearchDirection { + return this.pattern.direction; + } + /** * Every range in the document that matches the search string. + * + * This might not be 100% complete - @see Pattern::MAX_SEARCH_RANGES */ - public getMatchRanges(editor: vscode.TextEditor): vscode.Range[] { + public getMatchRanges(editor: TextEditor): Range[] { return this.recalculateSearchRanges(editor); } - private matchRanges: Map = new Map(); - - /** - * Whether the needle should be interpreted as a regular expression - */ - private readonly isRegex: boolean; + private matchRanges: Map = new Map(); /** * If true, an all-lowercase needle will not be treated as case-insensitive, even if smartcase is enabled. @@ -62,123 +71,8 @@ export class SearchState { */ private readonly ignoreSmartcase: boolean; - /** - * The string being searched for - */ - private needle = ''; - - // How to adjust the cursor's position after going to a match - // Some examples: - // /abc/3 will jump to the third character after finding abc - // /abc/b-2 will go 2 characters to the left after finding abc - // /abc/e2 will go 2 characters to the right from the end of abc after finding it - // TODO: support the ; offset (see http://vimdoc.sourceforge.net/htmldoc/pattern.html) - private offset?: { - type: 'line' | 'beginning' | 'end'; - num: number; - }; - - /** - * The raw string being searched for, including both the needle and search offset - */ - private _searchString = ''; - public get searchString(): string { - return this._searchString; - } - - public set searchString(search: string) { - if (this._searchString !== search) { - this._searchString = search; - - const oldNeedle = this.needle; - this.needle = search; - this.offset = undefined; - - const needleSegments = - this.searchDirection === SearchDirection.Backward - ? search.split(SearchState.notEscapedQuestionMarkRegex) - : search.split(SearchState.notEscapedSlashRegex); - if (needleSegments.length > 1) { - this.needle = needleSegments[0]; - const num = Number(needleSegments[1]); - if (isNaN(num)) { - if (SearchState.searchOffsetBeginRegex.test(needleSegments[1])) { - this.offset = { - type: 'beginning', - num: Number(needleSegments[1].slice(1)), - }; - } else if (SearchState.searchOffsetEndRegex.test(needleSegments[1])) { - this.offset = { - type: 'end', - num: Number(needleSegments[1].slice(1)), - }; - } - } else { - this.offset = { - type: 'line', - num, - }; - } - } - - if (this.needle !== oldNeedle) { - // Invalidate all cached results - this.matchRanges.clear(); - - this._needleRegex = undefined; - } - } - } - - private _needleRegex: RegExp | undefined; - private get needleRegex(): RegExp { - if (this._needleRegex) { - return this._needleRegex; - } - - /* - * Decide whether the search is case sensitive. - * If ignorecase is false, the search is case sensitive. - * If ignorecase is true, the search should be case insensitive. - * If both ignorecase and smartcase are true, the search is case sensitive only when the search string contains UpperCase character. - */ - let ignorecase = configuration.ignorecase; - if ( - ignorecase && - configuration.smartcase && - !this.ignoreSmartcase && - /[A-Z]/.test(this.needle) - ) { - ignorecase = false; - } - - let searchRE = this.needle; - const ignorecaseOverride = this.needle.match(SearchState.caseOverrideRegex); - if (ignorecaseOverride) { - // Vim strips all \c's but uses the behavior of the first one. - searchRE = this.needle.replace(SearchState.caseOverrideRegex, ''); - ignorecase = ignorecaseOverride[0][1] === 'c'; - } - - if (!this.isRegex) { - searchRE = this.needle.replace(SearchState.specialCharactersRegex, '\\$&'); - } - - const regexFlags = ignorecase ? 'gim' : 'gm'; - - try { - this._needleRegex = new RegExp(searchRE, regexFlags); - } catch (err) { - // Couldn't compile the regexp, try again with special characters escaped - searchRE = this.needle.replace(SearchState.specialCharactersRegex, '\\$&'); - this._needleRegex = new RegExp(searchRE, regexFlags); - } - - return this._needleRegex; - } - - private recalculateSearchRanges(editor: vscode.TextEditor): vscode.Range[] { - if (this.needle === '') { + private recalculateSearchRanges(editor: TextEditor): Range[] { + if (this.searchString === '') { return []; } @@ -189,67 +83,22 @@ export class SearchState { return cached.ranges; } - // We store the entire text file as a string inside text, and run the - // regex against it many times to find all of our matches. - const text = document.getText(); - const selection = editor.selection; - const startOffset = document.offsetAt(selection.active); - const regex = this.needleRegex; - regex.lastIndex = startOffset; - - let result: RegExpExecArray | null; - let wrappedOver = false; - const matchRanges = [] as vscode.Range[]; - while (true) { - result = regex.exec(text); - - if (result) { - if (wrappedOver && result.index >= startOffset) { - // We've found our first match again - break; - } - - matchRanges.push( - new vscode.Range( - document.positionAt(result.index), - document.positionAt(result.index + result[0].length) - ) - ); - - if (matchRanges.length >= SearchState.MAX_SEARCH_RANGES) { - break; - } - - // This happens when you find a zero-length match - if (result.index === regex.lastIndex) { - regex.lastIndex++; - } - } else if (!wrappedOver) { - // We need to wrap around to the back if we reach the end. - regex.lastIndex = 0; - wrappedOver = true; - } else { - break; - } - } + // TODO: It's weird to use the active selection for this... + const matchRanges = this.pattern.allMatches(editor.document, editor.selection.active); - // TODO: we know the order of matches; this sort is lazy and could become a bottleneck if we increase the max # of matches - matchRanges.sort((x, y) => (x.start.isBefore(y.start) ? -1 : 1)); this.matchRanges.set(document.fileName, { version: document.version, ranges: matchRanges, }); + return matchRanges; } /** - * The position of the next search. - * match == false if there is no match. - * - * Pass in -1 as direction to reverse the direction we search. + * @returns The start of the next match range, after applying the search offset */ public getNextSearchMatchPosition( - editor: vscode.TextEditor, + editor: TextEditor, startPosition: Position, direction = SearchDirection.Forward ): { pos: Position; index: number } | undefined { @@ -257,52 +106,36 @@ export class SearchState { if (nextMatch === undefined) { return undefined; } - const { start, end, index } = nextMatch; + const { range, index } = nextMatch; - let pos = start; - if (this.offset) { - if (this.offset.type === 'line') { - pos = start.add( - editor.document, - PositionDiff.exactCharacter({ lineOffset: this.offset.num, character: 0 }) - ); - } else if (this.offset.type === 'beginning') { - pos = start.getOffsetThroughLineBreaks(this.offset.num); - } else if (this.offset.type === 'end') { - pos = end.getOffsetThroughLineBreaks(this.offset.num - 1); - } - } - - return { pos, index }; + return { pos: this.offset ? this.offset.apply(range) : range.start, index }; } /** - * The position of the next search. - * match == false if there is no match. + * @returns The next match range from the given position and its rank in the document's matches * - * Pass in -1 as direction to reverse the direction we search. + * @param direction If `SearchDirection.Backward`, this will search in the opposite of the pattern's direction * - * end is exclusive; which means the index is start + matchedString.length + * NOTE: This method does not take the search offset into account */ public getNextSearchMatchRange( - editor: vscode.TextEditor, - startPosition: Position, + editor: TextEditor, + fromPosition: Position, direction = SearchDirection.Forward - ): { start: Position; end: Position; index: number } | undefined { + ): { range: Range; index: number } | undefined { const matchRanges = this.recalculateSearchRanges(editor); if (matchRanges.length === 0) { return undefined; } - const effectiveDirection = (direction * this.searchDirection) as SearchDirection; + const effectiveDirection = (direction * this.pattern.direction) as SearchDirection; if (effectiveDirection === SearchDirection.Forward) { - for (const [index, matchRange] of matchRanges.entries()) { - if (matchRange.start.isAfter(startPosition)) { + for (const [index, range] of matchRanges.entries()) { + if (range.start.isAfter(fromPosition)) { return { - start: matchRange.start, - end: matchRange.end, + range, index, }; } @@ -311,19 +144,17 @@ export class SearchState { if (configuration.wrapscan) { const range = matchRanges[0]; return { - start: range.start, - end: range.end, + range, index: 0, }; } else { return undefined; } } else { - for (const [index, matchRange] of matchRanges.slice(0).reverse().entries()) { - if (matchRange.end.isBeforeOrEqual(startPosition)) { + for (const [index, range] of matchRanges.slice(0).reverse().entries()) { + if (range.end.isBeforeOrEqual(fromPosition)) { return { - start: matchRange.start, - end: matchRange.end, + range, index: matchRanges.length - index - 1, }; } @@ -333,8 +164,7 @@ export class SearchState { if (configuration.wrapscan) { const range = matchRanges[matchRanges.length - 1]; return { - start: range.start, - end: range.end, + range, index: matchRanges.length - 1, }; } else { @@ -343,21 +173,23 @@ export class SearchState { } } - public getSearchMatchRangeOf( - editor: vscode.TextEditor, + /** + * @returns the match range which contains the given Position, or undefined if none exists + */ + public findContainingMatchRange( + editor: TextEditor, pos: Position - ): { start: Position; end: Position; index: number } | undefined { + ): { range: Range; index: number } | undefined { const matchRanges = this.recalculateSearchRanges(editor); if (matchRanges.length === 0) { return undefined; } - for (const [index, matchRange] of matchRanges.entries()) { - if (matchRange.start.isBeforeOrEqual(pos) && matchRange.end.isAfter(pos)) { + for (const [index, range] of matchRanges.entries()) { + if (range.start.isBeforeOrEqual(pos) && range.end.isAfter(pos)) { return { - start: matchRange.start, - end: matchRange.end, + range, index, }; } @@ -365,19 +197,4 @@ export class SearchState { return undefined; } - - constructor( - direction: SearchDirection, - startPosition: Position, - searchString = '', - { isRegex = false, ignoreSmartcase = false } = {}, - currentMode: Mode - ) { - this.searchDirection = direction; - this.cursorStartPosition = startPosition; - this.isRegex = isRegex; - this.ignoreSmartcase = ignoreSmartcase; - this.searchString = searchString; - this.previousMode = currentMode; - } } diff --git a/src/state/substituteState.ts b/src/state/substituteState.ts index db67292da5d..76a6d767e1c 100644 --- a/src/state/substituteState.ts +++ b/src/state/substituteState.ts @@ -1,3 +1,5 @@ +import { Pattern } from '../vimscript/pattern'; + /** * State involved with Substitution commands (:s). */ @@ -5,14 +7,14 @@ export class SubstituteState { /** * The last pattern searched for in the substitution */ - public searchPattern: string; + public searchPattern: Pattern; /** * The last replacement string in the substitution */ public replaceString: string; - constructor(searchPattern: string, replaceString: string) { + constructor(searchPattern: Pattern, replaceString: string) { this.searchPattern = searchPattern; this.replaceString = replaceString; } diff --git a/src/statusBar.ts b/src/statusBar.ts index adc9a77918c..e4654e72cab 100644 --- a/src/statusBar.ts +++ b/src/statusBar.ts @@ -1,11 +1,11 @@ import * as vscode from 'vscode'; import { Mode } from './mode/mode'; import { globalState } from './state/globalState'; -import { SearchDirection } from './state/searchState'; import { configuration } from './configuration/configuration'; import { VimState } from './state/vimState'; import { Logger } from './util/logger'; import { VimError } from './error'; +import { SearchDirection } from './vimscript/pattern'; class StatusBarImpl implements vscode.Disposable { // Displays the current state (mode, recording macro, etc.) and messages to the user @@ -198,8 +198,7 @@ export function statusBarText(vimState: VimState) { logger.warn(`globalState.searchState is undefined in SearchInProgressMode.`); return ''; } - const leadingChar = - globalState.searchState.searchDirection === SearchDirection.Forward ? '/' : '?'; + const leadingChar = globalState.searchState.direction === SearchDirection.Forward ? '/' : '?'; const searchWithCursor = globalState.searchState.searchString.split(''); searchWithCursor.splice(vimState.statusBarCursorCharacterPos, 0, cursorChar); diff --git a/src/vimscript/exCommandParser.ts b/src/vimscript/exCommandParser.ts index d467d4d1a2e..2b02aebccdd 100644 --- a/src/vimscript/exCommandParser.ts +++ b/src/vimscript/exCommandParser.ts @@ -25,7 +25,7 @@ import { parseReadCommandArgs } from '../cmd_line/subparsers/read'; import { parseRegisterCommandArgs } from '../cmd_line/subparsers/register'; import { parseOptionsCommandArgs } from '../cmd_line/subparsers/setoptions'; import { parseSortCommandArgs } from '../cmd_line/subparsers/sort'; -import { parseSubstituteCommandArgs } from '../cmd_line/subparsers/substitute'; +import { substituteCommandArgs } from '../cmd_line/subparsers/substitute'; import * as tabCmd from '../cmd_line/subparsers/tab'; import { parseWallCommandArgs } from '../cmd_line/subparsers/wall'; import { parseWriteCommandArgs } from '../cmd_line/subparsers/write'; @@ -433,7 +433,7 @@ export const builtinExCommands: ReadonlyArray<[[string, string], ArgParser | und [['rubyd', 'o'], undefined], [['rubyf', 'ile'], undefined], [['rund', 'o'], undefined], - [['s', 'ubstitute'], parseSubstituteCommandArgs], + [['s', 'ubstitute'], (args: string) => substituteCommandArgs.tryParse(args)], [['sN', 'ext'], undefined], [['sa', 'rgument'], undefined], [['sal', 'l'], undefined], diff --git a/src/vimscript/lineRange.ts b/src/vimscript/lineRange.ts index 80ebb3d6160..33e8ac474f9 100644 --- a/src/vimscript/lineRange.ts +++ b/src/vimscript/lineRange.ts @@ -1,10 +1,11 @@ -import { alt, any, optWhitespace, Parser, regexp, seq, string, succeed } from 'parsimmon'; +import { alt, any, optWhitespace, Parser, seq, string, succeed } from 'parsimmon'; import { Position, Range } from 'vscode'; import { ErrorCode, VimError } from '../error'; import { globalState } from '../state/globalState'; -import { SearchDirection, SearchState } from '../state/searchState'; +import { SearchState } from '../state/searchState'; import { VimState } from '../state/vimState'; import { numberParser } from './parserUtils'; +import { Pattern, SearchDirection } from './pattern'; /** * Specifies the start or end of a line range. @@ -40,12 +41,12 @@ type LineSpecifier = | { // /{pattern}[/] type: 'pattern_next'; - pattern: string; + pattern: Pattern; } | { // ?{pattern}[?] type: 'pattern_prev'; - pattern: string; + pattern: Pattern; } | { // \/ @@ -73,8 +74,22 @@ const lineSpecifierParser: Parser = alt( .map((mark) => { return { type: 'mark', mark }; }), - // TODO: pattern_next - // TODO: pattern_prev + string('/') + .then(Pattern.parser({ direction: SearchDirection.Forward })) + .map((pattern) => { + return { + type: 'pattern_next', + pattern, + }; + }), + string('?') + .then(Pattern.parser({ direction: SearchDirection.Backward })) + .map((pattern) => { + return { + type: 'pattern_prev', + pattern, + }; + }), string('\\/').result({ type: 'last_search_pattern_next' }), string('\\?').result({ type: 'last_search_pattern_prev' }), string('\\&').result({ type: 'last_substitute_pattern_next' }) @@ -134,9 +149,21 @@ export class Address { } return mark.position.line; case 'pattern_next': - throw new Error('Using a pattern in a line range is not yet supported'); // TODO + const m = this.specifier.pattern.nextMatch( + vimState.document, + vimState.cursorStopPosition + ); + if (m === undefined) { + // TODO: throw proper errors for nowrapscan + throw VimError.fromCode( + ErrorCode.PatternNotFound, + this.specifier.pattern.patternString + ); + } else { + return m.start.line; + } case 'pattern_prev': - throw new Error('Using a pattern in a line range is not yet supported'); // TODO + throw new Error('Using a backward pattern in a line range is not yet supported'); // TODO case 'last_search_pattern_next': if (!globalState.searchState) { throw VimError.fromCode(ErrorCode.NoPreviousRegularExpression); @@ -148,7 +175,10 @@ export class Address { ); if (nextMatch === undefined) { // TODO: throw proper errors for nowrapscan - throw VimError.fromCode(ErrorCode.PatternNotFound); + throw VimError.fromCode( + ErrorCode.PatternNotFound, + globalState.searchState.searchString + ); } return nextMatch.pos.line; case 'last_search_pattern_prev': @@ -162,7 +192,10 @@ export class Address { ); if (prevMatch === undefined) { // TODO: throw proper errors for nowrapscan - throw VimError.fromCode(ErrorCode.PatternNotFound); + throw VimError.fromCode( + ErrorCode.PatternNotFound, + globalState.searchState.searchString + ); } return prevMatch.pos.line; case 'last_substitute_pattern_next': @@ -172,7 +205,7 @@ export class Address { const searchState = new SearchState( SearchDirection.Forward, vimState.cursorStopPosition, - globalState.substituteState.searchPattern, + globalState.substituteState.searchPattern.patternString, {}, vimState.currentMode ); @@ -182,7 +215,7 @@ export class Address { ); if (match === undefined) { // TODO: throw proper errors for nowrapscan - throw VimError.fromCode(ErrorCode.PatternNotFound); + throw VimError.fromCode(ErrorCode.PatternNotFound, searchState.searchString); } return match.pos.line; default: diff --git a/src/vimscript/pattern.ts b/src/vimscript/pattern.ts new file mode 100644 index 00000000000..da41b3dfe19 --- /dev/null +++ b/src/vimscript/pattern.ts @@ -0,0 +1,295 @@ +import { escapeRegExp } from 'lodash'; +import { alt, any, lazy, noneOf, oneOf, Parser, seq, string } from 'parsimmon'; +import { Position, Range, TextDocument } from 'vscode'; +import { configuration } from '../configuration/configuration'; +import { numberParser } from './parserUtils'; + +export function searchStringParser(args: { + direction: SearchDirection; + ignoreSmartcase?: boolean; +}): Parser<{ + pattern: Pattern; + offset: SearchOffset | undefined; +}> { + return seq( + Pattern.parser(args), + lazy(() => SearchOffset.parser.fallback(undefined)) + ).map(([pattern, offset]) => { + return { pattern, offset }; + }); +} + +export enum SearchDirection { + Forward = 1, + Backward = -1, +} + +/** + * See `:help pattern` + * + * TODO(#3996): Currently, this is a thin wrapper around JavaScript's regex engine. + * We should either re-implement Vim's regex engine from scratch or (more likely) + * implement a best-effort translation from Vim's syntax to JavaScript's. + */ +export class Pattern { + public readonly patternString: string; + public readonly direction: SearchDirection; + public readonly regex: RegExp; + public readonly ignorecase: boolean | undefined; + + private static readonly MAX_SEARCH_RANGES = 1000; + + public nextMatch(document: TextDocument, fromPosition: Position): Range | undefined { + const haystack = document.getText(); + this.regex.lastIndex = document.offsetAt(fromPosition) + 1; + const match = this.regex.exec(haystack); + return match + ? new Range(document.positionAt(match.index), document.positionAt(match.index + match.length)) + : undefined; + } + + /** + * Every range in the document that matches the search string. + * + * This might not be 100% complete - @see Pattern::MAX_SEARCH_RANGES + */ + public allMatches(document: TextDocument, fromPosition: Position): Range[] { + const haystack = document.getText(); + const startOffset = document.offsetAt(fromPosition); + + const matchRanges = { + beforeWrapping: [] as Range[], + afterWrapping: [] as Range[], + }; + let wrappedOver = false; + while (true) { + const match = this.regex.exec(haystack); + + if (match) { + if (wrappedOver && match.index >= startOffset) { + // We've found our first match again + break; + } + + const matchRange = new Range( + document.positionAt(match.index), + document.positionAt(match.index + match[0].length) + ); + + (wrappedOver ? matchRanges.afterWrapping : matchRanges.beforeWrapping).push(matchRange); + + if ( + matchRanges.beforeWrapping.length + matchRanges.afterWrapping.length >= + Pattern.MAX_SEARCH_RANGES + ) { + // TODO: Vim uses a timeout... we probably should too + break; + } + + // When we find a zero-length match, nudge the search position forward to avoid getting stuck + if (matchRange.start.isEqual(matchRange.end)) { + this.regex.lastIndex++; + } + } else if (!wrappedOver) { + // We need to wrap around to the back if we reach the end. + this.regex.lastIndex = 0; + wrappedOver = true; + } else { + break; + } + } + + return matchRanges.afterWrapping.concat(matchRanges.beforeWrapping); + } + + public static fromLiteralString( + input: string, + direction: SearchDirection, + wordBoundaries: boolean + ): Pattern { + const patternString = input.replace(escapeRegExp(input), '\\$&'); + if (wordBoundaries) { + return new Pattern( + `\\<${patternString}\\>`, + direction, + new RegExp(`\b${patternString}\b`, configuration.ignorecase ? 'gim' : 'gm') + ); + } else { + return new Pattern( + patternString, + direction, + new RegExp(patternString, configuration.ignorecase ? 'gim' : 'gm') + ); + } + } + + public static parser(args: { + direction: SearchDirection; + ignoreSmartcase?: boolean; + delimiter?: string; + }): Parser { + const delimiter = args.delimiter + ? args.delimiter + : args.direction === SearchDirection.Forward + ? '/' + : '?'; + // TODO: Some escaped characters need special treatment + return alt( + string('\\') + .then(any.fallback(undefined)) + .map((escaped) => { + if (escaped === undefined) { + return '\\\\'; + } else if (escaped === 'c') { + return { ignorecase: true }; + } else if (escaped === 'C') { + return { ignorecase: false }; + } else if (escaped === '<' || escaped === '>') { + // TODO: not QUITE the same + return '\\b'; + } else if (escaped === 'n') { + return '\\r?\\n'; + } + return '\\' + escaped; + }), + noneOf(delimiter) + ) + .many() + .skip(string(delimiter).fallback(undefined)) + .map((atoms) => { + let patternString = ''; + let caseOverride: boolean | undefined; + for (const atom of atoms) { + if (typeof atom === 'string') { + patternString += atom; + } else { + if (atom.ignorecase) { + caseOverride = true; + } else if (caseOverride === undefined) { + caseOverride = false; + } + } + } + return { + patternString, + caseOverride, + }; + }) + .map(({ patternString, caseOverride }) => { + const flags = Pattern.getIgnoreCase(patternString, { + caseOverride, + ignoreSmartcase: args.ignoreSmartcase ?? false, + }) + ? 'gim' + : 'gm'; + return new Pattern(patternString, args.direction, RegExp(patternString, flags)); + }); + } + + private static getIgnoreCase( + patternString: string, + flags: { caseOverride?: boolean; ignoreSmartcase: boolean } + ): boolean { + if (flags.caseOverride !== undefined) { + return flags.caseOverride; + } else if (configuration.smartcase && !flags.ignoreSmartcase && /[A-Z]/.test(patternString)) { + return false; + } + return configuration.ignorecase; + } + + private constructor(patternString: string, direction: SearchDirection, regex: RegExp) { + this.patternString = patternString; + this.direction = direction; + // TODO: Recalculate ignorecase if relevant config changes? + this.regex = regex; + } +} + +type SearchOffsetData = + | { + type: 'lines' | 'chars_from_start' | 'chars_from_end'; + delta: number; + } + | { + type: 'pattern'; + direction: SearchDirection; + pattern: Pattern; + offset?: SearchOffset; + }; + +const searchOffsetTypeParser = oneOf('esb') + .fallback(undefined) + .map((esb) => { + if (esb === undefined) { + return 'lines'; + } else { + return esb === 'e' ? 'chars_from_end' : 'chars_from_start'; + } + }); + +/** + * See `:help search-offset` + */ +export class SearchOffset { + private readonly data: SearchOffsetData; + + public static parser: Parser = alt( + seq(searchOffsetTypeParser, oneOf('+-').fallback('+'), numberParser).map( + ([type, sign, num]) => + new SearchOffset({ + type, + delta: sign === '-' ? -num : num, + }) + ), + seq(searchOffsetTypeParser, oneOf('+-')).map( + ([type, sign]) => + new SearchOffset({ + type, + delta: sign === '-' ? -1 : 1, + }) + ), + seq(searchOffsetTypeParser).map(([type]) => new SearchOffset({ type, delta: 0 })), + string(';/') + .then(searchStringParser({ direction: SearchDirection.Forward })) + .map(({ pattern, offset }) => { + return new SearchOffset({ + type: 'pattern', + direction: SearchDirection.Forward, + pattern, + offset, + }); + }), + string(';?') + .then(searchStringParser({ direction: SearchDirection.Backward })) + .map(({ pattern, offset }) => { + return new SearchOffset({ + type: 'pattern', + direction: SearchDirection.Backward, + pattern, + offset, + }); + }) + ); + + public constructor(data: SearchOffsetData) { + this.data = data; + } + + public apply(match: Range): Position { + switch (this.data.type) { + case 'lines': + return this.data.delta === 0 + ? match.start + : new Position(match.end.line + this.data.delta, 0); + case 'chars_from_start': + return match.start.getOffsetThroughLineBreaks(this.data.delta); + case 'chars_from_end': + return match.end.getOffsetThroughLineBreaks(this.data.delta - 1); + case 'pattern': // TODO(#3919): Support `;` offset (`:help //;`) + default: + const guard: unknown = this.data; + throw new Error('Unexpected SearchOffset type'); + } + } +} diff --git a/test/cmd_line/subparser.substitute.test.ts b/test/cmd_line/subparser.substitute.test.ts index 704a52e1b94..78aa883982e 100644 --- a/test/cmd_line/subparser.substitute.test.ts +++ b/test/cmd_line/subparser.substitute.test.ts @@ -7,9 +7,9 @@ suite(':substitute args parser', () => { test('can parse pattern, replace, and flags', () => { const args = subParser('/a/b/g').arguments; - assert.strictEqual(args.pattern, 'a'); + assert.strictEqual(args.pattern?.patternString, 'a'); assert.strictEqual(args.replace, 'b'); - assert.strictEqual(args.flags, 8); + assert.deepStrictEqual(args.flags, { replaceAll: true }); }); test('can parse count', () => { @@ -19,31 +19,32 @@ suite(':substitute args parser', () => { test('can parse custom delimiter', () => { const args = subParser('#a#b#g').arguments; - assert.strictEqual(args.pattern, 'a'); + assert.strictEqual(args.pattern?.patternString, 'a'); assert.strictEqual(args.replace, 'b'); - assert.strictEqual(args.flags, 8); + assert.deepStrictEqual(args.flags, { replaceAll: true }); }); test('can escape delimiter', () => { const args = subParser('/\\/\\/a/b/').arguments; - assert.strictEqual(args.pattern, '//a'); + assert.strictEqual(args.pattern?.patternString, '\\/\\/a'); + assert.strictEqual(args.pattern?.regex.source, '\\/\\/a'); assert.strictEqual(args.replace, 'b'); }); test('can use pattern escapes', () => { const args = subParser('/\\ba/b/').arguments; - assert.strictEqual(args.pattern, '\\ba'); + assert.strictEqual(args.pattern?.patternString, '\\ba'); assert.strictEqual(args.replace, 'b'); }); test('can escape replacement', () => { const args = subParser('/a/\\b/').arguments; - assert.strictEqual(args.pattern, 'a'); + assert.strictEqual(args.pattern?.patternString, 'a'); assert.strictEqual(args.replace, '\b'); }); test('can parse flag KeepPreviousFlags', () => { const args = subParser('/a/b/&').arguments; - assert.strictEqual(args.flags, 1); + assert.deepStrictEqual(args.flags, { keepPreviousFlags: true }); }); }); diff --git a/test/cmd_line/substitute.test.ts b/test/cmd_line/substitute.test.ts index 498b59f40cd..254c24a6e31 100644 --- a/test/cmd_line/substitute.test.ts +++ b/test/cmd_line/substitute.test.ts @@ -9,8 +9,9 @@ function sub( args?: { lineRange?: string; flags?: string; count?: number } ): string { const lineRange = args?.lineRange ?? ''; - const flags = args ? `/${args.flags}` : ''; - const count = args ? ` ${args.count}` : ''; + const flags = args?.flags !== undefined ? `/${args.flags}` : ''; + const count = args?.count !== undefined ? ` ${args.count}` : ''; + console.log(`:${lineRange}s/${pattern}/${replace}${flags}${count}\n`); return `:${lineRange}s/${pattern}/${replace}${flags}${count}\n`; } diff --git a/test/register/register.test.ts b/test/register/register.test.ts index af72b7adcac..2244851ca60 100644 --- a/test/register/register.test.ts +++ b/test/register/register.test.ts @@ -199,13 +199,13 @@ suite('register', () => { ); await modeHandler.handleKeyEvent('*'); - assert.strictEqual((await Register.get('/'))?.text, '\\bWake\\b'); + assert.strictEqual((await Register.get('/'))?.text, '\\'); await modeHandler.handleMultipleKeyEvents(['g', '*']); assert.strictEqual((await Register.get('/'))?.text, 'Wake'); await modeHandler.handleKeyEvent('#'); - assert.strictEqual((await Register.get('/'))?.text, '\\bWake\\b'); + assert.strictEqual((await Register.get('/'))?.text, '\\'); await modeHandler.handleMultipleKeyEvents(['g', '#']); assert.strictEqual((await Register.get('/'))?.text, 'Wake'); diff --git a/test/vimscript/lineRangeParse.test.ts b/test/vimscript/lineRangeParse.test.ts index 7ff436051ef..b4bc61e04bd 100644 --- a/test/vimscript/lineRangeParse.test.ts +++ b/test/vimscript/lineRangeParse.test.ts @@ -1,5 +1,6 @@ import assert = require('assert'); import { Address, LineRange } from '../../src/vimscript/lineRange'; +import { Pattern, SearchDirection } from '../../src/vimscript/pattern'; function parseTest(name: string, input: string, output: LineRange) { test(name, () => { @@ -17,17 +18,46 @@ suite('LineRange parsing', () => { parseTest("mark ('a)", "'a", new LineRange(new Address({ type: 'mark', mark: 'a' }))); parseTest("mark ('A)", "'A", new LineRange(new Address({ type: 'mark', mark: 'A' }))); parseTest("mark ('<)", "'<", new LineRange(new Address({ type: 'mark', mark: '<' }))); - // TODO: uncomment these - // parseTest( - // 'pattern_next (no closing /)', - // '/abc', - // new LineRange(new Address({ type: 'pattern_next', pattern: 'abc' })) - // ); - // parseTest( - // 'pattern_next (closing /)', - // '/abc/', - // new LineRange(new Address({ type: 'pattern_next', pattern: 'abc' })) - // ); + parseTest( + 'pattern_next (no closing /)', + '/abc', + new LineRange( + new Address({ + type: 'pattern_next', + pattern: Pattern.parser({ direction: SearchDirection.Forward }).tryParse('abc'), + }) + ) + ); + parseTest( + 'pattern_next (closing /)', + '/abc/', + new LineRange( + new Address({ + type: 'pattern_next', + pattern: Pattern.parser({ direction: SearchDirection.Forward }).tryParse('abc'), + }) + ) + ); + parseTest( + 'pattern_prev (no closing ?)', + '?abc', + new LineRange( + new Address({ + type: 'pattern_prev', + pattern: Pattern.parser({ direction: SearchDirection.Backward }).tryParse('abc'), + }) + ) + ); + parseTest( + 'pattern_prev (closing ?)', + '?abc?', + new LineRange( + new Address({ + type: 'pattern_prev', + pattern: Pattern.parser({ direction: SearchDirection.Backward }).tryParse('abc'), + }) + ) + ); parseTest( 'last_search_pattern_next', '\\/', diff --git a/test/vimscript/lineRangeResolve.test.ts b/test/vimscript/lineRangeResolve.test.ts index 6dc6e0adaab..36471037e80 100644 --- a/test/vimscript/lineRangeResolve.test.ts +++ b/test/vimscript/lineRangeResolve.test.ts @@ -106,6 +106,20 @@ suite('LineRange resolving', () => { }, }); + resolveTest({ + title: 'Explicit pattern (forward)', + start: ['ap|ple', 'banana', 'carrot', 'dragonfruit', 'eggplant'], + keysPressed: '', + end: ['ap|ple', 'banana', 'carrot', 'dragonfruit', 'eggplant'], + lineRanges: { + '/carrot': [2, 2], + '/carrot/': [2, 2], + '/carrot/,/dragonfruit': [2, 3], + '/carrot/,/dragonfruit/': [2, 3], + '/(an){2}/,/[^a]g/': [1, 4], + }, + }); + resolveTest({ title: 'Last searched pattern', start: ['apple', 'banana', '|carrot', 'dragonfruit', 'eggplant'], diff --git a/test/vimscript/searchOffset.test.ts b/test/vimscript/searchOffset.test.ts new file mode 100644 index 00000000000..df493800493 --- /dev/null +++ b/test/vimscript/searchOffset.test.ts @@ -0,0 +1,43 @@ +import assert = require('assert'); +import { SearchOffset } from '../../src/vimscript/pattern'; + +function parseTest(name: string, input: string, output: SearchOffset) { + test(name, () => { + assert.deepStrictEqual(SearchOffset.parser.tryParse(input), output); + }); +} + +suite('SearchOffset parsing', () => { + parseTest('+', '+', new SearchOffset({ type: 'lines', delta: 1 })); + parseTest('-', '-', new SearchOffset({ type: 'lines', delta: -1 })); + + parseTest('[num]', '123', new SearchOffset({ type: 'lines', delta: 123 })); + parseTest('+[num]', '+123', new SearchOffset({ type: 'lines', delta: 123 })); + parseTest('-[num]', '-123', new SearchOffset({ type: 'lines', delta: -123 })); + + parseTest('e', 'e', new SearchOffset({ type: 'chars_from_end', delta: 0 })); + parseTest('e+', 'e+', new SearchOffset({ type: 'chars_from_end', delta: 1 })); + parseTest('e-', 'e-', new SearchOffset({ type: 'chars_from_end', delta: -1 })); + parseTest('e[num]', 'e123', new SearchOffset({ type: 'chars_from_end', delta: 123 })); + parseTest('e+[num]', 'e+123', new SearchOffset({ type: 'chars_from_end', delta: 123 })); + parseTest('e-[num]', 'e-123', new SearchOffset({ type: 'chars_from_end', delta: -123 })); + + parseTest('s', 's', new SearchOffset({ type: 'chars_from_start', delta: 0 })); + parseTest('s+', 's+', new SearchOffset({ type: 'chars_from_start', delta: 1 })); + parseTest('s-', 's-', new SearchOffset({ type: 'chars_from_start', delta: -1 })); + parseTest('s[num]', 's123', new SearchOffset({ type: 'chars_from_start', delta: 123 })); + parseTest('s+[num]', 's+123', new SearchOffset({ type: 'chars_from_start', delta: 123 })); + parseTest('s-[num]', 's-123', new SearchOffset({ type: 'chars_from_start', delta: -123 })); + + parseTest('b', 'b', new SearchOffset({ type: 'chars_from_start', delta: 0 })); + parseTest('b+', 'b+', new SearchOffset({ type: 'chars_from_start', delta: 1 })); + parseTest('b-', 'b-', new SearchOffset({ type: 'chars_from_start', delta: -1 })); + parseTest('b[num]', 'b123', new SearchOffset({ type: 'chars_from_start', delta: 123 })); + parseTest('b+[num]', 'b+123', new SearchOffset({ type: 'chars_from_start', delta: 123 })); + parseTest('b-[num]', 'b-123', new SearchOffset({ type: 'chars_from_start', delta: -123 })); + + // TODO: ;{pattern} +}); + +// TODO: Write these tests +// suite('SearchOffset application', () => {});