Skip to content

Commit

Permalink
Reduce change
Browse files Browse the repository at this point in the history
  • Loading branch information
z2oh committed Feb 1, 2024
1 parent d522fbd commit 9ab9744
Show file tree
Hide file tree
Showing 5 changed files with 436 additions and 466 deletions.
1 change: 0 additions & 1 deletion Sources/SKCore/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ add_library(SKCore STATIC
LanguageServer.swift
MainFilesProvider.swift
PathPrefixMapping.swift
SplitShellCommand.swift
Toolchain.swift
ToolchainRegistry.swift
XCToolchainPlist.swift)
Expand Down
333 changes: 333 additions & 0 deletions Sources/SKCore/CompilationDatabase.swift
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,336 @@ extension CompilationDatabase.Command: Codable {
try container.encodeIfPresent(output, forKey: .output)
}
}

fileprivate struct UnixCommandParser {
var content: Substring
var i: Substring.UTF8View.Index
var result: [String] = []

var ch: UInt8 { self.content.utf8[i] }
var done: Bool { self.content.endIndex == i }

init(_ string: Substring) {
self.content = string
self.i = self.content.utf8.startIndex
}

mutating func next() {
i = content.utf8.index(after: i)
}

mutating func next(expect c: UInt8) {
assert(c == ch)
next()
}

mutating func parse() -> [String] {
while !done {
switch ch {
case UInt8(ascii: " "): next()
default: parseString()
}
}
return result
}

mutating func parseString() {
var str = ""
STRING: while !done {
switch ch {
case UInt8(ascii: " "): break STRING
case UInt8(ascii: "\""): parseDoubleQuotedString(into: &str)
case UInt8(ascii: "\'"): parseSingleQuotedString(into: &str)
default: parsePlainString(into: &str)
}
}
result.append(str)
}

mutating func parseDoubleQuotedString(into str: inout String) {
next(expect: UInt8(ascii: "\""))
var start = i
while !done {
switch ch {
case UInt8(ascii: "\""):
str += content[start..<i]
next()
return
case UInt8(ascii: "\\"):
str += content[start..<i]
next()
start = i
if !done { fallthrough }
default:
next()
}
}
str += content[start..<i]
}

mutating func parseSingleQuotedString(into str: inout String) {
next(expect: UInt8(ascii: "\'"))
let start = i
while !done {
switch ch {
case UInt8(ascii: "\'"):
str += content[start..<i]
next()
return
default:
next()
}
}
str += content[start..<i]
}

mutating func parsePlainString(into str: inout String) {
var start = i
while !done {
let _ch = ch
switch _ch {
case UInt8(ascii: "\""), UInt8(ascii: "\'"), UInt8(ascii: " "):
str += content[start..<i]
return
case UInt8(ascii: "\\"):
str += content[start..<i]
next()
start = i
if !done { fallthrough }
default:
next()
}
}
str += content[start..<i]
}
}

/// Split and unescape a shell-escaped command line invocation.
///
/// Examples:
///
/// ```
/// abc def -> ["abc", "def"]
/// abc\ def -> ["abc def"]
/// abc"\""def -> ["abc\"def"]
/// abc'\"'def -> ["abc\\"def"]
/// ```
///
/// See clang's `unescapeCommandLine()`.
public func splitShellEscapedCommand(_ cmd: String) -> [String] {
var parser = UnixCommandParser(cmd[...])
return parser.parse()
}

// MARK: - Windows

fileprivate extension Character {
var isWhitespace: Bool {
switch self {
case " ", "\t":
return true
default:
return false
}
}

var isWhitespaceOrNull: Bool {
return self.isWhitespace || self == "\0"
}

func isWindowsSpecialChar(inCommandName: Bool) -> Bool {
if isWhitespace {
return true
}
if self == #"""# {
return true
}
if !inCommandName && self == #"\"# {
return true
}
return false
}
}

fileprivate struct WindowsCommandParser {
/// The content of the entire command that shall be parsed.
private let content: String

/// Whether we are parsing the initial command name. In this mode `\` is not treated as escaping the quote
/// character.
private var parsingCommandName: Bool

/// An index into `content`, pointing to the character that we are currently parsing.
private var currentCharacterIndex: String.UTF8View.Index

/// The split command line arguments.
private var result: [String] = []

/// The character that is currently being parsed.
///
/// `nil` if we have reached the end of `content`.
private var currentCharacter: Character? {
guard currentCharacterIndex < content.endIndex else {
return nil
}
return self.content[currentCharacterIndex]
}

/// The character after `currentCharacter`.
///
/// `nil` if we have reached the end of `content`.
private var peek: Character? {
let nextIndex = content.index(after: currentCharacterIndex)
if nextIndex < content.endIndex {
return content[nextIndex]
} else {
return nil
}
}

init(_ string: String, initialCommandName: Bool) {
self.content = string
self.currentCharacterIndex = self.content.startIndex
self.parsingCommandName = initialCommandName
}

/// Designated entry point to split a Windows command line invocation.
mutating func parse() -> [String] {
while let currentCharacter {
if currentCharacter.isWhitespaceOrNull {
// Consume any whitespace separating arguments.
_ = consume()
} else {
result.append(parseSingleArgument())
}
}
return result
}

/// Consume the current character.
private mutating func consume() -> Character {
guard let character = currentCharacter else {
preconditionFailure("Nothing to consume")
}
currentCharacterIndex = content.index(after: currentCharacterIndex)
return character
}

/// Consume the current character, asserting that it is `expectedCharacter`
private mutating func consume(expect expectedCharacter: Character) {
assert(currentCharacter == expectedCharacter)
_ = consume()
}

/// Parses a single argument, consuming its characters and returns the parsed arguments with all escaping unfolded
/// (e.g. `\"` gets returned as `"`)
///
/// Afterwards the parser points to the character after the argument.
mutating func parseSingleArgument() -> String {
var str = ""
while let currentCharacter {
if !currentCharacter.isWindowsSpecialChar(inCommandName: parsingCommandName) {
str.append(consume())
continue
}
if currentCharacter.isWhitespaceOrNull {
parsingCommandName = false
return str
} else if currentCharacter == "\"" {
str += parseQuoted()
} else if currentCharacter == #"\"# {
assert(!parsingCommandName, "else we'd have treated it as a normal char");
str.append(parseBackslash())
} else {
preconditionFailure("unexpected special character");
}
}
return str
}

/// Assuming that we are positioned at a `"`, parse a quoted string and return the string contents without the
/// quotes.
mutating func parseQuoted() -> String {
// Discard the opening quote. Its not part of the unescaped text.
consume(expect: "\"")

var str = ""
while let currentCharacter {
switch currentCharacter {
case "\"":
if peek == "\"" {
// Two adjacent quotes inside a quoted string are an escaped single quote. For example
// `" a "" b "`
// represents the string
// ` a " b `
consume(expect: "\"")
consume(expect: "\"")
str += "\""
} else {
// We have found the closing quote. Discard it and return.
consume(expect: "\"")
return str
}
case "\\" where !parsingCommandName:
str.append(parseBackslash())
default:
str.append(consume())
}
}
return str
}

/// Backslashes are interpreted in a rather complicated way in the Windows-style
/// command line, because backslashes are used both to separate path and to
/// escape double quote. This method consumes runs of backslashes as well as the
/// following double quote if it's escaped.
///
/// * If an even number of backslashes is followed by a double quote, one
/// backslash is output for every pair of backslashes, and the last double
/// quote remains unconsumed. The double quote will later be interpreted as
/// the start or end of a quoted string in the main loop outside of this
/// function.
///
/// * If an odd number of backslashes is followed by a double quote, one
/// backslash is output for every pair of backslashes, and a double quote is
/// output for the last pair of backslash-double quote. The double quote is
/// consumed in this case.
///
/// * Otherwise, backslashes are interpreted literally.
mutating func parseBackslash() -> String {
var str: String = ""

let firstNonBackslashIndex = content[currentCharacterIndex...].firstIndex(where: { $0 != "\\" }) ?? content.endIndex
let numberOfBackslashes = content.distance(from: currentCharacterIndex, to: firstNonBackslashIndex)

if firstNonBackslashIndex != content.endIndex && content[firstNonBackslashIndex] == "\"" {
str += String(repeating: "\\", count: numberOfBackslashes / 2)
if numberOfBackslashes.isMultiple(of: 2) {
// We have an even number of backslashes. Just add the escaped backslashes to `str` and return to parse the
// quote in the outer function.
currentCharacterIndex = firstNonBackslashIndex
} else {
// We have an odd number of backslashes. The last backslash escapes the quote.
str += "\""
currentCharacterIndex = content.index(after: firstNonBackslashIndex)
}
return str
}

// The sequence of backslashes is not followed by quotes. Interpret them literally.
str += String(repeating: "\\", count: numberOfBackslashes)
currentCharacterIndex = firstNonBackslashIndex
return str
}
}

// Sometimes, this function will be handling a full command line including an
// executable pathname at the start. In that situation, the initial pathname
// needs different handling from the following arguments, because when
// CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
// escaping the quote character, whereas when libc scans the rest of the
// command line, it does.
public func splitWindowsCommandLine(_ cmd: String, initialCommandName: Bool) -> [String] {
var parser = WindowsCommandParser(cmd, initialCommandName: initialCommandName)
return parser.parse()
}

0 comments on commit 9ab9744

Please sign in to comment.