From e6494535b93f0dfc3113eb5f0315ac1dd9ce99be Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Wed, 20 Sep 2023 15:40:28 -0400 Subject: [PATCH] windows: convert TestCommandLineRecomposition to a fuzz test and fix discrepancies Notably, this fixes the escaping of the first argument when it contains quoted spaces, and fixes a panic in DecomposeCommandLine when it contains more than 8192 arguments. Fixes golang/go#58817. For golang/go#17149. For golang/go#63236. Change-Id: Ib72913b8182998adc1420d73ee0f9dc017dfbf32 Reviewed-on: https://go-review.googlesource.com/c/sys/+/530275 LUCI-TryBot-Result: Go LUCI Reviewed-by: Quim Muntal Reviewed-by: Than McIntosh Auto-Submit: Bryan Mills --- windows/exec_windows.go | 78 ++++++++++--- windows/syscall_windows_test.go | 189 +++++++++++++++++++++----------- 2 files changed, 192 insertions(+), 75 deletions(-) diff --git a/windows/exec_windows.go b/windows/exec_windows.go index a52e0331d..d1baeb244 100644 --- a/windows/exec_windows.go +++ b/windows/exec_windows.go @@ -22,7 +22,7 @@ import ( // but only if there is space or tab inside s. func EscapeArg(s string) string { if len(s) == 0 { - return "\"\"" + return `""` } n := len(s) hasSpace := false @@ -35,7 +35,7 @@ func EscapeArg(s string) string { } } if hasSpace { - n += 2 + n += 2 // Reserve space for quotes. } if n == len(s) { return s @@ -82,20 +82,68 @@ func EscapeArg(s string) string { // in CreateProcess's CommandLine argument, CreateService/ChangeServiceConfig's BinaryPathName argument, // or any program that uses CommandLineToArgv. func ComposeCommandLine(args []string) string { - var commandLine string - for i := range args { - if i > 0 { - commandLine += " " + if len(args) == 0 { + return "" + } + + // Per https://learn.microsoft.com/en-us/windows/win32/api/shellapi/nf-shellapi-commandlinetoargvw: + // “This function accepts command lines that contain a program name; the + // program name can be enclosed in quotation marks or not.” + // + // Unfortunately, it provides no means of escaping interior quotation marks + // within that program name, and we have no way to report them here. + prog := args[0] + mustQuote := len(prog) == 0 + for i := 0; i < len(prog); i++ { + c := prog[i] + if c <= ' ' || (c == '"' && i == 0) { + // Force quotes for not only the ASCII space and tab as described in the + // MSDN article, but also ASCII control characters. + // The documentation for CommandLineToArgvW doesn't say what happens when + // the first argument is not a valid program name, but it empirically + // seems to drop unquoted control characters. + mustQuote = true + break + } + } + var commandLine []byte + if mustQuote { + commandLine = make([]byte, 0, len(prog)+2) + commandLine = append(commandLine, '"') + for i := 0; i < len(prog); i++ { + c := prog[i] + if c == '"' { + // This quote would interfere with our surrounding quotes. + // We have no way to report an error, so just strip out + // the offending character instead. + continue + } + commandLine = append(commandLine, c) + } + commandLine = append(commandLine, '"') + } else { + if len(args) == 1 { + // args[0] is a valid command line representing itself. + // No need to allocate a new slice or string for it. + return prog } - commandLine += EscapeArg(args[i]) + commandLine = []byte(prog) } - return commandLine + + for _, arg := range args[1:] { + commandLine = append(commandLine, ' ') + // TODO(bcmills): since we're already appending to a slice, it would be nice + // to avoid the intermediate allocations of EscapeArg. + // Perhaps we can factor out an appendEscapedArg function. + commandLine = append(commandLine, EscapeArg(arg)...) + } + return string(commandLine) } // DecomposeCommandLine breaks apart its argument command line into unescaped parts using CommandLineToArgv, // as gathered from GetCommandLine, QUERY_SERVICE_CONFIG's BinaryPathName argument, or elsewhere that // command lines are passed around. -// DecomposeCommandLine returns error if commandLine contains NUL. +// DecomposeCommandLine returns an error if commandLine contains NUL. func DecomposeCommandLine(commandLine string) ([]string, error) { if len(commandLine) == 0 { return []string{}, nil @@ -105,14 +153,18 @@ func DecomposeCommandLine(commandLine string) ([]string, error) { return nil, errorspkg.New("string with NUL passed to DecomposeCommandLine") } var argc int32 - argv, err := CommandLineToArgv(&utf16CommandLine[0], &argc) + argv8192, err := CommandLineToArgv(&utf16CommandLine[0], &argc) if err != nil { return nil, err } - defer LocalFree(Handle(unsafe.Pointer(argv))) + defer LocalFree(Handle(unsafe.Pointer(argv8192))) + var args []string - for _, v := range (*argv)[:argc] { - args = append(args, UTF16ToString((*v)[:])) + // Note: CommandLineToArgv hard-codes an incorrect return type + // (see https://go.dev/issue/63236). + // We use an unsafe.Pointer conversion here to work around it. + for _, p := range unsafe.Slice((**uint16)(unsafe.Pointer(argv8192)), argc) { + args = append(args, UTF16PtrToString(p)) } return args, nil } diff --git a/windows/syscall_windows_test.go b/windows/syscall_windows_test.go index f89964925..a8e7be43b 100644 --- a/windows/syscall_windows_test.go +++ b/windows/syscall_windows_test.go @@ -10,7 +10,6 @@ import ( "debug/pe" "errors" "fmt" - "math/rand" "os" "path/filepath" "runtime" @@ -19,6 +18,7 @@ import ( "syscall" "testing" "time" + "unicode/utf8" "unsafe" "golang.org/x/sys/windows" @@ -562,78 +562,143 @@ func TestResourceExtraction(t *testing.T) { } } -func TestCommandLineRecomposition(t *testing.T) { - const ( - maxCharsPerArg = 35 - maxArgsPerTrial = 80 - doubleQuoteProb = 4 - singleQuoteProb = 1 - backSlashProb = 3 - spaceProb = 1 - trials = 1000 - ) - randString := func(l int) []rune { - s := make([]rune, l) - for i := range s { - s[i] = rand.Int31() - } - return s - } - mungeString := func(s []rune, char rune, timesInTen int) { - if timesInTen < rand.Intn(10)+1 || len(s) == 0 { - return - } - s[rand.Intn(len(s))] = char - } - argStorage := make([]string, maxArgsPerTrial+1) - for i := 0; i < trials; i++ { - args := argStorage[:rand.Intn(maxArgsPerTrial)+2] - args[0] = "valid-filename-for-arg0" - for j := 1; j < len(args); j++ { - arg := randString(rand.Intn(maxCharsPerArg + 1)) - mungeString(arg, '"', doubleQuoteProb) - mungeString(arg, '\'', singleQuoteProb) - mungeString(arg, '\\', backSlashProb) - mungeString(arg, ' ', spaceProb) - args[j] = string(arg) +func FuzzComposeCommandLine(f *testing.F) { + f.Add(`C:\foo.exe /bar /baz "-bag qux"`) + f.Add(`"C:\Program Files\Go\bin\go.exe" env`) + f.Add(`C:\"Program Files"\Go\bin\go.exe env`) + f.Add(`C:\"Program Files"\Go\bin\go.exe env`) + f.Add(`C:\"Pro"gram Files\Go\bin\go.exe env`) + f.Add(``) + f.Add(` `) + f.Add(`W\"0`) + f.Add("\"\f") + f.Add("\f") + f.Add("\x16") + f.Add(`"" ` + strings.Repeat("a", 8193)) + f.Add(strings.Repeat(`"" `, 8193)) + + f.Add("\x00abcd") + f.Add("ab\x00cd") + f.Add("abcd\x00") + f.Add("\x00abcd\x00") + f.Add("\x00ab\x00cd\x00") + f.Add("\x00\x00\x00") + f.Add("\x16\x00\x16") + f.Add(`C:\Program Files\Go\bin\go.exe` + "\x00env") + f.Add(`"C:\Program Files\Go\bin\go.exe"` + "\x00env") + f.Add(`C:\"Program Files"\Go\bin\go.exe` + "\x00env") + f.Add(`C:\"Pro"gram Files\Go\bin\go.exe` + "\x00env") + f.Add("\x00" + strings.Repeat("a", 8192)) + f.Add(strings.Repeat("\x00"+strings.Repeat("a", 8192), 4)) + + f.Fuzz(func(t *testing.T, s string) { + // DecomposeCommandLine is the “control” for our experiment: + // if it returns a particular list of arguments, then we know + // it must be possible to create an input string that produces + // exactly those arguments. + // + // However, DecomposeCommandLine returns an error if the string + // contains a NUL byte. In that case, we will fall back to + // strings.Split, and be a bit more permissive about the results. + args, err := windows.DecomposeCommandLine(s) + argsFromSplit := false + + if err == nil { + if testing.Verbose() { + t.Logf("DecomposeCommandLine(%#q) = %#q", s, args) + } + } else { + t.Logf("DecomposeCommandLine: %v", err) + if !strings.Contains(s, "\x00") { + // The documentation for CommandLineToArgv takes for granted that + // the first argument is a valid file path, and doesn't describe any + // specific behavior for malformed arguments. Empirically it seems to + // tolerate anything we throw at it, but if we discover cases where it + // actually returns an error we might need to relax this check. + t.Fatal("(error unexpected)") + } + + // Since DecomposeCommandLine can't handle this string, + // interpret it as the raw arguments to ComposeCommandLine. + args = strings.Split(s, "\x00") + argsFromSplit = true + for i, arg := range args { + if !utf8.ValidString(arg) { + // We need to encode the arguments as UTF-16 to pass them to + // CommandLineToArgvW, so skip inputs that are not valid: they might + // have one or more runes converted to replacement characters. + t.Skipf("skipping: input %d is not valid UTF-8", i) + } + if len(arg) > 8192 { + // CommandLineToArgvW seems to truncate each argument after 8192 + // UTF-16 code units, although this behavior is not documented. Since + // it isn't documented, we shouldn't rely on it one way or the other, + // so skip the input to tell the fuzzer to try a different approach. + enc, _ := windows.UTF16FromString(arg) + if len(enc) > 8192 { + t.Skipf("skipping: input %d encodes to more than 8192 UTF-16 code units", i) + } + } + } + if testing.Verbose() { + t.Logf("using input: %#q", args) + } } + + // It's ok if we compose a different command line than what was read. + // Just check that we are able to compose something that round-trips + // to the same results as the original. commandLine := windows.ComposeCommandLine(args) - decomposedArgs, err := windows.DecomposeCommandLine(commandLine) + t.Logf("ComposeCommandLine(_) = %#q", commandLine) + + got, err := windows.DecomposeCommandLine(commandLine) if err != nil { - t.Errorf("Unable to decompose %#q made from %v: %v", commandLine, args, err) - continue + t.Fatalf("DecomposeCommandLine: unexpected error: %v", err) } - if len(decomposedArgs) != len(args) { - t.Errorf("Incorrect decomposition length from %v to %#q to %v", args, commandLine, decomposedArgs) - continue + if testing.Verbose() { + t.Logf("DecomposeCommandLine(_) = %#q", got) } - badMatches := make([]int, 0, len(args)) + + var badMatches []int for i := range args { - if args[i] != decomposedArgs[i] { + if i >= len(got) { + badMatches = append(badMatches, i) + continue + } + want := args[i] + if got[i] != want { + if i == 0 && argsFromSplit { + // It is possible that args[0] cannot be encoded exactly, because + // CommandLineToArgvW doesn't unescape that argument in the same way + // as the others: since the first argument is assumed to be the name + // of the program itself, we only have the option of quoted or not. + // + // If args[0] contains a space or control character, we must quote it + // to avoid it being split into multiple arguments. + // If args[0] already starts with a quote character, we have no way + // to indicate that that character is part of the literal argument. + // In either case, if the string already contains a quote character + // we must avoid misinterpriting that character as the end of the + // quoted argument string. + // + // Unfortunately, ComposeCommandLine does not return an error, so we + // can't report existing quote characters as errors. + // Instead, we strip out the problematic quote characters from the + // argument, and quote the remainder. + // For paths like C:\"Program Files"\Go\bin\go.exe that is arguably + // what the caller intended anyway, and for other strings it seems + // less harmful than corrupting the subsequent arguments. + if got[i] == strings.ReplaceAll(want, `"`, ``) { + continue + } + } badMatches = append(badMatches, i) } } if len(badMatches) != 0 { - t.Errorf("Incorrect decomposition at indices %v from %v to %#q to %v", badMatches, args, commandLine, decomposedArgs) - continue + t.Errorf("Incorrect decomposition at indices: %v", badMatches) } - } - - // check that windows.DecomposeCommandLine returns error for strings with NUL - testsWithNUL := []string{ - "\x00abcd", - "ab\x00cd", - "abcd\x00", - "\x00abcd\x00", - "\x00ab\x00cd\x00", - "\x00\x00\x00", - } - for _, test := range testsWithNUL { - _, err := windows.DecomposeCommandLine(test) - if err == nil { - t.Errorf("Failed to return error while decomposing %#q string with NUL inside", test) - } - } + }) } func TestWinVerifyTrust(t *testing.T) {