From deef8c5e4c9b636e731006fad651adc80606f489 Mon Sep 17 00:00:00 2001 From: jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> Date: Mon, 17 Jun 2024 21:06:12 -0400 Subject: [PATCH] POC: Speed up compilation by freezing container during compilation. (#11940) Instead of rebuilding the entire project every time we want to compile a single fuzz target, a better workflow is to build the project once and somehow compile the target against the already compiled project code. This POC does that by interrupting building in when it detects it is compiling the fuzz target. On detection it does the following: 1. Writes the command to /out/statefile TODO: write the cwd. 2. Commits the current container as "frozen" for use later. TODO: make this changeable. 3. Returns 1 so compilation stops. TODO: It would be better to exit the container. This step may be important to prevent clean up of the environment. Then the frozen container can be used to compile fuzz targets against the project without recompiling the project in its entirety. TODO: 1. Support this in oss-fuzz-gen 2. Install docker command line tool in base-builder (or use sneaky inheritance) because it must be used within the container. 3. Automate the compilation of the new fuzz target --- infra/base-images/base-builder/Dockerfile | 6 +- .../base-builder/jcc/build_jcc.bash | 6 +- infra/base-images/base-builder/jcc/jcc.go | 64 +-- infra/base-images/base-builder/jcc/jcc2.go | 401 ++++++++++++++++++ projects/skcms/Dockerfile | 6 + 5 files changed, 416 insertions(+), 67 deletions(-) create mode 100644 infra/base-images/base-builder/jcc/jcc2.go diff --git a/infra/base-images/base-builder/Dockerfile b/infra/base-images/base-builder/Dockerfile index 8dcbdce6cc5c..6424e1aaa441 100644 --- a/infra/base-images/base-builder/Dockerfile +++ b/infra/base-images/base-builder/Dockerfile @@ -170,8 +170,10 @@ COPY bazel_build_fuzz_tests \ # TODO: Build this as part of a multi-stage build. ADD https://commondatastorage.googleapis.com/clusterfuzz-builds/jcc/clang-jcc /usr/local/bin/ -ADD https://commondatastorage.googleapis.com/clusterfuzz-builds/jcc/clang++-jcc /usr/local/bin/ -RUN chmod +x /usr/local/bin/clang-jcc && chmod +x /usr/local/bin/clang++-jcc +ADD https://commondatastorage.googleapis.com/clusterfuzz-builds/jcc/clang++-jcc /usr/local/bin +ADD https://commondatastorage.googleapis.com/clusterfuzz-builds/jcc/clang-jcc2 /usr/local/bin/ +ADD https://commondatastorage.googleapis.com/clusterfuzz-builds/jcc/clang++-jcc2 /usr/local/bin +RUN chmod +x /usr/local/bin/clang-jcc /usr/local/bin/clang++-jcc /usr/local/bin/clang-jcc2 /usr/local/bin/clang++-jcc2 COPY llvmsymbol.diff $SRC COPY detect_repo.py /opt/cifuzz/ diff --git a/infra/base-images/base-builder/jcc/build_jcc.bash b/infra/base-images/base-builder/jcc/build_jcc.bash index 7b20ab87aa21..6a62c22890f0 100755 --- a/infra/base-images/base-builder/jcc/build_jcc.bash +++ b/infra/base-images/base-builder/jcc/build_jcc.bash @@ -17,7 +17,9 @@ ################################################################################ go build jcc.go -cp jcc clang -cp jcc clang++ +go build jcc2.go gsutil cp jcc gs://clusterfuzz-builds/jcc/clang++-jcc gsutil cp jcc gs://clusterfuzz-builds/jcc/clang-jcc + +gsutil cp jcc2 gs://clusterfuzz-builds/jcc/clang++-jcc2 +gsutil cp jcc2 gs://clusterfuzz-builds/jcc/clang-jcc2 diff --git a/infra/base-images/base-builder/jcc/jcc.go b/infra/base-images/base-builder/jcc/jcc.go index d11dfef9a4ff..0101831e64e4 100644 --- a/infra/base-images/base-builder/jcc/jcc.go +++ b/infra/base-images/base-builder/jcc/jcc.go @@ -25,7 +25,6 @@ import ( "os/exec" "path/filepath" "regexp" - "slices" "strings" ) @@ -169,63 +168,6 @@ func CorrectMissingHeaders(bin string, cmd []string) ([]string, bool, error) { return cmd, false, nil } -func EnsureDir(dirPath string) { - // Checks if a path is an existing directory, otherwise create one. - if pathInfo, err := os.Stat(dirPath); err == nil { - if isDir := pathInfo.IsDir(); !isDir { - panic(dirPath + " exists but is not a directory.") - } - } else if errors.Is(err, fs.ErrNotExist) { - if err := os.MkdirAll(dirPath, 0755); err != nil { - panic("Failed to create directory: " + dirPath + ".") - } - fmt.Println("Created directory: " + dirPath + ".") - } else { - panic("An error occurred in os.Stat(" + dirPath + "): " + err.Error()) - } -} - -func GenerateAST(bin string, args []string, filePath string) { - // Generates AST. - outFile, err := os.Create(filePath) - if err != nil { - fmt.Println(err) - } - defer outFile.Close() - - cmd := exec.Command(bin, args...) - cmd.Stdout = outFile - cmd.Run() -} - -func GenerateASTs(bin string, args []string, astDir string) { - // Generates an AST for each C/CPP file in the command. - // Cannot save AST when astDir is not available. - EnsureDir(astDir) - - // Target file suffixes. - suffixes := []string{".cpp", ".cc", ".cxx", ".c++", ".c", ".h", ".hpp"} - // C/CPP targets in the command. - targetFiles := []string{} - // Flags to generate AST. - flags := []string{"-Xclang", "-ast-dump=json", "-fsyntax-only"} - for _, arg := range args { - targetFileExt := strings.ToLower(filepath.Ext(arg)) - if slices.Contains(suffixes, targetFileExt) { - targetFiles = append(targetFiles, arg) - continue - } - flags = append(flags, arg) - } - - // Generate an AST for each target file. Skips AST generation when a - // command has no target file (e.g., during linking). - for _, targetFile := range targetFiles { - filePath := filepath.Join(astDir, fmt.Sprintf("%s.ast", filepath.Base(targetFile))) - GenerateAST(bin, append(flags, targetFile), filePath) - } -} - func ExecBuildCommand(bin string, args []string) (int, string, string) { // Executes the original command. cmd := exec.Command(bin, args...) @@ -238,10 +180,6 @@ func ExecBuildCommand(bin string, args []string) (int, string, string) { } func Compile(bin string, args []string) (int, string, string) { - // Generate ASTs f we define this ENV var. - if astDir := os.Getenv("JCC_GENERATE_AST_DIR"); astDir != "" { - GenerateASTs(bin, args, astDir) - } // Run the actual command. return ExecBuildCommand(bin, args) } @@ -360,7 +298,7 @@ func WriteStdErrOut(args []string, outstr string, errstr string) { fmt.Print(outstr) fmt.Fprint(os.Stderr, errstr) // Record what compile args produced the error and the error itself in log file. - AppendStringToFile("/workspace/err.log", fmt.Sprintf("%s\n", args) + errstr) + AppendStringToFile("/workspace/err.log", fmt.Sprintf("%s\n", args)+errstr) } func main() { diff --git a/infra/base-images/base-builder/jcc/jcc2.go b/infra/base-images/base-builder/jcc/jcc2.go new file mode 100644 index 000000000000..a2d9e73ea2f5 --- /dev/null +++ b/infra/base-images/base-builder/jcc/jcc2.go @@ -0,0 +1,401 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/fs" + "io/ioutil" + "log" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" +) + +var MaxMissingHeaderFiles = 10 +var CppifyHeadersMagicString = "\n/* JCCCppifyHeadersMagicString */\n" + +func CopyFile(src string, dst string) { + contents, err := ioutil.ReadFile(src) + if err != nil { + panic(err) + } + err = ioutil.WriteFile(dst, contents, 0644) + if err != nil { + panic(err) + } +} + +func TryFixCCompilation(cmdline []string) ([]string, int, string, string) { + var newFile string = "" + for i, arg := range cmdline { + if !strings.HasSuffix(arg, ".c") { + continue + } + if _, err := os.Stat(arg); errors.Is(err, os.ErrNotExist) { + continue + } + newFile = strings.TrimSuffix(arg, ".c") + newFile += ".cpp" + CopyFile(arg, newFile) + CppifyHeaderIncludesFromFile(newFile) + cmdline[i] = newFile + break + } + if newFile == "" { + return []string{}, 1, "", "" + } + cppBin := "clang++" + newCmdline := []string{"-stdlib=libc++"} + newCmdline = append(cmdline, newCmdline...) + newFullArgs := append([]string{cppBin}, newCmdline...) + + retcode, out, err := Compile(cppBin, newCmdline) + if retcode == 0 { + return newFullArgs, retcode, out, err + } + correctedCmdline, corrected, _ := CorrectMissingHeaders(cppBin, newCmdline) + if corrected { + return append([]string{cppBin}, correctedCmdline...), 0, "", "" + } + return newFullArgs, retcode, out, err +} + +func ExtractMissingHeader(compilerOutput string) (string, bool) { + r := regexp.MustCompile(`fatal error: ['|<](?P
[a-zA-z0-9\/\.]+)['|>] file not found`) + matches := r.FindStringSubmatch(compilerOutput) + if len(matches) == 0 { + return "", false + } + return matches[1], true +} + +func ReplaceMissingHeaderInFile(srcFilename, curHeader, replacementHeader string) error { + srcFile, err := os.Open(srcFilename) + if err != nil { + return err + } + srcBytes, err := ioutil.ReadAll(srcFile) + if err != nil { + return err + } + src := string(srcBytes) + newSrc := ReplaceMissingHeader(src, curHeader, replacementHeader) + b := []byte(newSrc) + err = ioutil.WriteFile(srcFilename, b, 0644) + if err != nil { + return err + } + return nil +} + +func ReplaceMissingHeader(src, curHeader, replacementHeader string) string { + re := regexp.MustCompile(`#include ["|<]` + curHeader + `["|>]\n`) + replacement := "#include \"" + replacementHeader + "\"\n" + return re.ReplaceAllString(src, replacement) +} + +func GetHeaderCorrectedFilename(compilerErr string) (string, string, bool) { + re := regexp.MustCompile(`(?P[a-z\/\-\_0-9A-z\.]+):.* fatal error: .* file not found`) + matches := re.FindStringSubmatch(compilerErr) + if len(matches) < 2 { + return "", "", false + } + oldFilename := matches[1] + base := filepath.Base(oldFilename) + root := filepath.Dir(oldFilename) + newFilename := root + "/jcc-corrected-" + base + return oldFilename, newFilename, true +} + +func GetHeaderCorrectedCmd(cmd []string, compilerErr string) ([]string, string, error) { + oldFilename, newFilename, success := GetHeaderCorrectedFilename(compilerErr) + if !success { + return cmd, "", errors.New("Couldn't find buggy file") + } + // Make new cmd. + newCmd := make([]string, len(cmd)) + for i, part := range cmd { + newCmd[i] = part + } + found := false + for i, filename := range newCmd { + if filename == oldFilename { + newCmd[i] = newFilename + found = true + break + } + } + CopyFile(oldFilename, newFilename) + if found { + return newCmd, newFilename, nil + } + return cmd, "", errors.New("Couldn't find file") +} + +func CorrectMissingHeaders(bin string, cmd []string) ([]string, bool, error) { + + _, _, stderr := Compile(bin, cmd) + cmd, correctedFilename, err := GetHeaderCorrectedCmd(cmd, stderr) + if err != nil { + return cmd, false, err + } + for i := 0; i < MaxMissingHeaderFiles; i++ { + fixed, hasBrokenHeaders := TryCompileAndFixHeadersOnce(bin, cmd, correctedFilename) + if fixed { + return cmd, true, nil + } + if !hasBrokenHeaders { + return cmd, false, nil + } + } + return cmd, false, nil +} + +func ExecBuildCommand(bin string, args []string) (int, string, string) { + // Executes the original command. + cmd := exec.Command(bin, args...) + var outb, errb bytes.Buffer + cmd.Stdout = &outb + cmd.Stderr = &errb + cmd.Stdin = os.Stdin + cmd.Run() + return cmd.ProcessState.ExitCode(), outb.String(), errb.String() +} + +func Compile(bin string, args []string) (int, string, string) { + // Run the actual command. + return ExecBuildCommand(bin, args) +} + +func TryCompileAndFixHeadersOnce(bin string, cmd []string, filename string) (fixed, hasBrokenHeaders bool) { + retcode, _, err := Compile(bin, cmd) + if retcode == 0 { + fixed = true + hasBrokenHeaders = false + return + } + missingHeader, isMissing := ExtractMissingHeader(err) + if !isMissing { + fixed = false + hasBrokenHeaders = false + return + } + + newHeaderPath, found := FindMissingHeader(missingHeader) + if !found { + fixed = false + hasBrokenHeaders = true + return false, true + } + ReplaceMissingHeaderInFile(filename, missingHeader, newHeaderPath) + return false, true +} + +func FindMissingHeader(missingHeader string) (string, bool) { + envVar := "JCC_MISSING_HEADER_SEARCH_PATH" + var searchPath string + searchPath, exists := os.LookupEnv(envVar) + if !exists { + searchPath = "/src" + } + searchPath, _ = filepath.Abs(searchPath) + var headerLocation string + missingHeader = "/" + missingHeader + find := func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if strings.HasSuffix(path, missingHeader) { + headerLocation = path + return nil + } + return nil + } + filepath.WalkDir(searchPath, find) + if headerLocation == "" { + return "", false + } + return headerLocation, true +} + +func CppifyHeaderIncludesFromFile(srcFile string) error { + contentsBytes, err := ioutil.ReadFile(srcFile) + if err != nil { + return err + } + contents := string(contentsBytes[:]) + contents, err = CppifyHeaderIncludes(contents) + if err != nil { + return err + } + b := []byte(contents) + err = ioutil.WriteFile(srcFile, b, 0644) + return err +} + +func CppifyHeaderIncludes(contents string) (string, error) { + shouldCppify, exists := os.LookupEnv("JCC_CPPIFY_PROJECT_HEADERS") + if !exists || strings.Compare(shouldCppify, "0") == 0 { + return contents, nil + } + if strings.Contains(contents, CppifyHeadersMagicString) { + return contents, nil + } + re := regexp.MustCompile(`\#include \"(?P
.+)\"\n`) + matches := re.FindAllStringSubmatch(contents, -1) + if len(matches) == 0 { + return "", nil // !!! + } + for i, match := range matches { + if i == 0 { + // So we don't cppify twice. + contents += CppifyHeadersMagicString + } + oldStr := match[0] + replacement := "extern \"C\" {\n#include \"" + match[1] + "\"\n}\n" + contents = strings.Replace(contents, oldStr, replacement, 1) + if strings.Compare(contents, "") == 0 { + panic("Failed to replace") + } + } + return contents, nil +} + +func AppendStringToFile(filepath, new_content string) error { + // Appends |new_content| to the content of |filepath|. + file, err := os.OpenFile(filepath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer file.Close() + + _, err = file.WriteString(new_content) + return err +} + +func WriteStdErrOut(args []string, outstr string, errstr string) { + // Prints |outstr| to stdout, prints |errstr| to stderr, and saves |errstr| to err.log. + fmt.Print(outstr) + fmt.Fprint(os.Stderr, errstr) + // Record what compile args produced the error and the error itself in log file. + AppendStringToFile("/workspace/err.log", fmt.Sprintf("%s\n", args)+errstr) +} + +func main() { + f, err := os.OpenFile("/tmp/jcc.log", os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + log.Println(err) + } + defer f.Close() + if _, err := f.WriteString(fmt.Sprintf("%s\n", os.Args)); err != nil { + log.Println(err) + } + + args := os.Args[1:] + if args[0] == "unfreeze" { + fmt.Println("unfreeze") + unfreeze() + } + basename := filepath.Base(os.Args[0]) + isCPP := basename == "clang++-jcc" + newArgs := append(args, "-w") + + var bin string + if isCPP { + bin = "clang++" + newArgs = append(args, "-stdlib=libc++") + } else { + bin = "clang" + } + fullCmdArgs := append([]string{bin}, newArgs...) + if IsCompilingTarget(fullCmdArgs) { + WriteTargetArgsAndCommitImage(fullCmdArgs) + os.Exit(0) + } + retcode, out, errstr := Compile(bin, newArgs) + WriteStdErrOut(fullCmdArgs, out, errstr) + os.Exit(retcode) +} + +type BuildCommand struct { + CWD string `json:"CWD"` + CMD []string `json:"CMD"` +} + +func WriteTargetArgsAndCommitImage(cmdline []string) { + log.Println("WRITE COMMAND") + f, _ := os.OpenFile("/out/statefile.json", os.O_CREATE|os.O_WRONLY, 0644) + wd, _ := os.Getwd() + buildcmd := BuildCommand{ + CWD: wd, + CMD: cmdline, + } + jsonData, _ := json.Marshal(buildcmd) + f.Write(jsonData) + f.Close() + hostname, _ := os.Hostname() + dockerArgs := []string{"commit", hostname, "frozen"} + cmd := exec.Command("docker", dockerArgs...) + var outb, errb bytes.Buffer + cmd.Stdout = &outb + cmd.Stderr = &errb + cmd.Stdin = os.Stdin + cmd.Run() + fmt.Println(outb.String(), errb.String()) + fmt.Println("COMMIT IMAGE") +} + +func IsCompilingTarget(cmdline []string) bool { + for _, arg := range cmdline { + // This can fail if people do crazy things they aren't supposed + // to such as using some other means to link in libFuzzer. + if arg == "-fsanitize=fuzzer" { + return true + } + if arg == "-lFuzzingEngine" { + return true + } + } + return false +} + +func parseCommand(command string) (string, []string) { + args := strings.Fields(command) + commandBin := args[0] + commandArgs := args[1:] + return commandBin, commandArgs +} + +func unfreeze() { + content, err := ioutil.ReadFile("/out/statefile.json") + if err != nil { + log.Fatal(err) + } + var command BuildCommand + json.Unmarshal(content, &command) + bin, args := parseCommand(strings.Join(command.CMD, " ")) + os.Chdir(command.CWD) + ExecBuildCommand(bin, args) + os.Exit(0) +} diff --git a/projects/skcms/Dockerfile b/projects/skcms/Dockerfile index f280e2806ca7..8ef43481d629 100644 --- a/projects/skcms/Dockerfile +++ b/projects/skcms/Dockerfile @@ -24,6 +24,12 @@ RUN wget -O $SRC/skcms/iccprofile_seed_corpus.zip https://storage.googleapis.com # current directory for build script WORKDIR skcms +RUN apt-get update && \ + apt-get install -y systemd && \ + wget https://download.docker.com/linux/ubuntu/dists/focal/pool/stable/amd64/docker-ce-cli_20.10.8~3-0~ubuntu-focal_amd64.deb -O /tmp/docker-ce.deb && \ + dpkg -i /tmp/docker-ce.deb && \ + rm /tmp/docker-ce.deb + COPY build.sh $SRC/ COPY iccprofile.options iccprofile.dict $SRC/skcms/ \ No newline at end of file