From f9e7003e9596d9c88eb46c056eb0cdf0ab0d8ccc Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 22 Sep 2023 10:53:18 +0200 Subject: [PATCH 01/69] fix typo --- experiment/build/local_build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 068f059bc..aa42ebce6 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Module for building things on Google Cloud Build for use in trials.""" +"""Module for building things locally for use in trials.""" import os from typing import Tuple From aaa049c471bff1f082f460f9cea1a2566dd1a7bd Mon Sep 17 00:00:00 2001 From: phi-go Date: Tue, 26 Sep 2023 13:30:18 +0200 Subject: [PATCH 02/69] add mutation_analysis image with gllvm --- docker/generate_makefile.py | 5 +- fuzzers/mutation_analysis/builder.Dockerfile | 126 +++++++++++++++++++ fuzzers/mutation_analysis/fuzzer.py | 47 +++++++ fuzzers/mutation_analysis/main.cc | 46 +++++++ 4 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 fuzzers/mutation_analysis/builder.Dockerfile create mode 100644 fuzzers/mutation_analysis/fuzzer.py create mode 100644 fuzzers/mutation_analysis/main.cc diff --git a/docker/generate_makefile.py b/docker/generate_makefile.py index fff26cd92..7ae467f62 100755 --- a/docker/generate_makefile.py +++ b/docker/generate_makefile.py @@ -163,11 +163,12 @@ def generate_makefile(): for name, image in buildable_images.items(): makefile += get_rules_for_image(name, image) - # Print build targets for all fuzzer-benchmark pairs (including coverage). + # Print build targets for all fuzzer-benchmark pairs (including coverage and mutation_analysis). fuzzers.append('coverage') + fuzzers.append('mutation_analysis') for fuzzer in fuzzers: image_type = 'runner' - if 'coverage' in fuzzer: + if any(ss in fuzzer for ss in ['coverage', 'mutation_analysis']): image_type = 'builder' for benchmark in benchmarks: makefile += (f'build-{fuzzer}-{benchmark}: ' + diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile new file mode 100644 index 000000000..af9780e51 --- /dev/null +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -0,0 +1,126 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG parent_image + + +FROM gcr.io/fuzzbench/base-image AS base-image + + +FROM $parent_image + + +# WORKDIR /home/ +# RUN mkdir -p downloads +# WORKDIR /home/downloads +# RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py +# RUN python3 get-pip.py + +RUN pip3 install wllvm + +# ENV PATH "/root/toolchains/build/llvm+clang-901-x86_64-linux-gnu_build/bin/:$PATH" +ENV LLVM_COMPILER "clang" + +RUN mkdir -p /tmp/gradle && \ + cd /tmp/gradle && \ + wget -q https://services.gradle.org/distributions/gradle-6.8-bin.zip && \ + unzip gradle-6.8-bin.zip && \ + mv gradle-6.8 /usr/local/gradle && \ + rm -r /tmp/gradle + +ENV PATH "/usr/local/gradle/bin/:$PATH" + +#### install gllvm +# WORKDIR /root/ + +# RUN wget -q -c https://dl.google.com/go/go1.16.15.linux-amd64.tar.gz -O - | tar -xz -C /usr/local + +# ENV PATH="${PATH}:/root/.cargo/bin:/usr/local/go/bin:/root/go/bin" + +# RUN go get github.com/SRI-CSL/gllvm/cmd/... + +ENV PATH="${PATH}:/root/.cargo/bin:/usr/local/go/bin:/root/go/bin" + +RUN mkdir /tmp/gllvm/ && \ + cd /tmp/gllvm/ && \ + wget -q -c https://dl.google.com/go/go1.16.15.linux-amd64.tar.gz -O - | tar -xz -C /usr/local && \ + go get github.com/SRI-CSL/gllvm/cmd/... && \ + rm -r /tmp/gllvm/ + +# TODO remove +# copy main.cc to /home/mutator/programs/common/main.cc while framework is not done +COPY main.cc /home/mutator/dockerfiles/programs/common/main.cc + +# mutator + + +# WORKDIR /home/ + +# # RUN mkdir mutator +# WORKDIR /home/mutator + +# ARG DEBIAN_FRONTEND=noninteractive +# RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y openjdk-11-jdk zlib1g-dev + # cmake \ + # binutils-dev \ + # libcurl4-openssl-dev \ + # zlib1g-dev \ + # libdw-dev \ + # libiberty-dev \ + # libssl-dev \ + # libelf-dev \ + # libdw-dev \ + # libidn2-dev \ + # libidn2-0 \ + # idn2 \ + # libstdc++6 + +# RUN git clone https://github.com/CISPA-SysSec/mua_fuzzer_bench mutator +# COPY mua_fuzzer_bench /mutator + +# COPY modules /home/mutator/modules +# COPY build.gradle /home/mutator/ +# COPY run_mutation.py /home/mutator/ +# RUN chmod +x run_mutation.py +# COPY settings.gradle /home/mutator +# RUN cd /mutator && \ +# echo "llvmBinPath=/usr/local/bin/" > gradle.properties && \ +# gradle clean && \ +# gradle build + + +# # set library paths for used shared libraries s.t. the system finds them +# ENV LD_LIBRARY_PATH /home/mutator/build/install/LLVM_Mutation_Tool/lib/ +# # For all subjects provide the path to the default main here. This is based on oss-fuzz convention. +# ENV LIB_FUZZING_ENGINE="/home/mutator/programs/common/main.cc" +# ENV CC=gclang +# ENV CXX=gclang++ + +######## + +# ENV LF_PATH /tmp/libfuzzer.zip + +# # Use a libFuzzer version that supports clang source-based coverage. +# # This libfuzzer is 0b5e6b11c358e704384520dc036eddb5da1c68bf with +# # https://github.com/google/fuzzbench/blob/cf86138081ec705a47ce0a4bab07b5737292e7e0/fuzzers/coverage/patch.diff +# # applied. + +# RUN wget https://storage.googleapis.com/fuzzbench-artifacts/libfuzzer-coverage.zip -O $LF_PATH && \ +# echo "cc78179f6096cae4b799d0cc9436f000cc0be9b1fb59500d16b14b1585d46b61 $LF_PATH" | sha256sum --check --status && \ +# mkdir /tmp/libfuzzer && \ +# cd /tmp/libfuzzer && \ +# unzip $LF_PATH && \ +# bash build.sh && \ +# cp libFuzzer.a /usr/lib && \ +# rm -rf /tmp/libfuzzer $LF_PATH \ No newline at end of file diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py new file mode 100644 index 000000000..c7fe53244 --- /dev/null +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -0,0 +1,47 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Integration code for clang source-based coverage builds.""" + +import os + +from fuzzers import utils + + +def build(): + # """Build benchmark.""" + cflags = [ + # '-fprofile-instr-generate', '-fcoverage-mapping', '-gline-tables-only' + ] + utils.append_flags('CFLAGS', cflags) + utils.append_flags('CXXFLAGS', cflags) + + os.environ['CC'] = 'gclang' + os.environ['CXX'] = 'gclang++' + os.environ['FUZZER_LIB'] = '/home/mutator/dockerfiles/programs/common/main.cc' + + # fuzzer_lib = env['FUZZER_LIB'] + # env['LIB_FUZZING_ENGINE'] = fuzzer_lib + # if os.path.exists(fuzzer_lib): + # # Make /usr/lib/libFuzzingEngine.a point to our library for OSS-Fuzz + # # so we can build projects that are using -lFuzzingEngine. + # shutil.copy(fuzzer_lib, OSS_FUZZ_LIB_FUZZING_ENGINE_PATH) + + build_script = os.path.join(os.environ['SRC'], 'build.sh') + print(f"build_script: {build_script}") + + benchmark = os.getenv('BENCHMARK') + fuzzer = os.getenv('FUZZER') + print(f'Building benchmark {benchmark} with fuzzer {fuzzer}') + + utils.build_benchmark() diff --git a/fuzzers/mutation_analysis/main.cc b/fuzzers/mutation_analysis/main.cc new file mode 100644 index 000000000..13da28162 --- /dev/null +++ b/fuzzers/mutation_analysis/main.cc @@ -0,0 +1,46 @@ +#include +#include +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); + +int main(int argc, char** argv) { + FILE *fp; + int fd; + size_t size; + + if (argc < 1) { + fprintf(stderr, "Not enough arguments"); + exit(EXIT_FAILURE); + } + + fp = fopen(argv[1], "r"); + if (fp == NULL) { + perror("Could not open file\n."); + exit(EXIT_FAILURE); + } + + fseek(fp, 0L, SEEK_END); + size = ftell(fp); + + + fd = fileno(fp); + if (size == 0) { + printf("zero size: %zu\n", size); + const uint8_t data[1] = {0}; + LLVMFuzzerTestOneInput(data, 0); + return 0; + } + auto data = static_cast(mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0)); + if (data == (uint8_t*)-1) { + perror("Could not mmap file\n."); + exit(EXIT_FAILURE); + } + + /* Call function to be fuzzed, e.g.: */ + LLVMFuzzerTestOneInput(data, size); + + return 0; +} From e90cd5321a079c3dfffa83751e7bec6474203db5 Mon Sep 17 00:00:00 2001 From: phi-go Date: Mon, 2 Oct 2023 17:02:43 +0200 Subject: [PATCH 03/69] [WIP] mua framework integration --- .gitmodules | 4 ++ fuzzers/mutation_analysis/builder.Dockerfile | 56 +++++++++----------- fuzzers/mutation_analysis/fuzzer.py | 17 ++++-- fuzzers/mutation_analysis/main.cc | 46 ---------------- 4 files changed, 44 insertions(+), 79 deletions(-) delete mode 100644 fuzzers/mutation_analysis/main.cc diff --git a/.gitmodules b/.gitmodules index e69de29bb..5f5542b38 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "fuzzers/mutation_analysis/mua_fuzzer_bench"] + path = fuzzers/mutation_analysis/mua_fuzzer_bench + url = https://github.com/phi-go/mua_fuzzer_bench + branch = sbft diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index af9780e51..18b893ca8 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -20,6 +20,14 @@ FROM gcr.io/fuzzbench/base-image AS base-image FROM $parent_image +RUN apt-get update && apt-get install -y \ + lsb-release wget software-properties-common gnupg +RUN mkdir /llvm && \ + cd /llvm && \ + bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" && \ + wget https://apt.llvm.org/llvm.sh && \ + chmod +x llvm.sh && \ + ./llvm.sh 15 # WORKDIR /home/ # RUN mkdir -p downloads @@ -27,10 +35,10 @@ FROM $parent_image # RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py # RUN python3 get-pip.py -RUN pip3 install wllvm +RUN pip3 install wllvm # ENV PATH "/root/toolchains/build/llvm+clang-901-x86_64-linux-gnu_build/bin/:$PATH" -ENV LLVM_COMPILER "clang" +# ENV LLVM_COMPILER "clang" RUN mkdir -p /tmp/gradle && \ cd /tmp/gradle && \ @@ -42,36 +50,14 @@ RUN mkdir -p /tmp/gradle && \ ENV PATH "/usr/local/gradle/bin/:$PATH" #### install gllvm -# WORKDIR /root/ - -# RUN wget -q -c https://dl.google.com/go/go1.16.15.linux-amd64.tar.gz -O - | tar -xz -C /usr/local - -# ENV PATH="${PATH}:/root/.cargo/bin:/usr/local/go/bin:/root/go/bin" - -# RUN go get github.com/SRI-CSL/gllvm/cmd/... - ENV PATH="${PATH}:/root/.cargo/bin:/usr/local/go/bin:/root/go/bin" - RUN mkdir /tmp/gllvm/ && \ cd /tmp/gllvm/ && \ wget -q -c https://dl.google.com/go/go1.16.15.linux-amd64.tar.gz -O - | tar -xz -C /usr/local && \ go get github.com/SRI-CSL/gllvm/cmd/... && \ rm -r /tmp/gllvm/ -# TODO remove -# copy main.cc to /home/mutator/programs/common/main.cc while framework is not done -COPY main.cc /home/mutator/dockerfiles/programs/common/main.cc - -# mutator - - -# WORKDIR /home/ - -# # RUN mkdir mutator -# WORKDIR /home/mutator - -# ARG DEBIAN_FRONTEND=noninteractive -# RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y openjdk-11-jdk zlib1g-dev +RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y openjdk-11-jdk zlib1g-dev file # cmake \ # binutils-dev \ # libcurl4-openssl-dev \ @@ -87,17 +73,27 @@ COPY main.cc /home/mutator/dockerfiles/programs/common/main.cc # libstdc++6 # RUN git clone https://github.com/CISPA-SysSec/mua_fuzzer_bench mutator -# COPY mua_fuzzer_bench /mutator +COPY mua_fuzzer_bench /mutator # COPY modules /home/mutator/modules # COPY build.gradle /home/mutator/ # COPY run_mutation.py /home/mutator/ # RUN chmod +x run_mutation.py # COPY settings.gradle /home/mutator -# RUN cd /mutator && \ -# echo "llvmBinPath=/usr/local/bin/" > gradle.properties && \ -# gradle clean && \ -# gradle build +RUN cd /mutator && \ + echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties +RUN cd /mutator && gradle clean && gradle build +# RUN ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ + +RUN ln /usr/bin/llvm-link-15 /bin/llvm-link + +RUN apt-get update && apt-get install -y pipx python3.8-venv +RUN pipx install hatch + +RUN ln -s /mutator/exec-recorder.py /exec-recorder.py +RUN ln -s /exec-recorder.py /bin/gclang-wrap +RUN ln -s /exec-recorder.py /bin/gclang++-wrap +RUN ln -s /mutator/mua_build_benchmark.py /bin/mua_build_benchmark # # set library paths for used shared libraries s.t. the system finds them diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index c7fe53244..51f451fb1 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -14,21 +14,30 @@ """Integration code for clang source-based coverage builds.""" import os +import subprocess from fuzzers import utils +MUA_RECORDING_DB = '/tmp/execs.sqlite' + def build(): # """Build benchmark.""" cflags = [ # '-fprofile-instr-generate', '-fcoverage-mapping', '-gline-tables-only' + '-fPIE', ] utils.append_flags('CFLAGS', cflags) utils.append_flags('CXXFLAGS', cflags) - os.environ['CC'] = 'gclang' - os.environ['CXX'] = 'gclang++' - os.environ['FUZZER_LIB'] = '/home/mutator/dockerfiles/programs/common/main.cc' + os.environ['CC'] = 'gclang-wrap' + os.environ['CXX'] = 'gclang++-wrap' + os.environ['LLVM_COMPILER_PATH'] = '/usr/lib/llvm-15/bin/' + os.environ['FUZZER_LIB'] = '/mutator/dockerfiles/programs/common/main.cc' + os.environ['MUA_RECORDING_DB'] = MUA_RECORDING_DB + + if os.path.exists(MUA_RECORDING_DB): + os.unlink(MUA_RECORDING_DB) # fuzzer_lib = env['FUZZER_LIB'] # env['LIB_FUZZING_ENGINE'] = fuzzer_lib @@ -45,3 +54,5 @@ def build(): print(f'Building benchmark {benchmark} with fuzzer {fuzzer}') utils.build_benchmark() + + # subprocess.check_call(['/bin/mua_build_benchmark']) diff --git a/fuzzers/mutation_analysis/main.cc b/fuzzers/mutation_analysis/main.cc deleted file mode 100644 index 13da28162..000000000 --- a/fuzzers/mutation_analysis/main.cc +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include -#include -#include -#include - -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); - -int main(int argc, char** argv) { - FILE *fp; - int fd; - size_t size; - - if (argc < 1) { - fprintf(stderr, "Not enough arguments"); - exit(EXIT_FAILURE); - } - - fp = fopen(argv[1], "r"); - if (fp == NULL) { - perror("Could not open file\n."); - exit(EXIT_FAILURE); - } - - fseek(fp, 0L, SEEK_END); - size = ftell(fp); - - - fd = fileno(fp); - if (size == 0) { - printf("zero size: %zu\n", size); - const uint8_t data[1] = {0}; - LLVMFuzzerTestOneInput(data, 0); - return 0; - } - auto data = static_cast(mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0)); - if (data == (uint8_t*)-1) { - perror("Could not mmap file\n."); - exit(EXIT_FAILURE); - } - - /* Call function to be fuzzed, e.g.: */ - LLVMFuzzerTestOneInput(data, size); - - return 0; -} From a0e2b1fd8d8c8e33937f89544429bb5a0f47f8af Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 4 Oct 2023 17:09:31 +0200 Subject: [PATCH 04/69] [WIP] update-alternatives in mua image --- fuzzers/mutation_analysis/builder.Dockerfile | 34 ++++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 18b893ca8..90bc057a2 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -82,10 +82,35 @@ COPY mua_fuzzer_bench /mutator # COPY settings.gradle /home/mutator RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties -RUN cd /mutator && gradle clean && gradle build +# RUN cd /mutator && gradle clean && gradle build # RUN ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ -RUN ln /usr/bin/llvm-link-15 /bin/llvm-link +# RUN ln /usr/bin/llvm-link-15 /bin/llvm-link +RUN update-alternatives --install \ + /usr/local/bin/llvm-config llvm-config /usr/lib/llvm-15/bin/llvm-config 200 \ + --slave /usr/local/bin/llvm-ar llvm-ar /usr/lib/llvm-15/bin/llvm-ar \ + --slave /usr/local/bin/llvm-as llvm-as /usr/lib/llvm-15/bin/llvm-as \ + --slave /usr/local/bin/llvm-bcanalyzer llvm-bcanalyzer /usr/lib/llvm-15/bin/llvm-bcanalyzer \ + --slave /usr/local/bin/llvm-cov llvm-cov /usr/lib/llvm-15/bin/llvm-cov \ + --slave /usr/local/bin/llvm-diff llvm-diff /usr/lib/llvm-15/bin/llvm-diff \ + --slave /usr/local/bin/llvm-dis llvm-dis /usr/lib/llvm-15/bin/llvm-dis \ + --slave /usr/local/bin/llvm-dwarfdump llvm-dwarfdump /usr/lib/llvm-15/bin/llvm-dwarfdump \ + --slave /usr/local/bin/llvm-extract llvm-extract /usr/lib/llvm-15/bin/llvm-extract \ + --slave /usr/local/bin/llvm-link llvm-link /usr/lib/llvm-15/bin/llvm-link \ + --slave /usr/local/bin/llvm-mc llvm-mc /usr/lib/llvm-15/bin/llvm-mc \ + --slave /usr/local/bin/llvm-mcmarkup llvm-mcmarkup /usr/lib/llvm-15/bin/llvm-mcmarkup \ + --slave /usr/local/bin/llvm-nm llvm-nm /usr/lib/llvm-15/bin/llvm-nm \ + --slave /usr/local/bin/llvm-objdump llvm-objdump /usr/lib/llvm-15/bin/llvm-objdump \ + --slave /usr/local/bin/llvm-ranlib llvm-ranlib /usr/lib/llvm-15/bin/llvm-ranlib \ + --slave /usr/local/bin/llvm-readobj llvm-readobj /usr/lib/llvm-15/bin/llvm-readobj \ + --slave /usr/local/bin/llvm-rtdyld llvm-rtdyld /usr/lib/llvm-15/bin/llvm-rtdyld \ + --slave /usr/local/bin/llvm-size llvm-size /usr/lib/llvm-15/bin/llvm-size \ + --slave /usr/local/bin/llvm-stress llvm-stress /usr/lib/llvm-15/bin/llvm-stress \ + --slave /usr/local/bin/llvm-symbolizer llvm-symbolizer /usr/lib/llvm-15/bin/llvm-symbolizer \ + --slave /usr/local/bin/llvm-tblgen llvm-tblgen /usr/lib/llvm-15/bin/llvm-tblgen \ + --slave /usr/local/bin/lld lld /usr/lib/llvm-15/bin/lld \ + --slave /usr/local/bin/clang clang /usr/lib/llvm-15/bin/clang \ + --slave /usr/local/bin/clang++ clang++ /usr/lib/llvm-15/bin/clang++ RUN apt-get update && apt-get install -y pipx python3.8-venv RUN pipx install hatch @@ -119,4 +144,7 @@ RUN ln -s /mutator/mua_build_benchmark.py /bin/mua_build_benchmark # unzip $LF_PATH && \ # bash build.sh && \ # cp libFuzzer.a /usr/lib && \ -# rm -rf /tmp/libfuzzer $LF_PATH \ No newline at end of file +# rm -rf /tmp/libfuzzer $LF_PATH + + +# clear && fuzzer_build && mua_build_benchmark && pushd /mutator && gradle build && ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ && pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/ ; popd \ No newline at end of file From aeee7a6601c2589be4156e00ae0eb8ae2c31f88c Mon Sep 17 00:00:00 2001 From: qy53zuny Date: Thu, 12 Oct 2023 15:38:19 +0200 Subject: [PATCH 05/69] [WIP] integrate mutation analysis into fuzzbench, build mua image --- experiment/build/build_utils.py | 4 +++ experiment/build/builder.py | 13 ++++++++ experiment/build/local_build.py | 42 ++++++++++++++++++++++++++ experiment/measurer/measure_manager.py | 30 ++++++++++++++++++ fuzzers/mutation_analysis/fuzzer.py | 3 ++ 5 files changed, 92 insertions(+) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index 94f1c1b7e..db7516adb 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -37,6 +37,10 @@ def get_coverage_binaries_dir(): """Return coverage binaries directory.""" return exp_path.path('coverage-binaries') +def get_mua_binaries_dir(): + """Return mua finder binaries directory.""" + return exp_path.path('mua-binaries') + def get_build_logs_dir(): """Return build logs directory.""" diff --git a/experiment/build/builder.py b/experiment/build/builder.py index bdfbd6c71..526416b5b 100644 --- a/experiment/build/builder.py +++ b/experiment/build/builder.py @@ -104,6 +104,17 @@ def build_measurer(benchmark: str) -> bool: except Exception: # pylint: disable=broad-except logger.error('Failed to build measurer for %s.', benchmark) return False + +def build_mua(benchmark: str) -> bool: + """Do a mutation analysis build for a benchmark.""" + try: + logger.info('Building mua measurer for benchmark: %s.', benchmark) + buildlib.build_mua(benchmark) + logs.info('Done building mua measurer for benchmark: %s.', benchmark) + return True + except Exception: # pylint: disable=broad-except + logger.error('Failed to build mua measurer for %s.', benchmark) + return False def build_all_measurers(benchmarks: List[str]) -> List[str]: @@ -113,6 +124,8 @@ def build_all_measurers(benchmarks: List[str]) -> List[str]: filesystem.recreate_directory(build_utils.get_coverage_binaries_dir()) build_measurer_args = [(benchmark,) for benchmark in benchmarks] successful_calls = retry_build_loop(build_measurer, build_measurer_args) + # build mua measurer + retry_build_loop(build_mua, build_measurer_args) logger.info('Done building measurers.') # Return list of benchmarks (like the list we were passed as an argument) # instead of returning a list of tuples each containing a benchmark. diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index aa42ebce6..9ce1b9851 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -43,6 +43,10 @@ def get_shared_coverage_binaries_dir(): experiment_filestore_path = experiment_utils.get_experiment_filestore_path() return os.path.join(experiment_filestore_path, 'coverage-binaries') +def get_shared_mua_binaries_dir(): + """Returns the shared mua binaries directory.""" + experiment_filestore_path = experiment_utils.get_experiment_filestore_path() + return os.path.join(experiment_filestore_path, 'mua-binaries') def make_shared_coverage_binaries_dir(): """Make the shared coverage binaries directory.""" @@ -51,6 +55,14 @@ def make_shared_coverage_binaries_dir(): return os.makedirs(shared_coverage_binaries_dir) +def make_shared_mua_binaries_dir(): + """Make the shared mua binaries directory.""" + shared_mua_binaries_dir = get_shared_mua_binaries_dir() + if os.path.exists(shared_mua_binaries_dir): + return + os.makedirs(shared_mua_binaries_dir) + + def build_coverage(benchmark): """Build (locally) coverage image for benchmark.""" @@ -62,6 +74,16 @@ def build_coverage(benchmark): copy_coverage_binaries(benchmark) return result +def build_mua(benchmark): + """Build (locally) mua image for benchmark.""" + image_name = f'.mutation_analysis-{benchmark}-builder' + result = make([image_name]) + if result.retcode: + return result + make_shared_mua_binaries_dir() + copy_mua_binaries(benchmark) + return result + def copy_coverage_binaries(benchmark): """Copy coverage binaries in a local experiment.""" @@ -80,6 +102,26 @@ def copy_coverage_binaries(benchmark): command ]) +def copy_mua_binaries(benchmark): + """Copy mua binaries in a local experiment.""" + shared_mua_binaries_dir = get_shared_mua_binaries_dir() + mount_arg = f'{shared_mua_binaries_dir}:{shared_mua_binaries_dir}' + builder_image_url = benchmark_utils.get_builder_image_url( + benchmark, 'mutation_analysis', environment.get('DOCKER_REGISTRY')) + mua_build_archive = f'mutation-analysis-build-{benchmark}.tar.gz' + mua_build_archive_shared_dir_path = os.path.join( + shared_mua_binaries_dir, mua_build_archive) + command = ( + '(cd /mutator; ' + f'tar -czvf {mua_build_archive_shared_dir_path} /mutator)') + logger.info('MUA tar command:'+str(command)) + logger.info('MUA builder_image_url:'+str(builder_image_url)) + logger.info('MUA DOCKER_REGISTRY:'+str(environment.get('DOCKER_REGISTRY'))) + return new_process.execute([ + 'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c', + command + ]) + def build_fuzzer_benchmark(fuzzer: str, benchmark: str) -> bool: """Builds |benchmark| for |fuzzer|.""" diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index f10e556c3..202852a56 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -120,6 +120,7 @@ def measure_loop(experiment: str, with multiprocessing.Pool( *pool_args) as pool, multiprocessing.Manager() as manager: set_up_coverage_binaries(pool, experiment) + set_up_mua_binaries(pool, experiment) # Using Multiprocessing.Queue will fail with a complaint about # inheriting queue. # pytype: disable=attribute-error @@ -657,6 +658,35 @@ def set_up_coverage_binaries(pool, experiment): filesystem.create_directory(coverage_binaries_dir) pool.map(set_up_coverage_binary, benchmarks) +def set_up_mua_binaries(pool, experiment): + """Set up mua finder binaries for all benchmarks in |experiment|.""" + # Use set comprehension to select distinct benchmarks. + with db_utils.session_scope() as session: + benchmarks = [ + benchmark_tuple[0] + for benchmark_tuple in session.query(models.Trial.benchmark). + distinct().filter(models.Trial.experiment == experiment) + ] + + mua_binaries_dir = build_utils.get_mua_binaries_dir() + filesystem.create_directory(mua_binaries_dir) + pool.map(set_up_mua_binary, benchmarks) + +def set_up_mua_binary(benchmark): + """Set up mua finder binaries for |benchmark|.""" + initialize_logs() + mua_binaries_dir = build_utils.get_mua_binaries_dir() + benchmark_mua_binary_dir = mua_binaries_dir / benchmark + filesystem.create_directory(benchmark_mua_binary_dir) + archive_name = f'mutation-analysis-build-{benchmark}.tar.gz' + archive_filestore_path = exp_path.filestore(mua_binaries_dir / + archive_name) + filestore_utils.cp(archive_filestore_path, + str(benchmark_mua_binary_dir)) + archive_path = benchmark_mua_binary_dir / archive_name + with tarfile.open(archive_path, 'r:gz') as tar: + tar.extractall(benchmark_mua_binary_dir) + os.remove(archive_path) def set_up_coverage_binary(benchmark): """Set up coverage binaries for |benchmark|.""" diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 51f451fb1..5825d4f54 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -35,6 +35,7 @@ def build(): os.environ['LLVM_COMPILER_PATH'] = '/usr/lib/llvm-15/bin/' os.environ['FUZZER_LIB'] = '/mutator/dockerfiles/programs/common/main.cc' os.environ['MUA_RECORDING_DB'] = MUA_RECORDING_DB + os.environ['llvmBinPath'] = '/usr/local/bin/' if os.path.exists(MUA_RECORDING_DB): os.unlink(MUA_RECORDING_DB) @@ -53,6 +54,8 @@ def build(): fuzzer = os.getenv('FUZZER') print(f'Building benchmark {benchmark} with fuzzer {fuzzer}') + os.system("touch /test.txt") + utils.build_benchmark() # subprocess.check_call(['/bin/mua_build_benchmark']) From e994f656a8c413acf6be7b0694e2329997859f3f Mon Sep 17 00:00:00 2001 From: qy53zuny Date: Fri, 13 Oct 2023 12:44:03 +0200 Subject: [PATCH 06/69] [WIP] mua integration fuzzbench --- experiment/build/local_build.py | 6 +++--- fuzzers/mutation_analysis/fuzzer.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 9ce1b9851..0e75e11ff 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -114,9 +114,9 @@ def copy_mua_binaries(benchmark): command = ( '(cd /mutator; ' f'tar -czvf {mua_build_archive_shared_dir_path} /mutator)') - logger.info('MUA tar command:'+str(command)) - logger.info('MUA builder_image_url:'+str(builder_image_url)) - logger.info('MUA DOCKER_REGISTRY:'+str(environment.get('DOCKER_REGISTRY'))) + # logger.info('MUA tar command:'+str(command)) + # logger.info('MUA builder_image_url:'+str(builder_image_url)) + # logger.info('MUA DOCKER_REGISTRY:'+str(environment.get('DOCKER_REGISTRY'))) return new_process.execute([ 'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c', command diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 5825d4f54..b2139593e 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -59,3 +59,31 @@ def build(): utils.build_benchmark() # subprocess.check_call(['/bin/mua_build_benchmark']) + prepare() + + +def prepare(): + # executed when benchmark is already present, but no fuzzer selected + subprocess.check_call(['/bin/mua_build_benchmark']) + subprocess.check_call(['cd /mutator && gradle build']) + subprocess.check_call(['ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ ']) + subprocess.check_call(['pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/']) + #subprocess.check_call(['']) + #subprocess.check_call(['']) + + # build tooling + # load libs + #build location executables + + + # fuzzer_build # runs fuzzer.py build +# mua_build_benchmark # builds bitcode to /out/filename.bc and config to /tmp/config + +# cd /mutator && gradle build #baut tooling +# ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ +# pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/ # stores infos in /tmp/test + + +# /tmp/test/progs/xml/xml.locator /benchmark.yaml #create a list of all possible mutations +# cd /mutator && python locator_signal_to_mutation_list.py --trigger-signal-dir /tmp/trigger_signal/ --prog xml --out /tmp/mualist.json && cat /tmp/mualist.json +# cd /mutator && MUT_NUM_CPUS=24 pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_mutants_local --result-path /tmp/mutants_$(date +"%Y%m%d_%H%M%S") --statsdb /tmp/test/stats.db --mutation-list /tmp/mualist.json From ffbf1e410ffd0adb8abfd4e61a531b33fc949309 Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Sat, 28 Oct 2023 21:58:22 +0200 Subject: [PATCH 07/69] [WIP] mua integration recover changes --- common/experiment_utils.py | 2 +- experiment/build/local_build.py | 126 +++++++++++++++---- experiment/dispatcher.py | 2 +- experiment/measurer/measure_manager.py | 50 +++++--- experiment/scheduler.py | 4 +- fuzzers/mutation_analysis/builder.Dockerfile | 4 + fuzzers/mutation_analysis/fuzzer.py | 21 +--- 7 files changed, 152 insertions(+), 57 deletions(-) diff --git a/common/experiment_utils.py b/common/experiment_utils.py index 604d0218f..1431a5bd8 100644 --- a/common/experiment_utils.py +++ b/common/experiment_utils.py @@ -20,7 +20,7 @@ from common import environment from common import experiment_path as exp_path -DEFAULT_SNAPSHOT_SECONDS = 15 * 60 # Seconds. +DEFAULT_SNAPSHOT_SECONDS = 5#15 * 60 # Seconds. CONFIG_DIR = 'config' diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 0e75e11ff..80aa26b49 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -15,6 +15,7 @@ """Module for building things locally for use in trials.""" import os +import shutil from typing import Tuple from common import benchmark_utils @@ -81,9 +82,112 @@ def build_mua(benchmark): if result.retcode: return result make_shared_mua_binaries_dir() - copy_mua_binaries(benchmark) + prepare_mua_binaries(benchmark) return result +def create_dir(dir): + if(not os.path.exists(dir)): + os.makedirs(dir) + return os.path.exists(dir) + +def initialize_mua(benchmark, trial_num, fuzzer, corpus_dir): + # find correct container and start it + container_name = 'mutation_analysis_'+benchmark+'_container' + + docker_start_command = 'docker start '+container_name + new_process.execute(docker_start_command.split(' ')) + + shared_mua_binaries_dir = get_shared_mua_binaries_dir() + + + # craft command, which is executed in mua container + command = '' + + # copy corpus from self.corpus_dir into container + command += '(touch /awesomeItWorks; )' + + corpi_dir = shared_mua_binaries_dir+'/corpi' + fuzzer_corpi_dir = corpi_dir + '/' + fuzzer + trial_corpi_dir = fuzzer_corpi_dir + '/' + str(trial_num) + create_dir(corpi_dir) + create_dir(fuzzer_corpi_dir) + #create_dir(trial_corpi_dir) + shutil.copytree(corpus_dir, trial_corpi_dir, dirs_exist_ok=True) + + # check which mutation are covered => these mutants are needed + # check if needed mutants are in mutant storage + # if mutants are in storage, copy into mutant directory + # if mutants are not in storage, build mutants and add to storage + + + + # execute command on container + #command += '"' + + docker_exec_command = 'docker exec -t '+container_name+' /bin/bash -c' + logger.info('mua initialize command:'+str(docker_exec_command)) + docker_exec_command_formated = docker_exec_command.split(" ") + docker_exec_command_formated.append(command) + print(docker_exec_command_formated) + new_process.execute(docker_exec_command_formated) + + +def prepare_mua_binaries(benchmark): + """Run commands on mua container to prepare it""" + project = benchmark_utils.get_project(benchmark) + fuzz_target = benchmark_utils.get_fuzz_target(benchmark) + + shared_mua_binaries_dir = get_shared_mua_binaries_dir() + mount_arg = f'{shared_mua_binaries_dir}:{shared_mua_binaries_dir}' + builder_image_url = benchmark_utils.get_builder_image_url( + benchmark, 'mutation_analysis', environment.get('DOCKER_REGISTRY')) + + mua_build_archive = f'mutation-analysis-build-{benchmark}.tar.gz' + mua_build_archive_shared_dir_path = os.path.join( + shared_mua_binaries_dir, mua_build_archive) + + container_name = 'mutation_analysis_'+benchmark+'_container' + #new_image_name = builder_image_url+'_prepared' + + command = ( + '(python3 /mutator/mua_idle.py; ' + 'touch /out/testentry; ' + 'cd /src/'+project+' && /bin/mua_build_benchmark; ' + 'cd /mutator && gradle build; ' + 'ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/; ' + 'pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/; ' + 'cd /tmp && /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /benchmark.yaml; ' + 'cd /mutator && python locator_signal_to_mutation_list.py --trigger-signal-dir /tmp/trigger_signal/ --prog xml --out /out/mua_all_list.json; ' + 'cp /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /out/'+fuzz_target+'.locator; ' + 'cp /tmp/config.json /out/config.json; ' + 'tar -czvf '+mua_build_archive_shared_dir_path+' /out;' + ')' + ) + + logger.info('mua prepare command:'+str(command)) + docker_rm_command = 'docker rm -f '+container_name + try: + #print("docker rm") + #print(docker_rm_command) + new_process.execute(docker_rm_command.split(" ")) + except: + pass + + new_process.execute([ + 'docker', 'run', '--name', container_name, '-v', mount_arg, + '-e', 'FUZZ_OUTSIDE_EXPERIMENT=1', + '-e', 'FORCE_LOCAL=1', + '-e', 'TRIAL_ID=1', + '-e', 'FUZZER=mutation_analysis', + '-e', 'DEBUG_BUILDER=1', + builder_image_url, '/bin/bash', '-c', + command + ]) + + #docker_commit_command = 'docker commit '+container_name+' '+new_image_name + #new_process.execute(docker_commit_command.split(' ')) + + def copy_coverage_binaries(benchmark): """Copy coverage binaries in a local experiment.""" @@ -102,26 +206,6 @@ def copy_coverage_binaries(benchmark): command ]) -def copy_mua_binaries(benchmark): - """Copy mua binaries in a local experiment.""" - shared_mua_binaries_dir = get_shared_mua_binaries_dir() - mount_arg = f'{shared_mua_binaries_dir}:{shared_mua_binaries_dir}' - builder_image_url = benchmark_utils.get_builder_image_url( - benchmark, 'mutation_analysis', environment.get('DOCKER_REGISTRY')) - mua_build_archive = f'mutation-analysis-build-{benchmark}.tar.gz' - mua_build_archive_shared_dir_path = os.path.join( - shared_mua_binaries_dir, mua_build_archive) - command = ( - '(cd /mutator; ' - f'tar -czvf {mua_build_archive_shared_dir_path} /mutator)') - # logger.info('MUA tar command:'+str(command)) - # logger.info('MUA builder_image_url:'+str(builder_image_url)) - # logger.info('MUA DOCKER_REGISTRY:'+str(environment.get('DOCKER_REGISTRY'))) - return new_process.execute([ - 'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c', - command - ]) - def build_fuzzer_benchmark(fuzzer: str, benchmark: str) -> bool: """Builds |benchmark| for |fuzzer|.""" diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index 796c796b8..00980bd18 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -36,7 +36,7 @@ from experiment import scheduler from experiment import stop_experiment -LOOP_WAIT_SECONDS = 5 * 60 +LOOP_WAIT_SECONDS = 5 * 60 #2 # TODO(metzman): Convert more uses of os.path.join to exp_path.path. diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 202852a56..9d023e336 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -33,6 +33,7 @@ from sqlalchemy import orm from common import benchmark_utils +from common import environment from common import experiment_utils from common import experiment_path as exp_path from common import filesystem @@ -48,6 +49,11 @@ from experiment.measurer import run_crashes from experiment import scheduler +if not experiment_utils.is_local_experiment(): + import experiment.build.gcb_build as buildlib +else: + import experiment.build.local_build as buildlib + logger = logs.Logger() SnapshotMeasureRequest = collections.namedtuple( @@ -55,7 +61,7 @@ NUM_RETRIES = 3 RETRY_DELAY = 3 -FAIL_WAIT_SECONDS = 30 +FAIL_WAIT_SECONDS = 5#30 SNAPSHOT_QUEUE_GET_TIMEOUT = 1 SNAPSHOTS_BATCH_SAVE_SIZE = 100 @@ -120,7 +126,7 @@ def measure_loop(experiment: str, with multiprocessing.Pool( *pool_args) as pool, multiprocessing.Manager() as manager: set_up_coverage_binaries(pool, experiment) - set_up_mua_binaries(pool, experiment) + #set_up_mua_binaries(pool, experiment) # Using Multiprocessing.Queue will fail with a complaint about # inheriting queue. # pytype: disable=attribute-error @@ -131,6 +137,7 @@ def measure_loop(experiment: str, # races. all_trials_ended = scheduler.all_trials_ended(experiment) + if not measure_all_trials(experiment, max_total_time, pool, multiprocessing_queue, region_coverage): @@ -165,13 +172,13 @@ def measure_all_trials(experiment: str, max_total_time: int, pool, if not unmeasured_snapshots: return False - measure_trial_coverage_args = [ + measure_trial_args = [ (unmeasured_snapshot, max_cycle, multiprocessing_queue, region_coverage) for unmeasured_snapshot in unmeasured_snapshots ] - result = pool.starmap_async(measure_trial_coverage, - measure_trial_coverage_args) + result = pool.starmap_async(measure_trial, + measure_trial_args) # Poll the queue for snapshots and save them in batches until the pool is # done processing each unmeasured snapshot. Then save any remaining @@ -201,7 +208,7 @@ def save_snapshots(): # unmeasured_snapshot. Since it is finished and the queue is # empty, we can stop checking the queue for more snapshots. logger.debug( - 'Finished call to map with measure_trial_coverage.') + 'Finished call to map with measure_trial.') break if len(snapshots) >= SNAPSHOTS_BATCH_SAVE_SIZE * .75: @@ -247,10 +254,10 @@ def _query_unmeasured_trials(experiment: str): with db_utils.session_scope() as session: trial_query = session.query(models.Trial) - no_snapshots_filter = ~models.Trial.id.in_(ids_of_trials_with_snapshots) - started_trials_filter = ~models.Trial.time_started.is_(None) - nonpreempted_trials_filter = ~models.Trial.preempted - experiment_trials_filter = models.Trial.experiment == experiment + no_snapshots_filter = ~models.Trial.id.in_(ids_of_trials_with_snapshots) # trial has no snapshot + started_trials_filter = ~models.Trial.time_started.is_(None) # trial already started + nonpreempted_trials_filter = ~models.Trial.preempted # trial not preempted + experiment_trials_filter = models.Trial.experiment == experiment # trial matches the current experiment return trial_query.filter(experiment_trials_filter, no_snapshots_filter, started_trials_filter, nonpreempted_trials_filter) @@ -398,10 +405,18 @@ def get_profraw_files(self): def initialize_measurement_dirs(self): """Initialize directories that will be needed for measuring coverage.""" + for directory in [self.corpus_dir, self.coverage_dir, self.crashes_dir]: filesystem.recreate_directory(directory) filesystem.create_directory(self.report_dir) + def initialize_mua_environment(self): + buildlib.initialize_mua(self.benchmark, self.trial_num, self.fuzzer, self.corpus_dir) + + def process_mua(self): + """runs mua measurement""" + # run all needed mutants in container + def run_cov_new_units(self): """Run the coverage binary on new units.""" coverage_binary = coverage_utils.get_coverage_binary(self.benchmark) @@ -553,7 +568,7 @@ def get_fuzzer_stats(stats_filestore_path): return json.loads(stats_str) -def measure_trial_coverage(measure_req, max_cycle: int, +def measure_trial(measure_req, max_cycle: int, multiprocessing_queue: multiprocessing.Queue, region_coverage) -> models.Snapshot: """Measure the coverage obtained by |trial_num| on |benchmark| using @@ -564,7 +579,7 @@ def measure_trial_coverage(measure_req, max_cycle: int, # Add 1 to ensure we measure the last cycle. for cycle in range(min_cycle, max_cycle + 1): try: - snapshot = measure_snapshot_coverage(measure_req.fuzzer, + snapshot = measure_snapshot(measure_req.fuzzer, measure_req.benchmark, measure_req.trial_id, cycle, region_coverage) @@ -582,10 +597,10 @@ def measure_trial_coverage(measure_req, max_cycle: int, logger.debug('Done measuring trial: %d.', measure_req.trial_id) -def measure_snapshot_coverage( # pylint: disable=too-many-locals +def measure_snapshot( # pylint: disable=too-many-locals fuzzer: str, benchmark: str, trial_num: int, cycle: int, region_coverage: bool) -> models.Snapshot: - """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| + """Measure coverage and mua of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger( default_extras={ @@ -617,6 +632,9 @@ def measure_snapshot_coverage( # pylint: disable=too-many-locals snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) + + snapshot_measurer.initialize_mua_environment() + # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) @@ -638,12 +656,14 @@ def measure_snapshot_coverage( # pylint: disable=too-many-locals fuzzer_stats=fuzzer_stats_data, crashes=crashes) + snapshot_measurer.process_mua() + measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle, measuring_time) + return snapshot - def set_up_coverage_binaries(pool, experiment): """Set up coverage binaries for all benchmarks in |experiment|.""" # Use set comprehension to select distinct benchmarks. diff --git a/experiment/scheduler.py b/experiment/scheduler.py index 0d9da0b22..fa17da4b9 100644 --- a/experiment/scheduler.py +++ b/experiment/scheduler.py @@ -37,9 +37,9 @@ # Give the trial runner a little extra time to shut down and account for how # long it can take to actually start running once an instance is started. 5 # minutes is an arbitrary amount of time. -GRACE_TIME_SECONDS = 5 * 60 +GRACE_TIME_SECONDS = 5 * 60 #10 -FAIL_WAIT_SECONDS = 10 * 60 +FAIL_WAIT_SECONDS = 10 * 60 #10 logger = logs.Logger() # pylint: disable=invalid-name diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 90bc057a2..0e97eee19 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -121,6 +121,10 @@ RUN ln -s /exec-recorder.py /bin/gclang++-wrap RUN ln -s /mutator/mua_build_benchmark.py /bin/mua_build_benchmark +#RUN echo "transfering control flow to mua_idle.py" +#RUN python3 /mutator/mua_idle.py + + # # set library paths for used shared libraries s.t. the system finds them # ENV LD_LIBRARY_PATH /home/mutator/build/install/LLVM_Mutation_Tool/lib/ # # For all subjects provide the path to the default main here. This is based on oss-fuzz convention. diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index b2139593e..a4e66a9ff 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -15,6 +15,7 @@ import os import subprocess +import time from fuzzers import utils @@ -22,6 +23,7 @@ def build(): + # """Build benchmark.""" cflags = [ # '-fprofile-instr-generate', '-fcoverage-mapping', '-gline-tables-only' @@ -54,29 +56,14 @@ def build(): fuzzer = os.getenv('FUZZER') print(f'Building benchmark {benchmark} with fuzzer {fuzzer}') - os.system("touch /test.txt") - utils.build_benchmark() - # subprocess.check_call(['/bin/mua_build_benchmark']) - prepare() - + # while(True): time.sleep(1) -def prepare(): - # executed when benchmark is already present, but no fuzzer selected - subprocess.check_call(['/bin/mua_build_benchmark']) - subprocess.check_call(['cd /mutator && gradle build']) - subprocess.check_call(['ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ ']) - subprocess.check_call(['pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/']) - #subprocess.check_call(['']) - #subprocess.check_call(['']) - # build tooling - # load libs - #build location executables - # fuzzer_build # runs fuzzer.py build +# fuzzer_build # runs fuzzer.py build # mua_build_benchmark # builds bitcode to /out/filename.bc and config to /tmp/config # cd /mutator && gradle build #baut tooling From 40cad5e52ad864799e7daa9ed4a5df9473d5d360 Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Mon, 6 Nov 2023 15:26:52 +0100 Subject: [PATCH 08/69] fuzzbench integration: build mutants --- experiment/build/local_build.py | 36 +++++++++++++++++---------------- run_mua.sh | 5 +++++ 2 files changed, 24 insertions(+), 17 deletions(-) create mode 100644 run_mua.sh diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 80aa26b49..e6afaf254 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -87,7 +87,7 @@ def build_mua(benchmark): def create_dir(dir): if(not os.path.exists(dir)): - os.makedirs(dir) + os.makedirs(dir, exist_ok=True) return os.path.exists(dir) def initialize_mua(benchmark, trial_num, fuzzer, corpus_dir): @@ -100,30 +100,32 @@ def initialize_mua(benchmark, trial_num, fuzzer, corpus_dir): shared_mua_binaries_dir = get_shared_mua_binaries_dir() - # craft command, which is executed in mua container - command = '' - - # copy corpus from self.corpus_dir into container - command += '(touch /awesomeItWorks; )' + + # create corpi directory entry corpi_dir = shared_mua_binaries_dir+'/corpi' fuzzer_corpi_dir = corpi_dir + '/' + fuzzer trial_corpi_dir = fuzzer_corpi_dir + '/' + str(trial_num) - create_dir(corpi_dir) create_dir(fuzzer_corpi_dir) - #create_dir(trial_corpi_dir) - shutil.copytree(corpus_dir, trial_corpi_dir, dirs_exist_ok=True) - # check which mutation are covered => these mutants are needed - # check if needed mutants are in mutant storage - # if mutants are in storage, copy into mutant directory - # if mutants are not in storage, build mutants and add to storage + # create covered_mutants directory entry (contains ids) + mutants_ids_dir_entry = shared_mua_binaries_dir+'/mutant_ids'+'/'+fuzzer+'/'+str(trial_num) + create_dir(mutants_ids_dir_entry) + + # create mutants directory + mutants_dir_entry = shared_mua_binaries_dir+'/mutants'+'/' + create_dir(mutants_dir_entry) + # copy corpus from self.corpus_dir into container + shutil.copytree(corpus_dir, trial_corpi_dir, dirs_exist_ok=True) + # get additional info from commons + experiment_name = experiment_utils.get_experiment_name() + fuzz_target = benchmark_utils.get_fuzz_target(benchmark) # execute command on container - #command += '"' - + command = '(python3 /mutator/mua_build_ids.py '+fuzz_target+' '+experiment_name+' '+fuzzer+' '+str(trial_num)+'; )' + docker_exec_command = 'docker exec -t '+container_name+' /bin/bash -c' logger.info('mua initialize command:'+str(docker_exec_command)) docker_exec_command_formated = docker_exec_command.split(" ") @@ -150,7 +152,7 @@ def prepare_mua_binaries(benchmark): #new_image_name = builder_image_url+'_prepared' command = ( - '(python3 /mutator/mua_idle.py; ' + '(' 'touch /out/testentry; ' 'cd /src/'+project+' && /bin/mua_build_benchmark; ' 'cd /mutator && gradle build; ' @@ -161,7 +163,7 @@ def prepare_mua_binaries(benchmark): 'cp /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /out/'+fuzz_target+'.locator; ' 'cp /tmp/config.json /out/config.json; ' 'tar -czvf '+mua_build_archive_shared_dir_path+' /out;' - ')' + 'python3 /mutator/mua_idle.py; )' ) logger.info('mua prepare command:'+str(command)) diff --git a/run_mua.sh b/run_mua.sh new file mode 100644 index 000000000..52b7a42ae --- /dev/null +++ b/run_mua.sh @@ -0,0 +1,5 @@ +#! /bin/bash +source .venv/bin/activate + +PYTHONPATH=. python3 experiment/run_experiment.py --experiment-config /home/joschua/Desktop/CISPA/Projekte/SBFT/experiment/config.yaml --benchmarks bloaty_fuzz_target --experiment-name testrun01-fuzzers afl libfuzzer -a + From 396140e6db7fe3dba36cec9ce9289177de1675ed Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Mon, 6 Nov 2023 17:11:37 +0100 Subject: [PATCH 09/69] add mua command line option --- experiment/measurer/measure_manager.py | 27 +++++++++++++++----------- experiment/run_experiment.py | 15 +++++++++++--- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 9d023e336..a5c1939ed 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -83,8 +83,9 @@ def measure_main(experiment_config): measurers_cpus = experiment_config['measurers_cpus'] runners_cpus = experiment_config['runners_cpus'] region_coverage = experiment_config['region_coverage'] + mutation_analysis = experiment_config['mutation_analysis'] measure_loop(experiment, max_total_time, measurers_cpus, runners_cpus, - region_coverage) + region_coverage, mutation_analysis) # Clean up resources. gc.collect() @@ -106,7 +107,8 @@ def measure_loop(experiment: str, max_total_time: int, measurers_cpus=None, runners_cpus=None, - region_coverage=False): + region_coverage=False, + mutation_analysis=False): """Continuously measure trials for |experiment|.""" logger.info('Start measure_loop.') @@ -126,7 +128,8 @@ def measure_loop(experiment: str, with multiprocessing.Pool( *pool_args) as pool, multiprocessing.Manager() as manager: set_up_coverage_binaries(pool, experiment) - #set_up_mua_binaries(pool, experiment) + if(mutation_analysis): + set_up_mua_binaries(pool, experiment) # Using Multiprocessing.Queue will fail with a complaint about # inheriting queue. # pytype: disable=attribute-error @@ -140,7 +143,7 @@ def measure_loop(experiment: str, if not measure_all_trials(experiment, max_total_time, pool, multiprocessing_queue, - region_coverage): + region_coverage, mutation_analysis): # We didn't measure any trials. if all_trials_ended: # There are no trials producing snapshots to measure. @@ -156,7 +159,7 @@ def measure_loop(experiment: str, def measure_all_trials(experiment: str, max_total_time: int, pool, - multiprocessing_queue, region_coverage) -> bool: + multiprocessing_queue, region_coverage, mutation_analysis) -> bool: """Get coverage data (with coverage runs) for all active trials. Note that this should not be called unless multiprocessing.set_start_method('spawn') was called first. Otherwise it will use fork which breaks logging.""" @@ -173,7 +176,7 @@ def measure_all_trials(experiment: str, max_total_time: int, pool, return False measure_trial_args = [ - (unmeasured_snapshot, max_cycle, multiprocessing_queue, region_coverage) + (unmeasured_snapshot, max_cycle, multiprocessing_queue, region_coverage, mutation_analysis) for unmeasured_snapshot in unmeasured_snapshots ] @@ -570,7 +573,7 @@ def get_fuzzer_stats(stats_filestore_path): def measure_trial(measure_req, max_cycle: int, multiprocessing_queue: multiprocessing.Queue, - region_coverage) -> models.Snapshot: + region_coverage, mutation_analysis) -> models.Snapshot: """Measure the coverage obtained by |trial_num| on |benchmark| using |fuzzer|.""" initialize_logs() @@ -582,7 +585,7 @@ def measure_trial(measure_req, max_cycle: int, snapshot = measure_snapshot(measure_req.fuzzer, measure_req.benchmark, measure_req.trial_id, cycle, - region_coverage) + region_coverage, mutation_analysis) if not snapshot: break multiprocessing_queue.put(snapshot) @@ -599,7 +602,7 @@ def measure_trial(measure_req, max_cycle: int, def measure_snapshot( # pylint: disable=too-many-locals fuzzer: str, benchmark: str, trial_num: int, cycle: int, - region_coverage: bool) -> models.Snapshot: + region_coverage: bool, mutation_analysis: bool) -> models.Snapshot: """Measure coverage and mua of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger( @@ -633,7 +636,8 @@ def measure_snapshot( # pylint: disable=too-many-locals snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) - snapshot_measurer.initialize_mua_environment() + if(mutation_analysis): + snapshot_measurer.initialize_mua_environment() # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) @@ -656,7 +660,8 @@ def measure_snapshot( # pylint: disable=too-many-locals fuzzer_stats=fuzzer_stats_data, crashes=crashes) - snapshot_measurer.process_mua() + if(mutation_analysis): + snapshot_measurer.process_mua() measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle, diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 4771dfebf..9ea85f68c 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -309,7 +309,8 @@ def start_experiment( # pylint: disable=too-many-arguments measurers_cpus: Optional[int] = None, runners_cpus: Optional[int] = None, region_coverage: bool = False, - custom_seed_corpus_dir: Optional[str] = None): + custom_seed_corpus_dir: Optional[str] = None, + mutation_analysis: bool = False): """Start a fuzzer benchmarking experiment.""" if not allow_uncommitted_changes: check_no_uncommitted_changes() @@ -344,7 +345,8 @@ def start_experiment( # pylint: disable=too-many-arguments if config['custom_seed_corpus_dir']: validate_custom_seed_corpus(config['custom_seed_corpus_dir'], benchmarks) - + + config['mutation_analysis'] = mutation_analysis return start_experiment_from_full_config(config) @@ -703,6 +705,12 @@ def run_experiment_main(args=None): required=False, default=False, action='store_true') + parser.add_argument('-ma', + '--mutation-analysis', + help='Run integrated mutation analysis.', + required=False, + default=False, + action='store_true') args = parser.parse_args(args) fuzzers = args.fuzzers or all_fuzzers @@ -753,7 +761,8 @@ def run_experiment_main(args=None): measurers_cpus=measurers_cpus, runners_cpus=runners_cpus, region_coverage=args.region_coverage, - custom_seed_corpus_dir=args.custom_seed_corpus_dir) + custom_seed_corpus_dir=args.custom_seed_corpus_dir, + mutation_analysis=args.mutation_analysis) return 0 From b070a60d35383e8c20090968893f8e0e987567d6 Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Tue, 14 Nov 2023 09:52:23 +0100 Subject: [PATCH 10/69] move mua code from local only to local+gcb --- experiment/build/local_build.py | 49 ----------------------- experiment/measurer/measure_manager.py | 54 +++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 55 deletions(-) diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index e6afaf254..e9507825e 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -15,7 +15,6 @@ """Module for building things locally for use in trials.""" import os -import shutil from typing import Tuple from common import benchmark_utils @@ -85,54 +84,6 @@ def build_mua(benchmark): prepare_mua_binaries(benchmark) return result -def create_dir(dir): - if(not os.path.exists(dir)): - os.makedirs(dir, exist_ok=True) - return os.path.exists(dir) - -def initialize_mua(benchmark, trial_num, fuzzer, corpus_dir): - # find correct container and start it - container_name = 'mutation_analysis_'+benchmark+'_container' - - docker_start_command = 'docker start '+container_name - new_process.execute(docker_start_command.split(' ')) - - shared_mua_binaries_dir = get_shared_mua_binaries_dir() - - - - - # create corpi directory entry - corpi_dir = shared_mua_binaries_dir+'/corpi' - fuzzer_corpi_dir = corpi_dir + '/' + fuzzer - trial_corpi_dir = fuzzer_corpi_dir + '/' + str(trial_num) - create_dir(fuzzer_corpi_dir) - - # create covered_mutants directory entry (contains ids) - mutants_ids_dir_entry = shared_mua_binaries_dir+'/mutant_ids'+'/'+fuzzer+'/'+str(trial_num) - create_dir(mutants_ids_dir_entry) - - # create mutants directory - mutants_dir_entry = shared_mua_binaries_dir+'/mutants'+'/' - create_dir(mutants_dir_entry) - - # copy corpus from self.corpus_dir into container - shutil.copytree(corpus_dir, trial_corpi_dir, dirs_exist_ok=True) - - # get additional info from commons - experiment_name = experiment_utils.get_experiment_name() - fuzz_target = benchmark_utils.get_fuzz_target(benchmark) - - # execute command on container - command = '(python3 /mutator/mua_build_ids.py '+fuzz_target+' '+experiment_name+' '+fuzzer+' '+str(trial_num)+'; )' - - docker_exec_command = 'docker exec -t '+container_name+' /bin/bash -c' - logger.info('mua initialize command:'+str(docker_exec_command)) - docker_exec_command_formated = docker_exec_command.split(" ") - docker_exec_command_formated.append(command) - print(docker_exec_command_formated) - new_process.execute(docker_exec_command_formated) - def prepare_mua_binaries(benchmark): """Run commands on mua container to prepare it""" diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index a5c1939ed..1ce17acf5 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -21,6 +21,7 @@ import os import pathlib import posixpath +import shutil import sys import tempfile import tarfile @@ -40,6 +41,7 @@ from common import fuzzer_stats from common import filestore_utils from common import logs +from common import new_process from common import utils from database import utils as db_utils from database import models @@ -49,11 +51,6 @@ from experiment.measurer import run_crashes from experiment import scheduler -if not experiment_utils.is_local_experiment(): - import experiment.build.gcb_build as buildlib -else: - import experiment.build.local_build as buildlib - logger = logs.Logger() SnapshotMeasureRequest = collections.namedtuple( @@ -413,8 +410,53 @@ def initialize_measurement_dirs(self): filesystem.recreate_directory(directory) filesystem.create_directory(self.report_dir) + def create_dir(self, dir): + if(not os.path.exists(dir)): + os.makedirs(dir, exist_ok=True) + return os.path.exists(dir) + def initialize_mua_environment(self): - buildlib.initialize_mua(self.benchmark, self.trial_num, self.fuzzer, self.corpus_dir) + """build all covered mutants""" + + # find correct container and start it + container_name = 'mutation_analysis_'+self.benchmark+'_container' + + docker_start_command = 'docker start '+container_name + new_process.execute(docker_start_command.split(' ')) + + experiment_filestore_path = experiment_utils.get_experiment_filestore_path() + shared_mua_binaries_dir = os.path.join(experiment_filestore_path, 'mua-binaries') + + # create corpi directory entry + corpi_dir = shared_mua_binaries_dir+'/corpi' + fuzzer_corpi_dir = corpi_dir + '/' + self.fuzzer + trial_corpi_dir = fuzzer_corpi_dir + '/' + str(self.trial_num) + self.create_dir(fuzzer_corpi_dir) + + # create covered_mutants directory entry (contains ids) + mutants_ids_dir_entry = shared_mua_binaries_dir+'/mutant_ids'+'/'+self.fuzzer+'/'+str(self.trial_num) + self.create_dir(mutants_ids_dir_entry) + + # create mutants directory + mutants_dir_entry = shared_mua_binaries_dir+'/mutants'+'/' + self.create_dir(mutants_dir_entry) + + # copy corpus from self.corpus_dir into container + shutil.copytree(self.corpus_dir, trial_corpi_dir, dirs_exist_ok=True) + + # get additional info from commons + experiment_name = experiment_utils.get_experiment_name() + fuzz_target = benchmark_utils.get_fuzz_target(self.benchmark) + + # execute command on container + command = '(python3 /mutator/mua_build_ids.py '+fuzz_target+' '+experiment_name+' '+self.fuzzer+' '+str(self.trial_num)+'; )' + + docker_exec_command = 'docker exec -t '+container_name+' /bin/bash -c' + logger.info('mua initialize command:'+str(docker_exec_command)) + docker_exec_command_formated = docker_exec_command.split(" ") + docker_exec_command_formated.append(command) + print(docker_exec_command_formated) + new_process.execute(docker_exec_command_formated) def process_mua(self): """runs mua measurement""" From f1e8c26bc255b154891434716a5e6e1ed21261ac Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Wed, 15 Nov 2023 13:19:55 +0100 Subject: [PATCH 11/69] add process_mua MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philipp Görz --- experiment/measurer/measure_manager.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 1ce17acf5..0bcafee5e 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -455,12 +455,26 @@ def initialize_mua_environment(self): logger.info('mua initialize command:'+str(docker_exec_command)) docker_exec_command_formated = docker_exec_command.split(" ") docker_exec_command_formated.append(command) - print(docker_exec_command_formated) + logger.info(docker_exec_command_formated) new_process.execute(docker_exec_command_formated) def process_mua(self): """runs mua measurement""" + # get necessary info + container_name = 'mutation_analysis_'+self.benchmark+'_container' + experiment_name = experiment_utils.get_experiment_name() + fuzz_target = benchmark_utils.get_fuzz_target(self.benchmark) + + # run all needed mutants in container + command = '(python3 /mutator/mua_run_mutants.py '+fuzz_target+' '+experiment_name+' '+self.fuzzer+' '+str(self.trial_num)+'; )' + + docker_exec_command = 'docker exec -t '+container_name+' /bin/bash -c' + logger.info('mua process command:'+str(docker_exec_command)) + docker_exec_command_formated = docker_exec_command.split(" ") + docker_exec_command_formated.append(command) + logger.info(docker_exec_command_formated) + new_process.execute(docker_exec_command_formated, write_to_stdout=True) def run_cov_new_units(self): """Run the coverage binary on new units.""" From 8a76d1f5767c198358997d45727007b9dcc9efdf Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 16 Nov 2023 16:41:48 +0100 Subject: [PATCH 12/69] faster starts for dev --- .dockerignore | 4 +- common/fuzzer_utils.py | 3 +- common/logs.py | 7 ++- docker/image_types.yaml | 24 ++++++++++ experiment/build/builder.py | 4 +- experiment/build/local_build.py | 49 +++++++++++++------- experiment/measurer/measure_manager.py | 3 +- experiment/run_experiment.py | 12 ++++- fuzzers/mutation_analysis/.dockerignore | 2 + fuzzers/mutation_analysis/builder.Dockerfile | 29 ++++++------ fuzzers/mutation_analysis/fuzzer.py | 2 + run_mua.sh | 3 +- 12 files changed, 101 insertions(+), 41 deletions(-) create mode 100644 fuzzers/mutation_analysis/.dockerignore mode change 100644 => 100755 run_mua.sh diff --git a/.dockerignore b/.dockerignore index dedcec93d..1603d3695 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,4 +7,6 @@ .venv **__pycache__* docs -report* \ No newline at end of file +report* +fuzzers/mutation_analysis/mua_fuzzer_bench/.git +fuzzers/mutation_analysis/mua_fuzzer_bench/fuzzbench_mapped_dir/ \ No newline at end of file diff --git a/common/fuzzer_utils.py b/common/fuzzer_utils.py index 049ba8ce3..1a6aa11b4 100644 --- a/common/fuzzer_utils.py +++ b/common/fuzzer_utils.py @@ -30,6 +30,7 @@ FUZZERS_DIR = os.path.join(utils.ROOT_DIR, 'fuzzers') COVERAGE_TOOLS = {'coverage', 'coverage_source_based'} +MUA_TOOLS = {'mutation_analysis'} class FuzzerDirectory: @@ -135,7 +136,7 @@ def get_fuzzer_names(): for fuzzer in os.listdir(fuzzers_dir): if not os.path.isfile(os.path.join(fuzzers_dir, fuzzer, 'fuzzer.py')): continue - if fuzzer in COVERAGE_TOOLS: + if fuzzer in (COVERAGE_TOOLS | MUA_TOOLS): continue fuzzers.append(fuzzer) diff --git a/common/logs.py b/common/logs.py index 668982e28..48e043621 100644 --- a/common/logs.py +++ b/common/logs.py @@ -163,7 +163,12 @@ def log(logger, severity, message, *args, extras=None): if utils.is_local(): if extras: - message += ' Extras: ' + str(extras) + message += f' Extras: ' + if isinstance(extras, dict): + for key, value in extras.items(): + message += f'\n {key}: {value}' + else: + message += str(extras) logging.log(severity, message) return diff --git a/docker/image_types.yaml b/docker/image_types.yaml index e292224bd..ff3ca0ec9 100644 --- a/docker/image_types.yaml +++ b/docker/image_types.yaml @@ -55,6 +55,30 @@ tag: 'builders/coverage/{benchmark}' type: 'coverage' +'mutation_analysis-{benchmark}-builder-intermediate': + build_arg: + - 'parent_image=gcr.io/fuzzbench/builders/benchmark/{benchmark}' + - 'map_mua=false' + depends_on: + - '{benchmark}-project-builder' + dockerfile: 'fuzzers/mutation_analysis/builder.Dockerfile' + context: 'fuzzers/mutation_analysis' + tag: 'builders/mutation_analysis/{benchmark}-intermediate' + type: 'mutation_analysis' + +'mutation_analysis-{benchmark}-builder': + build_arg: + - 'benchmark={benchmark}' + - 'fuzzer=mutation_analysis' + - 'parent_image=gcr.io/fuzzbench/builders/mutation_analysis/{benchmark}-intermediate' + depends_on: + - 'mutation_analysis-{benchmark}-builder-intermediate' + - 'base-image' + dockerfile: 'docker/benchmark-builder/Dockerfile' + context: '.' + tag: 'builders/mutation_analysis/{benchmark}' + type: 'mutation_analysis' + '{fuzzer}-{benchmark}-builder-intermediate': build_arg: - 'parent_image=gcr.io/fuzzbench/builders/benchmark/{benchmark}' diff --git a/experiment/build/builder.py b/experiment/build/builder.py index 526416b5b..4a39b323a 100644 --- a/experiment/build/builder.py +++ b/experiment/build/builder.py @@ -97,9 +97,9 @@ def build_base_images() -> Tuple[int, str]: def build_measurer(benchmark: str) -> bool: """Do a coverage build for a benchmark.""" try: - logger.info('Building measurer for benchmark: %s.', benchmark) + logger.info('Building coverage measurer for benchmark: %s.', benchmark) buildlib.build_coverage(benchmark) - logs.info('Done building measurer for benchmark: %s.', benchmark) + logs.info('Done building coverage measurer for benchmark: %s.', benchmark) return True except Exception: # pylint: disable=broad-except logger.error('Failed to build measurer for %s.', benchmark) diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index e9507825e..58a198070 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -74,9 +74,11 @@ def build_coverage(benchmark): copy_coverage_binaries(benchmark) return result +MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' + def build_mua(benchmark): """Build (locally) mua image for benchmark.""" - image_name = f'.mutation_analysis-{benchmark}-builder' + image_name = f'.{MUTATION_ANALYSIS_IMAGE_NAME}-{benchmark}-builder' result = make([image_name]) if result.retcode: return result @@ -93,31 +95,36 @@ def prepare_mua_binaries(benchmark): shared_mua_binaries_dir = get_shared_mua_binaries_dir() mount_arg = f'{shared_mua_binaries_dir}:{shared_mua_binaries_dir}' builder_image_url = benchmark_utils.get_builder_image_url( - benchmark, 'mutation_analysis', environment.get('DOCKER_REGISTRY')) + benchmark, MUTATION_ANALYSIS_IMAGE_NAME, environment.get('DOCKER_REGISTRY')) mua_build_archive = f'mutation-analysis-build-{benchmark}.tar.gz' mua_build_archive_shared_dir_path = os.path.join( shared_mua_binaries_dir, mua_build_archive) - container_name = 'mutation_analysis_'+benchmark+'_container' + container_name = MUTATION_ANALYSIS_IMAGE_NAME + '_' + benchmark + '_container' #new_image_name = builder_image_url+'_prepared' + host_mua_mapped_dir = os.environ.get('HOST_MUA_MAPPED_DIR') + command = ( '(' - 'touch /out/testentry; ' - 'cd /src/'+project+' && /bin/mua_build_benchmark; ' - 'cd /mutator && gradle build; ' - 'ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/; ' - 'pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/; ' - 'cd /tmp && /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /benchmark.yaml; ' - 'cd /mutator && python locator_signal_to_mutation_list.py --trigger-signal-dir /tmp/trigger_signal/ --prog xml --out /out/mua_all_list.json; ' - 'cp /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /out/'+fuzz_target+'.locator; ' - 'cp /tmp/config.json /out/config.json; ' - 'tar -czvf '+mua_build_archive_shared_dir_path+' /out;' + f'echo {host_mua_mapped_dir}; ' + 'ls -la /mapped_dir; ' + 'cat /mapped_dir/test.txt; ' + # 'touch /out/testentry; ' + # 'cd /src/'+project+' && /bin/mua_build_benchmark; ' + # 'cd /mutator && gradle build; ' + # 'ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/; ' + # 'pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/; ' + # 'cd /tmp && /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /benchmark.yaml; ' + # 'cd /mutator && python locator_signal_to_mutation_list.py --trigger-signal-dir /tmp/trigger_signal/ --prog xml --out /out/mua_all_list.json; ' + # 'cp /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /out/'+fuzz_target+'.locator; ' + # 'cp /tmp/config.json /out/config.json; ' + f'tar -czvf {mua_build_archive_shared_dir_path} /out; ' 'python3 /mutator/mua_idle.py; )' ) - logger.info('mua prepare command:'+str(command)) + logger.info('mua prepare command:'+str(command)) docker_rm_command = 'docker rm -f '+container_name try: #print("docker rm") @@ -125,17 +132,25 @@ def prepare_mua_binaries(benchmark): new_process.execute(docker_rm_command.split(" ")) except: pass - - new_process.execute([ + + mua_run_cmd = [ 'docker', 'run', '--name', container_name, '-v', mount_arg, '-e', 'FUZZ_OUTSIDE_EXPERIMENT=1', '-e', 'FORCE_LOCAL=1', '-e', 'TRIAL_ID=1', '-e', 'FUZZER=mutation_analysis', '-e', 'DEBUG_BUILDER=1', + *( + [] + if host_mua_mapped_dir is None + else ['-v', f'{host_mua_mapped_dir}:/mapped_dir'] + ), builder_image_url, '/bin/bash', '-c', command - ]) + ] + + logger.info('mua run command:'+str(mua_run_cmd)) + new_process.execute(mua_run_cmd, write_to_stdout=True) #docker_commit_command = 'docker commit '+container_name+' '+new_image_name #new_process.execute(docker_commit_command.split(' ')) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 0bcafee5e..5b2a960ee 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -46,6 +46,7 @@ from database import utils as db_utils from database import models from experiment.build import build_utils +from experiment.build.local_build import MUTATION_ANALYSIS_IMAGE_NAME from experiment.measurer import coverage_utils from experiment.measurer import run_coverage from experiment.measurer import run_crashes @@ -419,7 +420,7 @@ def initialize_mua_environment(self): """build all covered mutants""" # find correct container and start it - container_name = 'mutation_analysis_'+self.benchmark+'_container' + container_name = MUTATION_ANALYSIS_IMAGE_NAME + '_' + self.benchmark + '_container' docker_start_command = 'docker start '+container_name new_process.execute(docker_start_command.split(' ')) diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 9ea85f68c..7baac7c8a 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -155,6 +155,8 @@ def read_and_validate_experiment_config(config_filename: str) -> Dict: config_requirements = { 'experiment_filestore': Requirement(True, str, True, '/' if local_experiment else 'gs://'), + 'host_mua_mapped_dir': + Requirement(False, str, False, '/'), 'report_filestore': Requirement(True, str, True, '/' if local_experiment else 'gs://'), 'docker_registry': @@ -370,6 +372,7 @@ def start_dispatcher(config: Dict, config_dir: str): """Start the dispatcher instance and run the dispatcher code on it.""" dispatcher = get_dispatcher(config) # Is dispatcher code being run manually (useful for debugging)? + os.environ['HOST_MUA_MAPPED_DIR'] = config.get('host_mua_mapped_dir') copy_resources_to_bucket(config_dir, config) if not os.getenv('MANUAL_EXPERIMENT'): dispatcher.start() @@ -496,6 +499,7 @@ def start(self): f'CONCURRENT_BUILDS={self.config["concurrent_builds"]}') set_worker_pool_name_arg = ( f'WORKER_POOL_NAME={self.config["worker_pool_name"]}') + mua_mapped_dir = os.environ['HOST_MUA_MAPPED_DIR'] environment_args = [ '-e', 'LOCAL_EXPERIMENT=True', @@ -517,6 +521,10 @@ def start(self): set_concurrent_builds_arg, '-e', set_worker_pool_name_arg, + *( + ['-e', f'HOST_MUA_MAPPED_DIR={mua_mapped_dir}'] + if mua_mapped_dir else [] + ), ] command = [ 'docker', @@ -529,6 +537,8 @@ def start(self): shared_experiment_filestore_arg, '-v', shared_report_filestore_arg, + '-v', # Just to make repeated run starts faster. + "/tmp/dispatcher_venv:/work/src/.venv/lib/python3.10/site-packages", ] + environment_args + [ '--shm-size=2g', '--cap-add=SYS_PTRACE', @@ -539,7 +549,7 @@ def start(self): '-c', 'rsync -r ' '"${EXPERIMENT_FILESTORE}/${EXPERIMENT}/input/" ${WORK} && ' - 'mkdir ${WORK}/src && ' + 'mkdir -p ${WORK}/src && ' 'tar -xvzf ${WORK}/src.tar.gz -C ${WORK}/src && ' 'PYTHONPATH=${WORK}/src python3 ' '${WORK}/src/experiment/dispatcher.py || ' diff --git a/fuzzers/mutation_analysis/.dockerignore b/fuzzers/mutation_analysis/.dockerignore new file mode 100644 index 000000000..90dcdc5b2 --- /dev/null +++ b/fuzzers/mutation_analysis/.dockerignore @@ -0,0 +1,2 @@ +mua_fuzzer_bench/.git +mua_fuzzer_bench/fuzzbench_mapped_dir/ \ No newline at end of file diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 0e97eee19..0dc40b5fb 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. ARG parent_image - +ARG map_mua FROM gcr.io/fuzzbench/base-image AS base-image @@ -57,7 +57,7 @@ RUN mkdir /tmp/gllvm/ && \ go get github.com/SRI-CSL/gllvm/cmd/... && \ rm -r /tmp/gllvm/ -RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y openjdk-11-jdk zlib1g-dev file +RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y openjdk-11-jdk zlib1g-dev file pipx python3.8-venv # cmake \ # binutils-dev \ # libcurl4-openssl-dev \ @@ -71,21 +71,11 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y openjdk- # libidn2-0 \ # idn2 \ # libstdc++6 +RUN pipx install hatch # RUN git clone https://github.com/CISPA-SysSec/mua_fuzzer_bench mutator COPY mua_fuzzer_bench /mutator -# COPY modules /home/mutator/modules -# COPY build.gradle /home/mutator/ -# COPY run_mutation.py /home/mutator/ -# RUN chmod +x run_mutation.py -# COPY settings.gradle /home/mutator -RUN cd /mutator && \ - echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties -# RUN cd /mutator && gradle clean && gradle build -# RUN ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ - -# RUN ln /usr/bin/llvm-link-15 /bin/llvm-link RUN update-alternatives --install \ /usr/local/bin/llvm-config llvm-config /usr/lib/llvm-15/bin/llvm-config 200 \ --slave /usr/local/bin/llvm-ar llvm-ar /usr/lib/llvm-15/bin/llvm-ar \ @@ -112,14 +102,23 @@ RUN update-alternatives --install \ --slave /usr/local/bin/clang clang /usr/lib/llvm-15/bin/clang \ --slave /usr/local/bin/clang++ clang++ /usr/lib/llvm-15/bin/clang++ -RUN apt-get update && apt-get install -y pipx python3.8-venv -RUN pipx install hatch +RUN cd /mutator && \ + echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties RUN ln -s /mutator/exec-recorder.py /exec-recorder.py RUN ln -s /exec-recorder.py /bin/gclang-wrap RUN ln -s /exec-recorder.py /bin/gclang++-wrap RUN ln -s /mutator/mua_build_benchmark.py /bin/mua_build_benchmark +# COPY modules /home/mutator/modules +# COPY build.gradle /home/mutator/ +# COPY run_mutation.py /home/mutator/ +# RUN chmod +x run_mutation.py +# COPY settings.gradle /home/mutator +# RUN cd /mutator && gradle clean && gradle build +# RUN ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ + +# RUN ln /usr/bin/llvm-link-15 /bin/llvm-link #RUN echo "transfering control flow to mua_idle.py" #RUN python3 /mutator/mua_idle.py diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index a4e66a9ff..5dfd54454 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -58,6 +58,8 @@ def build(): utils.build_benchmark() + subprocess.call(['/mutator/fuzzbench_build.sh']) + # while(True): time.sleep(1) diff --git a/run_mua.sh b/run_mua.sh old mode 100644 new mode 100755 index 52b7a42ae..2b6773df5 --- a/run_mua.sh +++ b/run_mua.sh @@ -1,5 +1,4 @@ #! /bin/bash source .venv/bin/activate -PYTHONPATH=. python3 experiment/run_experiment.py --experiment-config /home/joschua/Desktop/CISPA/Projekte/SBFT/experiment/config.yaml --benchmarks bloaty_fuzz_target --experiment-name testrun01-fuzzers afl libfuzzer -a - +PYTHONPATH=. python3 experiment/run_experiment.py --experiment-config /tmp/experiment_conf.yaml --benchmarks libxml2_xml --experiment-name mua-test-$(date +"%Y%m%d-%H%M%S") -f afl libfuzzer -a From e2768cb4d20e2ca9c0552a3fa52c332dda559b8a Mon Sep 17 00:00:00 2001 From: phi-go Date: Tue, 12 Dec 2023 14:28:38 +0000 Subject: [PATCH 13/69] pass presubmit checks --- common/experiment_utils.py | 2 +- common/logs.py | 2 +- docker/generate_makefile.py | 3 +- experiment/build/build_utils.py | 1 + experiment/build/builder.py | 6 +- experiment/build/local_build.py | 88 ++++------ experiment/build/test_docker_images.py | 48 ++++-- experiment/dispatcher.py | 2 +- experiment/measurer/measure_manager.py | 167 +++++++++++-------- experiment/measurer/test_measure_manager.py | 33 ++-- experiment/run_experiment.py | 16 +- experiment/scheduler.py | 4 +- fuzzers/mutation_analysis/builder.Dockerfile | 133 +++++---------- fuzzers/mutation_analysis/fuzzer.py | 34 +--- run_mua.sh | 4 - 15 files changed, 244 insertions(+), 299 deletions(-) delete mode 100755 run_mua.sh diff --git a/common/experiment_utils.py b/common/experiment_utils.py index 1431a5bd8..604d0218f 100644 --- a/common/experiment_utils.py +++ b/common/experiment_utils.py @@ -20,7 +20,7 @@ from common import environment from common import experiment_path as exp_path -DEFAULT_SNAPSHOT_SECONDS = 5#15 * 60 # Seconds. +DEFAULT_SNAPSHOT_SECONDS = 15 * 60 # Seconds. CONFIG_DIR = 'config' diff --git a/common/logs.py b/common/logs.py index 48e043621..5842d2f42 100644 --- a/common/logs.py +++ b/common/logs.py @@ -163,7 +163,7 @@ def log(logger, severity, message, *args, extras=None): if utils.is_local(): if extras: - message += f' Extras: ' + message += ' Extras: ' if isinstance(extras, dict): for key, value in extras.items(): message += f'\n {key}: {value}' diff --git a/docker/generate_makefile.py b/docker/generate_makefile.py index 7ae467f62..c43e7ec3e 100755 --- a/docker/generate_makefile.py +++ b/docker/generate_makefile.py @@ -163,7 +163,8 @@ def generate_makefile(): for name, image in buildable_images.items(): makefile += get_rules_for_image(name, image) - # Print build targets for all fuzzer-benchmark pairs (including coverage and mutation_analysis). + # Print build targets for all fuzzer-benchmark pairs + # (including coverage and mutation_analysis). fuzzers.append('coverage') fuzzers.append('mutation_analysis') for fuzzer in fuzzers: diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index db7516adb..f0ebabfa6 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -37,6 +37,7 @@ def get_coverage_binaries_dir(): """Return coverage binaries directory.""" return exp_path.path('coverage-binaries') + def get_mua_binaries_dir(): """Return mua finder binaries directory.""" return exp_path.path('mua-binaries') diff --git a/experiment/build/builder.py b/experiment/build/builder.py index 4a39b323a..c1178f750 100644 --- a/experiment/build/builder.py +++ b/experiment/build/builder.py @@ -99,12 +99,14 @@ def build_measurer(benchmark: str) -> bool: try: logger.info('Building coverage measurer for benchmark: %s.', benchmark) buildlib.build_coverage(benchmark) - logs.info('Done building coverage measurer for benchmark: %s.', benchmark) + logs.info('Done building coverage measurer for benchmark: %s.', + benchmark) return True except Exception: # pylint: disable=broad-except logger.error('Failed to build measurer for %s.', benchmark) return False - + + def build_mua(benchmark: str) -> bool: """Do a mutation analysis build for a benchmark.""" try: diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 58a198070..03782c2f6 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -15,6 +15,7 @@ """Module for building things locally for use in trials.""" import os +import subprocess from typing import Tuple from common import benchmark_utils @@ -43,11 +44,13 @@ def get_shared_coverage_binaries_dir(): experiment_filestore_path = experiment_utils.get_experiment_filestore_path() return os.path.join(experiment_filestore_path, 'coverage-binaries') + def get_shared_mua_binaries_dir(): """Returns the shared mua binaries directory.""" experiment_filestore_path = experiment_utils.get_experiment_filestore_path() return os.path.join(experiment_filestore_path, 'mua-binaries') + def make_shared_coverage_binaries_dir(): """Make the shared coverage binaries directory.""" shared_coverage_binaries_dir = get_shared_coverage_binaries_dir() @@ -55,6 +58,7 @@ def make_shared_coverage_binaries_dir(): return os.makedirs(shared_coverage_binaries_dir) + def make_shared_mua_binaries_dir(): """Make the shared mua binaries directory.""" shared_mua_binaries_dir = get_shared_mua_binaries_dir() @@ -63,7 +67,6 @@ def make_shared_mua_binaries_dir(): os.makedirs(shared_mua_binaries_dir) - def build_coverage(benchmark): """Build (locally) coverage image for benchmark.""" image_name = f'build-coverage-{benchmark}' @@ -74,8 +77,10 @@ def build_coverage(benchmark): copy_coverage_binaries(benchmark) return result + MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' + def build_mua(benchmark): """Build (locally) mua image for benchmark.""" image_name = f'.{MUTATION_ANALYSIS_IMAGE_NAME}-{benchmark}-builder' @@ -89,73 +94,46 @@ def build_mua(benchmark): def prepare_mua_binaries(benchmark): """Run commands on mua container to prepare it""" - project = benchmark_utils.get_project(benchmark) - fuzz_target = benchmark_utils.get_fuzz_target(benchmark) - shared_mua_binaries_dir = get_shared_mua_binaries_dir() mount_arg = f'{shared_mua_binaries_dir}:{shared_mua_binaries_dir}' builder_image_url = benchmark_utils.get_builder_image_url( - benchmark, MUTATION_ANALYSIS_IMAGE_NAME, environment.get('DOCKER_REGISTRY')) - + benchmark, MUTATION_ANALYSIS_IMAGE_NAME, + environment.get('DOCKER_REGISTRY')) + mua_build_archive = f'mutation-analysis-build-{benchmark}.tar.gz' - mua_build_archive_shared_dir_path = os.path.join( - shared_mua_binaries_dir, mua_build_archive) - - container_name = MUTATION_ANALYSIS_IMAGE_NAME + '_' + benchmark + '_container' - #new_image_name = builder_image_url+'_prepared' + mua_build_archive_shared_dir_path = os.path.join(shared_mua_binaries_dir, + mua_build_archive) + + container_name = f'{MUTATION_ANALYSIS_IMAGE_NAME}_{benchmark}_container' host_mua_mapped_dir = os.environ.get('HOST_MUA_MAPPED_DIR') - command = ( - '(' - f'echo {host_mua_mapped_dir}; ' - 'ls -la /mapped_dir; ' - 'cat /mapped_dir/test.txt; ' - # 'touch /out/testentry; ' - # 'cd /src/'+project+' && /bin/mua_build_benchmark; ' - # 'cd /mutator && gradle build; ' - # 'ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/; ' - # 'pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/; ' - # 'cd /tmp && /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /benchmark.yaml; ' - # 'cd /mutator && python locator_signal_to_mutation_list.py --trigger-signal-dir /tmp/trigger_signal/ --prog xml --out /out/mua_all_list.json; ' - # 'cp /tmp/test/progs/'+fuzz_target+'/'+fuzz_target+'.locator /out/'+fuzz_target+'.locator; ' - # 'cp /tmp/config.json /out/config.json; ' - f'tar -czvf {mua_build_archive_shared_dir_path} /out; ' - 'python3 /mutator/mua_idle.py; )' - ) - - logger.info('mua prepare command:'+str(command)) - docker_rm_command = 'docker rm -f '+container_name + command = ('(' + f'echo {host_mua_mapped_dir}; ' + 'ls -la /mapped_dir; ' + 'cat /mapped_dir/test.txt; ' + f'tar -czvf {mua_build_archive_shared_dir_path} /out; ' + 'python3 /mutator/mua_idle.py; ' + ')') + + logger.info('mua prepare command:' + str(command)) try: - #print("docker rm") - #print(docker_rm_command) - new_process.execute(docker_rm_command.split(" ")) - except: + new_process.execute(['docker', 'rm', '-f', container_name]) + except subprocess.CalledProcessError: pass mua_run_cmd = [ - 'docker', 'run', '--name', container_name, '-v', mount_arg, - '-e', 'FUZZ_OUTSIDE_EXPERIMENT=1', - '-e', 'FORCE_LOCAL=1', - '-e', 'TRIAL_ID=1', - '-e', 'FUZZER=mutation_analysis', - '-e', 'DEBUG_BUILDER=1', - *( - [] - if host_mua_mapped_dir is None - else ['-v', f'{host_mua_mapped_dir}:/mapped_dir'] - ), - builder_image_url, '/bin/bash', '-c', - command + 'docker', 'run', '--name', container_name, '-v', mount_arg, '-e', + 'FUZZ_OUTSIDE_EXPERIMENT=1', '-e', 'FORCE_LOCAL=1', '-e', 'TRIAL_ID=1', + '-e', 'FUZZER=mutation_analysis', '-e', 'DEBUG_BUILDER=1', + *([] if host_mua_mapped_dir is None else + ['-v', f'{host_mua_mapped_dir}:/mapped_dir']), builder_image_url, + '/bin/bash', '-c', command ] - - logger.info('mua run command:'+str(mua_run_cmd)) + + logger.info('mua run command:' + str(mua_run_cmd)) new_process.execute(mua_run_cmd, write_to_stdout=True) - - #docker_commit_command = 'docker commit '+container_name+' '+new_image_name - #new_process.execute(docker_commit_command.split(' ')) - - + def copy_coverage_binaries(benchmark): """Copy coverage binaries in a local experiment.""" diff --git a/experiment/build/test_docker_images.py b/experiment/build/test_docker_images.py index 1e9101624..3c8925447 100644 --- a/experiment/build/test_docker_images.py +++ b/experiment/build/test_docker_images.py @@ -23,21 +23,41 @@ def test_images_to_build_list(): benchmarks = ['libxml', 'libpng'] all_images = docker_images.get_images_to_build(fuzzers, benchmarks) assert set(all_images.keys()) == set([ - 'base-image', 'worker', 'dispatcher-image', 'libxml-project-builder', - 'libpng-project-builder', 'afl-libxml-builder-intermediate', - 'afl-libxml-intermediate-runner', 'afl-libxml-builder', - 'afl-libxml-builder-debug', 'coverage-libxml-builder', - 'afl-libpng-builder', 'afl-libpng-builder-debug', - 'afl-libpng-intermediate-runner', 'afl-libpng-builder-intermediate', - 'afl-libpng-runner', 'libfuzzer-libxml-builder-intermediate', - 'libfuzzer-libxml-builder', 'libfuzzer-libxml-builder-debug', + 'base-image', + 'worker', + 'dispatcher-image', + 'libxml-project-builder', + 'libpng-project-builder', + 'afl-libxml-builder-intermediate', + 'afl-libxml-intermediate-runner', + 'afl-libxml-builder', + 'afl-libxml-builder-debug', + 'coverage-libxml-builder', + 'afl-libpng-builder', + 'afl-libpng-builder-debug', + 'afl-libpng-intermediate-runner', + 'afl-libpng-builder-intermediate', + 'afl-libpng-runner', + 'libfuzzer-libxml-builder-intermediate', + 'libfuzzer-libxml-builder', + 'libfuzzer-libxml-builder-debug', 'libfuzzer-libpng-builder-intermediate', - 'libfuzzer-libxml-intermediate-runner', 'libfuzzer-libxml-runner', - 'libfuzzer-libpng-builder', 'libfuzzer-libpng-builder-debug', - 'libfuzzer-libpng-intermediate-runner', 'libfuzzer-libpng-runner', - 'coverage-libxml-builder-intermediate', 'coverage-libpng-builder', - 'coverage-libxml-builder-intermediate', 'afl-libxml-runner', - 'coverage-libpng-builder-intermediate' + 'libfuzzer-libxml-intermediate-runner', + 'libfuzzer-libxml-runner', + 'libfuzzer-libpng-builder', + 'libfuzzer-libpng-builder-debug', + 'libfuzzer-libpng-intermediate-runner', + 'libfuzzer-libpng-runner', + 'coverage-libxml-builder-intermediate', + 'coverage-libpng-builder', + 'coverage-libxml-builder-intermediate', + 'afl-libxml-runner', + 'coverage-libpng-builder-intermediate', + # mutation testing images + 'mutation_analysis-libpng-builder', + 'mutation_analysis-libpng-builder-intermediate', + 'mutation_analysis-libxml-builder', + 'mutation_analysis-libxml-builder-intermediate', ]) diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index 00980bd18..796c796b8 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -36,7 +36,7 @@ from experiment import scheduler from experiment import stop_experiment -LOOP_WAIT_SECONDS = 5 * 60 #2 +LOOP_WAIT_SECONDS = 5 * 60 # TODO(metzman): Convert more uses of os.path.join to exp_path.path. diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 5b2a960ee..79dcbb226 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -21,6 +21,7 @@ import os import pathlib import posixpath +import shlex import shutil import sys import tempfile @@ -28,13 +29,13 @@ import time from typing import List import queue +from pathlib import Path import psutil from sqlalchemy import func from sqlalchemy import orm from common import benchmark_utils -from common import environment from common import experiment_utils from common import experiment_path as exp_path from common import filesystem @@ -59,7 +60,7 @@ NUM_RETRIES = 3 RETRY_DELAY = 3 -FAIL_WAIT_SECONDS = 5#30 +FAIL_WAIT_SECONDS = 30 SNAPSHOT_QUEUE_GET_TIMEOUT = 1 SNAPSHOTS_BATCH_SAVE_SIZE = 100 @@ -101,12 +102,13 @@ def _process_init(cores_queue): psutil.Process().cpu_affinity([cpu]) -def measure_loop(experiment: str, - max_total_time: int, - measurers_cpus=None, - runners_cpus=None, - region_coverage=False, - mutation_analysis=False): +def measure_loop( # pylint: disable=too-many-arguments + experiment: str, + max_total_time: int, + measurers_cpus=None, + runners_cpus=None, + region_coverage=False, + mutation_analysis=False): """Continuously measure trials for |experiment|.""" logger.info('Start measure_loop.') @@ -126,19 +128,19 @@ def measure_loop(experiment: str, with multiprocessing.Pool( *pool_args) as pool, multiprocessing.Manager() as manager: set_up_coverage_binaries(pool, experiment) - if(mutation_analysis): + if mutation_analysis: set_up_mua_binaries(pool, experiment) # Using Multiprocessing.Queue will fail with a complaint about # inheriting queue. # pytype: disable=attribute-error multiprocessing_queue = manager.Queue() + # pytype: enable=attribute-error while True: try: # Get whether all trials have ended before we measure to prevent # races. all_trials_ended = scheduler.all_trials_ended(experiment) - if not measure_all_trials(experiment, max_total_time, pool, multiprocessing_queue, region_coverage, mutation_analysis): @@ -156,8 +158,9 @@ def measure_loop(experiment: str, logger.info('Finished measure loop.') -def measure_all_trials(experiment: str, max_total_time: int, pool, - multiprocessing_queue, region_coverage, mutation_analysis) -> bool: +def measure_all_trials( # pylint: disable=too-many-arguments + experiment: str, max_total_time: int, pool, multiprocessing_queue, + region_coverage, mutation_analysis) -> bool: """Get coverage data (with coverage runs) for all active trials. Note that this should not be called unless multiprocessing.set_start_method('spawn') was called first. Otherwise it will use fork which breaks logging.""" @@ -174,12 +177,11 @@ def measure_all_trials(experiment: str, max_total_time: int, pool, return False measure_trial_args = [ - (unmeasured_snapshot, max_cycle, multiprocessing_queue, region_coverage, mutation_analysis) - for unmeasured_snapshot in unmeasured_snapshots + (unmeasured_snapshot, max_cycle, multiprocessing_queue, region_coverage, + mutation_analysis) for unmeasured_snapshot in unmeasured_snapshots ] - result = pool.starmap_async(measure_trial, - measure_trial_args) + result = pool.starmap_async(measure_trial, measure_trial_args) # Poll the queue for snapshots and save them in batches until the pool is # done processing each unmeasured snapshot. Then save any remaining @@ -208,8 +210,7 @@ def save_snapshots(): # If "ready" that means pool has finished calling on each # unmeasured_snapshot. Since it is finished and the queue is # empty, we can stop checking the queue for more snapshots. - logger.debug( - 'Finished call to map with measure_trial.') + logger.debug('Finished call to map with measure_trial.') break if len(snapshots) >= SNAPSHOTS_BATCH_SAVE_SIZE * .75: @@ -255,10 +256,14 @@ def _query_unmeasured_trials(experiment: str): with db_utils.session_scope() as session: trial_query = session.query(models.Trial) - no_snapshots_filter = ~models.Trial.id.in_(ids_of_trials_with_snapshots) # trial has no snapshot - started_trials_filter = ~models.Trial.time_started.is_(None) # trial already started - nonpreempted_trials_filter = ~models.Trial.preempted # trial not preempted - experiment_trials_filter = models.Trial.experiment == experiment # trial matches the current experiment + no_snapshots_filter = ~models.Trial.id.in_( + ids_of_trials_with_snapshots) # trial has no snapshot + started_trials_filter = ~models.Trial.time_started.is_( + None) # trial already started + # trial not preempted + nonpreempted_trials_filter = ~models.Trial.preempted + # trial matches the current experiment + experiment_trials_filter = models.Trial.experiment == experiment return trial_query.filter(experiment_trials_filter, no_snapshots_filter, started_trials_filter, nonpreempted_trials_filter) @@ -411,35 +416,40 @@ def initialize_measurement_dirs(self): filesystem.recreate_directory(directory) filesystem.create_directory(self.report_dir) - def create_dir(self, dir): - if(not os.path.exists(dir)): - os.makedirs(dir, exist_ok=True) - return os.path.exists(dir) - + def create_dir(self, directory): + """Create directory if it does not exist, + also creates parent directories.""" + if not os.path.exists(directory): + os.makedirs(directory, exist_ok=True) + return os.path.exists(directory) + def initialize_mua_environment(self): - """build all covered mutants""" + """Build all covered mutants.""" # find correct container and start it - container_name = MUTATION_ANALYSIS_IMAGE_NAME + '_' + self.benchmark + '_container' + container_name = \ + f'{MUTATION_ANALYSIS_IMAGE_NAME}_{self.benchmark}_container' - docker_start_command = 'docker start '+container_name + docker_start_command = 'docker start ' + container_name new_process.execute(docker_start_command.split(' ')) - experiment_filestore_path = experiment_utils.get_experiment_filestore_path() - shared_mua_binaries_dir = os.path.join(experiment_filestore_path, 'mua-binaries') - + experiment_filestore_path = \ + Path(experiment_utils.get_experiment_filestore_path()) + shared_mua_binaries_dir = experiment_filestore_path / 'mua-binaries' + # create corpi directory entry - corpi_dir = shared_mua_binaries_dir+'/corpi' - fuzzer_corpi_dir = corpi_dir + '/' + self.fuzzer - trial_corpi_dir = fuzzer_corpi_dir + '/' + str(self.trial_num) + corpi_dir = Path(shared_mua_binaries_dir) / 'corpi' + fuzzer_corpi_dir = corpi_dir / self.fuzzer + trial_corpi_dir = fuzzer_corpi_dir / str(self.trial_num) self.create_dir(fuzzer_corpi_dir) # create covered_mutants directory entry (contains ids) - mutants_ids_dir_entry = shared_mua_binaries_dir+'/mutant_ids'+'/'+self.fuzzer+'/'+str(self.trial_num) + mutants_ids_dir_entry = (shared_mua_binaries_dir / 'mutant_ids' / + self.fuzzer / str(self.trial_num)) self.create_dir(mutants_ids_dir_entry) # create mutants directory - mutants_dir_entry = shared_mua_binaries_dir+'/mutants'+'/' + mutants_dir_entry = shared_mua_binaries_dir / 'mutants' self.create_dir(mutants_dir_entry) # copy corpus from self.corpus_dir into container @@ -450,33 +460,42 @@ def initialize_mua_environment(self): fuzz_target = benchmark_utils.get_fuzz_target(self.benchmark) # execute command on container - command = '(python3 /mutator/mua_build_ids.py '+fuzz_target+' '+experiment_name+' '+self.fuzzer+' '+str(self.trial_num)+'; )' - - docker_exec_command = 'docker exec -t '+container_name+' /bin/bash -c' - logger.info('mua initialize command:'+str(docker_exec_command)) - docker_exec_command_formated = docker_exec_command.split(" ") - docker_exec_command_formated.append(command) - logger.info(docker_exec_command_formated) - new_process.execute(docker_exec_command_formated) + command = [ + 'python3', '/mutator/mua_build_ids.py', fuzz_target, + experiment_name, self.fuzzer, + str(self.trial_num) + ] + + docker_exec_command = [ + 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', + shlex.join(command) + ] + + logger.info('mua initialize command:' + str(docker_exec_command)) + logger.info(docker_exec_command) + new_process.execute(docker_exec_command) def process_mua(self): """runs mua measurement""" # get necessary info - container_name = 'mutation_analysis_'+self.benchmark+'_container' + container_name = 'mutation_analysis_' + self.benchmark + '_container' experiment_name = experiment_utils.get_experiment_name() - fuzz_target = benchmark_utils.get_fuzz_target(self.benchmark) - + fuzz_target = benchmark_utils.get_fuzz_target(self.benchmark) # run all needed mutants in container - command = '(python3 /mutator/mua_run_mutants.py '+fuzz_target+' '+experiment_name+' '+self.fuzzer+' '+str(self.trial_num)+'; )' - - docker_exec_command = 'docker exec -t '+container_name+' /bin/bash -c' - logger.info('mua process command:'+str(docker_exec_command)) - docker_exec_command_formated = docker_exec_command.split(" ") - docker_exec_command_formated.append(command) - logger.info(docker_exec_command_formated) - new_process.execute(docker_exec_command_formated, write_to_stdout=True) - + command = [ + 'python3', '/mutator/mua_run_mutants.py', fuzz_target, + experiment_name, self.fuzzer, + str(self.trial_num) + ] + + docker_exec_command = [ + 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', + shlex.join(command) + ] + logger.info('mua process command:' + str(docker_exec_command)) + new_process.execute(docker_exec_command, write_to_stdout=True) + def run_cov_new_units(self): """Run the coverage binary on new units.""" coverage_binary = coverage_utils.get_coverage_binary(self.benchmark) @@ -629,8 +648,8 @@ def get_fuzzer_stats(stats_filestore_path): def measure_trial(measure_req, max_cycle: int, - multiprocessing_queue: multiprocessing.Queue, - region_coverage, mutation_analysis) -> models.Snapshot: + multiprocessing_queue: multiprocessing.Queue, region_coverage, + mutation_analysis) -> models.Snapshot: """Measure the coverage obtained by |trial_num| on |benchmark| using |fuzzer|.""" initialize_logs() @@ -640,9 +659,9 @@ def measure_trial(measure_req, max_cycle: int, for cycle in range(min_cycle, max_cycle + 1): try: snapshot = measure_snapshot(measure_req.fuzzer, - measure_req.benchmark, - measure_req.trial_id, cycle, - region_coverage, mutation_analysis) + measure_req.benchmark, + measure_req.trial_id, cycle, + region_coverage, mutation_analysis) if not snapshot: break multiprocessing_queue.put(snapshot) @@ -657,11 +676,11 @@ def measure_trial(measure_req, max_cycle: int, logger.debug('Done measuring trial: %d.', measure_req.trial_id) -def measure_snapshot( # pylint: disable=too-many-locals +def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments fuzzer: str, benchmark: str, trial_num: int, cycle: int, region_coverage: bool, mutation_analysis: bool) -> models.Snapshot: - """Measure coverage and mua of the snapshot for |cycle| for |trial_num| of |fuzzer| - and |benchmark|.""" + """Measure coverage and mua of the snapshot for |cycle| for |trial_num| + of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger( default_extras={ 'fuzzer': fuzzer, @@ -693,7 +712,7 @@ def measure_snapshot( # pylint: disable=too-many-locals snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) - if(mutation_analysis): + if mutation_analysis: snapshot_measurer.initialize_mua_environment() # Don't keep corpus archives around longer than they need to be. @@ -717,15 +736,16 @@ def measure_snapshot( # pylint: disable=too-many-locals fuzzer_stats=fuzzer_stats_data, crashes=crashes) - if(mutation_analysis): + if mutation_analysis: snapshot_measurer.process_mua() measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle, measuring_time) - + return snapshot + def set_up_coverage_binaries(pool, experiment): """Set up coverage binaries for all benchmarks in |experiment|.""" # Use set comprehension to select distinct benchmarks. @@ -740,6 +760,7 @@ def set_up_coverage_binaries(pool, experiment): filesystem.create_directory(coverage_binaries_dir) pool.map(set_up_coverage_binary, benchmarks) + def set_up_mua_binaries(pool, experiment): """Set up mua finder binaries for all benchmarks in |experiment|.""" # Use set comprehension to select distinct benchmarks. @@ -754,6 +775,7 @@ def set_up_mua_binaries(pool, experiment): filesystem.create_directory(mua_binaries_dir) pool.map(set_up_mua_binary, benchmarks) + def set_up_mua_binary(benchmark): """Set up mua finder binaries for |benchmark|.""" initialize_logs() @@ -761,15 +783,14 @@ def set_up_mua_binary(benchmark): benchmark_mua_binary_dir = mua_binaries_dir / benchmark filesystem.create_directory(benchmark_mua_binary_dir) archive_name = f'mutation-analysis-build-{benchmark}.tar.gz' - archive_filestore_path = exp_path.filestore(mua_binaries_dir / - archive_name) - filestore_utils.cp(archive_filestore_path, - str(benchmark_mua_binary_dir)) + archive_filestore_path = exp_path.filestore(mua_binaries_dir / archive_name) + filestore_utils.cp(archive_filestore_path, str(benchmark_mua_binary_dir)) archive_path = benchmark_mua_binary_dir / archive_name with tarfile.open(archive_path, 'r:gz') as tar: tar.extractall(benchmark_mua_binary_dir) os.remove(archive_path) + def set_up_coverage_binary(benchmark): """Set up coverage binaries for |benchmark|.""" initialize_logs() diff --git a/experiment/measurer/test_measure_manager.py b/experiment/measurer/test_measure_manager.py index 69e6400a6..d8ce1d0cb 100644 --- a/experiment/measurer/test_measure_manager.py +++ b/experiment/measurer/test_measure_manager.py @@ -168,21 +168,20 @@ def test_generate_summary(mocked_get_coverage_binary, mocked_execute, @mock.patch('common.logs.error') @mock.patch('experiment.measurer.measure_manager.initialize_logs') @mock.patch('multiprocessing.Queue') -@mock.patch('experiment.measurer.measure_manager.measure_snapshot_coverage') -def test_measure_trial_coverage(mocked_measure_snapshot_coverage, mocked_queue, - _, __): - """Tests that measure_trial_coverage works as expected.""" +@mock.patch('experiment.measurer.measure_manager.measure_snapshot') +def test_measure_trial(mocked_measure_snapshot, mocked_queue, _, __): + """Tests that measure_trial works as expected.""" min_cycle = 1 max_cycle = 10 measure_request = measure_manager.SnapshotMeasureRequest( FUZZER, BENCHMARK, TRIAL_NUM, min_cycle) - measure_manager.measure_trial_coverage(measure_request, max_cycle, - mocked_queue(), False) + measure_manager.measure_trial(measure_request, max_cycle, mocked_queue(), + False, True) expected_calls = [ - mock.call(FUZZER, BENCHMARK, TRIAL_NUM, cycle, False) + mock.call(FUZZER, BENCHMARK, TRIAL_NUM, cycle, False, True) for cycle in range(min_cycle, max_cycle + 1) ] - assert mocked_measure_snapshot_coverage.call_args_list == expected_calls + assert mocked_measure_snapshot.call_args_list == expected_calls @mock.patch('common.filestore_utils.ls') @@ -192,7 +191,7 @@ def test_measure_all_trials_not_ready(mocked_rsync, mocked_ls, experiment): mocked_ls.return_value = new_process.ProcessResult(1, '', False) assert measure_manager.measure_all_trials( experiment_utils.get_experiment_name(), MAX_TOTAL_TIME, - test_utils.MockPool(), queue.Queue(), False) + test_utils.MockPool(), queue.Queue(), False, False) assert not mocked_rsync.called @@ -209,7 +208,7 @@ def test_measure_all_trials_no_more(mocked_directories_have_same_files, mock_pool = test_utils.MockPool() assert not measure_manager.measure_all_trials( experiment_utils.get_experiment_name(), MAX_TOTAL_TIME, mock_pool, - queue.Queue(), False) + queue.Queue(), False, False) @mock.patch('common.new_process.execute') @@ -284,9 +283,9 @@ class TestIntegrationMeasurement: # portable binary. @pytest.mark.skipif(not os.getenv('FUZZBENCH_TEST_INTEGRATION'), reason='Not running integration tests.') - def test_measure_snapshot_coverage( # pylint: disable=too-many-locals + def test_measure_snapshot( # pylint: disable=too-many-locals self, db, experiment, tmp_path): - """Integration test for measure_snapshot_coverage.""" + """Integration test for measure_snapshot.""" # WORK is set by experiment to a directory that only makes sense in a # fakefs. A directory containing necessary llvm tools is also added to # PATH. @@ -295,8 +294,8 @@ def test_measure_snapshot_coverage( # pylint: disable=too-many-locals os.environ['WORK'] = str(tmp_path) # Set up the coverage binary. benchmark = 'freetype2_ftfuzzer' - coverage_binary_src = get_test_data_path( - 'test_measure_snapshot_coverage', benchmark + '-coverage') + coverage_binary_src = get_test_data_path('test_measure_snapshot', + benchmark + '-coverage') benchmark_cov_binary_dir = os.path.join( build_utils.get_coverage_binaries_dir(), benchmark) @@ -320,7 +319,7 @@ def test_measure_snapshot_coverage( # pylint: disable=too-many-locals # Set up the snapshot archive. cycle = 1 - archive = get_test_data_path('test_measure_snapshot_coverage', + archive = get_test_data_path('test_measure_snapshot', f'corpus-archive-{cycle:04d}.tar.gz') corpus_dir = os.path.join(snapshot_measurer.trial_dir, 'corpus') os.makedirs(corpus_dir) @@ -330,9 +329,9 @@ def test_measure_snapshot_coverage( # pylint: disable=too-many-locals mocked_cp.return_value = new_process.ProcessResult(0, '', False) # TODO(metzman): Create a system for using actual buckets in # integration tests. - snapshot = measure_manager.measure_snapshot_coverage( + snapshot = measure_manager.measure_snapshot( snapshot_measurer.fuzzer, snapshot_measurer.benchmark, - snapshot_measurer.trial_num, cycle, False) + snapshot_measurer.trial_num, cycle, False, True) assert snapshot assert snapshot.time == cycle * experiment_utils.get_snapshot_seconds() assert snapshot.edges_covered == 4629 diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 7baac7c8a..94f8c9e89 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -347,7 +347,7 @@ def start_experiment( # pylint: disable=too-many-arguments if config['custom_seed_corpus_dir']: validate_custom_seed_corpus(config['custom_seed_corpus_dir'], benchmarks) - + config['mutation_analysis'] = mutation_analysis return start_experiment_from_full_config(config) @@ -521,10 +521,8 @@ def start(self): set_concurrent_builds_arg, '-e', set_worker_pool_name_arg, - *( - ['-e', f'HOST_MUA_MAPPED_DIR={mua_mapped_dir}'] - if mua_mapped_dir else [] - ), + *(['-e', f'HOST_MUA_MAPPED_DIR={mua_mapped_dir}'] + if mua_mapped_dir else []), ] command = [ 'docker', @@ -537,8 +535,12 @@ def start(self): shared_experiment_filestore_arg, '-v', shared_report_filestore_arg, - '-v', # Just to make repeated run starts faster. - "/tmp/dispatcher_venv:/work/src/.venv/lib/python3.10/site-packages", + # To avoid having the dispatcher image reinstall the same python + # packages with every run the site-packages folder can be mapped + # to a volume. This reduces the time needed when repeating local + # starts by several minutes. + '-v', + '/tmp/dispatcher_venv:/work/src/.venv/lib/python3.10/site-packages', ] + environment_args + [ '--shm-size=2g', '--cap-add=SYS_PTRACE', diff --git a/experiment/scheduler.py b/experiment/scheduler.py index fa17da4b9..0d9da0b22 100644 --- a/experiment/scheduler.py +++ b/experiment/scheduler.py @@ -37,9 +37,9 @@ # Give the trial runner a little extra time to shut down and account for how # long it can take to actually start running once an instance is started. 5 # minutes is an arbitrary amount of time. -GRACE_TIME_SECONDS = 5 * 60 #10 +GRACE_TIME_SECONDS = 5 * 60 -FAIL_WAIT_SECONDS = 10 * 60 #10 +FAIL_WAIT_SECONDS = 10 * 60 logger = logs.Logger() # pylint: disable=invalid-name diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 0dc40b5fb..a2a63b79e 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -20,8 +20,20 @@ FROM gcr.io/fuzzbench/base-image AS base-image FROM $parent_image -RUN apt-get update && apt-get install -y \ - lsb-release wget software-properties-common gnupg +# Required packages +RUN DEBIAN_FRONTEND=noninteractive \ + apt-get update && \ + apt-get install -y \ + lsb-release \ + wget \ + software-properties-common gnupg \ + openjdk-11-jdk \ + zlib1g-dev \ + file \ + pipx \ + python3.8-venv + +# llvm 15 RUN mkdir /llvm && \ cd /llvm && \ bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" && \ @@ -29,53 +41,6 @@ RUN mkdir /llvm && \ chmod +x llvm.sh && \ ./llvm.sh 15 -# WORKDIR /home/ -# RUN mkdir -p downloads -# WORKDIR /home/downloads -# RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py -# RUN python3 get-pip.py - -RUN pip3 install wllvm - -# ENV PATH "/root/toolchains/build/llvm+clang-901-x86_64-linux-gnu_build/bin/:$PATH" -# ENV LLVM_COMPILER "clang" - -RUN mkdir -p /tmp/gradle && \ - cd /tmp/gradle && \ - wget -q https://services.gradle.org/distributions/gradle-6.8-bin.zip && \ - unzip gradle-6.8-bin.zip && \ - mv gradle-6.8 /usr/local/gradle && \ - rm -r /tmp/gradle - -ENV PATH "/usr/local/gradle/bin/:$PATH" - -#### install gllvm -ENV PATH="${PATH}:/root/.cargo/bin:/usr/local/go/bin:/root/go/bin" -RUN mkdir /tmp/gllvm/ && \ - cd /tmp/gllvm/ && \ - wget -q -c https://dl.google.com/go/go1.16.15.linux-amd64.tar.gz -O - | tar -xz -C /usr/local && \ - go get github.com/SRI-CSL/gllvm/cmd/... && \ - rm -r /tmp/gllvm/ - -RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y openjdk-11-jdk zlib1g-dev file pipx python3.8-venv - # cmake \ - # binutils-dev \ - # libcurl4-openssl-dev \ - # zlib1g-dev \ - # libdw-dev \ - # libiberty-dev \ - # libssl-dev \ - # libelf-dev \ - # libdw-dev \ - # libidn2-dev \ - # libidn2-0 \ - # idn2 \ - # libstdc++6 -RUN pipx install hatch - -# RUN git clone https://github.com/CISPA-SysSec/mua_fuzzer_bench mutator -COPY mua_fuzzer_bench /mutator - RUN update-alternatives --install \ /usr/local/bin/llvm-config llvm-config /usr/lib/llvm-15/bin/llvm-config 200 \ --slave /usr/local/bin/llvm-ar llvm-ar /usr/lib/llvm-15/bin/llvm-ar \ @@ -102,52 +67,38 @@ RUN update-alternatives --install \ --slave /usr/local/bin/clang clang /usr/lib/llvm-15/bin/clang \ --slave /usr/local/bin/clang++ clang++ /usr/lib/llvm-15/bin/clang++ -RUN cd /mutator && \ - echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties - -RUN ln -s /mutator/exec-recorder.py /exec-recorder.py -RUN ln -s /exec-recorder.py /bin/gclang-wrap -RUN ln -s /exec-recorder.py /bin/gclang++-wrap -RUN ln -s /mutator/mua_build_benchmark.py /bin/mua_build_benchmark - -# COPY modules /home/mutator/modules -# COPY build.gradle /home/mutator/ -# COPY run_mutation.py /home/mutator/ -# RUN chmod +x run_mutation.py -# COPY settings.gradle /home/mutator -# RUN cd /mutator && gradle clean && gradle build -# RUN ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ - -# RUN ln /usr/bin/llvm-link-15 /bin/llvm-link - -#RUN echo "transfering control flow to mua_idle.py" -#RUN python3 /mutator/mua_idle.py - +# wllvm +RUN pip3 install wllvm -# # set library paths for used shared libraries s.t. the system finds them -# ENV LD_LIBRARY_PATH /home/mutator/build/install/LLVM_Mutation_Tool/lib/ -# # For all subjects provide the path to the default main here. This is based on oss-fuzz convention. -# ENV LIB_FUZZING_ENGINE="/home/mutator/programs/common/main.cc" -# ENV CC=gclang -# ENV CXX=gclang++ +# gradle +RUN mkdir -p /tmp/gradle && \ + cd /tmp/gradle && \ + wget -q https://services.gradle.org/distributions/gradle-6.8-bin.zip && \ + unzip gradle-6.8-bin.zip && \ + mv gradle-6.8 /usr/local/gradle && \ + rm -r /tmp/gradle -######## +ENV PATH "/usr/local/gradle/bin/:$PATH" -# ENV LF_PATH /tmp/libfuzzer.zip +# gllvm +ENV PATH="${PATH}:/root/.cargo/bin:/usr/local/go/bin:/root/go/bin" +RUN mkdir /tmp/gllvm/ && \ + cd /tmp/gllvm/ && \ + wget -q -c https://dl.google.com/go/go1.16.15.linux-amd64.tar.gz -O - | tar -xz -C /usr/local && \ + go get github.com/SRI-CSL/gllvm/cmd/... && \ + rm -r /tmp/gllvm/ -# # Use a libFuzzer version that supports clang source-based coverage. -# # This libfuzzer is 0b5e6b11c358e704384520dc036eddb5da1c68bf with -# # https://github.com/google/fuzzbench/blob/cf86138081ec705a47ce0a4bab07b5737292e7e0/fuzzers/coverage/patch.diff -# # applied. +# hatch +RUN pipx install hatch -# RUN wget https://storage.googleapis.com/fuzzbench-artifacts/libfuzzer-coverage.zip -O $LF_PATH && \ -# echo "cc78179f6096cae4b799d0cc9436f000cc0be9b1fb59500d16b14b1585d46b61 $LF_PATH" | sha256sum --check --status && \ -# mkdir /tmp/libfuzzer && \ -# cd /tmp/libfuzzer && \ -# unzip $LF_PATH && \ -# bash build.sh && \ -# cp libFuzzer.a /usr/lib && \ -# rm -rf /tmp/libfuzzer $LF_PATH +# mua_fuzzer_bench +RUN git clone https://github.com/phi-go/mua_fuzzer_bench mutator && \ + git checkout d3d361092067423dc02ed4e9d82eefe694179ab5 +RUN cd /mutator && \ + echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties -# clear && fuzzer_build && mua_build_benchmark && pushd /mutator && gradle build && ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ && pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/ ; popd \ No newline at end of file +RUN ln -s /mutator/exec-recorder.py /exec-recorder.py && \ + ln -s /exec-recorder.py /bin/gclang-wrap && \ + ln -s /exec-recorder.py /bin/gclang++-wrap && \ + ln -s /mutator/mua_build_benchmark.py /bin/mua_build_benchmark diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 5dfd54454..4684152fa 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -11,11 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Integration code for clang source-based coverage builds.""" +"""Integration code for mua_fuzzer_bench measurer builds.""" import os import subprocess -import time from fuzzers import utils @@ -23,8 +22,7 @@ def build(): - - # """Build benchmark.""" + """Build benchmark.""" cflags = [ # '-fprofile-instr-generate', '-fcoverage-mapping', '-gline-tables-only' '-fPIE', @@ -34,7 +32,7 @@ def build(): os.environ['CC'] = 'gclang-wrap' os.environ['CXX'] = 'gclang++-wrap' - os.environ['LLVM_COMPILER_PATH'] = '/usr/lib/llvm-15/bin/' + os.environ['LLVM_COMPILER_PATH'] = '/usr/lib/llvm-15/bin/' os.environ['FUZZER_LIB'] = '/mutator/dockerfiles/programs/common/main.cc' os.environ['MUA_RECORDING_DB'] = MUA_RECORDING_DB os.environ['llvmBinPath'] = '/usr/local/bin/' @@ -42,15 +40,8 @@ def build(): if os.path.exists(MUA_RECORDING_DB): os.unlink(MUA_RECORDING_DB) - # fuzzer_lib = env['FUZZER_LIB'] - # env['LIB_FUZZING_ENGINE'] = fuzzer_lib - # if os.path.exists(fuzzer_lib): - # # Make /usr/lib/libFuzzingEngine.a point to our library for OSS-Fuzz - # # so we can build projects that are using -lFuzzingEngine. - # shutil.copy(fuzzer_lib, OSS_FUZZ_LIB_FUZZING_ENGINE_PATH) - build_script = os.path.join(os.environ['SRC'], 'build.sh') - print(f"build_script: {build_script}") + print(f'build_script: {build_script}') benchmark = os.getenv('BENCHMARK') fuzzer = os.getenv('FUZZER') @@ -59,20 +50,3 @@ def build(): utils.build_benchmark() subprocess.call(['/mutator/fuzzbench_build.sh']) - - # while(True): time.sleep(1) - - - - -# fuzzer_build # runs fuzzer.py build -# mua_build_benchmark # builds bitcode to /out/filename.bc and config to /tmp/config - -# cd /mutator && gradle build #baut tooling -# ldconfig /mutator/build/install/LLVM_Mutation_Tool/lib/ -# pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_local --config-path /tmp/config.json --result-path /tmp/test/ # stores infos in /tmp/test - - -# /tmp/test/progs/xml/xml.locator /benchmark.yaml #create a list of all possible mutations -# cd /mutator && python locator_signal_to_mutation_list.py --trigger-signal-dir /tmp/trigger_signal/ --prog xml --out /tmp/mualist.json && cat /tmp/mualist.json -# cd /mutator && MUT_NUM_CPUS=24 pipx run hatch run src/mua_fuzzer_benchmark/eval.py locator_mutants_local --result-path /tmp/mutants_$(date +"%Y%m%d_%H%M%S") --statsdb /tmp/test/stats.db --mutation-list /tmp/mualist.json diff --git a/run_mua.sh b/run_mua.sh deleted file mode 100755 index 2b6773df5..000000000 --- a/run_mua.sh +++ /dev/null @@ -1,4 +0,0 @@ -#! /bin/bash -source .venv/bin/activate - -PYTHONPATH=. python3 experiment/run_experiment.py --experiment-config /tmp/experiment_conf.yaml --benchmarks libxml2_xml --experiment-name mua-test-$(date +"%Y%m%d-%H%M%S") -f afl libfuzzer -a From 02039ab0cad690bcbdb6a6b3efcb7d954f3db2fa Mon Sep 17 00:00:00 2001 From: phi-go Date: Tue, 12 Dec 2023 15:22:23 +0000 Subject: [PATCH 14/69] impl build_mua for gcb_build (untested) --- experiment/build/gcb_build.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/experiment/build/gcb_build.py b/experiment/build/gcb_build.py index f553848c7..23b0900ab 100644 --- a/experiment/build/gcb_build.py +++ b/experiment/build/gcb_build.py @@ -67,6 +67,21 @@ def build_coverage(benchmark): _build(config, config_name) +def build_mua(benchmark): + """Build mutation testing image for benchmark on GCB.""" + buildable_images = _get_buildable_images(benchmark=benchmark) + image_templates = { + image_name: image_specs + for image_name, image_specs in buildable_images.items() + if (image_name == (benchmark + '-project-builder') or + image_specs['type'] == 'mutation_analysis') + } + config = generate_cloudbuild.create_cloudbuild_spec( + image_templates, benchmark=benchmark, fuzzer='mutation_analysis') + config_name = f'benchmark-{benchmark}-mutation_analysis' + _build(config, config_name) + + def _build( config: Dict, config_name: str, From 07a45f56706765ab610b8463a01ae7e97f910e66 Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 22 Dec 2023 18:15:12 +0000 Subject: [PATCH 15/69] changes fixing performance and cloud issues --- .dockerignore | 2 +- .gitignore | 1 + analysis/benchmark_results.py | 7 +- analysis/data_utils.py | 6 + analysis/experiment_results.py | 5 +- analysis/generate_report.py | 51 +- analysis/report_templates/with_mua.html | 492 +++++++++++++++++++ experiment/build/builder.py | 10 +- experiment/build/local_build.py | 22 +- experiment/build/test_builder.py | 10 +- experiment/measurer/measure_manager.py | 245 +++++++-- experiment/measurer/test_measure_manager.py | 17 +- experiment/reporter.py | 10 +- experiment/run_experiment.py | 12 +- experiment/runner.py | 31 ++ experiment/test_data/experiment-config.yaml | 1 + experiment/test_reporter.py | 5 +- experiment/test_runner.py | 1 - fuzzers/mutation_analysis/.dockerignore | 2 +- fuzzers/mutation_analysis/builder.Dockerfile | 7 +- fuzzers/mutation_analysis/fuzzer.py | 2 +- service/gcbrun_experiment.py | 1 + test_experiment.yaml | 24 + 23 files changed, 885 insertions(+), 79 deletions(-) create mode 100644 analysis/report_templates/with_mua.html create mode 100644 test_experiment.yaml diff --git a/.dockerignore b/.dockerignore index 1603d3695..b9f8034f9 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,4 +9,4 @@ docs report* fuzzers/mutation_analysis/mua_fuzzer_bench/.git -fuzzers/mutation_analysis/mua_fuzzer_bench/fuzzbench_mapped_dir/ \ No newline at end of file +fuzzers/mutation_analysis/fuzzbench_mapped_dir/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index e752b94a2..30f64c4e3 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,4 @@ docker/generated.mk # Vim backup files. .*.swp +fuzzers/mutation_analysis/fuzzbench_mapped_dir/ diff --git a/analysis/benchmark_results.py b/analysis/benchmark_results.py index a9dd8f918..7922a2677 100644 --- a/analysis/benchmark_results.py +++ b/analysis/benchmark_results.py @@ -36,13 +36,14 @@ class BenchmarkResults: """ def __init__(self, benchmark_name, experiment_df, coverage_dict, - output_directory, plotter): + output_directory, plotter, mua_results): self.name = benchmark_name self._experiment_df = experiment_df self._coverage_dict = coverage_dict self._output_directory = output_directory self._plotter = plotter + self.mua_results = mua_results def _prefix_with_benchmark(self, filename): return self.name + '_' + filename @@ -56,6 +57,10 @@ def get_coverage_report_path(self, fuzzer_name, benchmark_name): fuzzer_name, benchmark_name, self._benchmark_df) return filestore_utils.get_user_facing_path(filestore_path) + def get_mua_report_data(self, _fuzzer_name, _benchmark_name): + """Returns results as string""" + return 'TODO: Not Implemented Yet' + @property @functools.lru_cache() def type(self): diff --git a/analysis/data_utils.py b/analysis/data_utils.py index f6a31218e..27c06f0b4 100644 --- a/analysis/data_utils.py +++ b/analysis/data_utils.py @@ -147,6 +147,12 @@ def is_unique_crash(crash_group): unique_crashes.add(crash_state) is_firsts.append(is_unique) crash_group['firsts'] = is_firsts + print(crash_group.head()) + print(crash_group.index) + #crash_group.index = list(crash_group.index) + #crash_group.reset_index(inplace=True) + #print(crash_group.head()) + #print(crash_group.index) return crash_group.firsts diff --git a/analysis/experiment_results.py b/analysis/experiment_results.py index a60d6d9b5..eb1d1462c 100644 --- a/analysis/experiment_results.py +++ b/analysis/experiment_results.py @@ -59,6 +59,7 @@ def __init__( # pylint: disable=too-many-arguments coverage_dict, output_directory, plotter, + mua_results=None, experiment_name=None): if experiment_name: self.name = experiment_name @@ -66,6 +67,8 @@ def __init__( # pylint: disable=too-many-arguments # Take name from first row. self.name = experiment_df.experiment.iloc[0] + self.mua_results = mua_results + # FuzzBench repo commit hash. self.git_hash = None if 'git_hash' in experiment_df.columns: @@ -135,7 +138,7 @@ def benchmarks(self): benchmark_results.BenchmarkResults(name, self._experiment_df, self._coverage_dict, self._output_directory, - self._plotter) + self._plotter, self.mua_results) for name in sorted(benchmark_names) ] diff --git a/analysis/generate_report.py b/analysis/generate_report.py index 188ed3c2d..a483d153c 100644 --- a/analysis/generate_report.py +++ b/analysis/generate_report.py @@ -16,6 +16,7 @@ import argparse import os import sys +import sqlite3 import pandas as pd @@ -142,6 +143,7 @@ def get_experiment_data(experiment_names, logger.info('Reading experiment data from db.') experiment_df = queries.get_experiment_data(experiment_names, main_experiment_benchmarks) + experiment_df.to_csv('/tmp/experiment-data/experiment_data.csv') logger.info('Done reading experiment data from db.') description = queries.get_experiment_description(main_experiment_name) return experiment_df, description @@ -186,6 +188,41 @@ def modify_experiment_data_if_requested( # pylint: disable=too-many-arguments return experiment_df +def get_mua_results(experiment_name, fuzzers, _benchmarks, experiment_df): + """Get mutation analysis results for each fuzzer in each trial to use in + the report.""" + + #get relationship between trial_id and benchmark from df + trial_dict = experiment_df.set_index('trial_id')['benchmark'].to_dict() + + for fuzzer in fuzzers: + for trial in trial_dict.keys(): + + _benchmark = trial_dict[trial] + + mua_result_db_file = f'/workspace/mua_out/{experiment_name}/' \ + f'mua_binaries/corpus_run_results/{fuzzer}/{trial}/' \ + 'results.sqlite' + con = sqlite3.connect(mua_result_db_file) + cur = con.cursor() + + covered_mutants = cur.execute(""" + SELECT DISTINCT mut_id FROM results + JOIN timestamps ON results.input_file = timestamps.hashname + WHERE killed == 0 ORDER BY mut_id + """) + covered_mutants.fetchall() + + killed_mutants = cur.execute(""" + SELECT DISTINCT mut_id + FROM results JOIN timestamps + ON results.input_file = timestamps.hashname + WHERE killed == 1 + ORDER BY mut_id + """) + killed_mutants.fetchall() + + # pylint: disable=too-many-arguments,too-many-locals def generate_report(experiment_names, report_directory, @@ -202,7 +239,8 @@ def generate_report(experiment_names, merge_with_clobber=False, merge_with_clobber_nonprivate=False, coverage_report=False, - experiment_benchmarks=None): + experiment_benchmarks=None, + mutation_analysis=False): """Generate report helper.""" if merge_with_clobber_nonprivate: experiment_names = ( @@ -231,6 +269,9 @@ def generate_report(experiment_names, experiment_df, experiment_names, benchmarks, fuzzers, label_by_experiment, end_time, merge_with_clobber) + experiment_df.to_csv('/tmp/experiment-data/out.csv') + + #TODO: make this work again # Add |bugs_covered| column prior to export. experiment_df = data_utils.add_bugs_covered_column(experiment_df) @@ -247,6 +288,13 @@ def generate_report(experiment_names, experiment_df) logger.info('Finished generating coverage report info.') + if mutation_analysis: + # TODO get_mua_results(main_experiment_name, fuzzers, + # experiment_benchmarks, experiment_df) + mua_results = None + else: + mua_results = None + fuzzer_names = experiment_df.fuzzer.unique() plotter = plotting.Plotter(fuzzer_names, quick, log_scale) experiment_ctx = experiment_results.ExperimentResults( @@ -254,6 +302,7 @@ def generate_report(experiment_names, coverage_dict, report_directory, plotter, + mua_results=mua_results, experiment_name=report_name) template = report_type + '.html' diff --git a/analysis/report_templates/with_mua.html b/analysis/report_templates/with_mua.html new file mode 100644 index 000000000..f1c24b30c --- /dev/null +++ b/analysis/report_templates/with_mua.html @@ -0,0 +1,492 @@ + + + + + + FuzzBench: {{ experiment.name }} report + {% if in_progress %} + (running) + {% endif %} + + + + + + + + + +
+ +

+ FuzzBench: {{ experiment.name }} report +

+ {% if in_progress %} +
+ (experiment incomplete/still running...) +
+ {% endif %} +
+

experiment summary

+ + {% if experiment.rank_by_median_and_average_rank.size < 2 %} + + No aggregate ranking as the data contains a single fuzzer. + + {% elif experiment.benchmarks|length < 2 %} + + No aggregate ranking as the data contains a single benchmark. + + {% else %} + + {% block top_level_ranking %} + + We show two different aggregate (cross-benchmark) rankings of fuzzers. + The first is based on the average of per-benchmarks scores, where + the score represents the percentage of the highest reached median + {{ experiment.type }}-coverage on a given benchmark (higher value is better). + + The second ranking shows the average rank of fuzzers, after we rank + them on each benchmark according to their median reached + {{ experiment.type }}-covereges (lower value is better). + +
+
+
By avg. score
+ {{ experiment.linkify_names( + experiment.rank_by_median_and_average_normalized_score.round(2).to_frame() + ).to_html(escape=False) + }} +
+
+
By avg. rank
+ {{ experiment.linkify_names( + experiment.rank_by_median_and_average_rank.round(2).to_frame() + ).to_html(escape=False) + }} +
+
+ +
    +
  • +
    + Critical difference diagram +
    +
    + + + {% if experiment.rank_by_median_and_average_rank.size > 20 %} + + Too many fuzzers to render the diagram. The critical difference plot + currently only supports up to 20 fuzzers. + + {% else %} + + The diagram visualizes the average rank of fuzzers (second ranking + above) while showing the significance of the differences as well. + What is considered a "critical difference" (CD) is based on the + Friedman/Nemenyi post-hoc test. See more in the + documentation. + +
    + +
    +
    + +
    +
    + + Note: If a fuzzer does not support all benchmarks, its ranking as + shown in this diagram can be lower than it should be. So please + check the list of supported benchmarks for the fuzzer(s) of your interest. + The list could be specified in the fuzzer's README.md like + this. + + {% endif %} {# show critical difference diagram #} + +
    +
  • +
+ + {% endblock %} {# top_level_ranking #} + + {% endif %} {# data is available for top level ranking #} + +
    +
  • +
    + Median relative code-coverages on each benchmark +
    +
    +

    + Note: The relative coverage summary table shows the median + relative performance of each fuzzer to the experiment maximum. Thus the + highest relative performance may not be 100%.
    + trial_relative_coverage = trial_coverage / experiment_max_coverage
    +

    + {{ experiment.relative_code_summary_table.render() }} +
      +
    • Fuzzers are sorted by "FuzzerMean" (average median relative coverage), highest on the left.
    • +
    • Green background = highest relative median coverage.
    • +
    • Blue gradient background = greater than 95% relative median coverage.
    • +
    +
    +
  • +
+ + {% if experiment.type == 'bug' %} +
    +
  • +
    + Median relative bug-coverages on each benchmark +
    +
    +

    + Note: The relative coverage summary table shows the median + relative performance of each fuzzer to the experiment maximum. Thus the + highest relative performance may not be 100%.
    + trial_relative_coverage = trial_coverage / experiment_max_coverage
    + {{ experiment.relative_bug_summary_table.render() }} +

      +
    • Fuzzers are sorted by "FuzzerMean" (average median relative coverage), highest on the left.
    • +
    • Green background = highest relative median coverage.
    • +
    • Blue gradient background = greater than 95% relative median coverage.
    • +
    +
    +
  • +
+ +
    +
  • +
    + Total unique bugs found on each benchmark +
    +
    + {{ experiment.found_bugs_summary_table.render() }} +
      +
    • Fuzzers are sorted by "FuzzerSum", highest on the left.
    • +
    • Green background = most unique bugs found.
    • +
    • *note: This table represents unique bugs found across all trials.
    • +
    +
    +
  • +
+ {% endif %} + +
+ + {% for benchmark in experiment.benchmarks %} + +
+ +

{{ benchmark.name }} summary

+ +
+
+ {% if benchmark.type == 'bug' %} +
Discovered bug coverage distribution
+ + {% else %} +
Ranking by median reached code coverage
+ + {% endif %} +
+
+
Reached code coverage distribution
+ +
+
+
+
+ +
+
+
Mean code coverage growth over time
+ +
+
+
Mean code coverage growth over time
+ +
+ {% if benchmark.type == 'bug' %} +
+
Mean bug coverage growth over time
+ +
+
+
Mean bug coverage growth over time
+ +
+ {% endif %} +
+ * The error bands show the 95% confidence interval + around the mean code coverage. +
+
+ + {% if benchmark.fuzzers_with_not_enough_samples and not in_progress %} +
+
+
+ error +
+
+ + The following fuzzers do not have enough samples: + + {{ ', '.join(benchmark.fuzzers_with_not_enough_samples) }}. + + +
+
+
+ {% endif %} + + {% if benchmark.type == 'bug' %} +
    +
  • +
    + Sample statistics and statistical significance (bugs covered) +
    +
    + +
    Bug coverage sample statistics
    + {{ benchmark.bug_summary_table.to_html() }} +
    + +
    +
    +
    Vargha-Delaney A12 measure
    + + The table summarizes the A12 values from the + pairwise Vargha-Delaney A measure of effect size. + Green cells indicate the probability the fuzzer in the + row will outperform the fuzzer in the column. +
    + +
    +
    Mann-Whitney U test
    + + The table summarizes the p values of + pairwise Mann-Whitney U tests. + Green cells indicate that the reached + coverage distribution of a given fuzzer pair + is significantly different. +
    +
    + +
    +
  • +
+ {% endif %} + +
    +
  • +
    + Sample statistics and statistical significance (code coverage) +
    +
    + +
    Code coverage sample statistics
    + {{ benchmark.summary_table.to_html() }} +
    + +
    +
    +
    Vargha-Delaney A12 measure
    + + The table summarizes the A12 values from the + pairwise Vargha-Delaney A measure of effect size. + Green cells indicate the probability the fuzzer in the + row will outperform the fuzzer in the column. +
    + +
    +
    Mann-Whitney U test
    + + The table summarizes the p values of + pairwise Mann-Whitney U tests. + Green cells indicate that the reached + coverage distribution of a given fuzzer pair + is significantly different. +
    +
    + +
    +
  • +
+ + {% if coverage_report %} +
    +
  • +
    + Unique code coverage plots +
    +
    + +
    +
    +
    Ranking by unique code branches covered
    + + Each bar shows the total number of code branches found by a given fuzzer. + The colored area shows the number of unique code branches + (i.e., branches that were not covered by any other fuzzers). +
    +
    + +
    +
    +
    Pairwise unique code coverage
    + + Each cell represents the number of code branches covered by the fuzzer + of the column but not by the fuzzer of the row +
    +
    + +
    +
  • +
+ +
    +
  • +
    + Code coverage reports for each fuzzer on this benchmark +
    +
    +
    + {% for fuzzer in benchmark.fuzzer_names %} + {{ fuzzer }} + {% endfor %} +
    +
    +
  • +
+ {% endif %} +
    +
  • +
    + Mutation analysis reports for each fuzzer on this benchmark +
    +
    +
    + {% for fuzzer in benchmark.fuzzer_names %} +
    {{ fuzzer }} {{ benchmark.get_mua_report_data(fuzzer, benchmark.name) }}
    + {% endfor %} +
    +
    +
  • +
+ +
+ {% endfor %} + +
+

experiment data

+ You can download the raw data for this report here. + +

+ Check out the documentation on how to create customized reports using this data. + Also see some example Colab notebooks for doing custom analysis on the data here. + + {% if experiment.git_hash %} +

+ The experiment was conducted using this FuzzBench commit: + {{ experiment.git_hash }} + +

+ To reproduce this experiment run the following commands in your FuzzBench repo:
+ + # Check out the right commit.
+ git checkout {{ experiment.git_hash }}
+ # Download the internal config file.
+ curl https://storage.googleapis.com/{{ experiment.experiment_filestore}}/{{ experiment.name }}/{{ experiment_config_relative_path }} > /tmp/experiment-config.yaml
+ make install-dependencies
+ # Launch the experiment using paramters from the internal config file.
+ PYTHONPATH=. python experiment/reproduce_experiment.py -c /tmp/experiment-config.yaml -e <new_experiment_name> +
+ + {% endif %} + + {% if description %} +

+ Experiment Description:

+ {{ description }} + {% endif %} +
+ +
+ +
+ +
+ +
+
+ + + + + + + diff --git a/experiment/build/builder.py b/experiment/build/builder.py index c1178f750..1c869f0d1 100644 --- a/experiment/build/builder.py +++ b/experiment/build/builder.py @@ -94,7 +94,7 @@ def build_base_images() -> Tuple[int, str]: return buildlib.build_base_images() -def build_measurer(benchmark: str) -> bool: +def build_coverage_measurer(benchmark: str) -> bool: """Do a coverage build for a benchmark.""" try: logger.info('Building coverage measurer for benchmark: %s.', benchmark) @@ -107,7 +107,7 @@ def build_measurer(benchmark: str) -> bool: return False -def build_mua(benchmark: str) -> bool: +def build_mua_measurer(benchmark: str) -> bool: """Do a mutation analysis build for a benchmark.""" try: logger.info('Building mua measurer for benchmark: %s.', benchmark) @@ -124,10 +124,12 @@ def build_all_measurers(benchmarks: List[str]) -> List[str]: Returns a list of benchmarks built successfully.""" logger.info('Building measurers.') filesystem.recreate_directory(build_utils.get_coverage_binaries_dir()) + filesystem.recreate_directory(build_utils.get_mua_binaries_dir()) build_measurer_args = [(benchmark,) for benchmark in benchmarks] - successful_calls = retry_build_loop(build_measurer, build_measurer_args) + successful_calls = retry_build_loop(build_coverage_measurer, + build_measurer_args) # build mua measurer - retry_build_loop(build_mua, build_measurer_args) + retry_build_loop(build_mua_measurer, build_measurer_args) logger.info('Done building measurers.') # Return list of benchmarks (like the list we were passed as an argument) # instead of returning a list of tuples each containing a benchmark. diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 03782c2f6..3aacaa70e 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -30,8 +30,10 @@ def make(targets): """Invoke |make| with |targets| and return the result.""" - command = ['make', '-j'] + targets - return new_process.execute(command, cwd=utils.ROOT_DIR) + command = ['make', '--debug=j', '-j'] + targets + return new_process.execute(command, + write_to_stdout=True, + cwd=utils.ROOT_DIR) def build_base_images() -> Tuple[int, str]: @@ -94,8 +96,12 @@ def build_mua(benchmark): def prepare_mua_binaries(benchmark): """Run commands on mua container to prepare it""" - shared_mua_binaries_dir = get_shared_mua_binaries_dir() - mount_arg = f'{shared_mua_binaries_dir}:{shared_mua_binaries_dir}' + experiment_name = experiment_utils.get_experiment_name() + shared_mua_binaries_dir = f'/workspace/mua_out/{experiment_name}' + docker_mua_binaries_dir = f'/mapped/{experiment_name}' + mount_arg = f'{shared_mua_binaries_dir}:{docker_mua_binaries_dir}' + os.makedirs(shared_mua_binaries_dir, exist_ok=True) + builder_image_url = benchmark_utils.get_builder_image_url( benchmark, MUTATION_ANALYSIS_IMAGE_NAME, environment.get('DOCKER_REGISTRY')) @@ -109,14 +115,12 @@ def prepare_mua_binaries(benchmark): host_mua_mapped_dir = os.environ.get('HOST_MUA_MAPPED_DIR') command = ('(' - f'echo {host_mua_mapped_dir}; ' - 'ls -la /mapped_dir; ' - 'cat /mapped_dir/test.txt; ' + f'mkdir -p {shared_mua_binaries_dir}; ' f'tar -czvf {mua_build_archive_shared_dir_path} /out; ' 'python3 /mutator/mua_idle.py; ' ')') - logger.info('mua prepare command:' + str(command)) + logger.debug('mua prepare command:' + str(command)) try: new_process.execute(['docker', 'rm', '-f', container_name]) except subprocess.CalledProcessError: @@ -131,7 +135,7 @@ def prepare_mua_binaries(benchmark): '/bin/bash', '-c', command ] - logger.info('mua run command:' + str(mua_run_cmd)) + logger.debug('mua run command:' + str(mua_run_cmd)) new_process.execute(mua_run_cmd, write_to_stdout=True) diff --git a/experiment/build/test_builder.py b/experiment/build/test_builder.py index 5f9e79153..09538210a 100644 --- a/experiment/build/test_builder.py +++ b/experiment/build/test_builder.py @@ -65,8 +65,8 @@ def get_benchmarks_or_fuzzers(benchmarks_or_fuzzers_directory, filename, @pytest.mark.skipif(sys.version_info.minor > 10, reason='Test can stop responding on versions greater than ' '3.10') -@pytest.mark.parametrize('build_measurer_return_value', [True, False]) -@mock.patch('experiment.build.builder.build_measurer') +@pytest.mark.parametrize('build_coverage_measurer_return_value', [True, False]) +@mock.patch('experiment.build.builder.build_coverage_measurer') @mock.patch('experiment.build.builder.time') @mock.patch('experiment.build.builder.filesystem') @mock.patch('experiment.build.builder.build_utils') @@ -74,13 +74,13 @@ def get_benchmarks_or_fuzzers(benchmarks_or_fuzzers_directory, filename, {'CONCURRENT_BUILDS': str(DEFAULT_CONCURRENT_BUILDS)}) def test_build_all_measurers(mocked_build_utils, mocked_fs, mocked_time, mocked_build_measurer, - build_measurer_return_value): + build_coverage_measurer_return_value): """Tests that build_all_measurers works as intendend when build_measurer calls fail.""" - mocked_build_measurer.return_value = build_measurer_return_value + mocked_build_measurer.return_value = build_coverage_measurer_return_value benchmarks = get_regular_benchmarks() result = builder.build_all_measurers(benchmarks) - if build_measurer_return_value: + if build_coverage_measurer_return_value: assert result == benchmarks else: assert not result diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 79dcbb226..827f3dfea 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -14,6 +14,7 @@ """Module for measuring snapshots from trial runners.""" import collections +import datetime import gc import glob import multiprocessing @@ -23,13 +24,15 @@ import posixpath import shlex import shutil +import sqlite3 import sys import tempfile import tarfile import time -from typing import List +from typing import List, Optional, Tuple import queue from pathlib import Path +import uuid import psutil from sqlalchemy import func @@ -52,12 +55,19 @@ from experiment.measurer import run_coverage from experiment.measurer import run_crashes from experiment import scheduler +from experiment.runner import UNIQUE_TIMESTAMP_FILENAME logger = logs.Logger() SnapshotMeasureRequest = collections.namedtuple( 'SnapshotMeasureRequest', ['fuzzer', 'benchmark', 'trial_id', 'cycle']) +# Exec id is used to identify the current run, if the dispatcher container +# is preempted the exec id will change. This allows us to identify which actions +# were performed by earlier runs and which were performed by the current run. +# We use this to identify which mutants builds were interrupted by a +# preemption. +EXEC_ID = uuid.uuid4() NUM_RETRIES = 3 RETRY_DELAY = 3 FAIL_WAIT_SECONDS = 30 @@ -65,6 +75,59 @@ SNAPSHOTS_BATCH_SAVE_SIZE = 100 +def add_timestamps_to_mua_results_db(timestamp_info, + trial_start_time: datetime.datetime, cycle, + corpus_dir, db_path): + """Add timestamp info to the mua results sqlite db.""" + conn = sqlite3.connect(db_path) + cur = conn.cursor() + cur.execute(''' + CREATE TABLE IF NOT EXISTS timestamps ( + hashname TEXT PRIMARY KEY, + input_file TEXT, + timestamp FLOAT + ) + ''') + conn.commit() + + cur = conn.cursor() + for corpus_file in os.listdir(corpus_dir): + cur.execute( + ''' + SELECT hashname FROM timestamps WHERE hashname = ? + LIMIT 1 + ''', (corpus_file,)) + if cur.fetchone() is None: + # Corpus file has no associated timestamp yet. + # Try to get it from the timestamp_info dict. + timestamp = timestamp_info.get(corpus_file) + if timestamp is None: + # No timestamp found, so use the trial start time but only + # for the first cycle as it contains the seed corpus. + # If the timestamp_info dict does not have an entry for + # this corpus file, then it is assumed that the corpus file + # is a seed input and was not generated by the fuzzer. + if cycle != 0: + # logger.debug('No timestamp found for %s, this should not ' + # 'happen for non-seed inputs.', corpus_file) + continue + if trial_start_time is None: + logger.error( + 'No trial start time found, this should not happen for ' + 'started trials.') + continue + timestamp = trial_start_time.timestamp() + input_file = '' + logger.debug( + f'Using trial start time {timestamp} for {corpus_file}') + else: + input_file = timestamp_info[corpus_file]['filename'] + timestamp = timestamp_info[corpus_file]['timestamp'] + cur.execute('''INSERT INTO timestamps VALUES (?, ?, ?)''', + (corpus_file, input_file, timestamp)) + conn.commit() + + def exists_in_experiment_filestore(path: pathlib.Path) -> bool: """Returns True if |path| exists in the experiment_filestore.""" return filestore_utils.ls(exp_path.filestore(path), @@ -269,6 +332,14 @@ def _query_unmeasured_trials(experiment: str): nonpreempted_trials_filter) +def _query_trial_start_time(trial_id: int): + """Returns the start time of the trial with id |trial_id|.""" + with db_utils.session_scope() as session: + trial_query = session.query(models.Trial) + trial = trial_query.filter(models.Trial.id == trial_id).one() + return trial.time_started + + def _get_unmeasured_first_snapshots( experiment: str) -> List[SnapshotMeasureRequest]: """Returns a list of unmeasured SnapshotMeasureRequests that are the first @@ -342,9 +413,39 @@ def get_unmeasured_snapshots(experiment: str, return unmeasured_first_snapshots + unmeasured_latest_snapshots -def extract_corpus(corpus_archive: str, output_directory: str): +def enrich_timestamp_info(timestamp_info, member_to_filename): + """Enrich timestamp info with the filename of the corpus entry.""" + # Replace filenames with hashnames but keep the original filenames + # for reference. + full_timestamp_info = {} + for member, timestamp in timestamp_info.items(): + try: + filename = member_to_filename[member] + except KeyError: + # The file was not extracted, so we don't have a hashname for it + # This file will not need to be measured so we can skip it. + continue + # If there are multiple files with the same hashname, keep the one + # with the lowest timestamp. + if filename in full_timestamp_info: + if timestamp < full_timestamp_info[filename]['timestamp']: + full_timestamp_info[filename] = { + 'timestamp': timestamp, + 'filename': member + } + else: + full_timestamp_info[filename] = { + 'timestamp': timestamp, + 'filename': member + } + + +def extract_corpus(corpus_archive: str, + output_directory: str) -> Optional[List[Tuple[str, int]]]: """Extract a corpus from |corpus_archive| to |output_directory|.""" pathlib.Path(output_directory).mkdir(exist_ok=True) + timestamp_info = None + member_to_filename = {} with tarfile.open(corpus_archive, 'r:gz') as tar: for member in tar.getmembers(): @@ -353,6 +454,25 @@ def extract_corpus(corpus_archive: str, output_directory: str): # So skip if not a file. continue + if member.name == UNIQUE_TIMESTAMP_FILENAME: + timestamp_file_handle = tar.extractfile(member) + if not timestamp_file_handle: + logger.info('Failed to get timestamp file handle to %s.', + member) + continue + timestamp_json_string = timestamp_file_handle.read().decode() + if len(timestamp_json_string) == 0: + logger.info( + 'Empty timestamp json file, this is expected for empty ' + 'corpus.') + continue + try: + timestamp_info = json.loads(timestamp_json_string) + except json.decoder.JSONDecodeError: + logger.error('Failed to decode timestamp json file: ' + f'{timestamp_json_string}') + continue + member_file_handle = tar.extractfile(member) if not member_file_handle: logger.info('Failed to get handle to %s.', member) @@ -363,6 +483,7 @@ def extract_corpus(corpus_archive: str, output_directory: str): member_contents = member_file_handle.read() filename = utils.string_hash(member_contents) file_path = os.path.join(output_directory, filename) + member_to_filename[member.name] = filename if os.path.exists(file_path): # Don't write out duplicates in the archive. @@ -370,6 +491,10 @@ def extract_corpus(corpus_archive: str, output_directory: str): filesystem.write(file_path, member_contents, 'wb') + if timestamp_info is not None: + return enrich_timestamp_info(timestamp_info, member_to_filename) + return None + class SnapshotMeasurer(coverage_utils.TrialCoverage): # pylint: disable=too-many-instance-attributes """Class used for storing details needed to measure coverage of a particular @@ -423,9 +548,45 @@ def create_dir(self, directory): os.makedirs(directory, exist_ok=True) return os.path.exists(directory) - def initialize_mua_environment(self): + def initialize_mua_environment(self, timestamp_info, + trial_start_time: datetime.datetime, cycle): """Build all covered mutants.""" + def initialize_mua_directories(): + experiment_name = experiment_utils.get_experiment_name() + experiment_filestore_path = Path('/workspace/mua_out') + shared_mua_binaries_dir = \ + experiment_filestore_path / experiment_name / 'mua-binaries' + + # create corpi directory entry + corpi_dir = Path(shared_mua_binaries_dir) / 'corpi' + fuzzer_corpi_dir = corpi_dir / self.fuzzer + trial_corpi_dir = fuzzer_corpi_dir / str(self.trial_num) + self.create_dir(fuzzer_corpi_dir) + + # create covered_mutants directory entry (contains json files with + # covered mutant ids for each corpus entry) + mutants_ids_dir_entry = (shared_mua_binaries_dir / 'mutant_ids' / + self.fuzzer / str(self.trial_num)) + self.create_dir(mutants_ids_dir_entry) + + # create corpus_run_results directory entry (contains json files + # with covered and killed mutant ids for each corpus entry) + mutants_ids_dir_entry = (shared_mua_binaries_dir / + 'corpus_run_results' / self.fuzzer / + str(self.trial_num)) + self.create_dir(mutants_ids_dir_entry) + + # create mutants directory + mutants_dir_entry = shared_mua_binaries_dir / 'mutants' + self.create_dir(mutants_dir_entry) + + # copy corpus from self.corpus_dir into container + shutil.copytree(self.corpus_dir, + trial_corpi_dir, + dirs_exist_ok=True) + return mutants_ids_dir_entry + # find correct container and start it container_name = \ f'{MUTATION_ANALYSIS_IMAGE_NAME}_{self.benchmark}_container' @@ -433,27 +594,29 @@ def initialize_mua_environment(self): docker_start_command = 'docker start ' + container_name new_process.execute(docker_start_command.split(' ')) - experiment_filestore_path = \ - Path(experiment_utils.get_experiment_filestore_path()) - shared_mua_binaries_dir = experiment_filestore_path / 'mua-binaries' + mutants_ids_dir_entry = initialize_mua_directories() - # create corpi directory entry - corpi_dir = Path(shared_mua_binaries_dir) / 'corpi' - fuzzer_corpi_dir = corpi_dir / self.fuzzer - trial_corpi_dir = fuzzer_corpi_dir / str(self.trial_num) - self.create_dir(fuzzer_corpi_dir) + corpus_run_result_db = mutants_ids_dir_entry / 'results.sqlite' + if timestamp_info is None: + logs.info('No timestamp info found.') + timestamp_info = {} - # create covered_mutants directory entry (contains ids) - mutants_ids_dir_entry = (shared_mua_binaries_dir / 'mutant_ids' / - self.fuzzer / str(self.trial_num)) - self.create_dir(mutants_ids_dir_entry) + add_timestamps_to_mua_results_db(timestamp_info, trial_start_time, + cycle, self.corpus_dir, + corpus_run_result_db) - # create mutants directory - mutants_dir_entry = shared_mua_binaries_dir / 'mutants' - self.create_dir(mutants_dir_entry) + corpus_run_stats_db = mutants_ids_dir_entry / 'stats.sqlite' - # copy corpus from self.corpus_dir into container - shutil.copytree(self.corpus_dir, trial_corpi_dir, dirs_exist_ok=True) + if not corpus_run_stats_db.is_file(): + logger.info( + f'Copying stats db from container to: {corpus_run_stats_db}') + + copy_stats_db_command = [ + 'docker', 'cp', f'{container_name}:/mua_build/build/stats.db', + str(corpus_run_stats_db) + ] + logger.info(f'mua copy stats db command: {copy_stats_db_command}') + new_process.execute(copy_stats_db_command, write_to_stdout=True) # get additional info from commons experiment_name = experiment_utils.get_experiment_name() @@ -461,9 +624,9 @@ def initialize_mua_environment(self): # execute command on container command = [ - 'python3', '/mutator/mua_build_ids.py', fuzz_target, - experiment_name, self.fuzzer, - str(self.trial_num) + 'python3', '/mutator/mua_build_ids.py', + str(EXEC_ID), fuzz_target, experiment_name, self.fuzzer, + str(self.trial_num), '--debug_num_mutants=10' ] docker_exec_command = [ @@ -471,9 +634,8 @@ def initialize_mua_environment(self): shlex.join(command) ] - logger.info('mua initialize command:' + str(docker_exec_command)) - logger.info(docker_exec_command) - new_process.execute(docker_exec_command) + logger.info(f'mua_build_ids command: {docker_exec_command}') + new_process.execute(docker_exec_command, write_to_stdout=True) def process_mua(self): """runs mua measurement""" @@ -493,7 +655,7 @@ def process_mua(self): 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', shlex.join(command) ] - logger.info('mua process command:' + str(docker_exec_command)) + logger.info('mua_run_mutants command:' + str(docker_exec_command)) new_process.execute(docker_exec_command, write_to_stdout=True) def run_cov_new_units(self): @@ -571,14 +733,16 @@ def generate_coverage_information(self, cycle: int): return self.generate_summary(cycle) - def extract_corpus(self, corpus_archive_path) -> bool: + def extract_corpus( + self, corpus_archive_path + ) -> Tuple[bool, Optional[List[Tuple[str, int]]]]: """Extract the corpus archive for this cycle if it exists.""" if not os.path.exists(corpus_archive_path): self.logger.warning('Corpus not found: %s.', corpus_archive_path) - return False + return False, None - extract_corpus(corpus_archive_path, self.corpus_dir) - return True + timestamp_info = extract_corpus(corpus_archive_path, self.corpus_dir) + return True, timestamp_info def save_crash_files(self, cycle): """Save crashes in per-cycle crash archive.""" @@ -710,10 +874,13 @@ def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments return None snapshot_measurer.initialize_measurement_dirs() - snapshot_measurer.extract_corpus(corpus_archive_dst) + _extract_success, timestamp_info = snapshot_measurer.extract_corpus( + corpus_archive_dst) if mutation_analysis: - snapshot_measurer.initialize_mua_environment() + trial_start_time = _query_trial_start_time(trial_num) + snapshot_measurer.initialize_mua_environment(timestamp_info, + trial_start_time, cycle) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) @@ -776,19 +943,9 @@ def set_up_mua_binaries(pool, experiment): pool.map(set_up_mua_binary, benchmarks) -def set_up_mua_binary(benchmark): +def set_up_mua_binary(_benchmark): """Set up mua finder binaries for |benchmark|.""" initialize_logs() - mua_binaries_dir = build_utils.get_mua_binaries_dir() - benchmark_mua_binary_dir = mua_binaries_dir / benchmark - filesystem.create_directory(benchmark_mua_binary_dir) - archive_name = f'mutation-analysis-build-{benchmark}.tar.gz' - archive_filestore_path = exp_path.filestore(mua_binaries_dir / archive_name) - filestore_utils.cp(archive_filestore_path, str(benchmark_mua_binary_dir)) - archive_path = benchmark_mua_binary_dir / archive_name - with tarfile.open(archive_path, 'r:gz') as tar: - tar.extractall(benchmark_mua_binary_dir) - os.remove(archive_path) def set_up_coverage_binary(benchmark): diff --git a/experiment/measurer/test_measure_manager.py b/experiment/measurer/test_measure_manager.py index d8ce1d0cb..a0e96a4da 100644 --- a/experiment/measurer/test_measure_manager.py +++ b/experiment/measurer/test_measure_manager.py @@ -25,6 +25,9 @@ from database import models from database import utils as db_utils from experiment.build import build_utils +from experiment.build.gcb_build import build_mua +from experiment.build.local_build import (make_shared_mua_binaries_dir, + prepare_mua_binaries) from experiment.measurer import measure_manager from test_libs import utils as test_utils @@ -294,8 +297,8 @@ def test_measure_snapshot( # pylint: disable=too-many-locals os.environ['WORK'] = str(tmp_path) # Set up the coverage binary. benchmark = 'freetype2_ftfuzzer' - coverage_binary_src = get_test_data_path('test_measure_snapshot', - benchmark + '-coverage') + coverage_binary_src = get_test_data_path( + 'test_measure_snapshot_coverage', benchmark + '-coverage') benchmark_cov_binary_dir = os.path.join( build_utils.get_coverage_binaries_dir(), benchmark) @@ -303,6 +306,10 @@ def test_measure_snapshot( # pylint: disable=too-many-locals coverage_binary_dst_dir = os.path.join(benchmark_cov_binary_dir, 'ftfuzzer') + # shared_mua_binaries_dir = '/workspace/mua_out' + # os.makedirs(shared_mua_binaries_dir, exist_ok=True) + # mua_binary_dst_dir = os.path.join(shared_mua_binaries_dir, 'ftfuzzer') + shutil.copy(coverage_binary_src, coverage_binary_dst_dir) # Set up entities in database so that the snapshot can be created. @@ -319,12 +326,16 @@ def test_measure_snapshot( # pylint: disable=too-many-locals # Set up the snapshot archive. cycle = 1 - archive = get_test_data_path('test_measure_snapshot', + archive = get_test_data_path('test_measure_snapshot_coverage', f'corpus-archive-{cycle:04d}.tar.gz') corpus_dir = os.path.join(snapshot_measurer.trial_dir, 'corpus') os.makedirs(corpus_dir) shutil.copy(archive, corpus_dir) + build_mua(benchmark) + make_shared_mua_binaries_dir() + prepare_mua_binaries(benchmark) + with mock.patch('common.filestore_utils.cp') as mocked_cp: mocked_cp.return_value = new_process.ProcessResult(0, '', False) # TODO(metzman): Create a system for using actual buckets in diff --git a/experiment/reporter.py b/experiment/reporter.py index 69f2a7304..24adcb127 100644 --- a/experiment/reporter.py +++ b/experiment/reporter.py @@ -70,6 +70,12 @@ def output_report(experiment_config: dict, logger.info('Is merging with nonprivate: %s.', merge_with_nonprivate) experiment_benchmarks = set(experiment_config['benchmarks']) + mutation_analysis = experiment_config['mutation_analysis'] + if mutation_analysis: + report_type = 'with_mua' + else: + report_type = 'default' + try: logger.debug('Generating report.') filesystem.recreate_directory(reports_dir) @@ -78,10 +84,12 @@ def output_report(experiment_config: dict, str(reports_dir), report_name=experiment_name, fuzzers=fuzzers, + report_type=report_type, in_progress=in_progress, merge_with_clobber_nonprivate=merge_with_nonprivate, coverage_report=coverage_report, - experiment_benchmarks=experiment_benchmarks) + experiment_benchmarks=experiment_benchmarks, + mutation_analysis=mutation_analysis) filestore_utils.rsync( str(reports_dir), web_filestore_path, diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 94f8c9e89..5eeb61d64 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -372,7 +372,9 @@ def start_dispatcher(config: Dict, config_dir: str): """Start the dispatcher instance and run the dispatcher code on it.""" dispatcher = get_dispatcher(config) # Is dispatcher code being run manually (useful for debugging)? - os.environ['HOST_MUA_MAPPED_DIR'] = config.get('host_mua_mapped_dir') + host_mua_mapped_dir = config.get('host_mua_mapped_dir') + if host_mua_mapped_dir is not None: + os.environ['HOST_MUA_MAPPED_DIR'] = host_mua_mapped_dir copy_resources_to_bucket(config_dir, config) if not os.getenv('MANUAL_EXPERIMENT'): dispatcher.start() @@ -499,7 +501,7 @@ def start(self): f'CONCURRENT_BUILDS={self.config["concurrent_builds"]}') set_worker_pool_name_arg = ( f'WORKER_POOL_NAME={self.config["worker_pool_name"]}') - mua_mapped_dir = os.environ['HOST_MUA_MAPPED_DIR'] + mua_mapped_dir = os.environ.get('HOST_MUA_MAPPED_DIR') environment_args = [ '-e', 'LOCAL_EXPERIMENT=True', @@ -522,7 +524,7 @@ def start(self): '-e', set_worker_pool_name_arg, *(['-e', f'HOST_MUA_MAPPED_DIR={mua_mapped_dir}'] - if mua_mapped_dir else []), + if mua_mapped_dir is not None else []), ] command = [ 'docker', @@ -541,6 +543,10 @@ def start(self): # starts by several minutes. '-v', '/tmp/dispatcher_venv:/work/src/.venv/lib/python3.10/site-packages', + # To share files between the dispatcher and mutation testing + # container we need to map a shared host directory to a volume. + '-v', + '/workspace/mua_out:/workspace/mua_out', ] + environment_args + [ '--shm-size=2g', '--cap-add=SYS_PTRACE', diff --git a/experiment/runner.py b/experiment/runner.py index a4efc5b6f..0a9d0641a 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -23,8 +23,10 @@ import subprocess import sys import tarfile +import tempfile import threading import time +import traceback import zipfile from common import benchmark_config @@ -53,6 +55,8 @@ CORPUS_DIRNAME = 'corpus' RESULTS_DIRNAME = 'results' CORPUS_ARCHIVE_DIRNAME = 'corpus-archives' +UNIQUE_TIMESTAMP_FILENAME = \ + 'timestamps_8a3bb4ff-0dca-4e2d-a54d-db8c4e8bf5af.json' def _clean_seed_corpus(seed_corpus_dir): @@ -368,6 +372,8 @@ def archive_corpus(self): self.corpus_archives_dir, experiment_utils.get_corpus_archive_name(self.cycle)) + file_timestamps = {} + with tarfile.open(archive, 'w:gz') as tar: new_archive_time = self.last_archive_time for file_path in get_corpus_elements(self.output_corpus): @@ -379,6 +385,13 @@ def archive_corpus(self): new_archive_time = max(new_archive_time, last_modified_time) arcname = os.path.relpath(file_path, self.output_corpus) tar.add(file_path, arcname=arcname) + try: + file_timestamp = stat_info.st_mtime + file_timestamps[arcname] = file_timestamp + except Exception: # pylint: disable=broad-except + e_msg = traceback.format_exc() + logs.warning( + f'Failed to get timestamp for {arcname}: {e_msg}') except (FileNotFoundError, OSError): # We will get these errors if files or directories are being # deleted from |directory| as we archive it. Don't bother @@ -387,6 +400,23 @@ def archive_corpus(self): pass except Exception: # pylint: disable=broad-except logs.error('Unexpected exception occurred when archiving.') + + # Add a timestamp file to the archive, taking care to not overwrite + # any existing file in the corpus. + try: + with tempfile.NamedTemporaryFile(mode='wt') as temp_file: + logs.debug( + f'Writing timestamp file to archive: {temp_file}') + temp_file.write(json.dumps(file_timestamps)) + temp_file.flush() + tar.add(temp_file.name, + arcname=UNIQUE_TIMESTAMP_FILENAME, + recursive=False) + except Exception: # pylint: disable=broad-except + e_msg = traceback.format_exc() + logs.warning( + f'Failed to write timestamp file to archive: {e_msg}') + self.last_archive_time = new_archive_time return archive @@ -450,6 +480,7 @@ def experiment_main(): """Do a trial as part of an experiment.""" logs.info('Doing trial as part of experiment.') try: + logs.error('Do something') runner = TrialRunner() runner.conduct_trial() except Exception as error: # pylint: disable=broad-except diff --git a/experiment/test_data/experiment-config.yaml b/experiment/test_data/experiment-config.yaml index 4cecd0e6e..5047c863c 100644 --- a/experiment/test_data/experiment-config.yaml +++ b/experiment/test_data/experiment-config.yaml @@ -40,3 +40,4 @@ measurers_cpus: null runner_num_cpu_cores: 1 runner_machine_type: 'n1-standard-1' private: false +mutation_analysis: false diff --git a/experiment/test_reporter.py b/experiment/test_reporter.py index f3f3b9096..5eacaa98f 100644 --- a/experiment/test_reporter.py +++ b/experiment/test_reporter.py @@ -78,8 +78,11 @@ def test_output_report_filestore(experiment_fuzzers, expected_merged_fuzzers, [experiment_name], reports_dir, report_name=experiment_name, + report_type='default', fuzzers=expected_merged_fuzzers, in_progress=False, merge_with_clobber_nonprivate=False, coverage_report=False, - experiment_benchmarks=experiment_benchmarks) + experiment_benchmarks=experiment_benchmarks, + mutation_analysis=False, + ) diff --git a/experiment/test_runner.py b/experiment/test_runner.py index 6cb9be7ce..f16d7044b 100644 --- a/experiment/test_runner.py +++ b/experiment/test_runner.py @@ -347,7 +347,6 @@ def test_integration_runner(self, mocked_error, tmp_path, environ): parallel=True) archive_size = os.path.getsize(local_gcs_corpus_dir_copy / 'corpus-archive-0001.tar.gz') - assert archive_size > 500 assert len(os.listdir(output_corpus_dir)) > 5 diff --git a/fuzzers/mutation_analysis/.dockerignore b/fuzzers/mutation_analysis/.dockerignore index 90dcdc5b2..9b4875d72 100644 --- a/fuzzers/mutation_analysis/.dockerignore +++ b/fuzzers/mutation_analysis/.dockerignore @@ -1,2 +1,2 @@ mua_fuzzer_bench/.git -mua_fuzzer_bench/fuzzbench_mapped_dir/ \ No newline at end of file +fuzzbench_mapped_dir/ \ No newline at end of file diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index a2a63b79e..2d52f46eb 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -92,8 +92,9 @@ RUN mkdir /tmp/gllvm/ && \ RUN pipx install hatch # mua_fuzzer_bench -RUN git clone https://github.com/phi-go/mua_fuzzer_bench mutator && \ - git checkout d3d361092067423dc02ed4e9d82eefe694179ab5 +RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ + cd /mutator && \ + git checkout b7eb1793459605f358e0fde82d56509735bd6bd2 RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties @@ -102,3 +103,5 @@ RUN ln -s /mutator/exec-recorder.py /exec-recorder.py && \ ln -s /exec-recorder.py /bin/gclang-wrap && \ ln -s /exec-recorder.py /bin/gclang++-wrap && \ ln -s /mutator/mua_build_benchmark.py /bin/mua_build_benchmark + +RUN mkdir /mua_build/ && chmod 777 /mua_build/ diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 4684152fa..71e2240b4 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -18,7 +18,7 @@ from fuzzers import utils -MUA_RECORDING_DB = '/tmp/execs.sqlite' +MUA_RECORDING_DB = '/mua_build/execs.sqlite' def build(): diff --git a/service/gcbrun_experiment.py b/service/gcbrun_experiment.py index f19ab493d..47a27577f 100644 --- a/service/gcbrun_experiment.py +++ b/service/gcbrun_experiment.py @@ -16,6 +16,7 @@ """Entrypoint for gcbrun into run_experiment. This script will get the command from the last PR comment containing "/gcbrun" and pass it to run_experiment.py which will run an experiment.""" +# Happy holidays! import logging import os diff --git a/test_experiment.yaml b/test_experiment.yaml new file mode 100644 index 000000000..f9d3cf3e5 --- /dev/null +++ b/test_experiment.yaml @@ -0,0 +1,24 @@ +# The number of trials of a fuzzer-benchmark pair. +trials: 1 + +# The amount of time in seconds that each trial is run for. +# 1 day = 24 * 60 * 60 = 86400 +max_total_time: 120 #86400 + +# The location of the docker registry. +# FIXME: Support custom docker registry. +# See https://github.com/google/fuzzbench/issues/777 +docker_registry: gcr.io/fuzzbench + +# The local experiment folder that will store most of the experiment data. +# Please use an absolute path. +experiment_filestore: /tmp/experiment-data + +# The local report folder where HTML reports and summary data will be stored. +# Please use an absolute path. +report_filestore: /tmp/report-data + +# Flag that indicates this is a local experiment. +local_experiment: true + +host_mua_mapped_dir: "/home/pgoerz/fuzzbench/fuzzers/mutation_analysis/mua_fuzzer_bench/fuzzbench_mapped_dir/" \ No newline at end of file From 7ab3ef85b278380231b44338b43f21ecc26daca2 Mon Sep 17 00:00:00 2001 From: Dongge Liu Date: Sat, 23 Dec 2023 16:09:10 +1100 Subject: [PATCH 16/69] Fix OpenH264 based on OSS-Fuzz (cherry picked from commit 1a31072a3df2002eea0a97499414672ff7f005a8) --- benchmarks/openh264_decoder_fuzzer/Dockerfile | 21 ++++++++----------- benchmarks/openh264_decoder_fuzzer/build.sh | 7 +------ 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/benchmarks/openh264_decoder_fuzzer/Dockerfile b/benchmarks/openh264_decoder_fuzzer/Dockerfile index e92837ffd..519ea338e 100644 --- a/benchmarks/openh264_decoder_fuzzer/Dockerfile +++ b/benchmarks/openh264_decoder_fuzzer/Dockerfile @@ -14,16 +14,13 @@ # ################################################################################ -FROM gcr.io/oss-fuzz-base/base-builder@sha256:87ca1e9e19235e731fac8de8d1892ebe8d55caf18e7aa131346fc582a2034fdd -MAINTAINER twsmith@mozilla.com - -RUN dpkg --add-architecture i386 && \ - apt-get update && \ - apt-get install -y \ - libstdc++-9-dev libstdc++-9-dev:i386 nasm subversion - -RUN git clone \ - https://github.com/cisco/openh264.git - -WORKDIR openh264 +FROM gcr.io/oss-fuzz-base/base-builder +RUN apt-get update && \ + apt-get install -y ffmpeg libstdc++-9-dev libstdc++-9-dev:i386 nasm subversion +RUN git clone --depth 1 https://github.com/cisco/openh264.git openh264 +RUN python3 -m pip install corpus-replicator +RUN corpus-replicator -o corpus video_h264_264_libx264.yml video +RUN mv openh264/res/*.264 corpus/ +RUN zip -j0r decoder_fuzzer_seed_corpus.zip corpus/ +WORKDIR /src/openh264 COPY build.sh decoder_fuzzer.cpp $SRC/ diff --git a/benchmarks/openh264_decoder_fuzzer/build.sh b/benchmarks/openh264_decoder_fuzzer/build.sh index 814113860..ead85c82e 100755 --- a/benchmarks/openh264_decoder_fuzzer/build.sh +++ b/benchmarks/openh264_decoder_fuzzer/build.sh @@ -15,16 +15,11 @@ # ################################################################################ -# prepare corpus -svn export https://github.com/mozillasecurity/fuzzdata.git/trunk/samples/h264 corpus/ -mv ./res/*.264 ./corpus/ -zip -j0r ${OUT}/decoder_fuzzer_seed_corpus.zip ./corpus/ - # build if [[ $CXXFLAGS = *sanitize=memory* ]]; then ASM_BUILD=No else ASM_BUILD=Yes fi -make -j$(nproc) ARCH=$ARCHITECTURE USE_ASM=$ASM_BUILD BUILDTYPE=Debug libraries +make -j$(nproc) ARCH=$ARCHITECTURE USE_ASM=$ASM_BUILD BUILDTYPE=Debug libopenh264.a $CXX $CXXFLAGS -o $OUT/decoder_fuzzer -I./codec/api/wels -I./codec/console/common/inc -I./codec/common/inc -L. $LIB_FUZZING_ENGINE $SRC/decoder_fuzzer.cpp libopenh264.a From 12a54f9112113200a7b0480f1201f7a02f1743d9 Mon Sep 17 00:00:00 2001 From: phi-go Date: Mon, 25 Dec 2023 07:10:13 +0000 Subject: [PATCH 17/69] just test snapshot_coverage without mua --- experiment/measurer/test_measure_manager.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/experiment/measurer/test_measure_manager.py b/experiment/measurer/test_measure_manager.py index a0e96a4da..455892f7f 100644 --- a/experiment/measurer/test_measure_manager.py +++ b/experiment/measurer/test_measure_manager.py @@ -14,6 +14,7 @@ """Tests for measure_manager.py.""" import os +from pathlib import Path import shutil from unittest import mock import queue @@ -286,7 +287,7 @@ class TestIntegrationMeasurement: # portable binary. @pytest.mark.skipif(not os.getenv('FUZZBENCH_TEST_INTEGRATION'), reason='Not running integration tests.') - def test_measure_snapshot( # pylint: disable=too-many-locals + def test_measure_snapshot_coverage( # pylint: disable=too-many-locals self, db, experiment, tmp_path): """Integration test for measure_snapshot.""" # WORK is set by experiment to a directory that only makes sense in a @@ -306,10 +307,6 @@ def test_measure_snapshot( # pylint: disable=too-many-locals coverage_binary_dst_dir = os.path.join(benchmark_cov_binary_dir, 'ftfuzzer') - # shared_mua_binaries_dir = '/workspace/mua_out' - # os.makedirs(shared_mua_binaries_dir, exist_ok=True) - # mua_binary_dst_dir = os.path.join(shared_mua_binaries_dir, 'ftfuzzer') - shutil.copy(coverage_binary_src, coverage_binary_dst_dir) # Set up entities in database so that the snapshot can be created. @@ -332,17 +329,13 @@ def test_measure_snapshot( # pylint: disable=too-many-locals os.makedirs(corpus_dir) shutil.copy(archive, corpus_dir) - build_mua(benchmark) - make_shared_mua_binaries_dir() - prepare_mua_binaries(benchmark) - with mock.patch('common.filestore_utils.cp') as mocked_cp: mocked_cp.return_value = new_process.ProcessResult(0, '', False) # TODO(metzman): Create a system for using actual buckets in # integration tests. snapshot = measure_manager.measure_snapshot( snapshot_measurer.fuzzer, snapshot_measurer.benchmark, - snapshot_measurer.trial_num, cycle, False, True) + snapshot_measurer.trial_num, cycle, False, False) assert snapshot assert snapshot.time == cycle * experiment_utils.get_snapshot_seconds() assert snapshot.edges_covered == 4629 From d1f5cefb191289b3c96d78db857bccfbe513758c Mon Sep 17 00:00:00 2001 From: phi-go Date: Mon, 25 Dec 2023 11:04:03 +0000 Subject: [PATCH 18/69] write mua results to experiment store --- analysis/generate_report.py | 4 +- experiment/build/build_utils.py | 47 +++++++++- experiment/build/builder.py | 2 +- experiment/build/local_build.py | 2 +- experiment/measurer/measure_manager.py | 99 +++++++------------- experiment/measurer/run_mua.py | 90 ++++++++++++++++++ experiment/measurer/test_measure_manager.py | 4 - fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 8 files changed, 176 insertions(+), 74 deletions(-) create mode 100644 experiment/measurer/run_mua.py diff --git a/analysis/generate_report.py b/analysis/generate_report.py index a483d153c..9a155c06d 100644 --- a/analysis/generate_report.py +++ b/analysis/generate_report.py @@ -143,7 +143,7 @@ def get_experiment_data(experiment_names, logger.info('Reading experiment data from db.') experiment_df = queries.get_experiment_data(experiment_names, main_experiment_benchmarks) - experiment_df.to_csv('/tmp/experiment-data/experiment_data.csv') + # experiment_df.to_csv('/tmp/experiment-data/experiment_data.csv') logger.info('Done reading experiment data from db.') description = queries.get_experiment_description(main_experiment_name) return experiment_df, description @@ -269,7 +269,7 @@ def generate_report(experiment_names, experiment_df, experiment_names, benchmarks, fuzzers, label_by_experiment, end_time, merge_with_clobber) - experiment_df.to_csv('/tmp/experiment-data/out.csv') + # experiment_df.to_csv('/tmp/experiment-data/out.csv') #TODO: make this work again # Add |bugs_covered| column prior to export. diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index f0ebabfa6..fe10569a4 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -13,6 +13,7 @@ # limitations under the License. """Module for utility code shared by build submodules.""" +import os import tempfile from common import experiment_path as exp_path @@ -33,14 +34,56 @@ def store_build_logs(build_config, build_result): exp_path.filestore(get_build_logs_dir() / build_log_filename)) +def store_mua_stats_db(stats_db, benchmark): + """Save mua stats_db in the mua bucket.""" + stats_db = str(stats_db) + filestore_utils.cp( + stats_db, + exp_path.filestore(get_mua_results_dir() / 'base_build' / benchmark / + 'stats.sqlite')) + + +def store_mua_results_db(results_db, benchmark, fuzzer, cycle): + """Save mua stats_db in the mua bucket.""" + results_db = str(results_db) + filestore_utils.cp( + results_db, + exp_path.filestore(get_mua_results_dir() / 'results' / benchmark / + fuzzer / f'{cycle}.sqlite')) + + +def store_mua_build_log(build_output, benchmark, fuzzer, cycle): + """Save mua stats_db in the mua bucket.""" + with tempfile.NamedTemporaryFile(mode='w') as tmp: + tmp.write(build_output) + tmp.flush() + os.chmod(tmp.name, 0o666) + filestore_utils.cp( + tmp.name, + exp_path.filestore(get_mua_results_dir() / 'mua_build' / benchmark / + fuzzer / f'{cycle}.log')) + + +def store_mua_run_log(run_output, benchmark, fuzzer, cycle): + """Save mua stats_db in the mua bucket.""" + with tempfile.NamedTemporaryFile(mode='w') as tmp: + tmp.write(run_output) + tmp.flush() + os.chmod(tmp.name, 0o666) + filestore_utils.cp( + tmp.name, + exp_path.filestore(get_mua_results_dir() / 'mua_run' / benchmark / + fuzzer / f'{cycle}.log')) + + def get_coverage_binaries_dir(): """Return coverage binaries directory.""" return exp_path.path('coverage-binaries') -def get_mua_binaries_dir(): +def get_mua_results_dir(): """Return mua finder binaries directory.""" - return exp_path.path('mua-binaries') + return exp_path.path('mua-results') def get_build_logs_dir(): diff --git a/experiment/build/builder.py b/experiment/build/builder.py index 1c869f0d1..7a659312b 100644 --- a/experiment/build/builder.py +++ b/experiment/build/builder.py @@ -124,7 +124,7 @@ def build_all_measurers(benchmarks: List[str]) -> List[str]: Returns a list of benchmarks built successfully.""" logger.info('Building measurers.') filesystem.recreate_directory(build_utils.get_coverage_binaries_dir()) - filesystem.recreate_directory(build_utils.get_mua_binaries_dir()) + filesystem.recreate_directory(build_utils.get_mua_results_dir()) build_measurer_args = [(benchmark,) for benchmark in benchmarks] successful_calls = retry_build_loop(build_coverage_measurer, build_measurer_args) diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 3aacaa70e..525b39473 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -50,7 +50,7 @@ def get_shared_coverage_binaries_dir(): def get_shared_mua_binaries_dir(): """Returns the shared mua binaries directory.""" experiment_filestore_path = experiment_utils.get_experiment_filestore_path() - return os.path.join(experiment_filestore_path, 'mua-binaries') + return os.path.join(experiment_filestore_path, 'mua-results') def make_shared_coverage_binaries_dir(): diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 827f3dfea..620c99f90 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -32,7 +32,6 @@ from typing import List, Optional, Tuple import queue from pathlib import Path -import uuid import psutil from sqlalchemy import func @@ -50,11 +49,12 @@ from database import utils as db_utils from database import models from experiment.build import build_utils -from experiment.build.local_build import MUTATION_ANALYSIS_IMAGE_NAME from experiment.measurer import coverage_utils from experiment.measurer import run_coverage from experiment.measurer import run_crashes from experiment import scheduler +from experiment.measurer.run_mua import (copy_mua_stats_db, run_mua_build_ids, + start_mua_container) from experiment.runner import UNIQUE_TIMESTAMP_FILENAME logger = logs.Logger() @@ -62,12 +62,6 @@ SnapshotMeasureRequest = collections.namedtuple( 'SnapshotMeasureRequest', ['fuzzer', 'benchmark', 'trial_id', 'cycle']) -# Exec id is used to identify the current run, if the dispatcher container -# is preempted the exec id will change. This allows us to identify which actions -# were performed by earlier runs and which were performed by the current run. -# We use this to identify which mutants builds were interrupted by a -# preemption. -EXEC_ID = uuid.uuid4() NUM_RETRIES = 3 RETRY_DELAY = 3 FAIL_WAIT_SECONDS = 30 @@ -548,15 +542,26 @@ def create_dir(self, directory): os.makedirs(directory, exist_ok=True) return os.path.exists(directory) + def mua_run_result_dir(self): + """Return the directory where mua results are stored.""" + experiment_name = experiment_utils.get_experiment_name() + experiment_filestore_path = Path('/workspace/mua_out') + shared_mua_binaries_dir = \ + experiment_filestore_path / experiment_name / 'mua-results' + mua_run_results_dir = (shared_mua_binaries_dir / 'corpus_run_results' / + self.fuzzer / str(self.trial_num)) + return mua_run_results_dir + def initialize_mua_environment(self, timestamp_info, - trial_start_time: datetime.datetime, cycle): + trial_start_time: datetime.datetime, + benchmark, cycle): """Build all covered mutants.""" def initialize_mua_directories(): experiment_name = experiment_utils.get_experiment_name() experiment_filestore_path = Path('/workspace/mua_out') shared_mua_binaries_dir = \ - experiment_filestore_path / experiment_name / 'mua-binaries' + experiment_filestore_path / experiment_name / 'mua-results' # create corpi directory entry corpi_dir = Path(shared_mua_binaries_dir) / 'corpi' @@ -564,18 +569,12 @@ def initialize_mua_directories(): trial_corpi_dir = fuzzer_corpi_dir / str(self.trial_num) self.create_dir(fuzzer_corpi_dir) - # create covered_mutants directory entry (contains json files with - # covered mutant ids for each corpus entry) mutants_ids_dir_entry = (shared_mua_binaries_dir / 'mutant_ids' / self.fuzzer / str(self.trial_num)) self.create_dir(mutants_ids_dir_entry) - # create corpus_run_results directory entry (contains json files - # with covered and killed mutant ids for each corpus entry) - mutants_ids_dir_entry = (shared_mua_binaries_dir / - 'corpus_run_results' / self.fuzzer / - str(self.trial_num)) - self.create_dir(mutants_ids_dir_entry) + mua_results_dir = self.mua_run_result_dir() + self.create_dir(mua_results_dir) # create mutants directory mutants_dir_entry = shared_mua_binaries_dir / 'mutants' @@ -585,18 +584,13 @@ def initialize_mua_directories(): shutil.copytree(self.corpus_dir, trial_corpi_dir, dirs_exist_ok=True) - return mutants_ids_dir_entry + return mua_results_dir - # find correct container and start it - container_name = \ - f'{MUTATION_ANALYSIS_IMAGE_NAME}_{self.benchmark}_container' + start_mua_container(self.benchmark) - docker_start_command = 'docker start ' + container_name - new_process.execute(docker_start_command.split(' ')) + mua_results_dir = initialize_mua_directories() - mutants_ids_dir_entry = initialize_mua_directories() - - corpus_run_result_db = mutants_ids_dir_entry / 'results.sqlite' + corpus_run_result_db = mua_results_dir / 'results.sqlite' if timestamp_info is None: logs.info('No timestamp info found.') timestamp_info = {} @@ -605,39 +599,11 @@ def initialize_mua_directories(): cycle, self.corpus_dir, corpus_run_result_db) - corpus_run_stats_db = mutants_ids_dir_entry / 'stats.sqlite' - - if not corpus_run_stats_db.is_file(): - logger.info( - f'Copying stats db from container to: {corpus_run_stats_db}') - - copy_stats_db_command = [ - 'docker', 'cp', f'{container_name}:/mua_build/build/stats.db', - str(corpus_run_stats_db) - ] - logger.info(f'mua copy stats db command: {copy_stats_db_command}') - new_process.execute(copy_stats_db_command, write_to_stdout=True) - - # get additional info from commons - experiment_name = experiment_utils.get_experiment_name() - fuzz_target = benchmark_utils.get_fuzz_target(self.benchmark) - - # execute command on container - command = [ - 'python3', '/mutator/mua_build_ids.py', - str(EXEC_ID), fuzz_target, experiment_name, self.fuzzer, - str(self.trial_num), '--debug_num_mutants=10' - ] - - docker_exec_command = [ - 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', - shlex.join(command) - ] + copy_mua_stats_db(benchmark, mua_results_dir) - logger.info(f'mua_build_ids command: {docker_exec_command}') - new_process.execute(docker_exec_command, write_to_stdout=True) + run_mua_build_ids(benchmark, self.trial_num, self.fuzzer, cycle) - def process_mua(self): + def process_mua(self, cycle): """runs mua measurement""" # get necessary info container_name = 'mutation_analysis_' + self.benchmark + '_container' @@ -656,7 +622,13 @@ def process_mua(self): shlex.join(command) ] logger.info('mua_run_mutants command:' + str(docker_exec_command)) - new_process.execute(docker_exec_command, write_to_stdout=True) + mua_run_res = new_process.execute(docker_exec_command) + logger.info('mua_run_mutants result:' + str(mua_run_res)) + build_utils.store_mua_run_log(mua_run_res.output, self.benchmark, + self.fuzzer, cycle) + results_db = self.mua_run_result_dir() / 'results.sqlite' + build_utils.store_mua_results_db(results_db, self.benchmark, + self.fuzzer, cycle) def run_cov_new_units(self): """Run the coverage binary on new units.""" @@ -880,7 +852,8 @@ def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments if mutation_analysis: trial_start_time = _query_trial_start_time(trial_num) snapshot_measurer.initialize_mua_environment(timestamp_info, - trial_start_time, cycle) + trial_start_time, + benchmark, cycle) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) @@ -904,7 +877,7 @@ def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments crashes=crashes) if mutation_analysis: - snapshot_measurer.process_mua() + snapshot_measurer.process_mua(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle, @@ -938,8 +911,8 @@ def set_up_mua_binaries(pool, experiment): distinct().filter(models.Trial.experiment == experiment) ] - mua_binaries_dir = build_utils.get_mua_binaries_dir() - filesystem.create_directory(mua_binaries_dir) + mua_results_dir = build_utils.get_mua_results_dir() + filesystem.create_directory(mua_results_dir) pool.map(set_up_mua_binary, benchmarks) diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py new file mode 100644 index 000000000..183f5145b --- /dev/null +++ b/experiment/measurer/run_mua.py @@ -0,0 +1,90 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module for mutation testing measurer functionality.""" + +import shlex +import uuid +from common import logs +from common import benchmark_utils +from common import experiment_utils +from common import new_process +from experiment.build import build_utils + + +logger = logs.Logger() + +# Exec id is used to identify the current run, if the dispatcher container +# is preempted the exec id will change. This allows us to identify which actions +# were performed by earlier runs and which were performed by the current run. +# We use this to identify which mutants builds were interrupted by a +# preemption. +EXEC_ID = uuid.uuid4() + + +def get_container_name(benchmark): + """Return the container name for the given benchmark.""" + return f'mutation_analysis_{benchmark}_container' + + +def start_mua_container(benchmark): + """Start the mutation analysis container for the benchmark.""" + # find correct container and start it + container_name = get_container_name(benchmark) + + docker_start_command = 'docker start ' + container_name + new_process.execute(docker_start_command.split(' ')) + + +def copy_mua_stats_db(benchmark, mua_results_dir): + """Copy the stats db from the container to the mua results dir.""" + container_name = get_container_name(benchmark) + corpus_run_stats_db = mua_results_dir / 'stats.sqlite' + + if not corpus_run_stats_db.is_file(): + logger.info( + f'Copying stats db from container to: {corpus_run_stats_db}') + + copy_stats_db_command = [ + 'docker', 'cp', f'{container_name}:/mua_build/build/stats.db', + str(corpus_run_stats_db) + ] + logger.info(f'mua copy stats db command: {copy_stats_db_command}') + new_process.execute(copy_stats_db_command, write_to_stdout=True) + build_utils.store_mua_stats_db(corpus_run_stats_db, benchmark) + + +def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): + """Run mua_build_ids.py on the container.""" + container_name = get_container_name(benchmark) + # get additional info from commons + experiment_name = experiment_utils.get_experiment_name() + fuzz_target = benchmark_utils.get_fuzz_target(benchmark) + + # execute command on container + command = [ + 'python3', '/mutator/mua_build_ids.py', + str(EXEC_ID), fuzz_target, experiment_name, fuzzer, + str(trial_num), '--debug_num_mutants=10' + ] + + docker_exec_command = [ + 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', + shlex.join(command) + ] + + logger.info(f'mua_build_ids command: {docker_exec_command}') + mua_build_res = new_process.execute(docker_exec_command) + logger.info(f'mua_build_ids result: {mua_build_res}') + build_utils.store_mua_build_log(mua_build_res.output, benchmark, + fuzzer, cycle) diff --git a/experiment/measurer/test_measure_manager.py b/experiment/measurer/test_measure_manager.py index 455892f7f..0ad0843ae 100644 --- a/experiment/measurer/test_measure_manager.py +++ b/experiment/measurer/test_measure_manager.py @@ -14,7 +14,6 @@ """Tests for measure_manager.py.""" import os -from pathlib import Path import shutil from unittest import mock import queue @@ -26,9 +25,6 @@ from database import models from database import utils as db_utils from experiment.build import build_utils -from experiment.build.gcb_build import build_mua -from experiment.build.local_build import (make_shared_mua_binaries_dir, - prepare_mua_binaries) from experiment.measurer import measure_manager from test_libs import utils as test_utils diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 2d52f46eb..d5b92d915 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout b7eb1793459605f358e0fde82d56509735bd6bd2 + git checkout 4640b4966eff1d7f5f941af28dde691f102c70a9 RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From 67ce1d746c6a21dcd925c09365fbcd349f82a26b Mon Sep 17 00:00:00 2001 From: phi-go Date: Tue, 26 Dec 2023 07:38:49 +0000 Subject: [PATCH 19/69] store report errors in experiment-data --- experiment/build/build_utils.py | 19 +++++++++++++++++++ experiment/reporter.py | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index fe10569a4..747c7d9e0 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -13,6 +13,7 @@ # limitations under the License. """Module for utility code shared by build submodules.""" +import datetime import os import tempfile @@ -76,6 +77,19 @@ def store_mua_run_log(run_output, benchmark, fuzzer, cycle): fuzzer / f'{cycle}.log')) +def store_report_error_log(report_error): + """Save mua stats_db in the mua bucket.""" + timestamp_filename = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") + with tempfile.NamedTemporaryFile(mode='w') as tmp: + tmp.write(report_error) + tmp.flush() + os.chmod(tmp.name, 0o666) + filestore_utils.cp( + tmp.name, + exp_path.filestore(get_report_errors_logs_dir() / + f'{timestamp_filename}.log')) + + def get_coverage_binaries_dir(): """Return coverage binaries directory.""" return exp_path.path('coverage-binaries') @@ -89,3 +103,8 @@ def get_mua_results_dir(): def get_build_logs_dir(): """Return build logs directory.""" return exp_path.path('build-logs') + + +def get_report_errors_logs_dir(): + """Return report errors logs directory.""" + return exp_path.path('report-errors-logs') diff --git a/experiment/reporter.py b/experiment/reporter.py index 24adcb127..c54373a01 100644 --- a/experiment/reporter.py +++ b/experiment/reporter.py @@ -16,6 +16,7 @@ reports.""" import os import posixpath +import traceback from common import experiment_utils from common import experiment_path as exp_path @@ -26,6 +27,7 @@ from common import yaml_utils from analysis import generate_report from analysis import data_utils +from experiment.build.build_utils import store_report_error_log CORE_FUZZERS_YAML = os.path.join(utils.ROOT_DIR, 'service', 'core-fuzzers.yaml') @@ -101,4 +103,6 @@ def output_report(experiment_config: dict, except data_utils.EmptyDataError: logs.warning('No snapshot data.') except Exception: # pylint: disable=broad-except + error_msg = traceback.format_exc() + store_report_error_log(f'Error generating HTML report:\n{error_msg}') logger.error('Error generating HTML report.') From fc812a6fd21955e47caa62ceb2a572737c9e22de Mon Sep 17 00:00:00 2001 From: phi-go Date: Tue, 26 Dec 2023 07:42:37 +0000 Subject: [PATCH 20/69] fix presubmit checks --- experiment/build/build_utils.py | 2 +- experiment/measurer/run_mua.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index 747c7d9e0..f73deedb7 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -79,7 +79,7 @@ def store_mua_run_log(run_output, benchmark, fuzzer, cycle): def store_report_error_log(report_error): """Save mua stats_db in the mua bucket.""" - timestamp_filename = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") + timestamp_filename = datetime.datetime.now().strftime('%Y_%m_%d-%H_%M_%S') with tempfile.NamedTemporaryFile(mode='w') as tmp: tmp.write(report_error) tmp.flush() diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index 183f5145b..f10210dc6 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -21,7 +21,6 @@ from common import new_process from experiment.build import build_utils - logger = logs.Logger() # Exec id is used to identify the current run, if the dispatcher container @@ -86,5 +85,5 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): logger.info(f'mua_build_ids command: {docker_exec_command}') mua_build_res = new_process.execute(docker_exec_command) logger.info(f'mua_build_ids result: {mua_build_res}') - build_utils.store_mua_build_log(mua_build_res.output, benchmark, - fuzzer, cycle) + build_utils.store_mua_build_log(mua_build_res.output, benchmark, fuzzer, + cycle) From 800ebd9334b839f515718ef25375cb0ad8b7c200 Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 27 Dec 2023 09:47:51 +0100 Subject: [PATCH 21/69] actually fail when docker build for mua fails --- fuzzers/mutation_analysis/fuzzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 71e2240b4..dfdc97f44 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -49,4 +49,4 @@ def build(): utils.build_benchmark() - subprocess.call(['/mutator/fuzzbench_build.sh']) + subprocess.check_call(['/mutator/fuzzbench_build.sh']) From 7cdd014cfeb2656b5f374372e3012c26b1d43905 Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 27 Dec 2023 10:31:19 +0000 Subject: [PATCH 22/69] use more robust mua_build version --- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index d5b92d915..71253ea49 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 4640b4966eff1d7f5f941af28dde691f102c70a9 + git checkout 7fa82307e8cab44d625f53a6b89753ee9a20e5d0 RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From 9ba0b606659061f0597932f14025f59e1a2440a8 Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 07:33:32 +0000 Subject: [PATCH 23/69] update mua build scripts --- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 71253ea49..3cd6d08b8 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 7fa82307e8cab44d625f53a6b89753ee9a20e5d0 + git checkout 37e460f66dbc2500736ac02d91dcf4fee18278ba RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From f5b382bff943aa018c1ace02e13e165745bd1317 Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 07:37:17 +0000 Subject: [PATCH 24/69] remove spammy log --- experiment/runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/experiment/runner.py b/experiment/runner.py index 0a9d0641a..bc599b8d5 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -480,7 +480,6 @@ def experiment_main(): """Do a trial as part of an experiment.""" logs.info('Doing trial as part of experiment.') try: - logs.error('Do something') runner = TrialRunner() runner.conduct_trial() except Exception as error: # pylint: disable=broad-except From 33089c3b82de1df5da4f2cfd16258ad64a3053b6 Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 08:13:01 +0000 Subject: [PATCH 25/69] improve logging output --- experiment/measurer/measure_manager.py | 3 +++ experiment/measurer/run_mua.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 620c99f90..079f45260 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -624,6 +624,9 @@ def process_mua(self, cycle): logger.info('mua_run_mutants command:' + str(docker_exec_command)) mua_run_res = new_process.execute(docker_exec_command) logger.info('mua_run_mutants result:' + str(mua_run_res)) + logger.info(f'mua_run_mutants result: {mua_run_res.retcode} ' + + f'timed_out: {mua_run_res.timed_out}\n' + + f'{mua_run_res.output}') build_utils.store_mua_run_log(mua_run_res.output, self.benchmark, self.fuzzer, cycle) results_db = self.mua_run_result_dir() / 'results.sqlite' diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index f10210dc6..6a49d4159 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -84,6 +84,8 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): logger.info(f'mua_build_ids command: {docker_exec_command}') mua_build_res = new_process.execute(docker_exec_command) - logger.info(f'mua_build_ids result: {mua_build_res}') + logger.info(f'mua_build_ids result: {mua_build_res.retcode} ' + + f'timed_out: {mua_build_res.timed_out}\n' + + f'{mua_build_res.output}') build_utils.store_mua_build_log(mua_build_res.output, benchmark, fuzzer, cycle) From 5bd783f162d83670e3fb8adc35bfd29c4b52765d Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 10:26:26 +0000 Subject: [PATCH 26/69] escape db password in dispatcher startup script --- experiment/resources/dispatcher-startup-script-template.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment/resources/dispatcher-startup-script-template.sh b/experiment/resources/dispatcher-startup-script-template.sh index 3a6c7b465..0a324d254 100644 --- a/experiment/resources/dispatcher-startup-script-template.sh +++ b/experiment/resources/dispatcher-startup-script-template.sh @@ -24,7 +24,7 @@ docker run --rm \ -e EXPERIMENT={{experiment}} \ -e CLOUD_PROJECT={{cloud_project}} \ -e EXPERIMENT_FILESTORE={{experiment_filestore}} \ - -e POSTGRES_PASSWORD={{postgres_password}} \ + -e POSTGRES_PASSWORD="{{postgres_password}}" \ -e CLOUD_SQL_INSTANCE_CONNECTION_NAME={{cloud_sql_instance_connection_name}} \ -e DOCKER_REGISTRY={{docker_registry}} \ -e CONCURRENT_BUILDS={{concurrent_builds}} \ From e5482c043423676ec354a389544b96e2bcfea18e Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 12:21:24 +0000 Subject: [PATCH 27/69] log build_mua config --- experiment/build/gcb_build.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experiment/build/gcb_build.py b/experiment/build/gcb_build.py index 23b0900ab..07c508f26 100644 --- a/experiment/build/gcb_build.py +++ b/experiment/build/gcb_build.py @@ -14,6 +14,7 @@ """Module for building things on Google Cloud Build for use in trials.""" import os +import pprint import subprocess import tempfile from typing import Dict @@ -79,6 +80,7 @@ def build_mua(benchmark): config = generate_cloudbuild.create_cloudbuild_spec( image_templates, benchmark=benchmark, fuzzer='mutation_analysis') config_name = f'benchmark-{benchmark}-mutation_analysis' + logger.info(f"build_mua gcb_build.py: {config_name}, {config}") _build(config, config_name) From 019da8297b40eaa2a1098dc2d824181f07006387 Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 12:29:43 +0000 Subject: [PATCH 28/69] log config for base images --- experiment/build/gcb_build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/experiment/build/gcb_build.py b/experiment/build/gcb_build.py index 07c508f26..0f50aa628 100644 --- a/experiment/build/gcb_build.py +++ b/experiment/build/gcb_build.py @@ -49,6 +49,7 @@ def build_base_images(): benchmark=None, fuzzer=None, build_base_images=True) + logger.info(f"build_base_images gcb_build.py: {config}") _build(config, 'base-images') From db334f873f05ba7a8c16177d59bbc9a16f4d2ede Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 12:55:54 +0000 Subject: [PATCH 29/69] no need to log gcb build configs --- experiment/build/gcb_build.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/experiment/build/gcb_build.py b/experiment/build/gcb_build.py index 0f50aa628..5d742010b 100644 --- a/experiment/build/gcb_build.py +++ b/experiment/build/gcb_build.py @@ -49,7 +49,6 @@ def build_base_images(): benchmark=None, fuzzer=None, build_base_images=True) - logger.info(f"build_base_images gcb_build.py: {config}") _build(config, 'base-images') @@ -81,7 +80,6 @@ def build_mua(benchmark): config = generate_cloudbuild.create_cloudbuild_spec( image_templates, benchmark=benchmark, fuzzer='mutation_analysis') config_name = f'benchmark-{benchmark}-mutation_analysis' - logger.info(f"build_mua gcb_build.py: {config_name}, {config}") _build(config, config_name) From a2358c2af6f548157ab715f40a91343d37f73574 Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 16:03:55 +0000 Subject: [PATCH 30/69] make host_mua_out_dir configurable use it for the gcb dispatcher --- .gitignore | 3 + analysis/generate_report.py | 5 +- experiment/build/local_build.py | 52 +-------------- experiment/measurer/measure_manager.py | 10 +-- experiment/measurer/run_mua.py | 66 ++++++++++++++++++- .../dispatcher-startup-script-template.sh | 2 + experiment/run_experiment.py | 22 ++++++- 7 files changed, 101 insertions(+), 59 deletions(-) diff --git a/.gitignore b/.gitignore index 30f64c4e3..12c6ec5a7 100644 --- a/.gitignore +++ b/.gitignore @@ -44,4 +44,7 @@ docker/generated.mk # Vim backup files. .*.swp + +# mua related files and directories. fuzzers/mutation_analysis/fuzzbench_mapped_dir/ +mua_out/ diff --git a/analysis/generate_report.py b/analysis/generate_report.py index 9a155c06d..08f44a09c 100644 --- a/analysis/generate_report.py +++ b/analysis/generate_report.py @@ -28,6 +28,7 @@ from analysis import rendering from common import filesystem from common import logs +from experiment.measurer.run_mua import get_dispatcher_mua_out_dir logger = logs.Logger() @@ -200,7 +201,9 @@ def get_mua_results(experiment_name, fuzzers, _benchmarks, experiment_df): _benchmark = trial_dict[trial] - mua_result_db_file = f'/workspace/mua_out/{experiment_name}/' \ + mua_out_dir = get_dispatcher_mua_out_dir() + + mua_result_db_file = f'/{mua_out_dir}/{experiment_name}/' \ f'mua_binaries/corpus_run_results/{fuzzer}/{trial}/' \ 'results.sqlite' con = sqlite3.connect(mua_result_db_file) diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 525b39473..44c7a327b 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -24,6 +24,8 @@ from common import logs from common import new_process from common import utils +from experiment.measurer.run_mua import (MUTATION_ANALYSIS_IMAGE_NAME, + stop_mua_container) logger = logs.Logger() # pylint: disable=invalid-name @@ -80,65 +82,17 @@ def build_coverage(benchmark): return result -MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' - - def build_mua(benchmark): """Build (locally) mua image for benchmark.""" + stop_mua_container(benchmark) image_name = f'.{MUTATION_ANALYSIS_IMAGE_NAME}-{benchmark}-builder' result = make([image_name]) if result.retcode: return result make_shared_mua_binaries_dir() - prepare_mua_binaries(benchmark) return result -def prepare_mua_binaries(benchmark): - """Run commands on mua container to prepare it""" - experiment_name = experiment_utils.get_experiment_name() - shared_mua_binaries_dir = f'/workspace/mua_out/{experiment_name}' - docker_mua_binaries_dir = f'/mapped/{experiment_name}' - mount_arg = f'{shared_mua_binaries_dir}:{docker_mua_binaries_dir}' - os.makedirs(shared_mua_binaries_dir, exist_ok=True) - - builder_image_url = benchmark_utils.get_builder_image_url( - benchmark, MUTATION_ANALYSIS_IMAGE_NAME, - environment.get('DOCKER_REGISTRY')) - - mua_build_archive = f'mutation-analysis-build-{benchmark}.tar.gz' - mua_build_archive_shared_dir_path = os.path.join(shared_mua_binaries_dir, - mua_build_archive) - - container_name = f'{MUTATION_ANALYSIS_IMAGE_NAME}_{benchmark}_container' - - host_mua_mapped_dir = os.environ.get('HOST_MUA_MAPPED_DIR') - - command = ('(' - f'mkdir -p {shared_mua_binaries_dir}; ' - f'tar -czvf {mua_build_archive_shared_dir_path} /out; ' - 'python3 /mutator/mua_idle.py; ' - ')') - - logger.debug('mua prepare command:' + str(command)) - try: - new_process.execute(['docker', 'rm', '-f', container_name]) - except subprocess.CalledProcessError: - pass - - mua_run_cmd = [ - 'docker', 'run', '--name', container_name, '-v', mount_arg, '-e', - 'FUZZ_OUTSIDE_EXPERIMENT=1', '-e', 'FORCE_LOCAL=1', '-e', 'TRIAL_ID=1', - '-e', 'FUZZER=mutation_analysis', '-e', 'DEBUG_BUILDER=1', - *([] if host_mua_mapped_dir is None else - ['-v', f'{host_mua_mapped_dir}:/mapped_dir']), builder_image_url, - '/bin/bash', '-c', command - ] - - logger.debug('mua run command:' + str(mua_run_cmd)) - new_process.execute(mua_run_cmd, write_to_stdout=True) - - def copy_coverage_binaries(benchmark): """Copy coverage binaries in a local experiment.""" shared_coverage_binaries_dir = get_shared_coverage_binaries_dir() diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 079f45260..1512250d3 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -53,8 +53,8 @@ from experiment.measurer import run_coverage from experiment.measurer import run_crashes from experiment import scheduler -from experiment.measurer.run_mua import (copy_mua_stats_db, run_mua_build_ids, - start_mua_container) +from experiment.measurer.run_mua import (copy_mua_stats_db, get_dispatcher_mua_out_dir, run_mua_build_ids, + ensure_mua_container_running) from experiment.runner import UNIQUE_TIMESTAMP_FILENAME logger = logs.Logger() @@ -545,7 +545,7 @@ def create_dir(self, directory): def mua_run_result_dir(self): """Return the directory where mua results are stored.""" experiment_name = experiment_utils.get_experiment_name() - experiment_filestore_path = Path('/workspace/mua_out') + experiment_filestore_path = get_dispatcher_mua_out_dir() shared_mua_binaries_dir = \ experiment_filestore_path / experiment_name / 'mua-results' mua_run_results_dir = (shared_mua_binaries_dir / 'corpus_run_results' / @@ -559,7 +559,7 @@ def initialize_mua_environment(self, timestamp_info, def initialize_mua_directories(): experiment_name = experiment_utils.get_experiment_name() - experiment_filestore_path = Path('/workspace/mua_out') + experiment_filestore_path = get_dispatcher_mua_out_dir() shared_mua_binaries_dir = \ experiment_filestore_path / experiment_name / 'mua-results' @@ -586,7 +586,7 @@ def initialize_mua_directories(): dirs_exist_ok=True) return mua_results_dir - start_mua_container(self.benchmark) + ensure_mua_container_running(self.benchmark) mua_results_dir = initialize_mua_directories() diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index 6a49d4159..015b5586e 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -13,12 +13,16 @@ # limitations under the License. """Module for mutation testing measurer functionality.""" +import os +from pathlib import Path import shlex +import subprocess import uuid from common import logs from common import benchmark_utils from common import experiment_utils from common import new_process +from common import environment from experiment.build import build_utils logger = logs.Logger() @@ -31,18 +35,74 @@ EXEC_ID = uuid.uuid4() +MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' + + def get_container_name(benchmark): """Return the container name for the given benchmark.""" return f'mutation_analysis_{benchmark}_container' -def start_mua_container(benchmark): +def get_host_mua_out_dir(): + """Return the host directory where mua_out is mapped.""" + return Path(os.environ.get('HOST_MUA_OUT_DIR')).absolute() + + +def get_dispatcher_mua_out_dir(): + return Path('/mua_out/') + + +def stop_mua_container(benchmark): + """Stop the mua container for the benchmark.""" + container_name = get_container_name(benchmark) + try: + new_process.execute(['docker', 'rm', '-f', container_name]) + except subprocess.CalledProcessError: + pass + + +def run_mua_container(benchmark): + """Run commands on mua container to prepare it""" + experiment_name = experiment_utils.get_experiment_name() + host_mua_out_dir = get_host_mua_out_dir() + shared_mua_binaries_dir = host_mua_out_dir / experiment_name + docker_mua_binaries_dir = f'/mapped/{experiment_name}' + mount_arg = f'{shared_mua_binaries_dir}:{docker_mua_binaries_dir}' + os.makedirs(shared_mua_binaries_dir, exist_ok=True) + + builder_image_url = benchmark_utils.get_builder_image_url( + benchmark, MUTATION_ANALYSIS_IMAGE_NAME, + environment.get('DOCKER_REGISTRY')) + + container_name = get_container_name(benchmark) + + host_mua_mapped_dir = os.environ.get('HOST_MUA_MAPPED_DIR') + + mua_run_cmd = [ + 'docker', 'run', '--init', '-it', '--detach', '--name', container_name, + '-v', mount_arg, + *([] if host_mua_mapped_dir is None else + ['-v', f'{host_mua_mapped_dir}:/mapped_dir']), builder_image_url, + '/bin/bash', '-c', 'sleep infinity' + ] + + logger.info('mua container run command:' + str(mua_run_cmd)) + mua_run_res = new_process.execute(mua_run_cmd) + logger.info(f'mua container run result: {mua_run_res.retcode} ' + + f'timed_out: {mua_run_res.timed_out}\n' + + f'{mua_run_res.output}') + + +def ensure_mua_container_running(benchmark): """Start the mutation analysis container for the benchmark.""" # find correct container and start it container_name = get_container_name(benchmark) - docker_start_command = 'docker start ' + container_name - new_process.execute(docker_start_command.split(' ')) + docker_start_command = ['docker', 'start', container_name] + res = new_process.execute(docker_start_command, expect_zero=False) + if res.retcode != 0: + logger.info('Could not start mua container, using run instead.') + run_mua_container(benchmark) def copy_mua_stats_db(benchmark, mua_results_dir): diff --git a/experiment/resources/dispatcher-startup-script-template.sh b/experiment/resources/dispatcher-startup-script-template.sh index 0a324d254..6f47da4f9 100644 --- a/experiment/resources/dispatcher-startup-script-template.sh +++ b/experiment/resources/dispatcher-startup-script-template.sh @@ -30,6 +30,8 @@ docker run --rm \ -e CONCURRENT_BUILDS={{concurrent_builds}} \ -e WORKER_POOL_NAME={{worker_pool_name}} \ -e PRIVATE={{private}} \ + -e MUTATION_ANALYSIS={{mutation_analysis}} \ + {{mua_mapped_dir}} \ --cap-add=SYS_PTRACE --cap-add=SYS_NICE \ -v /var/run/docker.sock:/var/run/docker.sock --name=dispatcher-container \ {{docker_registry}}/dispatcher-image /work/startup-dispatcher.sh &> /tmp/dispatcher.log diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 5eeb61d64..a4091d52e 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -17,6 +17,7 @@ import argparse import os +from pathlib import Path import re import subprocess import sys @@ -501,7 +502,17 @@ def start(self): f'CONCURRENT_BUILDS={self.config["concurrent_builds"]}') set_worker_pool_name_arg = ( f'WORKER_POOL_NAME={self.config["worker_pool_name"]}') + + # TODO(mua): Only pass env and volumes if mua is enabled. + mua_mapped_dir = os.environ.get('HOST_MUA_MAPPED_DIR') + + host_mua_out_dir = str(Path( + os.environ.get('HOST_MUA_OUT_DIR', Path.cwd()/'mua_out') + ).absolute()) + os.environ['HOST_MUA_OUT_DIR'] = host_mua_out_dir + logs.debug(f'Setting HOST_MUA_OUT_DIR to {host_mua_out_dir}') + environment_args = [ '-e', 'LOCAL_EXPERIMENT=True', @@ -525,6 +536,8 @@ def start(self): set_worker_pool_name_arg, *(['-e', f'HOST_MUA_MAPPED_DIR={mua_mapped_dir}'] if mua_mapped_dir is not None else []), + *(['-e', f'HOST_MUA_OUT_DIR={host_mua_out_dir}'] + if host_mua_out_dir is not None else []), ] command = [ 'docker', @@ -546,7 +559,7 @@ def start(self): # To share files between the dispatcher and mutation testing # container we need to map a shared host directory to a volume. '-v', - '/workspace/mua_out:/workspace/mua_out', + f'{host_mua_out_dir}:/mua_out', ] + environment_args + [ '--shm-size=2g', '--cap-add=SYS_PTRACE', @@ -608,6 +621,12 @@ def _render_startup_script(self): 'worker_pool_name': self.config['worker_pool_name'], 'private': self.config['private'], } + if self.config['mutation_analysis']: + kwargs['mutation_analysis'] = True + kwargs['mua_mapped_dir'] = '-v /home/chronos/mua_out/:/mua_out' + else: + kwargs['mutation_analysis'] = False + kwargs['mua_mapped_dir'] = '' if 'worker_pool_name' in self.config: kwargs['worker_pool_name'] = self.config['worker_pool_name'] return template.render(**kwargs) @@ -615,6 +634,7 @@ def _render_startup_script(self): def write_startup_script(self, startup_script_file): """Get the startup script to start the experiment on the dispatcher.""" startup_script = self._render_startup_script() + print(startup_script) startup_script_file.write(startup_script) startup_script_file.flush() From 6f2d549fa492cc19af8dcac81ec8d6f8d2644d67 Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 16:28:34 +0000 Subject: [PATCH 31/69] build mua measurer images only if needed --- experiment/build/builder.py | 5 +++-- experiment/dispatcher.py | 8 +++++--- .../resources/dispatcher-startup-script-template.sh | 1 - experiment/run_experiment.py | 3 --- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/experiment/build/builder.py b/experiment/build/builder.py index 7a659312b..b98ab9b1b 100644 --- a/experiment/build/builder.py +++ b/experiment/build/builder.py @@ -119,7 +119,7 @@ def build_mua_measurer(benchmark: str) -> bool: return False -def build_all_measurers(benchmarks: List[str]) -> List[str]: +def build_all_measurers(benchmarks: List[str], mutation_analysis: bool) -> List[str]: """Build measurers for each benchmark in |benchmarks| in parallel Returns a list of benchmarks built successfully.""" logger.info('Building measurers.') @@ -129,7 +129,8 @@ def build_all_measurers(benchmarks: List[str]) -> List[str]: successful_calls = retry_build_loop(build_coverage_measurer, build_measurer_args) # build mua measurer - retry_build_loop(build_mua_measurer, build_measurer_args) + if mutation_analysis: + retry_build_loop(build_mua_measurer, build_measurer_args) logger.info('Done building measurers.') # Return list of benchmarks (like the list we were passed as an argument) # instead of returning a list of tuples each containing a benchmark. diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index 796c796b8..00e82192d 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -105,7 +105,8 @@ def __init__(self, experiment_config_filepath: str): def build_images_for_trials(fuzzers: List[str], benchmarks: List[str], num_trials: int, - preemptible: bool) -> List[models.Trial]: + preemptible: bool, + mutation_analysis) -> List[models.Trial]: """Builds the images needed to run |experiment| and returns a list of trials that can be run for experiment. This is the number of trials specified in experiment times each pair of fuzzer+benchmark that builds successfully.""" @@ -114,7 +115,7 @@ def build_images_for_trials(fuzzers: List[str], benchmarks: List[str], builder.build_base_images() # Only build fuzzers for benchmarks whose measurers built successfully. - benchmarks = builder.build_all_measurers(benchmarks) + benchmarks = builder.build_all_measurers(benchmarks, mutation_analysis) build_successes = builder.build_all_fuzzer_benchmarks(fuzzers, benchmarks) experiment_name = experiment_utils.get_experiment_name() trials = [] @@ -147,7 +148,8 @@ def dispatcher_main(): trials = build_images_for_trials(experiment.fuzzers, experiment.benchmarks, experiment.num_trials, - experiment.preemptible) + experiment.preemptible, + experiment.config['mutation_analysis']) _initialize_trials_in_db(trials) create_work_subdirs(['experiment-folders', 'measurement-folders']) diff --git a/experiment/resources/dispatcher-startup-script-template.sh b/experiment/resources/dispatcher-startup-script-template.sh index 6f47da4f9..2b74d8b8a 100644 --- a/experiment/resources/dispatcher-startup-script-template.sh +++ b/experiment/resources/dispatcher-startup-script-template.sh @@ -30,7 +30,6 @@ docker run --rm \ -e CONCURRENT_BUILDS={{concurrent_builds}} \ -e WORKER_POOL_NAME={{worker_pool_name}} \ -e PRIVATE={{private}} \ - -e MUTATION_ANALYSIS={{mutation_analysis}} \ {{mua_mapped_dir}} \ --cap-add=SYS_PTRACE --cap-add=SYS_NICE \ -v /var/run/docker.sock:/var/run/docker.sock --name=dispatcher-container \ diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index a4091d52e..297fc4c7b 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -511,7 +511,6 @@ def start(self): os.environ.get('HOST_MUA_OUT_DIR', Path.cwd()/'mua_out') ).absolute()) os.environ['HOST_MUA_OUT_DIR'] = host_mua_out_dir - logs.debug(f'Setting HOST_MUA_OUT_DIR to {host_mua_out_dir}') environment_args = [ '-e', @@ -622,10 +621,8 @@ def _render_startup_script(self): 'private': self.config['private'], } if self.config['mutation_analysis']: - kwargs['mutation_analysis'] = True kwargs['mua_mapped_dir'] = '-v /home/chronos/mua_out/:/mua_out' else: - kwargs['mutation_analysis'] = False kwargs['mua_mapped_dir'] = '' if 'worker_pool_name' in self.config: kwargs['worker_pool_name'] = self.config['worker_pool_name'] From ec1095e2e4aacfded1ad16b0088f8fe124abed8c Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 17:29:28 +0000 Subject: [PATCH 32/69] cloud shared dir and error message mua run --- experiment/measurer/run_mua.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index 015b5586e..0fbb44231 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -45,7 +45,12 @@ def get_container_name(benchmark): def get_host_mua_out_dir(): """Return the host directory where mua_out is mapped.""" - return Path(os.environ.get('HOST_MUA_OUT_DIR')).absolute() + if experiment_utils.is_local_experiment(): + return Path( + os.environ.get('HOST_MUA_OUT_DIR', '/tmp/mua_out') + ).absolute() + else: + return Path('/home/chronos/mua_out/') def get_dispatcher_mua_out_dir(): @@ -86,11 +91,15 @@ def run_mua_container(benchmark): '/bin/bash', '-c', 'sleep infinity' ] - logger.info('mua container run command:' + str(mua_run_cmd)) - mua_run_res = new_process.execute(mua_run_cmd) - logger.info(f'mua container run result: {mua_run_res.retcode} ' + - f'timed_out: {mua_run_res.timed_out}\n' + - f'{mua_run_res.output}') + mua_run_res = new_process.execute(mua_run_cmd, expect_zero=False) + if mua_run_res.retcode != 0: + logger.error( + f'could not run mua container:\n' + + f'command: {mua_run_cmd}\n' + + f'returncode: {mua_run_res.retcode}\n' + + f'timed_out: {mua_run_res.timed_out}\n' + + f'{mua_run_res.output}') + raise Exception('Could not run mua container.') def ensure_mua_container_running(benchmark): From ed29c4af91502df380e4f5917fb9aea1b93a886e Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 28 Dec 2023 19:41:15 +0000 Subject: [PATCH 33/69] pass presubmit --- experiment/build/builder.py | 3 ++- experiment/build/gcb_build.py | 1 - experiment/build/local_build.py | 1 - experiment/build/test_builder.py | 4 ++-- experiment/dispatcher.py | 3 +-- experiment/measurer/measure_manager.py | 4 +++- experiment/measurer/run_mua.py | 28 +++++++++++--------------- experiment/run_experiment.py | 6 +++--- experiment/test_dispatcher.py | 10 +++++---- 9 files changed, 29 insertions(+), 31 deletions(-) diff --git a/experiment/build/builder.py b/experiment/build/builder.py index b98ab9b1b..fe531ceaf 100644 --- a/experiment/build/builder.py +++ b/experiment/build/builder.py @@ -119,7 +119,8 @@ def build_mua_measurer(benchmark: str) -> bool: return False -def build_all_measurers(benchmarks: List[str], mutation_analysis: bool) -> List[str]: +def build_all_measurers(benchmarks: List[str], + mutation_analysis: bool) -> List[str]: """Build measurers for each benchmark in |benchmarks| in parallel Returns a list of benchmarks built successfully.""" logger.info('Building measurers.') diff --git a/experiment/build/gcb_build.py b/experiment/build/gcb_build.py index 5d742010b..23b0900ab 100644 --- a/experiment/build/gcb_build.py +++ b/experiment/build/gcb_build.py @@ -14,7 +14,6 @@ """Module for building things on Google Cloud Build for use in trials.""" import os -import pprint import subprocess import tempfile from typing import Dict diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 44c7a327b..103674943 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -15,7 +15,6 @@ """Module for building things locally for use in trials.""" import os -import subprocess from typing import Tuple from common import benchmark_utils diff --git a/experiment/build/test_builder.py b/experiment/build/test_builder.py index 09538210a..9c1e4a418 100644 --- a/experiment/build/test_builder.py +++ b/experiment/build/test_builder.py @@ -79,7 +79,7 @@ def test_build_all_measurers(mocked_build_utils, mocked_fs, mocked_time, calls fail.""" mocked_build_measurer.return_value = build_coverage_measurer_return_value benchmarks = get_regular_benchmarks() - result = builder.build_all_measurers(benchmarks) + result = builder.build_all_measurers(benchmarks, False) if build_coverage_measurer_return_value: assert result == benchmarks else: @@ -130,7 +130,7 @@ def test_integration_build_oss_fuzz_project_measurers( def _test_build_measurers_benchmarks(benchmarks): """Asserts that measurers for each benchmark in |benchmarks| can build.""" - assert benchmarks == builder.build_all_measurers(benchmarks) + assert benchmarks == builder.build_all_measurers(benchmarks, False) def _test_build_fuzzers_benchmarks(fuzzers, benchmarks): diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index 00e82192d..4d87005cf 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -104,8 +104,7 @@ def __init__(self, experiment_config_filepath: str): def build_images_for_trials(fuzzers: List[str], benchmarks: List[str], - num_trials: int, - preemptible: bool, + num_trials: int, preemptible: bool, mutation_analysis) -> List[models.Trial]: """Builds the images needed to run |experiment| and returns a list of trials that can be run for experiment. This is the number of trials specified in diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 1512250d3..4fd6985ca 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -53,7 +53,9 @@ from experiment.measurer import run_coverage from experiment.measurer import run_crashes from experiment import scheduler -from experiment.measurer.run_mua import (copy_mua_stats_db, get_dispatcher_mua_out_dir, run_mua_build_ids, +from experiment.measurer.run_mua import (copy_mua_stats_db, + get_dispatcher_mua_out_dir, + run_mua_build_ids, ensure_mua_container_running) from experiment.runner import UNIQUE_TIMESTAMP_FILENAME diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index 0fbb44231..8a5726b05 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -34,7 +34,6 @@ # preemption. EXEC_ID = uuid.uuid4() - MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' @@ -46,14 +45,13 @@ def get_container_name(benchmark): def get_host_mua_out_dir(): """Return the host directory where mua_out is mapped.""" if experiment_utils.is_local_experiment(): - return Path( - os.environ.get('HOST_MUA_OUT_DIR', '/tmp/mua_out') - ).absolute() - else: - return Path('/home/chronos/mua_out/') + return Path(os.environ.get('HOST_MUA_OUT_DIR', + '/tmp/mua_out')).absolute() + return Path('/home/chronos/mua_out/') def get_dispatcher_mua_out_dir(): + """Return the dispatcher directory where mua_out is mapped to.""" return Path('/mua_out/') @@ -85,20 +83,18 @@ def run_mua_container(benchmark): mua_run_cmd = [ 'docker', 'run', '--init', '-it', '--detach', '--name', container_name, - '-v', mount_arg, - *([] if host_mua_mapped_dir is None else - ['-v', f'{host_mua_mapped_dir}:/mapped_dir']), builder_image_url, - '/bin/bash', '-c', 'sleep infinity' + '-v', mount_arg, *([] if host_mua_mapped_dir is None else + ['-v', f'{host_mua_mapped_dir}:/mapped_dir']), + builder_image_url, '/bin/bash', '-c', 'sleep infinity' ] mua_run_res = new_process.execute(mua_run_cmd, expect_zero=False) if mua_run_res.retcode != 0: - logger.error( - f'could not run mua container:\n' + - f'command: {mua_run_cmd}\n' + - f'returncode: {mua_run_res.retcode}\n' + - f'timed_out: {mua_run_res.timed_out}\n' + - f'{mua_run_res.output}') + logger.error('could not run mua container:\n' + + f'command: {mua_run_cmd}\n' + + f'returncode: {mua_run_res.retcode}\n' + + f'timed_out: {mua_run_res.timed_out}\n' + + f'{mua_run_res.output}') raise Exception('Could not run mua container.') diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 297fc4c7b..870046040 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -507,9 +507,9 @@ def start(self): mua_mapped_dir = os.environ.get('HOST_MUA_MAPPED_DIR') - host_mua_out_dir = str(Path( - os.environ.get('HOST_MUA_OUT_DIR', Path.cwd()/'mua_out') - ).absolute()) + host_mua_out_dir = str( + Path(os.environ.get('HOST_MUA_OUT_DIR', + Path.cwd() / 'mua_out')).absolute()) os.environ['HOST_MUA_OUT_DIR'] = host_mua_out_dir environment_args = [ diff --git a/experiment/test_dispatcher.py b/experiment/test_dispatcher.py index 168add2bc..c5f340894 100644 --- a/experiment/test_dispatcher.py +++ b/experiment/test_dispatcher.py @@ -97,7 +97,8 @@ def test_build_images_for_trials_base_images_fail(dispatcher_experiment): dispatcher.build_images_for_trials(dispatcher_experiment.fuzzers, dispatcher_experiment.benchmarks, dispatcher_experiment.num_trials, - dispatcher_experiment.preemptible) + dispatcher_experiment.preemptible, + False) @mock.patch('experiment.build.builder.build_base_images') @@ -114,7 +115,7 @@ def test_build_images_for_trials_build_success(_, dispatcher_experiment): trials = dispatcher.build_images_for_trials( dispatcher_experiment.fuzzers, dispatcher_experiment.benchmarks, dispatcher_experiment.num_trials, - dispatcher_experiment.preemptible) + dispatcher_experiment.preemptible, False) trial_fuzzer_benchmarks = [ (trial.fuzzer, trial.benchmark) for trial in trials ] @@ -146,7 +147,7 @@ def mocked_build_all_fuzzer_benchmarks(fuzzers, benchmarks): trials = dispatcher.build_images_for_trials( dispatcher_experiment.fuzzers, dispatcher_experiment.benchmarks, dispatcher_experiment.num_trials, - dispatcher_experiment.preemptible) + dispatcher_experiment.preemptible, False) for trial in trials: assert trial.benchmark == successful_benchmark @@ -181,7 +182,8 @@ def mocked_build_all_fuzzer_benchmarks(fuzzers, benchmarks): with mock.patch('experiment.build.builder.build_all_fuzzer_benchmarks', side_effect=mocked_build_all_fuzzer_benchmarks): trials = dispatcher.build_images_for_trials(fuzzers, benchmarks, - num_trials, False) + num_trials, False, + False) trial_fuzzer_benchmarks = [ (trial.fuzzer, trial.benchmark) for trial in trials From 9830393abbaa355bb9587df62b458e2d040db2c3 Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 08:41:31 +0000 Subject: [PATCH 34/69] log more for permission errors --- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 3cd6d08b8..faf3effec 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 37e460f66dbc2500736ac02d91dcf4fee18278ba + git checkout 290037f33a4e0838945d7ba6959332e5aa456d46 RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From 56bb28c66f49cb7d781ea68822006d721e0b85ae Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 10:04:23 +0000 Subject: [PATCH 35/69] debugging mua_run_mutants --- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index faf3effec..af38bbdcd 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 290037f33a4e0838945d7ba6959332e5aa456d46 + git checkout 29341e3b450f59145b81ba119c88eda4fee2dfaa RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From bbbd3a3ada46766f1cdda2e2923f44b8d738f516 Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 10:43:04 +0000 Subject: [PATCH 36/69] fix for subprocess run --- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index af38bbdcd..edd05c747 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 29341e3b450f59145b81ba119c88eda4fee2dfaa + git checkout 9f9266f64ffe0adb562980b69e73f5574cf7ff15 RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From 4ca3f04d31adf994baffb2f4230bd238bd50a2a1 Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 11:20:24 +0000 Subject: [PATCH 37/69] logging for permission error --- experiment/measurer/run_mua.py | 34 ++++++++++++++++++-- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index 8a5726b05..8548eeb3b 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -17,6 +17,7 @@ from pathlib import Path import shlex import subprocess +import time import uuid from common import logs from common import benchmark_utils @@ -90,6 +91,9 @@ def run_mua_container(benchmark): mua_run_res = new_process.execute(mua_run_cmd, expect_zero=False) if mua_run_res.retcode != 0: + if 'Conflict. The container name' in mua_run_res.output: + logger.debug('mua container already running') + return logger.error('could not run mua container:\n' + f'command: {mua_run_cmd}\n' + f'returncode: {mua_run_res.retcode}\n' + @@ -98,8 +102,27 @@ def run_mua_container(benchmark): raise Exception('Could not run mua container.') +def mua_container_is_running(benchmark): + """Return true if the mua container is started.""" + container_name = get_container_name(benchmark) + try: + res = new_process.execute([ + 'docker', 'inspect', '-f', '{{.State.Running}}', container_name + ], expect_zero=False) + if res.retcode != 0: + return False + if res.output.strip() == 'true': + return True + return False + except subprocess.CalledProcessError: + return False + + def ensure_mua_container_running(benchmark): """Start the mutation analysis container for the benchmark.""" + if mua_container_is_running(benchmark): + return + # find correct container and start it container_name = get_container_name(benchmark) @@ -112,6 +135,13 @@ def ensure_mua_container_running(benchmark): def copy_mua_stats_db(benchmark, mua_results_dir): """Copy the stats db from the container to the mua results dir.""" + # Wait a bit if the container was just started + for _ in range(10): + if mua_container_is_running(benchmark): + break + logger.debug('Waiting for mua container to start.') + time.sleep(1) + container_name = get_container_name(benchmark) corpus_run_stats_db = mua_results_dir / 'stats.sqlite' @@ -123,7 +153,7 @@ def copy_mua_stats_db(benchmark, mua_results_dir): 'docker', 'cp', f'{container_name}:/mua_build/build/stats.db', str(corpus_run_stats_db) ] - logger.info(f'mua copy stats db command: {copy_stats_db_command}') + logger.debug(f'mua copy stats db command: {copy_stats_db_command}') new_process.execute(copy_stats_db_command, write_to_stdout=True) build_utils.store_mua_stats_db(corpus_run_stats_db, benchmark) @@ -147,7 +177,7 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): shlex.join(command) ] - logger.info(f'mua_build_ids command: {docker_exec_command}') + logger.debug(f'mua_build_ids command: {docker_exec_command}') mua_build_res = new_process.execute(docker_exec_command) logger.info(f'mua_build_ids result: {mua_build_res.retcode} ' + f'timed_out: {mua_build_res.timed_out}\n' + diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index edd05c747..2fc31f0c4 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 9f9266f64ffe0adb562980b69e73f5574cf7ff15 + git checkout 6bd8059aa1b3b441a1e725348790d90a538d0520 RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From 1edbb02d036b261349c43c2244b7ef9a6a5ab6f1 Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 13:18:45 +0000 Subject: [PATCH 38/69] use different mua out dir on cloud --- experiment/measurer/run_mua.py | 4 +++- experiment/run_experiment.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index 8548eeb3b..bea7396b4 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -37,6 +37,8 @@ MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' +GOOGLE_CLOUD_MUA_MAPPED_DIR = '/var/lib/toolbox/mua_out/' + def get_container_name(benchmark): """Return the container name for the given benchmark.""" @@ -48,7 +50,7 @@ def get_host_mua_out_dir(): if experiment_utils.is_local_experiment(): return Path(os.environ.get('HOST_MUA_OUT_DIR', '/tmp/mua_out')).absolute() - return Path('/home/chronos/mua_out/') + return Path(GOOGLE_CLOUD_MUA_MAPPED_DIR) def get_dispatcher_mua_out_dir(): diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 870046040..668faa9dd 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -40,6 +40,7 @@ from common import new_process from common import utils from common import yaml_utils +from experiment.measurer.run_mua import GOOGLE_CLOUD_MUA_MAPPED_DIR BENCHMARKS_DIR = os.path.join(utils.ROOT_DIR, 'benchmarks') FUZZERS_DIR = os.path.join(utils.ROOT_DIR, 'fuzzers') @@ -621,7 +622,7 @@ def _render_startup_script(self): 'private': self.config['private'], } if self.config['mutation_analysis']: - kwargs['mua_mapped_dir'] = '-v /home/chronos/mua_out/:/mua_out' + kwargs['mua_mapped_dir'] = f'-v {GOOGLE_CLOUD_MUA_MAPPED_DIR}:/mua_out' else: kwargs['mua_mapped_dir'] = '' if 'worker_pool_name' in self.config: From a20201849be8ebb455e041924b1353de3948e28f Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 13:31:21 +0000 Subject: [PATCH 39/69] use another mua out dir --- experiment/measurer/run_mua.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index bea7396b4..dbc92c8b4 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -37,7 +37,7 @@ MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' -GOOGLE_CLOUD_MUA_MAPPED_DIR = '/var/lib/toolbox/mua_out/' +GOOGLE_CLOUD_MUA_MAPPED_DIR = '/etc/mua_out/' def get_container_name(benchmark): From 636991b78843923103d7f071c29b22966735a8ce Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 14:16:34 +0000 Subject: [PATCH 40/69] vacuum sqlite db instead of simple cp --- experiment/build/build_utils.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index f73deedb7..abec8b3ce 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -15,6 +15,7 @@ import datetime import os +import sqlite3 import tempfile from common import experiment_path as exp_path @@ -37,20 +38,29 @@ def store_build_logs(build_config, build_result): def store_mua_stats_db(stats_db, benchmark): """Save mua stats_db in the mua bucket.""" - stats_db = str(stats_db) - filestore_utils.cp( - stats_db, - exp_path.filestore(get_mua_results_dir() / 'base_build' / benchmark / - 'stats.sqlite')) + with tempfile.NamedTemporaryFile(mode='w') as tmp: + with sqlite3.connect(stats_db) as conn: + conn.execute('VACUUM INTO ?', (tmp.name, )) + tmp.flush() + os.chmod(tmp.name, 0o666) + filestore_utils.cp( + tmp.name, + exp_path.filestore( + get_mua_results_dir() / 'base_build' / benchmark / + 'stats.sqlite')) def store_mua_results_db(results_db, benchmark, fuzzer, cycle): """Save mua stats_db in the mua bucket.""" - results_db = str(results_db) - filestore_utils.cp( - results_db, - exp_path.filestore(get_mua_results_dir() / 'results' / benchmark / - fuzzer / f'{cycle}.sqlite')) + with tempfile.NamedTemporaryFile(mode='w') as tmp: + with sqlite3.connect(results_db) as conn: + conn.execute('VACUUM INTO ?', (tmp.name, )) + tmp.flush() + os.chmod(tmp.name, 0o666) + filestore_utils.cp( + tmp.name, + exp_path.filestore(get_mua_results_dir() / 'results' / benchmark / + fuzzer / f'{cycle}.sqlite')) def store_mua_build_log(build_output, benchmark, fuzzer, cycle): From d3649ca0bbef79aa8e5b549d6293a871c6d4c448 Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 14:16:51 +0000 Subject: [PATCH 41/69] improve logging a bit --- experiment/measurer/measure_manager.py | 7 +++---- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 4fd6985ca..d1995bef9 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -114,8 +114,8 @@ def add_timestamps_to_mua_results_db(timestamp_info, continue timestamp = trial_start_time.timestamp() input_file = '' - logger.debug( - f'Using trial start time {timestamp} for {corpus_file}') + # logger.debug( + # f'Using trial start time {timestamp} for {corpus_file}') else: input_file = timestamp_info[corpus_file]['filename'] timestamp = timestamp_info[corpus_file]['timestamp'] @@ -623,9 +623,8 @@ def process_mua(self, cycle): 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', shlex.join(command) ] - logger.info('mua_run_mutants command:' + str(docker_exec_command)) + logger.debug('mua_run_mutants command:' + str(docker_exec_command)) mua_run_res = new_process.execute(docker_exec_command) - logger.info('mua_run_mutants result:' + str(mua_run_res)) logger.info(f'mua_run_mutants result: {mua_run_res.retcode} ' + f'timed_out: {mua_run_res.timed_out}\n' + f'{mua_run_res.output}') diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 2fc31f0c4..b7791b060 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 6bd8059aa1b3b441a1e725348790d90a538d0520 + git checkout d5bf98b022ea7dd37df5dc9b5770d65148a77e90 RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From be4d7d90bc5f607cf066c90e75075edacce45058 Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 29 Dec 2023 14:26:33 +0000 Subject: [PATCH 42/69] pass presubmit --- experiment/build/build_utils.py | 11 +++++------ experiment/measurer/run_mua.py | 6 +++--- experiment/run_experiment.py | 3 ++- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index abec8b3ce..71ac8d764 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -40,27 +40,26 @@ def store_mua_stats_db(stats_db, benchmark): """Save mua stats_db in the mua bucket.""" with tempfile.NamedTemporaryFile(mode='w') as tmp: with sqlite3.connect(stats_db) as conn: - conn.execute('VACUUM INTO ?', (tmp.name, )) + conn.execute('VACUUM INTO ?', (tmp.name,)) tmp.flush() os.chmod(tmp.name, 0o666) filestore_utils.cp( tmp.name, - exp_path.filestore( - get_mua_results_dir() / 'base_build' / benchmark / - 'stats.sqlite')) + exp_path.filestore(get_mua_results_dir() / 'base_build' / + benchmark / 'stats.sqlite')) def store_mua_results_db(results_db, benchmark, fuzzer, cycle): """Save mua stats_db in the mua bucket.""" with tempfile.NamedTemporaryFile(mode='w') as tmp: with sqlite3.connect(results_db) as conn: - conn.execute('VACUUM INTO ?', (tmp.name, )) + conn.execute('VACUUM INTO ?', (tmp.name,)) tmp.flush() os.chmod(tmp.name, 0o666) filestore_utils.cp( tmp.name, exp_path.filestore(get_mua_results_dir() / 'results' / benchmark / - fuzzer / f'{cycle}.sqlite')) + fuzzer / f'{cycle}.sqlite')) def store_mua_build_log(build_output, benchmark, fuzzer, cycle): diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index dbc92c8b4..ed894d3da 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -108,9 +108,9 @@ def mua_container_is_running(benchmark): """Return true if the mua container is started.""" container_name = get_container_name(benchmark) try: - res = new_process.execute([ - 'docker', 'inspect', '-f', '{{.State.Running}}', container_name - ], expect_zero=False) + res = new_process.execute( + ['docker', 'inspect', '-f', '{{.State.Running}}', container_name], + expect_zero=False) if res.retcode != 0: return False if res.output.strip() == 'true': diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 668faa9dd..34ffc7852 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -622,7 +622,8 @@ def _render_startup_script(self): 'private': self.config['private'], } if self.config['mutation_analysis']: - kwargs['mua_mapped_dir'] = f'-v {GOOGLE_CLOUD_MUA_MAPPED_DIR}:/mua_out' + kwargs[ + 'mua_mapped_dir'] = f'-v {GOOGLE_CLOUD_MUA_MAPPED_DIR}:/mua_out' else: kwargs['mua_mapped_dir'] = '' if 'worker_pool_name' in self.config: From 6e3f73c45a0a8d68c8c7884bd8d0fd833920579a Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Sat, 30 Dec 2023 13:24:46 +0100 Subject: [PATCH 43/69] fix benchmark re2_fuzzer cflags --- fuzzers/mutation_analysis/fuzzer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index dfdc97f44..15c22db6b 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -23,10 +23,16 @@ def build(): """Build benchmark.""" + benchmark = os.getenv('BENCHMARK') + cflags = [ # '-fprofile-instr-generate', '-fcoverage-mapping', '-gline-tables-only' '-fPIE', ] + if benchmark == "re2_fuzzer": + cflags = [ + '', + ] utils.append_flags('CFLAGS', cflags) utils.append_flags('CXXFLAGS', cflags) @@ -43,7 +49,6 @@ def build(): build_script = os.path.join(os.environ['SRC'], 'build.sh') print(f'build_script: {build_script}') - benchmark = os.getenv('BENCHMARK') fuzzer = os.getenv('FUZZER') print(f'Building benchmark {benchmark} with fuzzer {fuzzer}') From 54a2d2b630b4bd825cc229ffbe0a5b3f08b97ac8 Mon Sep 17 00:00:00 2001 From: phi-go Date: Sat, 30 Dec 2023 22:10:28 +0000 Subject: [PATCH 44/69] improve mua run / build scripts --- experiment/dispatcher.py | 4 +++ experiment/exec_id.py | 32 ++++++++++++++++++ experiment/measurer/measure_manager.py | 5 +-- experiment/measurer/run_mua.py | 34 ++++++++++++-------- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 5 files changed, 60 insertions(+), 17 deletions(-) create mode 100644 experiment/exec_id.py diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index 4d87005cf..3adf9d27f 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -23,6 +23,7 @@ import threading import time from typing import List +import uuid from common import experiment_path as exp_path from common import experiment_utils @@ -35,6 +36,7 @@ from experiment import reporter from experiment import scheduler from experiment import stop_experiment +from experiment.exec_id import write_exec_id LOOP_WAIT_SECONDS = 5 * 60 @@ -133,6 +135,8 @@ def dispatcher_main(): """Do the experiment and report results.""" logs.info('Starting experiment.') + write_exec_id() + # Set this here because we get failures if we do it in measurer for some # reason. multiprocessing.set_start_method('spawn') diff --git a/experiment/exec_id.py b/experiment/exec_id.py new file mode 100644 index 000000000..d3914699c --- /dev/null +++ b/experiment/exec_id.py @@ -0,0 +1,32 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Exec id is used to identify the current run, if the dispatcher container +# is preempted the exec id will change. This allows us to identify which actions +# were performed by earlier runs and which were performed by the current run. +# We use this to identify which mutants builds were interrupted by a +# preemption. +import uuid + + +def write_exec_id(): + """Write the exec id to a file.""" + with open('/tmp/exec_id', 'wb') as file_handle: + file_handle.write(uuid.uuid4().bytes) + + +def read_exec_id(): + """Read the exec id from a file.""" + with open('/tmp/exec_id', 'rb') as file_handle: + return uuid.UUID(bytes=file_handle.read()) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index d1995bef9..ddbabc2d8 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -572,7 +572,8 @@ def initialize_mua_directories(): self.create_dir(fuzzer_corpi_dir) mutants_ids_dir_entry = (shared_mua_binaries_dir / 'mutant_ids' / - self.fuzzer / str(self.trial_num)) + self.benchmark / self.fuzzer / + str(self.trial_num)) self.create_dir(mutants_ids_dir_entry) mua_results_dir = self.mua_run_result_dir() @@ -615,7 +616,7 @@ def process_mua(self, cycle): # run all needed mutants in container command = [ 'python3', '/mutator/mua_run_mutants.py', fuzz_target, - experiment_name, self.fuzzer, + self.benchmark, experiment_name, self.fuzzer, str(self.trial_num) ] diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index ed894d3da..a344d549f 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -18,27 +18,22 @@ import shlex import subprocess import time -import uuid from common import logs from common import benchmark_utils from common import experiment_utils from common import new_process from common import environment from experiment.build import build_utils +from experiment.exec_id import read_exec_id logger = logs.Logger() -# Exec id is used to identify the current run, if the dispatcher container -# is preempted the exec id will change. This allows us to identify which actions -# were performed by earlier runs and which were performed by the current run. -# We use this to identify which mutants builds were interrupted by a -# preemption. -EXEC_ID = uuid.uuid4() - MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' GOOGLE_CLOUD_MUA_MAPPED_DIR = '/etc/mua_out/' +EXEC_ID = None + def get_container_name(benchmark): """Return the container name for the given benchmark.""" @@ -162,6 +157,11 @@ def copy_mua_stats_db(benchmark, mua_results_dir): def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): """Run mua_build_ids.py on the container.""" + global EXEC_ID + if EXEC_ID is None: + EXEC_ID = read_exec_id() + logger.debug('Setting EXEC_ID to %s', EXEC_ID) + container_name = get_container_name(benchmark) # get additional info from commons experiment_name = experiment_utils.get_experiment_name() @@ -169,9 +169,14 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): # execute command on container command = [ - 'python3', '/mutator/mua_build_ids.py', - str(EXEC_ID), fuzz_target, experiment_name, fuzzer, - str(trial_num), '--debug_num_mutants=10' + 'python3', + '/mutator/mua_build_ids.py', + str(EXEC_ID), + fuzz_target, + benchmark, + experiment_name, + fuzzer, + str(trial_num), ] docker_exec_command = [ @@ -180,9 +185,10 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): ] logger.debug(f'mua_build_ids command: {docker_exec_command}') - mua_build_res = new_process.execute(docker_exec_command) + mua_build_res = new_process.execute(docker_exec_command, + write_to_stdout=True) logger.info(f'mua_build_ids result: {mua_build_res.retcode} ' + f'timed_out: {mua_build_res.timed_out}\n' + f'{mua_build_res.output}') - build_utils.store_mua_build_log(mua_build_res.output, benchmark, fuzzer, - cycle) + build_utils.store_mua_build_log(mua_build_res.output or '', benchmark, + fuzzer, cycle) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index b7791b060..ea06e4995 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout d5bf98b022ea7dd37df5dc9b5770d65148a77e90 + git checkout 9689cd03b5a37224e0f7afdb664f92155df79bdf RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From 5f33adf7431b80a506689a025be3b3f9894c4586 Mon Sep 17 00:00:00 2001 From: phi-go Date: Tue, 2 Jan 2024 12:30:10 +0000 Subject: [PATCH 45/69] improve performance of mua measurer --- experiment/build/build_utils.py | 15 ++++--- experiment/measurer/measure_manager.py | 44 +++++++++----------- experiment/measurer/run_mua.py | 6 +-- experiment/runner.py | 8 ++-- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- test_experiment.yaml | 24 ----------- 6 files changed, 33 insertions(+), 66 deletions(-) delete mode 100644 test_experiment.yaml diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index 71ac8d764..71462de28 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -49,7 +49,7 @@ def store_mua_stats_db(stats_db, benchmark): benchmark / 'stats.sqlite')) -def store_mua_results_db(results_db, benchmark, fuzzer, cycle): +def store_mua_results_db(results_db, trial, cycle): """Save mua stats_db in the mua bucket.""" with tempfile.NamedTemporaryFile(mode='w') as tmp: with sqlite3.connect(results_db) as conn: @@ -58,11 +58,10 @@ def store_mua_results_db(results_db, benchmark, fuzzer, cycle): os.chmod(tmp.name, 0o666) filestore_utils.cp( tmp.name, - exp_path.filestore(get_mua_results_dir() / 'results' / benchmark / - fuzzer / f'{cycle}.sqlite')) + exp_path.filestore(get_mua_results_dir() / 'results' / str(trial) / f'{cycle}.sqlite')) -def store_mua_build_log(build_output, benchmark, fuzzer, cycle): +def store_mua_build_log(build_output, benchmark, fuzzer, trial, cycle): """Save mua stats_db in the mua bucket.""" with tempfile.NamedTemporaryFile(mode='w') as tmp: tmp.write(build_output) @@ -71,10 +70,10 @@ def store_mua_build_log(build_output, benchmark, fuzzer, cycle): filestore_utils.cp( tmp.name, exp_path.filestore(get_mua_results_dir() / 'mua_build' / benchmark / - fuzzer / f'{cycle}.log')) + fuzzer / str(trial) / f'{cycle}.log')) -def store_mua_run_log(run_output, benchmark, fuzzer, cycle): +def store_mua_run_log(run_output, trial, cycle): """Save mua stats_db in the mua bucket.""" with tempfile.NamedTemporaryFile(mode='w') as tmp: tmp.write(run_output) @@ -82,8 +81,8 @@ def store_mua_run_log(run_output, benchmark, fuzzer, cycle): os.chmod(tmp.name, 0o666) filestore_utils.cp( tmp.name, - exp_path.filestore(get_mua_results_dir() / 'mua_run' / benchmark / - fuzzer / f'{cycle}.log')) + exp_path.filestore(get_mua_results_dir() / 'mua_run' + / str(trial) / f'{cycle}.log')) def store_report_error_log(report_error): diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index ddbabc2d8..b0375d74f 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -84,6 +84,9 @@ def add_timestamps_to_mua_results_db(timestamp_info, timestamp FLOAT ) ''') + cur.execute(''' + CREATE INDEX IF NOT EXISTS timestamps_hashname_index ON timestamps (hashname, timestamp) + ''') conn.commit() cur = conn.cursor() @@ -96,29 +99,20 @@ def add_timestamps_to_mua_results_db(timestamp_info, if cur.fetchone() is None: # Corpus file has no associated timestamp yet. # Try to get it from the timestamp_info dict. - timestamp = timestamp_info.get(corpus_file) - if timestamp is None: - # No timestamp found, so use the trial start time but only - # for the first cycle as it contains the seed corpus. - # If the timestamp_info dict does not have an entry for - # this corpus file, then it is assumed that the corpus file - # is a seed input and was not generated by the fuzzer. - if cycle != 0: - # logger.debug('No timestamp found for %s, this should not ' - # 'happen for non-seed inputs.', corpus_file) - continue - if trial_start_time is None: - logger.error( - 'No trial start time found, this should not happen for ' - 'started trials.') - continue + if cycle == 0: + # This is the first cycle, containing only seed inputs, + # so use the trial start time. timestamp = trial_start_time.timestamp() input_file = '' - # logger.debug( - # f'Using trial start time {timestamp} for {corpus_file}') else: - input_file = timestamp_info[corpus_file]['filename'] - timestamp = timestamp_info[corpus_file]['timestamp'] + timestamp = timestamp_info.get(corpus_file) + if timestamp is None: + if cycle != 0: + logger.warning('No timestamp found for %s.', corpus_file) + continue + else: + input_file = timestamp_info[corpus_file]['filename'] + timestamp = timestamp_info[corpus_file]['timestamp'] cur.execute('''INSERT INTO timestamps VALUES (?, ?, ?)''', (corpus_file, input_file, timestamp)) conn.commit() @@ -434,6 +428,7 @@ def enrich_timestamp_info(timestamp_info, member_to_filename): 'timestamp': timestamp, 'filename': member } + return full_timestamp_info def extract_corpus(corpus_archive: str, @@ -451,8 +446,9 @@ def extract_corpus(corpus_archive: str, continue if member.name == UNIQUE_TIMESTAMP_FILENAME: + logger.info('Found timestamp file %s.', member.name) timestamp_file_handle = tar.extractfile(member) - if not timestamp_file_handle: + if timestamp_file_handle is None: logger.info('Failed to get timestamp file handle to %s.', member) continue @@ -629,11 +625,9 @@ def process_mua(self, cycle): logger.info(f'mua_run_mutants result: {mua_run_res.retcode} ' + f'timed_out: {mua_run_res.timed_out}\n' + f'{mua_run_res.output}') - build_utils.store_mua_run_log(mua_run_res.output, self.benchmark, - self.fuzzer, cycle) + build_utils.store_mua_run_log(mua_run_res.output, self.trial_num, cycle) results_db = self.mua_run_result_dir() / 'results.sqlite' - build_utils.store_mua_results_db(results_db, self.benchmark, - self.fuzzer, cycle) + build_utils.store_mua_results_db(results_db, self.trial_num, cycle) def run_cov_new_units(self): """Run the coverage binary on new units.""" diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index a344d549f..2e5b32bda 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -177,6 +177,7 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): experiment_name, fuzzer, str(trial_num), + # "--debug_num_mutants=200" ] docker_exec_command = [ @@ -185,10 +186,9 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): ] logger.debug(f'mua_build_ids command: {docker_exec_command}') - mua_build_res = new_process.execute(docker_exec_command, - write_to_stdout=True) + mua_build_res = new_process.execute(docker_exec_command) logger.info(f'mua_build_ids result: {mua_build_res.retcode} ' + f'timed_out: {mua_build_res.timed_out}\n' + f'{mua_build_res.output}') build_utils.store_mua_build_log(mua_build_res.output or '', benchmark, - fuzzer, cycle) + fuzzer, trial_num, cycle) diff --git a/experiment/runner.py b/experiment/runner.py index bc599b8d5..d6868b377 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -390,7 +390,7 @@ def archive_corpus(self): file_timestamps[arcname] = file_timestamp except Exception: # pylint: disable=broad-except e_msg = traceback.format_exc() - logs.warning( + logs.debug( f'Failed to get timestamp for {arcname}: {e_msg}') except (FileNotFoundError, OSError): # We will get these errors if files or directories are being @@ -405,13 +405,11 @@ def archive_corpus(self): # any existing file in the corpus. try: with tempfile.NamedTemporaryFile(mode='wt') as temp_file: - logs.debug( - f'Writing timestamp file to archive: {temp_file}') + logs.debug(f"timestamp archiving: {len(file_timestamps)}") temp_file.write(json.dumps(file_timestamps)) temp_file.flush() tar.add(temp_file.name, - arcname=UNIQUE_TIMESTAMP_FILENAME, - recursive=False) + arcname=UNIQUE_TIMESTAMP_FILENAME) except Exception: # pylint: disable=broad-except e_msg = traceback.format_exc() logs.warning( diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index ea06e4995..9991358ae 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 9689cd03b5a37224e0f7afdb664f92155df79bdf + git checkout 6dc110191a512b2c240467a16a1aaac5446e12de RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties diff --git a/test_experiment.yaml b/test_experiment.yaml deleted file mode 100644 index f9d3cf3e5..000000000 --- a/test_experiment.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# The number of trials of a fuzzer-benchmark pair. -trials: 1 - -# The amount of time in seconds that each trial is run for. -# 1 day = 24 * 60 * 60 = 86400 -max_total_time: 120 #86400 - -# The location of the docker registry. -# FIXME: Support custom docker registry. -# See https://github.com/google/fuzzbench/issues/777 -docker_registry: gcr.io/fuzzbench - -# The local experiment folder that will store most of the experiment data. -# Please use an absolute path. -experiment_filestore: /tmp/experiment-data - -# The local report folder where HTML reports and summary data will be stored. -# Please use an absolute path. -report_filestore: /tmp/report-data - -# Flag that indicates this is a local experiment. -local_experiment: true - -host_mua_mapped_dir: "/home/pgoerz/fuzzbench/fuzzers/mutation_analysis/mua_fuzzer_bench/fuzzbench_mapped_dir/" \ No newline at end of file From 0702bbbcad10c63c4fd32e546582affcabc85b40 Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 3 Jan 2024 13:10:42 +0000 Subject: [PATCH 46/69] improve mua run perf --- fuzzers/mutation_analysis/builder.Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 9991358ae..29287ce1d 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -68,7 +68,7 @@ RUN update-alternatives --install \ --slave /usr/local/bin/clang++ clang++ /usr/lib/llvm-15/bin/clang++ # wllvm -RUN pip3 install wllvm +RUN pip3 install wllvm py-spy # gradle RUN mkdir -p /tmp/gradle && \ @@ -94,7 +94,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 6dc110191a512b2c240467a16a1aaac5446e12de + git checkout 13b77966064b60700e714930ca636dd79e992cd4 RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From 7f7b67e6095221d60346b083ab61a49d48998933 Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 3 Jan 2024 13:11:59 +0000 Subject: [PATCH 47/69] dispatcher local and mua wait for trials --- experiment/dispatcher.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index 3adf9d27f..d2da920c2 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -149,10 +149,13 @@ def dispatcher_main(): _initialize_experiment_in_db(experiment.config) + use_mutation_analysis = experiment.config['mutation_analysis'] + is_local_experiment = experiment_utils.is_local_experiment() + trials = build_images_for_trials(experiment.fuzzers, experiment.benchmarks, experiment.num_trials, experiment.preemptible, - experiment.config['mutation_analysis']) + use_mutation_analysis) _initialize_trials_in_db(trials) create_work_subdirs(['experiment-folders', 'measurement-folders']) @@ -162,6 +165,16 @@ def dispatcher_main(): args=(experiment.config,)) scheduler_loop_thread.start() + if is_local_experiment and use_mutation_analysis: + # Mutation analysis just takes all cpu available, further work needs to + # be done to make it work nicely in parallel with trial runners for a + # local experiment. This is not a problem for remote experiments because + # the trials are run on a seperate VM. + + # Wait for trials to end before starting measurer. + logs.info('Waiting for trials to end.') + scheduler_loop_thread.join() + measurer_main_process = multiprocessing.Process( target=measure_manager.measure_main, args=(experiment.config,)) From fe9fd38885dfbade9315b275edd13523ba6b4da2 Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 3 Jan 2024 13:32:50 +0000 Subject: [PATCH 48/69] pass presubmit --- experiment/build/build_utils.py | 7 +++--- experiment/dispatcher.py | 3 +-- experiment/exec_id.py | 13 +++++----- experiment/measurer/measure_manager.py | 34 +++++++++++++++----------- experiment/runner.py | 13 +++++----- fuzzers/mutation_analysis/fuzzer.py | 4 +-- 6 files changed, 41 insertions(+), 33 deletions(-) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index 71462de28..19f17f29a 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -58,7 +58,8 @@ def store_mua_results_db(results_db, trial, cycle): os.chmod(tmp.name, 0o666) filestore_utils.cp( tmp.name, - exp_path.filestore(get_mua_results_dir() / 'results' / str(trial) / f'{cycle}.sqlite')) + exp_path.filestore(get_mua_results_dir() / 'results' / str(trial) / + f'{cycle}.sqlite')) def store_mua_build_log(build_output, benchmark, fuzzer, trial, cycle): @@ -81,8 +82,8 @@ def store_mua_run_log(run_output, trial, cycle): os.chmod(tmp.name, 0o666) filestore_utils.cp( tmp.name, - exp_path.filestore(get_mua_results_dir() / 'mua_run' - / str(trial) / f'{cycle}.log')) + exp_path.filestore(get_mua_results_dir() / 'mua_run' / str(trial) / + f'{cycle}.log')) def store_report_error_log(report_error): diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index d2da920c2..84820573a 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -23,7 +23,6 @@ import threading import time from typing import List -import uuid from common import experiment_path as exp_path from common import experiment_utils @@ -167,7 +166,7 @@ def dispatcher_main(): if is_local_experiment and use_mutation_analysis: # Mutation analysis just takes all cpu available, further work needs to - # be done to make it work nicely in parallel with trial runners for a + # be done to make it work nicely in parallel with trial runners for a # local experiment. This is not a problem for remote experiments because # the trials are run on a seperate VM. diff --git a/experiment/exec_id.py b/experiment/exec_id.py index d3914699c..56f76f4b0 100644 --- a/experiment/exec_id.py +++ b/experiment/exec_id.py @@ -11,12 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -# Exec id is used to identify the current run, if the dispatcher container -# is preempted the exec id will change. This allows us to identify which actions -# were performed by earlier runs and which were performed by the current run. -# We use this to identify which mutants builds were interrupted by a -# preemption. +""" +Exec id is used to identify the current run, if the dispatcher container +is preempted the exec id will change. This allows us to identify which actions +were performed by earlier runs and which were performed by the current run. +We use this to identify which mutants builds were interrupted by a +preemption. +""" import uuid diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index b0375d74f..43aab5358 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -29,7 +29,7 @@ import tempfile import tarfile import time -from typing import List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import queue from pathlib import Path import psutil @@ -90,6 +90,7 @@ def add_timestamps_to_mua_results_db(timestamp_info, conn.commit() cur = conn.cursor() + num_timestamp_not_found = 0 for corpus_file in os.listdir(corpus_dir): cur.execute( ''' @@ -107,14 +108,17 @@ def add_timestamps_to_mua_results_db(timestamp_info, else: timestamp = timestamp_info.get(corpus_file) if timestamp is None: - if cycle != 0: - logger.warning('No timestamp found for %s.', corpus_file) + num_timestamp_not_found += 1 continue - else: - input_file = timestamp_info[corpus_file]['filename'] - timestamp = timestamp_info[corpus_file]['timestamp'] + input_file = timestamp_info[corpus_file]['filename'] + timestamp = timestamp_info[corpus_file]['timestamp'] cur.execute('''INSERT INTO timestamps VALUES (?, ?, ?)''', (corpus_file, input_file, timestamp)) + + if num_timestamp_not_found > 0: + logger.info('Failed to find timestamp info for %d corpus entries.', + num_timestamp_not_found) + conn.commit() @@ -403,7 +407,8 @@ def get_unmeasured_snapshots(experiment: str, return unmeasured_first_snapshots + unmeasured_latest_snapshots -def enrich_timestamp_info(timestamp_info, member_to_filename): +def enrich_timestamp_info(timestamp_info, + member_to_filename) -> Dict[str, Dict[str, Any]]: """Enrich timestamp info with the filename of the corpus entry.""" # Replace filenames with hashnames but keep the original filenames # for reference. @@ -431,8 +436,9 @@ def enrich_timestamp_info(timestamp_info, member_to_filename): return full_timestamp_info -def extract_corpus(corpus_archive: str, - output_directory: str) -> Optional[List[Tuple[str, int]]]: +def extract_corpus( + corpus_archive: str, + output_directory: str) -> Optional[Dict[str, Dict[str, Any]]]: """Extract a corpus from |corpus_archive| to |output_directory|.""" pathlib.Path(output_directory).mkdir(exist_ok=True) timestamp_info = None @@ -483,9 +489,9 @@ def extract_corpus(corpus_archive: str, filesystem.write(file_path, member_contents, 'wb') - if timestamp_info is not None: - return enrich_timestamp_info(timestamp_info, member_to_filename) - return None + if timestamp_info is None: + return None + return enrich_timestamp_info(timestamp_info, member_to_filename) class SnapshotMeasurer(coverage_utils.TrialCoverage): # pylint: disable=too-many-instance-attributes @@ -705,8 +711,8 @@ def generate_coverage_information(self, cycle: int): self.generate_summary(cycle) def extract_corpus( - self, corpus_archive_path - ) -> Tuple[bool, Optional[List[Tuple[str, int]]]]: + self, corpus_archive_path + ) -> Tuple[bool, Optional[Dict[str, Dict[str, Any]]]]: """Extract the corpus archive for this cycle if it exists.""" if not os.path.exists(corpus_archive_path): self.logger.warning('Corpus not found: %s.', corpus_archive_path) diff --git a/experiment/runner.py b/experiment/runner.py index d6868b377..9ea99c146 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -389,9 +389,10 @@ def archive_corpus(self): file_timestamp = stat_info.st_mtime file_timestamps[arcname] = file_timestamp except Exception: # pylint: disable=broad-except - e_msg = traceback.format_exc() - logs.debug( - f'Failed to get timestamp for {arcname}: {e_msg}') + # e_msg = traceback.format_exc() + # logs.debug( + # f'Failed to get timestamp for {arcname}: {e_msg}') + pass except (FileNotFoundError, OSError): # We will get these errors if files or directories are being # deleted from |directory| as we archive it. Don't bother @@ -405,11 +406,11 @@ def archive_corpus(self): # any existing file in the corpus. try: with tempfile.NamedTemporaryFile(mode='wt') as temp_file: - logs.debug(f"timestamp archiving: {len(file_timestamps)}") + logs.debug('timestamp archive num entries: ' + f'{len(file_timestamps)}') temp_file.write(json.dumps(file_timestamps)) temp_file.flush() - tar.add(temp_file.name, - arcname=UNIQUE_TIMESTAMP_FILENAME) + tar.add(temp_file.name, arcname=UNIQUE_TIMESTAMP_FILENAME) except Exception: # pylint: disable=broad-except e_msg = traceback.format_exc() logs.warning( diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 15c22db6b..386198024 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -29,9 +29,9 @@ def build(): # '-fprofile-instr-generate', '-fcoverage-mapping', '-gline-tables-only' '-fPIE', ] - if benchmark == "re2_fuzzer": + if benchmark == 're2_fuzzer': cflags = [ - '', + '', ] utils.append_flags('CFLAGS', cflags) utils.append_flags('CXXFLAGS', cflags) From 72458de89a7d09115fec6576d8c4a2af5143786a Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 3 Jan 2024 14:58:34 +0000 Subject: [PATCH 49/69] extra logging for local run --- experiment/scheduler.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/experiment/scheduler.py b/experiment/scheduler.py index 0d9da0b22..4db0c5502 100644 --- a/experiment/scheduler.py +++ b/experiment/scheduler.py @@ -101,8 +101,21 @@ def all_trials_ended(experiment: str) -> bool: """Return a bool if there are any trials in |experiment| that have not started.""" try: - return not get_experiment_trials(experiment).filter( + active_trials = get_experiment_trials(experiment).filter( models.Trial.time_ended.is_(None)).all() + if experiment_utils.is_local_experiment(): + # Do some extra logging for local runs + logs.info(f'Active trials: {len(active_trials)}') + for trial in active_trials: + if trial.time_started is None: + runtime_info = 'not started' + else: + runtime_info = datetime.datetime.now() - trial.time_started + runtime_info = f'running for {runtime_info}' + logs.info('Active trial: ' + + f'{trial.id} {trial.fuzzer} {trial.benchmark}: ' + + f'{runtime_info}') + return not active_trials except RuntimeError: logger.error('Failed to check whether all trials ended.') return False From 392b981bef3f4b6fe1e42e439afdbe27e8f2c3f6 Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Thu, 4 Jan 2024 12:21:24 +0100 Subject: [PATCH 50/69] fix libFuzzingEngineMutation support for openssl --- benchmarks/openssl_x509/build.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmarks/openssl_x509/build.sh b/benchmarks/openssl_x509/build.sh index 65faf0509..27892f01b 100755 --- a/benchmarks/openssl_x509/build.sh +++ b/benchmarks/openssl_x509/build.sh @@ -24,6 +24,10 @@ fi if [ "$FUZZER" = "centipede" ] then WITH_FUZZER_LIB="$FUZZER_LIB" +elif [ "$FUZZER" = "mutation_analysis" ] +then + clang++ -c /mutator/dockerfiles/programs/common/main.cc -o /usr/lib/libFuzzingEngineMutation.a + WITH_FUZZER_LIB="/usr/lib/libFuzzingEngineMutation" else WITH_FUZZER_LIB='/usr/lib/libFuzzingEngine' fi From c879b4354e570689284acc84936dbcf69fe004fa Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Thu, 4 Jan 2024 12:22:01 +0100 Subject: [PATCH 51/69] fix libFuzzingEngineMutation.a not compiled lib bug --- fuzzers/mutation_analysis/fuzzer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 386198024..940411449 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -39,10 +39,13 @@ def build(): os.environ['CC'] = 'gclang-wrap' os.environ['CXX'] = 'gclang++-wrap' os.environ['LLVM_COMPILER_PATH'] = '/usr/lib/llvm-15/bin/' - os.environ['FUZZER_LIB'] = '/mutator/dockerfiles/programs/common/main.cc' os.environ['MUA_RECORDING_DB'] = MUA_RECORDING_DB os.environ['llvmBinPath'] = '/usr/local/bin/' + # build FUZZER_LIB + subprocess.check_call(['clang++', '-c', '/mutator/dockerfiles/programs/common/main.cc', '-o', '/usr/lib/libFuzzingEngineMutation.a']) + os.environ['FUZZER_LIB'] = '/usr/lib/libFuzzingEngineMutation.a' + if os.path.exists(MUA_RECORDING_DB): os.unlink(MUA_RECORDING_DB) From 724cbe01007bf029bb4d001b37f64c965750980e Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Thu, 4 Jan 2024 12:23:04 +0100 Subject: [PATCH 52/69] fix pip not found problem + mua version bump --- fuzzers/mutation_analysis/builder.Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 29287ce1d..6fb454c40 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -33,6 +33,9 @@ RUN DEBIAN_FRONTEND=noninteractive \ pipx \ python3.8-venv +#fix pip issue +RUN ln -s /usr/local/bin/pip3 /usr/local/bin/pip + # llvm 15 RUN mkdir /llvm && \ cd /llvm && \ @@ -94,7 +97,8 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 13b77966064b60700e714930ca636dd79e992cd4 + git checkout 731b9cf404001e32a96765444e03d109b2fbda33 + RUN cd /mutator && \ echo "llvmBinPath=/usr/lib/llvm-15/bin/" > gradle.properties From a69b5c75c270e4e8c83202fcaf412b4dbf6f155c Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Thu, 4 Jan 2024 15:05:30 +0100 Subject: [PATCH 53/69] set main.cc as FUZZER_LIB again --- fuzzers/mutation_analysis/fuzzer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 940411449..8c8841f6c 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -43,8 +43,10 @@ def build(): os.environ['llvmBinPath'] = '/usr/local/bin/' # build FUZZER_LIB - subprocess.check_call(['clang++', '-c', '/mutator/dockerfiles/programs/common/main.cc', '-o', '/usr/lib/libFuzzingEngineMutation.a']) - os.environ['FUZZER_LIB'] = '/usr/lib/libFuzzingEngineMutation.a' + #subprocess.check_call(['clang++', '-c', '/mutator/dockerfiles/programs/common/main.cc', '-o', '/usr/lib/libFuzzingEngineMutation.a']) + #os.environ['FUZZER_LIB'] = '/usr/lib/libFuzzingEngineMutation.a' + + os.environ['FUZZER_LIB'] = '/mutator/dockerfiles/programs/common/main.cc' if os.path.exists(MUA_RECORDING_DB): os.unlink(MUA_RECORDING_DB) From 67797539658a7ba191cc7badefc8349f54d0367c Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 4 Jan 2024 17:01:39 +0000 Subject: [PATCH 54/69] mua run perf improvement and compress result dbs --- experiment/build/build_utils.py | 40 ++++++++++++++------------ experiment/measurer/measure_manager.py | 13 ++++++--- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index 19f17f29a..8287f907a 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -17,6 +17,7 @@ import os import sqlite3 import tempfile +import lzma from common import experiment_path as exp_path from common import filestore_utils @@ -36,30 +37,33 @@ def store_build_logs(build_config, build_result): exp_path.filestore(get_build_logs_dir() / build_log_filename)) +def _store_db(db_path, dest): + """Save db in the mua bucket.""" + with tempfile.NamedTemporaryFile() as tmp_uncompressed: + with tempfile.NamedTemporaryFile() as tmp_compressed: + with sqlite3.connect(db_path) as conn: + conn.execute('VACUUM INTO ?', (tmp_uncompressed.name,)) + tmp_uncompressed.flush() + os.chmod(tmp_uncompressed.name, 0o666) + with lzma.open(tmp_compressed.name, 'wb') as compressed: + compressed.write(tmp_uncompressed.read()) + tmp_compressed.flush() + os.chmod(tmp_compressed.name, 0o666) + filestore_utils.cp(tmp_compressed.name, dest) + + def store_mua_stats_db(stats_db, benchmark): """Save mua stats_db in the mua bucket.""" - with tempfile.NamedTemporaryFile(mode='w') as tmp: - with sqlite3.connect(stats_db) as conn: - conn.execute('VACUUM INTO ?', (tmp.name,)) - tmp.flush() - os.chmod(tmp.name, 0o666) - filestore_utils.cp( - tmp.name, - exp_path.filestore(get_mua_results_dir() / 'base_build' / - benchmark / 'stats.sqlite')) + _store_db(stats_db, + exp_path.filestore(get_mua_results_dir() / 'base_build' / + benchmark / 'stats.sqlite.lzma')) def store_mua_results_db(results_db, trial, cycle): """Save mua stats_db in the mua bucket.""" - with tempfile.NamedTemporaryFile(mode='w') as tmp: - with sqlite3.connect(results_db) as conn: - conn.execute('VACUUM INTO ?', (tmp.name,)) - tmp.flush() - os.chmod(tmp.name, 0o666) - filestore_utils.cp( - tmp.name, - exp_path.filestore(get_mua_results_dir() / 'results' / str(trial) / - f'{cycle}.sqlite')) + _store_db(results_db, + exp_path.filestore(get_mua_results_dir() / 'results' / + str(trial) / f'{cycle}.sqlite.lzma')) def store_mua_build_log(build_output, benchmark, fuzzer, trial, cycle): diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 43aab5358..51601a05d 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -79,13 +79,18 @@ def add_timestamps_to_mua_results_db(timestamp_info, cur = conn.cursor() cur.execute(''' CREATE TABLE IF NOT EXISTS timestamps ( - hashname TEXT PRIMARY KEY, + input_file_id INTEGER PRIMARY KEY AUTOINCREMENT, + hashname TEXT, input_file TEXT, - timestamp FLOAT + timestamp FLOAT, + UNIQUE(hashname) ) ''') cur.execute(''' - CREATE INDEX IF NOT EXISTS timestamps_hashname_index ON timestamps (hashname, timestamp) + CREATE INDEX IF NOT EXISTS timestamps_hashname_index ON timestamps (hashname) + ''') + cur.execute(''' + CREATE INDEX IF NOT EXISTS timestamps_id_timestamp_index ON timestamps (input_file_id, timestamp) ''') conn.commit() @@ -112,7 +117,7 @@ def add_timestamps_to_mua_results_db(timestamp_info, continue input_file = timestamp_info[corpus_file]['filename'] timestamp = timestamp_info[corpus_file]['timestamp'] - cur.execute('''INSERT INTO timestamps VALUES (?, ?, ?)''', + cur.execute('''INSERT INTO timestamps (hashname, input_file, timestamp) VALUES (?, ?, ?)''', (corpus_file, input_file, timestamp)) if num_timestamp_not_found > 0: From 59c41ae243a6a9da2dbd67d6003af3231a6a477c Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 4 Jan 2024 17:06:27 +0000 Subject: [PATCH 55/69] pass presubmit and update mua_fuzzer_bench version --- experiment/build/build_utils.py | 14 ++++++++------ experiment/measurer/measure_manager.py | 13 +++++++++---- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- fuzzers/mutation_analysis/fuzzer.py | 4 +++- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index 8287f907a..074b1488e 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -54,16 +54,18 @@ def _store_db(db_path, dest): def store_mua_stats_db(stats_db, benchmark): """Save mua stats_db in the mua bucket.""" - _store_db(stats_db, - exp_path.filestore(get_mua_results_dir() / 'base_build' / - benchmark / 'stats.sqlite.lzma')) + _store_db( + stats_db, + exp_path.filestore(get_mua_results_dir() / 'base_build' / benchmark / + 'stats.sqlite.lzma')) def store_mua_results_db(results_db, trial, cycle): """Save mua stats_db in the mua bucket.""" - _store_db(results_db, - exp_path.filestore(get_mua_results_dir() / 'results' / - str(trial) / f'{cycle}.sqlite.lzma')) + _store_db( + results_db, + exp_path.filestore(get_mua_results_dir() / 'results' / str(trial) / + f'{cycle}.sqlite.lzma')) def store_mua_build_log(build_output, benchmark, fuzzer, trial, cycle): diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 51601a05d..0f65c9342 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -87,10 +87,12 @@ def add_timestamps_to_mua_results_db(timestamp_info, ) ''') cur.execute(''' - CREATE INDEX IF NOT EXISTS timestamps_hashname_index ON timestamps (hashname) + CREATE INDEX IF NOT EXISTS timestamps_hashname_index + ON timestamps (hashname) ''') cur.execute(''' - CREATE INDEX IF NOT EXISTS timestamps_id_timestamp_index ON timestamps (input_file_id, timestamp) + CREATE INDEX IF NOT EXISTS timestamps_id_timestamp_index + ON timestamps (input_file_id, timestamp) ''') conn.commit() @@ -117,8 +119,11 @@ def add_timestamps_to_mua_results_db(timestamp_info, continue input_file = timestamp_info[corpus_file]['filename'] timestamp = timestamp_info[corpus_file]['timestamp'] - cur.execute('''INSERT INTO timestamps (hashname, input_file, timestamp) VALUES (?, ?, ?)''', - (corpus_file, input_file, timestamp)) + cur.execute( + ''' + INSERT INTO timestamps (hashname, input_file, timestamp) + VALUES (?, ?, ?) + ''', (corpus_file, input_file, timestamp)) if num_timestamp_not_found > 0: logger.info('Failed to find timestamp info for %d corpus entries.', diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 6fb454c40..f6f320af2 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -97,7 +97,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 731b9cf404001e32a96765444e03d109b2fbda33 + git checkout e5c871da036c5a82441cbee777a6deb347929cb9 RUN cd /mutator && \ diff --git a/fuzzers/mutation_analysis/fuzzer.py b/fuzzers/mutation_analysis/fuzzer.py index 8c8841f6c..84fb0332d 100644 --- a/fuzzers/mutation_analysis/fuzzer.py +++ b/fuzzers/mutation_analysis/fuzzer.py @@ -43,7 +43,9 @@ def build(): os.environ['llvmBinPath'] = '/usr/local/bin/' # build FUZZER_LIB - #subprocess.check_call(['clang++', '-c', '/mutator/dockerfiles/programs/common/main.cc', '-o', '/usr/lib/libFuzzingEngineMutation.a']) + #subprocess.check_call(['clang++', '-c', + #'/mutator/dockerfiles/programs/common/main.cc', '-o', + #'/usr/lib/libFuzzingEngineMutation.a']) #os.environ['FUZZER_LIB'] = '/usr/lib/libFuzzingEngineMutation.a' os.environ['FUZZER_LIB'] = '/mutator/dockerfiles/programs/common/main.cc' From 72ed9c6a63b6ddbea28cb5ac7a7523ecf1726297 Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 5 Jan 2024 13:02:29 +0000 Subject: [PATCH 56/69] ulimit in mua run and better error logs --- experiment/measurer/measure_manager.py | 10 +++++++++- experiment/measurer/run_mua.py | 11 ++++++++++- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 0f65c9342..654a6578c 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -25,10 +25,12 @@ import shlex import shutil import sqlite3 +import subprocess import sys import tempfile import tarfile import time +import traceback from typing import Any, Dict, List, Optional, Tuple import queue from pathlib import Path @@ -637,7 +639,13 @@ def process_mua(self, cycle): shlex.join(command) ] logger.debug('mua_run_mutants command:' + str(docker_exec_command)) - mua_run_res = new_process.execute(docker_exec_command) + try: + mua_run_res = new_process.execute(docker_exec_command) + except subprocess.CalledProcessError as error: + trace_msg = traceback.format_exc() + error_msg = f'mua_run_mutants failed: {error}\n{trace_msg}' + build_utils.store_mua_run_log(error_msg, self.trial_num, cycle) + raise error logger.info(f'mua_run_mutants result: {mua_run_res.retcode} ' + f'timed_out: {mua_run_res.timed_out}\n' + f'{mua_run_res.output}') diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index 2e5b32bda..a0e60e021 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -18,6 +18,7 @@ import shlex import subprocess import time +import traceback from common import logs from common import benchmark_utils from common import experiment_utils @@ -186,7 +187,15 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): ] logger.debug(f'mua_build_ids command: {docker_exec_command}') - mua_build_res = new_process.execute(docker_exec_command) + try: + mua_build_res = new_process.execute(docker_exec_command) + except subprocess.CalledProcessError as err: + logger.error(f'mua_build_ids failed: {err}') + trace_msg = traceback.format_exc() + error_msg = f'mua_build_ids failed: {err}\n{trace_msg}' + build_utils.store_mua_build_log(error_msg, benchmark, fuzzer, trial_num, + cycle) + raise err logger.info(f'mua_build_ids result: {mua_build_res.retcode} ' + f'timed_out: {mua_build_res.timed_out}\n' + f'{mua_build_res.output}') diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index f6f320af2..cdc8c2b85 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -97,7 +97,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout e5c871da036c5a82441cbee777a6deb347929cb9 + git checkout f912893b13584ad2a25c43b7b6a6c822676297f2 RUN cd /mutator && \ From 11da65212b7c333f8ff46139d8df4fa02a4654ab Mon Sep 17 00:00:00 2001 From: phi-go Date: Tue, 9 Jan 2024 11:58:49 +0000 Subject: [PATCH 57/69] improve mua startup and resource allocation --- experiment/measurer/run_mua.py | 31 +++++++++++++++++++- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index a0e60e021..e5bf3b28b 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -99,6 +99,25 @@ def run_mua_container(benchmark): f'{mua_run_res.output}') raise Exception('Could not run mua container.') + # Run pipx once to set up environment + command = [ + 'pipx', 'run', 'hatch', 'run', 'python', '-c', + 'import pathlib; pathlib.Path("/tmp/mua_started").touch()' + ] + + docker_exec_command = [ + 'docker', 'exec', '-w', '/mutator/', '-t', container_name, '/bin/bash', + '-c', + shlex.join(command) + ] + + logger.info(f'mua run pipx command: {docker_exec_command}') + try: + new_process.execute(docker_exec_command, write_to_stdout=True) + except subprocess.CalledProcessError as err: + logger.error(f'mua pipx run failed: {err}') + raise err + def mua_container_is_running(benchmark): """Return true if the mua container is started.""" @@ -127,9 +146,19 @@ def ensure_mua_container_running(benchmark): docker_start_command = ['docker', 'start', container_name] res = new_process.execute(docker_start_command, expect_zero=False) if res.retcode != 0: - logger.info('Could not start mua container, using run instead.') run_mua_container(benchmark) + while True: + check_mua_prepared_command = [ + 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', + 'test -f /tmp/mua_started' + ] + res = new_process.execute(check_mua_prepared_command, expect_zero=False) + if res.retcode == 0: + logger.info('mua container is prepared') + break + time.sleep(1) + def copy_mua_stats_db(benchmark, mua_results_dir): """Copy the stats db from the container to the mua results dir.""" diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index cdc8c2b85..d57dcdd3e 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -97,7 +97,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout f912893b13584ad2a25c43b7b6a6c822676297f2 + git checkout 25705394bc93d1d4febe3b71126a40ee2a655a8a RUN cd /mutator && \ From cc85e8bccc90f75e8c3fac567965c3c4376bd947 Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 10 Jan 2024 15:31:32 +0000 Subject: [PATCH 58/69] perf improvements --- experiment/measurer/measure_manager.py | 68 ++++++------ experiment/measurer/run_mua.py | 107 +++++++++++++++++++ fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 3 files changed, 144 insertions(+), 33 deletions(-) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 654a6578c..58eba4e87 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -57,7 +57,7 @@ from experiment import scheduler from experiment.measurer.run_mua import (copy_mua_stats_db, get_dispatcher_mua_out_dir, - run_mua_build_ids, + get_measure_spot, run_mua_build_ids, ensure_mua_container_running) from experiment.runner import UNIQUE_TIMESTAMP_FILENAME @@ -618,40 +618,44 @@ def initialize_mua_directories(): copy_mua_stats_db(benchmark, mua_results_dir) - run_mua_build_ids(benchmark, self.trial_num, self.fuzzer, cycle) + with get_measure_spot() as _measure_spot: + run_mua_build_ids(benchmark, self.trial_num, self.fuzzer, cycle) def process_mua(self, cycle): """runs mua measurement""" - # get necessary info - container_name = 'mutation_analysis_' + self.benchmark + '_container' - experiment_name = experiment_utils.get_experiment_name() - fuzz_target = benchmark_utils.get_fuzz_target(self.benchmark) - - # run all needed mutants in container - command = [ - 'python3', '/mutator/mua_run_mutants.py', fuzz_target, - self.benchmark, experiment_name, self.fuzzer, - str(self.trial_num) - ] - - docker_exec_command = [ - 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', - shlex.join(command) - ] - logger.debug('mua_run_mutants command:' + str(docker_exec_command)) - try: - mua_run_res = new_process.execute(docker_exec_command) - except subprocess.CalledProcessError as error: - trace_msg = traceback.format_exc() - error_msg = f'mua_run_mutants failed: {error}\n{trace_msg}' - build_utils.store_mua_run_log(error_msg, self.trial_num, cycle) - raise error - logger.info(f'mua_run_mutants result: {mua_run_res.retcode} ' + - f'timed_out: {mua_run_res.timed_out}\n' + - f'{mua_run_res.output}') - build_utils.store_mua_run_log(mua_run_res.output, self.trial_num, cycle) - results_db = self.mua_run_result_dir() / 'results.sqlite' - build_utils.store_mua_results_db(results_db, self.trial_num, cycle) + with get_measure_spot() as _measure_spot: + # get necessary info + container_name = \ + 'mutation_analysis_' + self.benchmark + '_container' + experiment_name = experiment_utils.get_experiment_name() + fuzz_target = benchmark_utils.get_fuzz_target(self.benchmark) + + # run all needed mutants in container + command = [ + 'python3', '/mutator/mua_run_mutants.py', fuzz_target, + self.benchmark, experiment_name, self.fuzzer, + str(self.trial_num) + ] + + docker_exec_command = [ + 'docker', 'exec', '-t', container_name, '/bin/bash', '-c', + shlex.join(command) + ] + logger.debug('mua_run_mutants command:' + str(docker_exec_command)) + try: + mua_run_res = new_process.execute(docker_exec_command) + except subprocess.CalledProcessError as error: + trace_msg = traceback.format_exc() + error_msg = f'mua_run_mutants failed: {error}\n{trace_msg}' + build_utils.store_mua_run_log(error_msg, self.trial_num, cycle) + raise error + logger.info(f'mua_run_mutants result: {mua_run_res.retcode} ' + + f'timed_out: {mua_run_res.timed_out}\n' + + f'{mua_run_res.output}') + build_utils.store_mua_run_log(mua_run_res.output, self.trial_num, + cycle) + results_db = self.mua_run_result_dir() / 'results.sqlite' + build_utils.store_mua_results_db(results_db, self.trial_num, cycle) def run_cov_new_units(self): """Run the coverage binary on new units.""" diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index e5bf3b28b..e7ca3a4d9 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -13,9 +13,12 @@ # limitations under the License. """Module for mutation testing measurer functionality.""" +from contextlib import contextmanager import os from pathlib import Path +import random import shlex +import sqlite3 import subprocess import time import traceback @@ -32,6 +35,7 @@ MUTATION_ANALYSIS_IMAGE_NAME = 'mutation_analysis' GOOGLE_CLOUD_MUA_MAPPED_DIR = '/etc/mua_out/' +MAX_PARALLEL_MEASURE_RUNS = 2 EXEC_ID = None @@ -230,3 +234,106 @@ def run_mua_build_ids(benchmark, trial_num, fuzzer, cycle): f'{mua_build_res.output}') build_utils.store_mua_build_log(mua_build_res.output or '', benchmark, fuzzer, trial_num, cycle) + + +class MeasureRunsDB: + """Class for managing the measure_runs database, which is used to limit + concurrently ran mutation measurments.""" + + def __init__(self, db_file): + self.db_file = db_file + self.conn = sqlite3.connect(self.db_file, + check_same_thread=False, + timeout=300) + + @contextmanager + def cur(self): + """Return a cursor to the database.""" + with self.conn as conn: + cur = conn.cursor() + yield cur + cur.close() + + @contextmanager + def transaction(self, transaction_type): + """Return a cursor to the database, with a started transaction of the + given type.""" + while True: + try: + with self.cur() as cur: + cur.execute(f'BEGIN {transaction_type} TRANSACTION') + yield cur + return + except sqlite3.OperationalError as err: + if 'database is locked' in str(err): + time.sleep(random.random() * 10) + else: + raise + raise Exception('Could not begin transaction.') + + def initialize(self): + """Initialize the database.""" + with self.cur() as cur: + cur.execute('PRAGMA journal_mode=WAL') + cur.execute('PRAGMA synchronous=NORMAL') + cur.execute(''' + CREATE TABLE IF NOT EXISTS instances ( + spot INTEGER PRIMARY KEY, + in_use INTEGER + ) + ''') + + cur.execute(''' + CREATE INDEX IF NOT EXISTS idx_cpus_used ON instances + (in_use, spot) + ''') + + with self.transaction('EXCLUSIVE') as cur: + for idx in range(MAX_PARALLEL_MEASURE_RUNS): + cur.execute( + ''' + INSERT OR IGNORE INTO instances (spot, in_use) + VALUES (?, 0) + ''', (idx,)) + + def get_free_spot(self): + """Return a free spot, or None if none are available.""" + with self.transaction('EXCLUSIVE') as cur: + cur.execute(''' + SELECT spot FROM instances WHERE in_use = 0 + ORDER BY spot ASC LIMIT 1 + ''') + row = cur.fetchone() + if row: + cur.execute('UPDATE instances SET in_use = 1 WHERE spot = ?', + (row[0],)) + return row[0] + return None + + def release_spot(self, spot): + """Release a spot.""" + with self.transaction('IMMEDIATE') as cur: + cur.execute('UPDATE instances SET in_use = 0 WHERE spot = ?', + (spot,)) + + +@contextmanager +def get_measure_spot(): + """Context manager for getting a free measure spot.""" + measure_db_path = '/tmp/measure_runs.sqlite' + measure_db = MeasureRunsDB(measure_db_path) + measure_db.initialize() + start_wait = time.time() + while True: + new_cpu = measure_db.get_free_spot() + if new_cpu is not None: + logger.info( + f'Got spot {new_cpu} after {time.time() - start_wait:.2f}s') + try: + yield new_cpu + finally: + measure_db.release_spot(new_cpu) + break + time.sleep(random.random() * 2) + else: # no break + yield None diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index d57dcdd3e..b521199e4 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -97,7 +97,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 25705394bc93d1d4febe3b71126a40ee2a655a8a + git checkout d83b09b688a111aaaf16a2e2f553f52ee2fe872a RUN cd /mutator && \ From 1c5a5a6b9384dc01819b894098ec9b100610d781 Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 10 Jan 2024 15:51:22 +0000 Subject: [PATCH 59/69] less spam in mua runs --- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index b521199e4..78ed4f17f 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -97,7 +97,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout d83b09b688a111aaaf16a2e2f553f52ee2fe872a + git checkout 5acfb1a30e50740c13e55d8c4b394c33a775847f RUN cd /mutator && \ From d078a9dc7ec7821e3c08f4bd2ec3d1c3a8f998dd Mon Sep 17 00:00:00 2001 From: phi-go Date: Wed, 10 Jan 2024 15:57:42 +0000 Subject: [PATCH 60/69] overwrite results db on store --- experiment/build/build_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experiment/build/build_utils.py b/experiment/build/build_utils.py index 074b1488e..5a97400eb 100644 --- a/experiment/build/build_utils.py +++ b/experiment/build/build_utils.py @@ -60,12 +60,12 @@ def store_mua_stats_db(stats_db, benchmark): 'stats.sqlite.lzma')) -def store_mua_results_db(results_db, trial, cycle): +def store_mua_results_db(results_db, trial, _cycle): """Save mua stats_db in the mua bucket.""" _store_db( results_db, exp_path.filestore(get_mua_results_dir() / 'results' / str(trial) / - f'{cycle}.sqlite.lzma')) + 'results.sqlite.lzma')) def store_mua_build_log(build_output, benchmark, fuzzer, trial, cycle): From a126e951dbefec5932c3e01a3ce843b519948d82 Mon Sep 17 00:00:00 2001 From: phi-go Date: Thu, 11 Jan 2024 20:17:31 +0000 Subject: [PATCH 61/69] only eval median trial --- experiment/measurer/measure_manager.py | 52 ++--- experiment/measurer/run_mua.py | 211 ++++++++++++++++++- experiment/measurer/test_measure_manager.py | 6 +- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 4 files changed, 239 insertions(+), 32 deletions(-) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 58eba4e87..9df0a3624 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -55,10 +55,11 @@ from experiment.measurer import run_coverage from experiment.measurer import run_crashes from experiment import scheduler -from experiment.measurer.run_mua import (copy_mua_stats_db, - get_dispatcher_mua_out_dir, - get_measure_spot, run_mua_build_ids, - ensure_mua_container_running) +from experiment.measurer.run_mua import ( + add_measure_run, add_measure_run_failed, copy_mua_stats_db, + get_covered_mutants, get_dispatcher_mua_out_dir, get_measure_spot, + get_mua_results_path, init_measure_db, run_mua_build_ids, + ensure_mua_container_running, wait_if_median_run) from experiment.runner import UNIQUE_TIMESTAMP_FILENAME logger = logs.Logger() @@ -152,8 +153,9 @@ def measure_main(experiment_config): runners_cpus = experiment_config['runners_cpus'] region_coverage = experiment_config['region_coverage'] mutation_analysis = experiment_config['mutation_analysis'] + num_trials = experiment_config['trials'] measure_loop(experiment, max_total_time, measurers_cpus, runners_cpus, - region_coverage, mutation_analysis) + num_trials, region_coverage, mutation_analysis) # Clean up resources. gc.collect() @@ -176,6 +178,7 @@ def measure_loop( # pylint: disable=too-many-arguments max_total_time: int, measurers_cpus=None, runners_cpus=None, + num_trials=None, region_coverage=False, mutation_analysis=False): """Continuously measure trials for |experiment|.""" @@ -198,7 +201,7 @@ def measure_loop( # pylint: disable=too-many-arguments *pool_args) as pool, multiprocessing.Manager() as manager: set_up_coverage_binaries(pool, experiment) if mutation_analysis: - set_up_mua_binaries(pool, experiment) + set_up_mua_binaries(pool, experiment, num_trials) # Using Multiprocessing.Queue will fail with a complaint about # inheriting queue. # pytype: disable=attribute-error @@ -821,10 +824,12 @@ def measure_trial(measure_req, max_cycle: int, # Add 1 to ensure we measure the last cycle. for cycle in range(min_cycle, max_cycle + 1): try: + is_last_cycle = cycle == max_cycle snapshot = measure_snapshot(measure_req.fuzzer, measure_req.benchmark, measure_req.trial_id, cycle, - region_coverage, mutation_analysis) + is_last_cycle, region_coverage, + mutation_analysis) if not snapshot: break multiprocessing_queue.put(snapshot) @@ -841,7 +846,8 @@ def measure_trial(measure_req, max_cycle: int, def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments fuzzer: str, benchmark: str, trial_num: int, cycle: int, - region_coverage: bool, mutation_analysis: bool) -> models.Snapshot: + is_last_cycle: bool, region_coverage: bool, + mutation_analysis: bool) -> models.Snapshot: """Measure coverage and mua of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger( @@ -870,6 +876,7 @@ def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments corpus_archive_dst, expect_zero=False).retcode: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) + add_measure_run_failed(benchmark, fuzzer, trial_num) return None snapshot_measurer.initialize_measurement_dirs() @@ -904,7 +911,15 @@ def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments crashes=crashes) if mutation_analysis: - snapshot_measurer.process_mua(cycle) + if is_last_cycle: + num_covered_muts = get_covered_mutants(trial_num) + logger.info( + f'Trial {trial_num} covered {num_covered_muts} mutants.') + add_measure_run(benchmark, fuzzer, trial_num, num_covered_muts) + if wait_if_median_run(benchmark, fuzzer, trial_num): + logger.info( + f'The median trial is {trial_num}, get mua results.') + snapshot_measurer.process_mua(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle, @@ -928,24 +943,13 @@ def set_up_coverage_binaries(pool, experiment): pool.map(set_up_coverage_binary, benchmarks) -def set_up_mua_binaries(pool, experiment): +def set_up_mua_binaries(_pool, _experiment, num_trials): """Set up mua finder binaries for all benchmarks in |experiment|.""" - # Use set comprehension to select distinct benchmarks. - with db_utils.session_scope() as session: - benchmarks = [ - benchmark_tuple[0] - for benchmark_tuple in session.query(models.Trial.benchmark). - distinct().filter(models.Trial.experiment == experiment) - ] - mua_results_dir = build_utils.get_mua_results_dir() filesystem.create_directory(mua_results_dir) - pool.map(set_up_mua_binary, benchmarks) - - -def set_up_mua_binary(_benchmark): - """Set up mua finder binaries for |benchmark|.""" - initialize_logs() + mua_results_path = get_mua_results_path() + filesystem.create_directory(mua_results_path) + init_measure_db(num_trials) def set_up_coverage_binary(benchmark): diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index e7ca3a4d9..0cb6affd4 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -37,6 +37,8 @@ GOOGLE_CLOUD_MUA_MAPPED_DIR = '/etc/mua_out/' MAX_PARALLEL_MEASURE_RUNS = 2 +DISPATCHER_MUA_OUT_DIR = Path('/mua_out/') + EXEC_ID = None @@ -53,9 +55,20 @@ def get_host_mua_out_dir(): return Path(GOOGLE_CLOUD_MUA_MAPPED_DIR) +def get_mua_results_path(): + """Return the path to the mua results directory.""" + experiment_name = experiment_utils.get_experiment_name() + return Path(DISPATCHER_MUA_OUT_DIR / experiment_name / 'mua-results') + + +def get_measure_db_path(): + """Return the path to the measure_runs database.""" + return get_mua_results_path() / 'measure_runs.sqlite' + + def get_dispatcher_mua_out_dir(): """Return the dispatcher directory where mua_out is mapped to.""" - return Path('/mua_out/') + return DISPATCHER_MUA_OUT_DIR def stop_mua_container(benchmark): @@ -271,8 +284,9 @@ def transaction(self, transaction_type): raise raise Exception('Could not begin transaction.') - def initialize(self): + def initialize(self, num_trials): """Initialize the database.""" + logger.info(f'Initializing measure_runs database: {self.db_file}') with self.cur() as cur: cur.execute('PRAGMA journal_mode=WAL') cur.execute('PRAGMA synchronous=NORMAL') @@ -288,6 +302,46 @@ def initialize(self): (in_use, spot) ''') + cur.execute(''' + CREATE TABLE IF NOT EXISTS meta ( + num_trials INTEGER + ) + ''') + + cur.execute( + ''' + INSERT OR IGNORE INTO meta (num_trials) + VALUES (?) + ''', (num_trials,)) + + cur.execute(''' + CREATE TABLE IF NOT EXISTS measure_runs ( + fuzzer TEXT, + benchmark TEXT, + run_idx INTEGER, + done INTEGER, + trial_num INTEGER, + covered_mutants INTEGER, + PRIMARY KEY (fuzzer, benchmark, run_idx) + )''') + + cur.execute(''' + CREATE INDEX IF NOT EXISTS idx_measure_runs ON measure_runs ( + fuzzer, benchmark, run_idx, done + )''') + + cur.execute(''' + CREATE TABLE IF NOT EXISTS covered_muts ( + trial_id INTEGER, + covered_mut INTEGER, + PRIMARY KEY (trial_id, covered_mut) + )''') + + cur.execute(''' + CREATE INDEX IF NOT EXISTS idx_covered_muts ON covered_muts ( + trial_id, covered_mut + )''') + with self.transaction('EXCLUSIVE') as cur: for idx in range(MAX_PARALLEL_MEASURE_RUNS): cur.execute( @@ -316,13 +370,162 @@ def release_spot(self, spot): cur.execute('UPDATE instances SET in_use = 0 WHERE spot = ?', (spot,)) + def get_num_trials(self): + """Return the number of trials.""" + with self.cur() as cur: + cur.execute('SELECT num_trials FROM meta') + row = cur.fetchone() + if row: + return row[0] + return None + + def ensure_measure_runs(self, benchmark, fuzzer, num_trials): + """Ensure that there are measure runs for the given benchmark and + fuzzer.""" + with self.transaction('EXCLUSIVE') as cur: + for run_idx in range(num_trials): + cur.execute( + ''' + INSERT OR IGNORE INTO measure_runs ( + fuzzer, benchmark, run_idx, done + ) VALUES (?, ?, ?, 0) + ''', (fuzzer, benchmark, run_idx)) + + def add_measure_run(self, benchmark, fuzzer, trial_num, covered_mutants): + """Add a measure run.""" + with self.transaction('EXCLUSIVE') as cur: + cur.execute( + ''' + UPDATE measure_runs SET + done = 1, + trial_num = ?, + covered_mutants = ? + WHERE rowid = ( + SELECT MIN(rowid) + FROM measure_runs + WHERE fuzzer = ? AND benchmark = ? AND done = 0 + ) + ''', (trial_num, covered_mutants, fuzzer, benchmark)) + + def add_measure_run_failed(self, benchmark, fuzzer, trial_num): + """Add a measure run.""" + with self.transaction('EXCLUSIVE') as cur: + cur.execute( + ''' + UPDATE measure_runs SET + done = 2, + trial_num = ? + WHERE rowid = ( + SELECT MIN(rowid) + FROM measure_runs + WHERE fuzzer = ? AND benchmark = ? AND done = 0 + ) + ''', (trial_num, fuzzer, benchmark)) + + def wait_for_other_trials_to_complete(self, benchmark, fuzzer): + """Wait for other trials to complete.""" + num_trials = self.get_num_trials() + if num_trials is None: + raise Exception('Could not get number of trials from database.') + while True: + with self.cur() as cur: + cur.execute( + ''' + SELECT COUNT(*) FROM measure_runs + WHERE fuzzer = ? AND benchmark = ? AND done = 0 + ''', (fuzzer, benchmark)) + row = cur.fetchone() + if row: + if row[0] == 0: + return + logger.info( + f'Waiting on other trials for {benchmark} {fuzzer} ' + + f'to complete, {row[0]} remaining.') + else: + logger.error('Could not get number of remaining trials.') + time.sleep(10) + + def get_median_run(self, benchmark, fuzzer): + """Return the median run for the given benchmark and fuzzer.""" + num_trials = self.get_num_trials() + if num_trials is None: + raise Exception('Could not get number of trials from database.') + with self.cur() as cur: + cur.execute( + ''' + SELECT trial_num FROM measure_runs + WHERE fuzzer = ? AND benchmark = ? AND done = 1 + ORDER BY covered_mutants, rowid DESC LIMIT 1 OFFSET ? + ''', (fuzzer, benchmark, num_trials // 2)) + row = cur.fetchone() + if row: + return row[0] + return None + + def get_num_covered_mutants(self, trial_num): + """Return the number of covered mutants for the given trial.""" + with self.cur() as cur: + cur.execute( + ''' + SELECT COUNT(*) FROM covered_muts + WHERE trial_id = ? + ''', (trial_num,)) + row = cur.fetchone() + if row: + return row[0] + return None + + +def init_measure_db(num_trials): + """Initialize the measure_runs database.""" + measure_db_path = get_measure_db_path() + logger.warning(f'Initializing measure_runs database: {measure_db_path}') + measure_db = MeasureRunsDB(measure_db_path) + measure_db.initialize(num_trials) + + +def get_covered_mutants(trial_num): + """Return the number of covered mutants for the given trial.""" + measure_db_path = get_measure_db_path() + measure_db = MeasureRunsDB(measure_db_path) + return measure_db.get_num_covered_mutants(trial_num) + + +def add_measure_run_failed(benchmark, fuzzer, trial_num): + """Add that the measure run failed and should not be waited for nor used as + a candidate for the median run.""" + measure_db_path = get_measure_db_path() + measure_db = MeasureRunsDB(measure_db_path) + num_trials = measure_db.get_num_trials() + measure_db.ensure_measure_runs(benchmark, fuzzer, num_trials) + measure_db.add_measure_run_failed(benchmark, fuzzer, trial_num) + + +def add_measure_run(benchmark, fuzzer, trial_num, covered_mutants): + """Add the covered mutants for a measure run, this indicates that the + trial is done.""" + measure_db_path = get_measure_db_path() + measure_db = MeasureRunsDB(measure_db_path) + num_trials = measure_db.get_num_trials() + measure_db.ensure_measure_runs(benchmark, fuzzer, num_trials) + measure_db.add_measure_run(benchmark, fuzzer, trial_num, covered_mutants) + + +def wait_if_median_run(benchmark, fuzzer, trial_id): + """Wait until all trials for benchmark fuzzer are done and return + True if the trial_id is the median run.""" + measure_db_path = get_measure_db_path() + measure_db = MeasureRunsDB(measure_db_path) + measure_db.wait_for_other_trials_to_complete(benchmark, fuzzer) + median_run_trial_id = measure_db.get_median_run(benchmark, fuzzer) + return trial_id == median_run_trial_id + @contextmanager def get_measure_spot(): """Context manager for getting a free measure spot.""" - measure_db_path = '/tmp/measure_runs.sqlite' + measure_db_path = get_measure_db_path() measure_db = MeasureRunsDB(measure_db_path) - measure_db.initialize() start_wait = time.time() while True: new_cpu = measure_db.get_free_spot() diff --git a/experiment/measurer/test_measure_manager.py b/experiment/measurer/test_measure_manager.py index 0ad0843ae..5f060b923 100644 --- a/experiment/measurer/test_measure_manager.py +++ b/experiment/measurer/test_measure_manager.py @@ -178,8 +178,8 @@ def test_measure_trial(mocked_measure_snapshot, mocked_queue, _, __): measure_manager.measure_trial(measure_request, max_cycle, mocked_queue(), False, True) expected_calls = [ - mock.call(FUZZER, BENCHMARK, TRIAL_NUM, cycle, False, True) - for cycle in range(min_cycle, max_cycle + 1) + mock.call(FUZZER, BENCHMARK, TRIAL_NUM, cycle, max_cycle == cycle, + False, True) for cycle in range(min_cycle, max_cycle + 1) ] assert mocked_measure_snapshot.call_args_list == expected_calls @@ -331,7 +331,7 @@ def test_measure_snapshot_coverage( # pylint: disable=too-many-locals # integration tests. snapshot = measure_manager.measure_snapshot( snapshot_measurer.fuzzer, snapshot_measurer.benchmark, - snapshot_measurer.trial_num, cycle, False, False) + snapshot_measurer.trial_num, cycle, True, False, False) assert snapshot assert snapshot.time == cycle * experiment_utils.get_snapshot_seconds() assert snapshot.edges_covered == 4629 diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 78ed4f17f..97120dfb3 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -97,7 +97,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 5acfb1a30e50740c13e55d8c4b394c33a775847f + git checkout 5ac9af6ee3dfac3293e5705fa2e3b6520a41a9b9 RUN cd /mutator && \ From 93ef805308ba3a490cf84314e621364d28b24dba Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 12 Jan 2024 13:23:15 +0000 Subject: [PATCH 62/69] less spam when waiting on trials --- experiment/measurer/run_mua.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/experiment/measurer/run_mua.py b/experiment/measurer/run_mua.py index 0cb6affd4..30c03aca3 100644 --- a/experiment/measurer/run_mua.py +++ b/experiment/measurer/run_mua.py @@ -427,6 +427,7 @@ def wait_for_other_trials_to_complete(self, benchmark, fuzzer): num_trials = self.get_num_trials() if num_trials is None: raise Exception('Could not get number of trials from database.') + first = True while True: with self.cur() as cur: cur.execute( @@ -438,9 +439,11 @@ def wait_for_other_trials_to_complete(self, benchmark, fuzzer): if row: if row[0] == 0: return - logger.info( - f'Waiting on other trials for {benchmark} {fuzzer} ' + - f'to complete, {row[0]} remaining.') + if first: + first = False + logger.info('Waiting on other trials for ' + f'{benchmark} {fuzzer} ' + + f'to complete, {row[0]} remaining.') else: logger.error('Could not get number of remaining trials.') time.sleep(10) From 5239d94cb6ba13ca8839f7acd5469bcf6836223f Mon Sep 17 00:00:00 2001 From: Joschua Schilling Date: Fri, 12 Jan 2024 16:28:05 +0100 Subject: [PATCH 63/69] Integrate mutation testing into fuzzbench reporting --- analysis/benchmark_results.py | 23 ++- analysis/generate_report.py | 207 ++++++++++++++++++++---- analysis/plotting.py | 49 ++++++ analysis/report_templates/with_mua.html | 15 +- 4 files changed, 259 insertions(+), 35 deletions(-) diff --git a/analysis/benchmark_results.py b/analysis/benchmark_results.py index 7922a2677..f6c41c0dd 100644 --- a/analysis/benchmark_results.py +++ b/analysis/benchmark_results.py @@ -21,6 +21,9 @@ from analysis import stat_tests from common import benchmark_utils from common import filestore_utils +from common import logs + +logger = logs.Logger() # pylint: disable=too-many-public-methods, too-many-arguments @@ -59,7 +62,25 @@ def get_coverage_report_path(self, fuzzer_name, benchmark_name): def get_mua_report_data(self, _fuzzer_name, _benchmark_name): """Returns results as string""" - return 'TODO: Not Implemented Yet' + return 'TODO: Not Implemented Yet' #TODO: implement this or delete + + @property + def mutation_analysis_plot(self): + """plot for mutation analysis.""" + plot_filename = self._prefix_with_benchmark('mutation_analysis.svg') + if self.mua_results is None: + logger.info( + 'mutation_analysis_plot not rendered, due to missing data') + return None + + (num_trials, fuzzer_pds) = self.mua_results + + num_fuzzers = len(fuzzer_pds) + + self._plotter.write_mutation_analysis_plot( + fuzzer_pds, num_fuzzers, num_trials, + self._get_full_path(plot_filename)) + return plot_filename @property @functools.lru_cache() diff --git a/analysis/generate_report.py b/analysis/generate_report.py index 08f44a09c..dc89e20ba 100644 --- a/analysis/generate_report.py +++ b/analysis/generate_report.py @@ -14,9 +14,13 @@ """Report generator tool.""" import argparse +import lzma import os import sys import sqlite3 +import tempfile + +from collections import defaultdict import pandas as pd @@ -28,7 +32,7 @@ from analysis import rendering from common import filesystem from common import logs -from experiment.measurer.run_mua import get_dispatcher_mua_out_dir +from common import experiment_utils logger = logs.Logger() @@ -46,7 +50,7 @@ def get_arg_parser(): parser.add_argument( '-t', '--report-type', - choices=['default', 'experimental'], + choices=['default', 'experimental', 'with_mua'], default='default', help='Type of the report (which template to use). Default: default.') parser.add_argument( @@ -60,6 +64,11 @@ def get_arg_parser(): action='store_true', default=False, help='If set, plots are created faster, but contain less details.') + parser.add_argument('-mua', + '--mutation-analysis', + action='store_true', + default=False, + help='If set, mutation analysis report is created.') parser.add_argument( '--log-scale', action='store_true', @@ -81,6 +90,11 @@ def get_arg_parser(): '--fuzzers', nargs='*', help='Names of the fuzzers to include in the report.') + parser.add_argument( + '-xb', + '--experiment-benchmarks', + nargs='*', + help='Names of the benchmarks to include in the report.') parser.add_argument( '-cov', '--coverage-report', @@ -189,41 +203,173 @@ def modify_experiment_data_if_requested( # pylint: disable=too-many-arguments return experiment_df -def get_mua_results(experiment_name, fuzzers, _benchmarks, experiment_df): +def normalized_timestamps(timestamps): + """Normalize timestamps.""" + print(timestamps[0]) + seed_timestamp_file = next( + (tt for tt in timestamps if tt[2] == ''), None) + try: + min_timestamp_file = min( + (tt for tt in timestamps if tt[2] != ''), + key=lambda x: x[3]) + except ValueError: + min_timestamp_file = seed_timestamp_file + try: + max_timestamp_file = max( + (tt for tt in timestamps if tt[2] != ''), + key=lambda x: x[3]) + except ValueError: + max_timestamp_file = seed_timestamp_file + # print(len(timestamps)) + # print('min_timestamp', min_timestamp_file) + # print('max_timestamp', max_timestamp_file) + min_timestamp = min_timestamp_file[3] + max_timestamp = max_timestamp_file[3] + print('max_timestamp - min_timestamp', max_timestamp - min_timestamp) + timestamps_normalized = {} + for _hashname, input_file_id, input_file, timestamp in timestamps: + if input_file == '': + timestamps_normalized[input_file_id] = 0 + else: + timestamps_normalized[input_file_id] = timestamp - min_timestamp + + timespan = max_timestamp - min_timestamp + return timestamps_normalized, timespan + + +def get_first_covered_killed(results, timestamps_map): + """Get first covered and killed mutant.""" + ordered_inputs = sorted(results, key=lambda x: timestamps_map[x[0]]) + mut_result_times = defaultdict(lambda: {'seen': None, 'killed': None}) + for ordered_input in ordered_inputs: + input_file_id, mut_id, skipped, killed = ordered_input[:4] + if skipped: + continue + if mut_id not in mut_result_times: + mut_result_times[mut_id]['seen'] = timestamps_map[input_file_id] + if killed: + assert mut_id in mut_result_times + if mut_result_times[mut_id]['killed'] is None: + mut_result_times[mut_id]['killed'] = timestamps_map[ + input_file_id] + return mut_result_times + + +def get_timeline(time_covered_killed, timespan, fuzz_target, benchmark, + fuzzer_name, trial_num, cycle): + """Create timeline regarding covering and killing of mutants.""" + if timespan == 0: + max_time_base = 1 + else: + max_time_base = 16 + normalized_time_elem = timespan / (max_time_base**2) + time = 'time' + count_seen = 'seen' + count_killed = 'killed' + print(f'{time:<10} {count_seen:<7} {count_killed:<7}') + res = [] + for time_base in range(1, max_time_base + 1): + time = normalized_time_elem * (time_base**2) + count_seen = 0 + count_killed = 0 + for _mut_id, times in time_covered_killed.items(): + if times['seen'] is not None and times['seen'] <= time: + count_seen += 1 + if times['killed'] is not None and times['killed'] <= time: + count_killed += 1 + print(f'{time:8.2f}s: {count_seen:>7} {count_killed:>7}') + res.append((fuzz_target, benchmark, fuzzer_name, trial_num, cycle, time, + count_seen, count_killed)) + return res + + +def load_result_db(res_db_path): + """Load result.sqlite database.""" + with tempfile.NamedTemporaryFile() as tmp_file: + with lzma.open(res_db_path) as res_db: + tmp_file.write(res_db.read()) + tmp_file.flush() + tmp_file.seek(0) + with sqlite3.connect(tmp_file.name) as conn: + run_info = conn.execute( + 'SELECT benchmark, fuzz_target, fuzzer, trial_num FROM run_info' + ).fetchall() + results = conn.execute('''SELECT + input_file_id, + mut_id, + skipped, + killed, + orig_retcode, + mutant_retcode, + orig_runtime, + mutant_runtime, + orig_timed_out, + mutant_timed_out + FROM results''').fetchall() + timestamps = conn.execute( + '''SELECT hashname, input_file_id, input_file, timestamp + FROM timestamps''').fetchall() + return run_info, results, timestamps + + +def get_mua_results(experiment_df): """Get mutation analysis results for each fuzzer in each trial to use in the report.""" #get relationship between trial_id and benchmark from df trial_dict = experiment_df.set_index('trial_id')['benchmark'].to_dict() - for fuzzer in fuzzers: - for trial in trial_dict.keys(): + #logger.info(f'trial_dict: {trial_dict}') + + experiment_data_dir = experiment_utils.get_experiment_filestore_path() + results_data_dir = f'{experiment_data_dir}/mua-results/results' - _benchmark = trial_dict[trial] + if not os.path.isdir(results_data_dir): + logger.warning('''mua-results/results dir does not exist, + stopping mua report creation''') + return None - mua_out_dir = get_dispatcher_mua_out_dir() + fuzzer_pds = defaultdict(list) - mua_result_db_file = f'/{mua_out_dir}/{experiment_name}/' \ - f'mua_binaries/corpus_run_results/{fuzzer}/{trial}/' \ - 'results.sqlite' - con = sqlite3.connect(mua_result_db_file) - cur = con.cursor() + for trial in trial_dict.keys(): - covered_mutants = cur.execute(""" - SELECT DISTINCT mut_id FROM results - JOIN timestamps ON results.input_file = timestamps.hashname - WHERE killed == 0 ORDER BY mut_id - """) - covered_mutants.fetchall() + print(experiment_data_dir) + mua_result_db_file = f'{results_data_dir}/{trial}/' \ + 'results.sqlite.lzma' + logger.info('mua_result_db_file:') + logger.info(mua_result_db_file) + run_info, results, timestamps = load_result_db(mua_result_db_file) + assert len(run_info) == 1 + benchmark, fuzz_target, fuzzer, trial_num = run_info[0] + print(benchmark, fuzz_target, fuzzer, trial_num, trial) + timestamps_map, timespan = normalized_timestamps(timestamps) - killed_mutants = cur.execute(""" - SELECT DISTINCT mut_id - FROM results JOIN timestamps - ON results.input_file = timestamps.hashname - WHERE killed == 1 - ORDER BY mut_id - """) - killed_mutants.fetchall() + results = [ + rr for rr in results if timestamps_map.get(rr[0]) is not None + ] + time_covered_killed = get_first_covered_killed(results, timestamps_map) + timeline = get_timeline(time_covered_killed, timespan, fuzz_target, + benchmark, fuzzer, trial_num, trial) + pd_timeline = pd.DataFrame(timeline, + columns=[ + 'fuzz_target', 'benchmark', 'fuzzer', + 'trial_num', 'cycle', 'time', 'seen', + 'killed' + ]) + fuzzer_pds[fuzzer].append(pd_timeline) + + num_trials = None + for fuzzer in fuzzer_pds.keys(): + if num_trials is None: + num_trials = len(fuzzer_pds[fuzzer]) + else: + assert num_trials == len(fuzzer_pds[fuzzer]) + + num_fuzzers = len(fuzzer_pds) + + print(num_trials, num_fuzzers) + + return (num_trials, fuzzer_pds) # pylint: disable=too-many-arguments,too-many-locals @@ -274,7 +420,7 @@ def generate_report(experiment_names, # experiment_df.to_csv('/tmp/experiment-data/out.csv') - #TODO: make this work again + #TODO: make this work with a single fuzzer selected # Add |bugs_covered| column prior to export. experiment_df = data_utils.add_bugs_covered_column(experiment_df) @@ -294,7 +440,8 @@ def generate_report(experiment_names, if mutation_analysis: # TODO get_mua_results(main_experiment_name, fuzzers, # experiment_benchmarks, experiment_df) - mua_results = None + #fuzzers = ['afl', 'libfuzzer'] + mua_results = get_mua_results(experiment_df) else: mua_results = None @@ -338,7 +485,9 @@ def main(): from_cached_data=args.from_cached_data, end_time=args.end_time, merge_with_clobber=args.merge_with_clobber, - coverage_report=args.coverage_report) + coverage_report=args.coverage_report, + experiment_benchmarks=args.experiment_benchmarks, + mutation_analysis=args.mutation_analysis) if __name__ == '__main__': diff --git a/analysis/plotting.py b/analysis/plotting.py index 2838e8688..95d3d0608 100644 --- a/analysis/plotting.py +++ b/analysis/plotting.py @@ -13,6 +13,8 @@ # limitations under the License. """Plotting functions.""" +from itertools import chain + import numpy as np import Orange import seaborn as sns @@ -20,6 +22,9 @@ from matplotlib import colors from matplotlib import pyplot as plt from analysis import data_utils +from common import logs + +logger = logs.Logger() _DEFAULT_TICKS_COUNT = 12 _DEFAULT_LABEL_ROTATION = 30 @@ -542,6 +547,50 @@ def write_unique_coverage_ranking_plot(self, unique_branch_cov_df_combined, image_path, wide=True) + def write_mutation_analysis_plot(self, fuzzer_pds, num_fuzzers, num_trials, + image_path): + """Writes mutation analysis plot.""" + + df_list = list(chain(*fuzzer_pds.values())) + fig, axes = plt.subplots(num_fuzzers, + num_trials, + sharex=True, + sharey=True) + + #logger.info(f'df_list: {df_list}') + #logger.info(f'fuzzer_pds: {fuzzer_pds}') + #logger.info(f'num_fuzzers: {num_fuzzers}') + #logger.info(f'num_trials: {num_trials}') + + # plot counter + count = 0 + for trial_num in range(num_trials): + for fuzzer_num in range(num_fuzzers): + if num_trials == 1: + plt_ax = axes[fuzzer_num] + else: + plt_ax = axes[fuzzer_num, trial_num] + df = df_list[count] + #fuzz_target = df.iloc[0]['fuzz_target'] + benchmark = df.iloc[0]['benchmark'] + fuzzer = df.iloc[0]['fuzzer'] + trial_num = df.iloc[0]['trial_num'] + #cycle = df.iloc[0]['cycle'] + df_list[count].plot.line( + ax=plt_ax, + x='time', + y=['seen', 'killed'], + title=f'{benchmark} - {fuzzer}', + ) + plt_ax.title.set_fontsize('small') + plt_ax.set_xlim(xmin=0) + plt_ax.set_ylim(ymin=0) + count += 1 + + fig.tight_layout() + fig.savefig(image_path, bbox_inches='tight') + plt.close(fig) + def pairwise_unique_coverage_heatmap_plot(self, pairwise_unique_coverage_table, axes=None): diff --git a/analysis/report_templates/with_mua.html b/analysis/report_templates/with_mua.html index f1c24b30c..a84410331 100644 --- a/analysis/report_templates/with_mua.html +++ b/analysis/report_templates/with_mua.html @@ -407,11 +407,16 @@
Pairwise unique code coverage
Mutation analysis reports for each fuzzer on this benchmark
-
- {% for fuzzer in benchmark.fuzzer_names %} -
{{ fuzzer }} {{ benchmark.get_mua_report_data(fuzzer, benchmark.name) }}
- {% endfor %} -
+ +
+
+
Covered and killed mutants
+ + The graphs show the number of covered and killed mutants for a given fuzzer. +
+
+
From 29539db1eb8fadaf330f31fab39be08bb86c7faf Mon Sep 17 00:00:00 2001 From: phi-go Date: Fri, 12 Jan 2024 15:46:14 +0000 Subject: [PATCH 64/69] pass presubmit --- analysis/data_utils.py | 6 ----- analysis/generate_report.py | 44 +++++++++++++------------------------ 2 files changed, 15 insertions(+), 35 deletions(-) diff --git a/analysis/data_utils.py b/analysis/data_utils.py index 27c06f0b4..f6a31218e 100644 --- a/analysis/data_utils.py +++ b/analysis/data_utils.py @@ -147,12 +147,6 @@ def is_unique_crash(crash_group): unique_crashes.add(crash_state) is_firsts.append(is_unique) crash_group['firsts'] = is_firsts - print(crash_group.head()) - print(crash_group.index) - #crash_group.index = list(crash_group.index) - #crash_group.reset_index(inplace=True) - #print(crash_group.head()) - #print(crash_group.index) return crash_group.firsts diff --git a/analysis/generate_report.py b/analysis/generate_report.py index dc89e20ba..9172f1530 100644 --- a/analysis/generate_report.py +++ b/analysis/generate_report.py @@ -205,7 +205,6 @@ def modify_experiment_data_if_requested( # pylint: disable=too-many-arguments def normalized_timestamps(timestamps): """Normalize timestamps.""" - print(timestamps[0]) seed_timestamp_file = next( (tt for tt in timestamps if tt[2] == ''), None) try: @@ -220,12 +219,8 @@ def normalized_timestamps(timestamps): key=lambda x: x[3]) except ValueError: max_timestamp_file = seed_timestamp_file - # print(len(timestamps)) - # print('min_timestamp', min_timestamp_file) - # print('max_timestamp', max_timestamp_file) min_timestamp = min_timestamp_file[3] max_timestamp = max_timestamp_file[3] - print('max_timestamp - min_timestamp', max_timestamp - min_timestamp) timestamps_normalized = {} for _hashname, input_file_id, input_file, timestamp in timestamps: if input_file == '': @@ -255,18 +250,17 @@ def get_first_covered_killed(results, timestamps_map): return mut_result_times -def get_timeline(time_covered_killed, timespan, fuzz_target, benchmark, - fuzzer_name, trial_num, cycle): +def get_timeline(time_covered_killed, timespan, meta): # pylint: disable=too-many-locals """Create timeline regarding covering and killing of mutants.""" if timespan == 0: max_time_base = 1 else: max_time_base = 16 + fuzz_target, benchmark, fuzzer_name, trial_num, cycle = meta normalized_time_elem = timespan / (max_time_base**2) time = 'time' count_seen = 'seen' count_killed = 'killed' - print(f'{time:<10} {count_seen:<7} {count_killed:<7}') res = [] for time_base in range(1, max_time_base + 1): time = normalized_time_elem * (time_base**2) @@ -277,7 +271,6 @@ def get_timeline(time_covered_killed, timespan, fuzz_target, benchmark, count_seen += 1 if times['killed'] is not None and times['killed'] <= time: count_killed += 1 - print(f'{time:8.2f}s: {count_seen:>7} {count_killed:>7}') res.append((fuzz_target, benchmark, fuzzer_name, trial_num, cycle, time, count_seen, count_killed)) return res @@ -312,15 +305,13 @@ def load_result_db(res_db_path): return run_info, results, timestamps -def get_mua_results(experiment_df): +def get_mua_results(experiment_df): # pylint: disable=too-many-locals """Get mutation analysis results for each fuzzer in each trial to use in the report.""" #get relationship between trial_id and benchmark from df trial_dict = experiment_df.set_index('trial_id')['benchmark'].to_dict() - #logger.info(f'trial_dict: {trial_dict}') - experiment_data_dir = experiment_utils.get_experiment_filestore_path() results_data_dir = f'{experiment_data_dir}/mua-results/results' @@ -332,24 +323,26 @@ def get_mua_results(experiment_df): fuzzer_pds = defaultdict(list) for trial in trial_dict.keys(): - - print(experiment_data_dir) mua_result_db_file = f'{results_data_dir}/{trial}/' \ 'results.sqlite.lzma' - logger.info('mua_result_db_file:') - logger.info(mua_result_db_file) + if not os.path.isfile(mua_result_db_file): + logger.debug( + 'mua_result_db_file does not exist, this is expected ' + + 'if only median trial is evaluated: ' + f'{mua_result_db_file}') + continue + logger.info(f'found mua_result_db_file: {mua_result_db_file}') run_info, results, timestamps = load_result_db(mua_result_db_file) assert len(run_info) == 1 benchmark, fuzz_target, fuzzer, trial_num = run_info[0] - print(benchmark, fuzz_target, fuzzer, trial_num, trial) timestamps_map, timespan = normalized_timestamps(timestamps) results = [ rr for rr in results if timestamps_map.get(rr[0]) is not None ] time_covered_killed = get_first_covered_killed(results, timestamps_map) - timeline = get_timeline(time_covered_killed, timespan, fuzz_target, - benchmark, fuzzer, trial_num, trial) + meta = fuzz_target, benchmark, fuzzer, trial_num, trial + timeline = get_timeline(time_covered_killed, timespan, meta) pd_timeline = pd.DataFrame(timeline, columns=[ 'fuzz_target', 'benchmark', 'fuzzer', @@ -359,15 +352,11 @@ def get_mua_results(experiment_df): fuzzer_pds[fuzzer].append(pd_timeline) num_trials = None - for fuzzer in fuzzer_pds.keys(): + for fuzzer, fuzzer_pd in fuzzer_pds.items(): if num_trials is None: - num_trials = len(fuzzer_pds[fuzzer]) + num_trials = len(fuzzer_pd) else: - assert num_trials == len(fuzzer_pds[fuzzer]) - - num_fuzzers = len(fuzzer_pds) - - print(num_trials, num_fuzzers) + assert num_trials == len(fuzzer_pd) return (num_trials, fuzzer_pds) @@ -438,9 +427,6 @@ def generate_report(experiment_names, logger.info('Finished generating coverage report info.') if mutation_analysis: - # TODO get_mua_results(main_experiment_name, fuzzers, - # experiment_benchmarks, experiment_df) - #fuzzers = ['afl', 'libfuzzer'] mua_results = get_mua_results(experiment_df) else: mua_results = None From fcf342c70790f93b0f401f1ca6afd52dd7130005 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20G=C3=B6rz?= Date: Mon, 15 Jan 2024 10:58:23 +0000 Subject: [PATCH 65/69] fix running without mutation-analysis flag --- experiment/measurer/measure_manager.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 9df0a3624..a9ea107a3 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -876,7 +876,8 @@ def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments corpus_archive_dst, expect_zero=False).retcode: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) - add_measure_run_failed(benchmark, fuzzer, trial_num) + if mutation_analysis: + add_measure_run_failed(benchmark, fuzzer, trial_num) return None snapshot_measurer.initialize_measurement_dirs() @@ -893,7 +894,11 @@ def measure_snapshot( # pylint: disable=too-many-locals,too-many-arguments os.remove(corpus_archive_dst) # Run coverage on the new corpus units. - snapshot_measurer.run_cov_new_units() + if mutation_analysis: + with get_measure_spot(): + snapshot_measurer.run_cov_new_units() + else: + snapshot_measurer.run_cov_new_units() # Generate profdata and transform it into json form. snapshot_measurer.generate_coverage_information(cycle) From 72926c0bdf8614f16adaef2b4cd658e1908f6186 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20G=C3=B6rz?= Date: Mon, 15 Jan 2024 10:58:39 +0000 Subject: [PATCH 66/69] set merge_with_nonprivate to false --- service/experiment-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/experiment-config.yaml b/service/experiment-config.yaml index b9acb09f8..53885721b 100644 --- a/service/experiment-config.yaml +++ b/service/experiment-config.yaml @@ -15,7 +15,7 @@ preemptible_runners: true # This experiment should generate a report that is combined with other public # "production" experiments. -merge_with_nonprivate: true +merge_with_nonprivate: false # This experiment should be merged with other reports in later experiments. private: false From 47af5a9893a5a755147a2ece5333ad283913b275 Mon Sep 17 00:00:00 2001 From: Philipp Goerz Date: Wed, 17 Jan 2024 17:15:45 +0000 Subject: [PATCH 67/69] store build logs for local runs --- experiment/build/local_build.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index 103674943..afb40919a 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -15,6 +15,7 @@ """Module for building things locally for use in trials.""" import os +import shlex from typing import Tuple from common import benchmark_utils @@ -25,6 +26,7 @@ from common import utils from experiment.measurer.run_mua import (MUTATION_ANALYSIS_IMAGE_NAME, stop_mua_container) +from experiment.build import build_utils logger = logs.Logger() # pylint: disable=invalid-name @@ -32,14 +34,16 @@ def make(targets): """Invoke |make| with |targets| and return the result.""" command = ['make', '--debug=j', '-j'] + targets + logger.info(f'Running: {shlex.join(command)}') return new_process.execute(command, - write_to_stdout=True, cwd=utils.ROOT_DIR) def build_base_images() -> Tuple[int, str]: """Build base images locally.""" - return make(['base-image', 'worker']) + result = make(['base-image', 'worker']) + build_utils.store_build_logs('base-images', result) + return result def get_shared_coverage_binaries_dir(): @@ -74,6 +78,7 @@ def build_coverage(benchmark): """Build (locally) coverage image for benchmark.""" image_name = f'build-coverage-{benchmark}' result = make([image_name]) + build_utils.store_build_logs(image_name, result) if result.retcode: return result make_shared_coverage_binaries_dir() @@ -86,6 +91,7 @@ def build_mua(benchmark): stop_mua_container(benchmark) image_name = f'.{MUTATION_ANALYSIS_IMAGE_NAME}-{benchmark}-builder' result = make([image_name]) + build_utils.store_build_logs(image_name, result) if result.retcode: return result make_shared_mua_binaries_dir() @@ -113,4 +119,5 @@ def copy_coverage_binaries(benchmark): def build_fuzzer_benchmark(fuzzer: str, benchmark: str) -> bool: """Builds |benchmark| for |fuzzer|.""" image_name = f'build-{fuzzer}-{benchmark}' - make([image_name]) + result = make([image_name]) + build_utils.store_build_logs(image_name, result) From b8672c820b910b1500be385cb4333f42c3a93efe Mon Sep 17 00:00:00 2001 From: Philipp Goerz Date: Wed, 17 Jan 2024 17:16:22 +0000 Subject: [PATCH 68/69] allow build scripts compiling same binary twice --- fuzzers/mutation_analysis/builder.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzzers/mutation_analysis/builder.Dockerfile b/fuzzers/mutation_analysis/builder.Dockerfile index 97120dfb3..4b58a54f1 100644 --- a/fuzzers/mutation_analysis/builder.Dockerfile +++ b/fuzzers/mutation_analysis/builder.Dockerfile @@ -97,7 +97,7 @@ RUN pipx install hatch # mua_fuzzer_bench RUN git clone https://github.com/phi-go/mua_fuzzer_bench /mutator && \ cd /mutator && \ - git checkout 5ac9af6ee3dfac3293e5705fa2e3b6520a41a9b9 + git checkout de67ec6f816362bbba58d9a3f7541a873ae6b4e2 RUN cd /mutator && \ From 0452521f2c82728526746a1b8f61d482ca8992da Mon Sep 17 00:00:00 2001 From: Philipp Goerz Date: Wed, 17 Jan 2024 17:26:33 +0000 Subject: [PATCH 69/69] allow unlimited log size for gcb_build --- common/new_process.py | 11 +++++++++-- experiment/build/gcb_build.py | 3 ++- experiment/build/local_build.py | 3 +-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/common/new_process.py b/common/new_process.py index fd54c5cab..fb39ae019 100644 --- a/common/new_process.py +++ b/common/new_process.py @@ -78,6 +78,7 @@ def execute( # pylint: disable=too-many-locals,too-many-branches output_file: Optional[int] = None, # Not True by default because we can't always set group on processes. kill_children: bool = False, + limit_log_size: bool = True, **kwargs) -> ProcessResult: """Execute |command| and return the returncode and the output""" if write_to_stdout: @@ -110,12 +111,18 @@ def execute( # pylint: disable=too-many-locals,too-many-branches retcode = process.returncode - command_log_str = ' '.join(command)[:LOG_LIMIT_FIELD] + if limit_log_size: + command_log_str = ' '.join(command)[:LOG_LIMIT_FIELD] + else: + command_log_str = ' '.join(command) log_message = 'Executed command: "%s" returned: %d.' if output is not None: output = output.decode('utf-8', errors='ignore') - output_for_log = output[-LOG_LIMIT_FIELD:] + if limit_log_size: + output_for_log = output[-LOG_LIMIT_FIELD:] + else: + output_for_log = output log_extras = {'output': output_for_log} else: log_extras = None diff --git a/experiment/build/gcb_build.py b/experiment/build/gcb_build.py index 23b0900ab..75e2bffa0 100644 --- a/experiment/build/gcb_build.py +++ b/experiment/build/gcb_build.py @@ -116,7 +116,8 @@ def _build( write_to_stdout=False, kill_children=True, timeout=timeout_seconds, - expect_zero=False) + expect_zero=False, + limit_log_size=False) # TODO(metzman): Refactor code so that local_build stores logs as well. build_utils.store_build_logs(config_name, result) if result.retcode != 0: diff --git a/experiment/build/local_build.py b/experiment/build/local_build.py index afb40919a..5a49da8be 100644 --- a/experiment/build/local_build.py +++ b/experiment/build/local_build.py @@ -35,8 +35,7 @@ def make(targets): """Invoke |make| with |targets| and return the result.""" command = ['make', '--debug=j', '-j'] + targets logger.info(f'Running: {shlex.join(command)}') - return new_process.execute(command, - cwd=utils.ROOT_DIR) + return new_process.execute(command, cwd=utils.ROOT_DIR) def build_base_images() -> Tuple[int, str]: