diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/.DS_Store differ diff --git a/.github/scripts/annotate_errors.py b/.github/scripts/annotate_errors.py new file mode 100644 index 0000000..886182d --- /dev/null +++ b/.github/scripts/annotate_errors.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import re +import sys + +def annotate_errors(log_file): + with open(log_file, 'r') as file: + for line in file: + # Match error patterns. For GCC/Clang: "error:"; for MSVC: "error Cxxxx:" + match = re.search(r'(.+):(\d+):(\d+): (error: .+)|(.+)\((\d+)\) ?: (error C[0-9]+: .+)', line) + if match: + if match.group(1): + # GCC/Clang style error + file_path, line_num, col_num, message = match.group(1), match.group(2), match.group(3), match.group(4) + print(f"::error file={file_path},line={line_num},col={col_num}::{message}") + elif match.group(5): + # MSVC style error + file_path, line_num, message = match.group(5), match.group(6), match.group(7) + print(f"::error file={file_path},line={line_num}::{message}") + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: annotate_errors.py ") + sys.exit(1) + + log_file = sys.argv[1] + try: + annotate_errors(log_file) + except: + print("Failed to read the file") diff --git a/.github/scripts/annotate_warnings.py b/.github/scripts/annotate_warnings.py new file mode 100644 index 0000000..0c7b323 --- /dev/null +++ b/.github/scripts/annotate_warnings.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +import re +import sys + +def annotate_warnings(log_file): + with open(log_file, 'r') as file: + for line in file: + match = re.search(r'(.+):(\d+):(\d+): warning: (.+)', line) + if match: + file_path, line_num, col_num, message = match.groups() + print(f"::warning file={file_path},line={line_num},col={col_num}::{message}") + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: annotate_warnings.py ") + sys.exit(1) + + log_file = sys.argv[1] + try: + annotate_warnings(log_file) + except: + print("Failed to read the file") diff --git a/.github/scripts/generate_coverage_reports.js b/.github/scripts/generate_coverage_reports.js new file mode 100644 index 0000000..25f9313 --- /dev/null +++ b/.github/scripts/generate_coverage_reports.js @@ -0,0 +1,140 @@ +module.exports = ({ github, context }) => { + const fs = require('fs'); + const path = require('path'); + + // Recursively get all coverage-*/coverage.json files from the directory + function getAllCoverageFiles(dir) { + let files = []; + + fs.readdirSync(dir).forEach(file => { + const fullPath = path.join(dir, file); + console.log(dir, file, '=', fullPath); + if (file.startsWith('coverage-')) { + files.push(fullPath); + console.log('Approved:', fullPath); + } + }); + + return files; + } + + // Get all coverage files from the directory + const coverageFiles = getAllCoverageFiles('coverage-files/'); + console.log('Coverage files:', coverageFiles); + + // Extract unique uncovered lines and annotations + function getUniqueCoverageData(coverageFiles) { + const aggregated_data = { + "files": new Set(), + "functions": new Set(), + "totals": new Set() + }; + + coverageFiles.forEach(file => { + const data = JSON.parse(fs.readFileSync(file + "/coverage.json", 'utf8')); + data['data'][0]['files'].forEach(file => aggregated_data["files"].add(file)); + data['data'][0]['functions'].forEach(function_data => aggregated_data["functions"].add(function_data)); + aggregated_data["totals"].add(data['data'][0]['totals']); + }); + + return aggregated_data; + } + + const aggregated_data = getUniqueCoverageData(coverageFiles); + + // Extract unique annotations + function getUniqueUncoveredLines(aggregated_data) { + const uncoveredLinesSet = new Set(); + const annotations = []; + + aggregated_data.files.forEach(file => { + if (file.segments && Array.isArray(file.segments)) { + const fileName = file.filename; + file.segments.forEach(segment => { + const [line, col, count] = segment; + const key = `${fileName}:${line}`; // Unique key based on filename and line number + if (count === 0 && !uncoveredLinesSet.has(key)) { + uncoveredLinesSet.add(key); + annotations.push({ + path: fileName, + start_line: line, + end_line: line, + annotation_level: 'warning', + message: 'Uncovered line' + }); + } + }); + } + }); + + return annotations; + } + + // Get unique uncovered lines from the combined coverage + const annotations = getUniqueUncoveredLines(aggregated_data); + + // Send annotations to the pull request (once per uncovered line) + annotations.forEach(annotation => { + github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + name: 'Code Coverage Check', + head_sha: context.sha, + output: { + title: 'Code Coverage Report', + summary: 'Found uncovered lines', + annotations: [annotation], + }, + }); + }); + + // Function to calculate overall and file-specific coverage + function calculateCoverage(aggregated_data) { + let fileCoverage = []; + let totalLines = 0; + let coveredLines = 0; + let processedFiles = new Set(); // To avoid duplicate files + + aggregated_data.files.forEach(file => { + if (!processedFiles.has(file.filename)) { + const fileSummary = file.summary.lines; + + // Extract per-file coverage percentage + fileCoverage.push({ + filename: file.filename, + coveragePercentage: fileSummary.percent.toFixed(2), + totalLines: fileSummary.count, + coveredLines: fileSummary.covered + }); + + totalLines += fileSummary.count; + coveredLines += fileSummary.covered; + processedFiles.add(file.filename); // Mark file as processed + } + }); + + const overallCoverage = (coveredLines / totalLines) * 100; + + return { overallCoverage: overallCoverage.toFixed(2), fileCoverage }; + } + + // Calculate overall and per-file coverage + const { overallCoverage, fileCoverage } = calculateCoverage(aggregated_data); + + // Build the coverage summary message + let summary = `### Code Coverage Summary\n`; + summary += `**Overall Coverage**: ${overallCoverage}%\n\n`; + summary += `#### File-by-file Coverage:\n`; + + fileCoverage.forEach(file => { + summary += `- **${file.filename}**: ${file.coveragePercentage}% (${file.coveredLines}/${file.totalLines} lines covered)\n`; + }); + + // Post the summary as a comment on the pull request + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: summary, + }); +} \ No newline at end of file diff --git a/.github/workflows/auto-clang-format.yml b/.github/workflows/auto-clang-format.yml deleted file mode 100644 index b114b60..0000000 --- a/.github/workflows/auto-clang-format.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: auto-clang-format -on: [pull_request] - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - uses: DoozyX/clang-format-lint-action@v0.13 - with: - source: '.' - exclude: './third_party ./external' - extensions: 'h,cpp,hpp' - clangFormatVersion: 12 - inplace: True - - uses: EndBug/add-and-commit@v4 - with: - author_name: Clang Robot - author_email: robot@clang - message: ':art: Committing clang-format changes' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 675621f..23411c1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,191 +1,185 @@ -name: ci +name: Build and Test CMake C++ Project + on: - pull_request: - release: - types: [published] push: - tags: branches: - main - - develop - -env: - # Conan cache environment variables - CONAN_SYSREQUIRES_MODE: enabled - CONAN_USER_HOME: "${{ github.workspace }}/conan-cache" - CONAN_USER_HOME_SHORT: "${{ github.workspace }}/conan-cache/short" - CLANG_TIDY_VERSION: "13.0.0" + pull_request: + branches: + - main jobs: - Test: - runs-on: ${{ matrix.os }} + build: strategy: fail-fast: false - - # Recommendations: - # * support at least 2 operating systems - # * support at least 2 compilers - # * make sure all supported configurations for your project are built - # - # Disable/enable builds in this list to meet the above recommendations - # and your own projects needs matrix: os: - - ubuntu-20.04 - - ubuntu-22.04 - - macos-11 - - macos-12 - - macos-13 - compiler: - # you can specify the version after `-` like "llvm-13.0.0". - - llvm-13.0.0 - - gcc-11 - generator: - - "Ninja Multi-Config" - build_type: - - Release - - Debug - developer_mode: - - ON - - OFF - - exclude: - # mingw is determined by this author to be too buggy to support - - os: macos-11 - compiler: gcc-11 - - os: macos-12 - compiler: gcc-11 - - os: macos-13 - compiler: gcc-11 - - os: ubuntu-20.04 - compiler: llvm-13.0.0 - - os: ubuntu-22.04 - compiler: llvm-13.0.0 - + [ + ubuntu-22.04, + ubuntu-24.04, + macos-13, + macos-14, + macos-15, + windows-2019, + windows-2022, + ] + c_compiler: [gcc, clang] + generator: [Ninja, "Unix Makefiles"] include: - # Add appropriate variables for gcov version required. This will intentionally break - # if you try to use a compiler that does not have gcov set - - compiler: gcc-11 - gcov_executable: gcov - - compiler: llvm-13.0.0 - gcov_executable: "llvm-cov gcov" - - # Set up preferred package generators, for given build configurations - - build_type: Release - developer_mode: OFF - package_generator: TBZ2 - - - os: windows-2022 - compiler: msvc + # MSVC is only available on Windows + - os: windows-2019 + c_compiler: cl + cxx_compiler: cl generator: "Visual Studio 17 2022" - build_type: Debug - developer_mode: On - - os: windows-2022 - compiler: msvc + c_compiler: cl + cxx_compiler: cl generator: "Visual Studio 17 2022" - build_type: Release - developer_mode: On - - - os: windows-2022 - compiler: msvc - generator: "Visual Studio 17 2022" - build_type: Debug - developer_mode: OFF - - - os: windows-2022 - compiler: msvc - generator: "Visual Studio 17 2022" - build_type: Release - developer_mode: OFF - package_generator: ZIP - + - c_compiler: clang + cxx_compiler: clang++ + - c_compiler: gcc + cxx_compiler: g++ + runs-on: ${{ matrix.os }} steps: - - name: Check for llvm version mismatches - if: ${{ contains(matrix.compiler, 'llvm') && !contains(matrix.compiler, env.CLANG_TIDY_VERSION) }} - uses: actions/github-script@v7 - with: - script: | - core.setFailed('There is a mismatch between configured llvm compiler and clang-tidy version chosen') - - - uses: actions/checkout@v4 + - name: Checkout repository + uses: actions/checkout@v4 - - name: Setup Cache - uses: ./.github/actions/setup_cache - with: - compiler: ${{ matrix.compiler }} - build_type: ${{ matrix.build_type }} - developer_mode: ${{ matrix.developer_mode }} - generator: ${{ matrix.generator }} + - name: Install dependencies (Ubuntu) + if: ${{ startsWith(matrix.os, 'ubuntu') }} + run: | + sudo apt-get install -y software-properties-common + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y + sudo apt-get update + sudo apt-get install -y cmake ninja-build gcc-12 g++-12 llvm lcov python3 - - name: Setup Cpp - uses: aminya/setup-cpp@v1 - with: - compiler: ${{ matrix.compiler }} - vcvarsall: ${{ contains(matrix.os, 'windows' )}} + # Set gcc and g++ version 12 as default + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 60 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 60 - cmake: true - ninja: true - conan: true - vcpkg: false - ccache: true - clangtidy: ${{ env.CLANG_TIDY_VERSION }} + - name: Install dependencies (macOS) + if: ${{ startsWith(matrix.os, 'macos') }} + run: | + brew install llvm ninja python3 cmake + - name: Install dependencies (Windows) + if: ${{ startsWith(matrix.os, 'windows') }} + run: | + choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' -y + choco install python --version 3.9.0 -y + choco install mingw llvm ninja -y - cppcheck: true + - name: Configure CMake + if: ${{ matrix.cxx_compiler == 'cl' }} + run: | + cmake -S . -B build -DCODE_COVERAGE=OFF -A x64 -DCMAKE_CXX_COMPILER=${{ matrix.cxx_compiler }} -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} - gcovr: true - opencppcoverage: true + - name: Configure CMake + if: ${{ matrix.cxx_compiler != 'cl' }} + run: | + cmake -S . -B build -G "${{ matrix.generator }}" -DCODE_COVERAGE=OFF -DCMAKE_CXX_COMPILER=${{ matrix.cxx_compiler }} -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} - - name: Cleanup Conan system packages (they are not properly cached) + - name: Build Project + id: build_project + continue-on-error: true run: | - conan remove -f '*/system' + cmake --build build --config Release > compile.log 2>&1 - # make sure coverage is only enabled for Debug builds, since it sets -O0 to make sure coverage - # has meaningful results - - name: Configure CMake + - name: Run tests + if: ${{ steps.build_project.outcome == 'success' }} run: | - cmake -S . -B ./build -G "${{matrix.generator}}" -DCMAKE_BUILD_TYPE:STRING=${{matrix.build_type}} -DENABLE_DEVELOPER_MODE:BOOL=${{matrix.developer_mode}} -DOPT_ENABLE_COVERAGE:BOOL=${{ matrix.build_type == 'Debug' && matrix.developer_mode == 'OFF' }} -DGIT_SHA:STRING=${{ github.sha }} + cmake --build build + cd build + ctest -C Release - - name: Build - # Execute the build. You can specify a specific target with "--target " + - name: Annotate warnings and errors without duplicates + if: ${{ always() }} + shell: bash run: | - cmake --build ./build --config ${{matrix.build_type}} + python3 ./.github/scripts/annotate_warnings.py compile.log > annotations.txt + python3 ./.github/scripts/annotate_errors.py compile.log >> annotations.txt - - name: Unix - Test and coverage - if: runner.os != 'Windows' - working-directory: ./build - # Execute tests defined by the CMake configuration. - # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + - name: Upload compile artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: "annotations-${{ matrix.os }}-${{ matrix.c_compiler }}-${{ matrix.generator }}" + path: annotations.txt + + gather_annotations: + needs: build + runs-on: ubuntu-latest + steps: + - name: Download annotations artifacts + uses: actions/download-artifact@v4 + with: + path: ./ + + - name: Filter and print unique annotations run: | - ctest -C ${{matrix.build_type}} - gcovr -j ${{env.nproc}} --delete --root ../ --print-summary --xml-pretty --xml coverage.xml . --gcov-executable '${{ matrix.gcov_executable }}' + cat ./annotations-*/annotations.txt | sort | uniq + + code_coverage: + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04, ubuntu-24.04, macos-13, macos-14, macos-15] + c_compiler: [clang] + generator: [Ninja, "Unix Makefiles"] + include: + # MSVC is only available on Windows + - c_compiler: clang + cxx_compiler: clang++ + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 - - name: Windows - Test and coverage - if: runner.os == 'Windows' - working-directory: ./build + - name: Install dependencies (Ubuntu) + if: ${{ startsWith(matrix.os, 'ubuntu') }} run: | - OpenCppCoverage.exe --export_type cobertura:coverage.xml --cover_children -- ctest -C ${{matrix.build_type}} + sudo apt-get install -y software-properties-common + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y + sudo apt-get update + sudo apt-get install -y cmake ninja-build llvm lcov python3 - - name: CPack - if: matrix.package_generator != '' - working-directory: ./build + - name: Install dependencies (macOS) + if: ${{ startsWith(matrix.os, 'macos') }} run: | - cpack -C ${{matrix.build_type}} -G ${{matrix.package_generator}} + brew install llvm lcov ninja python3 cmake + ln -sf "$(xcrun --find llvm-profdata)" /usr/local/bin/llvm-profdata + ln -sf "$(xcrun --find llvm-cov)" /usr/local/bin/llvm-cov - - name: Publish Tagged Release - uses: softprops/action-gh-release@v1 - if: ${{ startsWith(github.ref, 'refs/tags/') && matrix.package_generator != '' }} + - name: Prepare for code coverage + run: | + export LLVM_PROFILE_FILE="%p.profraw" + cmake -B build -G'${{ matrix.generator }}' -DCODE_COVERAGE=ON -DCMAKE_CXX_COMPILER=${{ matrix.cxx_compiler }} -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} + cmake --build build --target ccov-all-export + cat ./build/ccov/coverage.json + echo "Coverage report generated in ./out directory" + + - name: Upload compile artifacts + uses: actions/upload-artifact@v4 with: - files: | - build/*-*${{ matrix.build_type }}*-*.* - + name: "coverage-${{ matrix.os }}-${{ matrix.c_compiler }}-${{ matrix.generator }}" + path: ./build/ccov/coverage.json - - name: Publish to codecov - uses: codecov/codecov-action@v4 + code-coverage-summery: + runs-on: ubuntu-latest + needs: [code_coverage] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + # Download artifacts from previous jobs (assuming the artifacts contain coverage-*/coverage.json files) + - name: Download all coverage artifacts + uses: actions/download-artifact@v4 with: - token: ${{ secrets.CODECOV_TOKEN }} - flags: ${{ runner.os }} - name: ${{ runner.os }}-coverage - files: ./build/coverage.xml \ No newline at end of file + path: coverage-files + # Combine the coverage reports and post annotations using github-script + - name: Combine and process coverage reports + uses: actions/github-script@v7 + with: + script: | + const script = require('./.github/scripts/generate_coverage_reports.js') + console.log(script({github, context})) diff --git a/.gitignore b/.gitignore index 6f31401..02e2c31 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ build/ .vscode/ +.cache/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 771b4e9..57b3dac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,11 @@ cmake_minimum_required(VERSION 3.16.3) -project(RegexMatcher VERSION 1.0.0.1) +project(RegexMatcher VERSION 1.0.0.2) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED ON) + if(CMAKE_BUILD_TYPE STREQUAL "Debug") add_definitions(-DDEBUG) endif() @@ -14,7 +15,7 @@ if(NOT DEFINED CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT OR CMAKE_INSTALL_PREF endif() configure_file(RegexMatcherConfig.h.in ${CMAKE_BINARY_DIR}/config/RegexMatcherConfig.h) -add_subdirectory(core) +add_subdirectory(lib) include(CTest) enable_testing() @@ -35,4 +36,4 @@ set(CPACK_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${GIT_SHORT_SHA}-${CMAKE_SYSTEM_NAME}-${CMAKE_BUILD_TYPE}-${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}" ) -include(CPack) \ No newline at end of file +include(CPack) diff --git a/RegexMatcherConfig.h.in b/RegexMatcherConfig.h.in index 18ec233..05e3fc0 100644 --- a/RegexMatcherConfig.h.in +++ b/RegexMatcherConfig.h.in @@ -1,5 +1,9 @@ -// the configured options and settings for Tutorial -#define RegexMatcher_VERSION_MAJOR @RegexMatcher_VERSION_MAJOR@ -#define RegexMatcher_VERSION_MINOR @RegexMatcher_VERSION_MINOR@ -#define RegexMatcher_VERSION_PATCH @RegexMatcher_VERSION_PATCH@ -#define RegexMatcher_VERSION_TWEAK @RegexMatcher_VERSION_TWEAK@ \ No newline at end of file +namespace matcher +{ + + static constexpr short RegexMatcher_VERSION_MAJOR = @RegexMatcher_VERSION_MAJOR@; + static constexpr short RegexMatcher_VERSION_MINOR = @RegexMatcher_VERSION_MINOR@; + static constexpr short RegexMatcher_VERSION_PATCH = @RegexMatcher_VERSION_PATCH@; + static constexpr short RegexMatcher_VERSION_TWEAK = @RegexMatcher_VERSION_TWEAK@; + +} // namespace matcher diff --git a/cmake_tools/code-coverage.cmake b/cmake_tools/code-coverage.cmake new file mode 100644 index 0000000..0a4d933 --- /dev/null +++ b/cmake_tools/code-coverage.cmake @@ -0,0 +1,707 @@ +# +# Copyright (C) 2018-2024 by George Cave - gcave@stablecoder.ca +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +# USAGE: To enable any code coverage instrumentation/targets, the single CMake +# option of `CODE_COVERAGE` needs to be set to 'ON', either by GUI, ccmake, or +# on the command line. +# +# From this point, there are two primary methods for adding instrumentation to +# targets: 1 - A blanket instrumentation by calling `add_code_coverage()`, where +# all targets in that directory and all subdirectories are automatically +# instrumented. 2 - Per-target instrumentation by calling +# `target_code_coverage()`, where the target is given and thus only +# that target is instrumented. This applies to both libraries and executables. +# +# To add coverage targets, such as calling `make ccov` to generate the actual +# coverage information for perusal or consumption, call +# `target_code_coverage()` on an *executable* target. +# +# Example 1: All targets instrumented +# +# In this case, the coverage information reported will will be that of the +# `theLib` library target and `theExe` executable. +# +# 1a: Via global command +# +# ~~~ +# add_code_coverage() # Adds instrumentation to all targets +# +# add_library(theLib lib.cpp) +# +# add_executable(theExe main.cpp) +# target_link_libraries(theExe PRIVATE theLib) +# target_code_coverage(theExe) # As an executable target, adds the 'ccov-theExe' target (instrumentation already added via global anyways) for generating code coverage reports. +# ~~~ +# +# 1b: Via target commands +# +# ~~~ +# add_library(theLib lib.cpp) +# target_code_coverage(theLib) # As a library target, adds coverage instrumentation but no targets. +# +# add_executable(theExe main.cpp) +# target_link_libraries(theExe PRIVATE theLib) +# target_code_coverage(theExe) # As an executable target, adds the 'ccov-theExe' target and instrumentation for generating code coverage reports. +# ~~~ +# +# Example 2: Target instrumented, but with regex pattern of files to be excluded +# from report +# +# ~~~ +# add_executable(theExe main.cpp non_covered.cpp) +# target_code_coverage(theExe EXCLUDE non_covered.cpp test/*) # As an executable target, the reports will exclude the non-covered.cpp file, and any files in a test/ folder. +# ~~~ +# +# Example 3: Target added to the 'ccov' and 'ccov-all' targets +# +# ~~~ +# add_code_coverage_all_targets(EXCLUDE test/*) # Adds the 'ccov-all' target set and sets it to exclude all files in test/ folders. +# +# add_executable(theExe main.cpp non_covered.cpp) +# target_code_coverage(theExe AUTO ALL EXCLUDE non_covered.cpp test/*) # As an executable target, adds to the 'ccov' and ccov-all' targets, and the reports will exclude the non-covered.cpp file, and any files in a test/ folder. +# ~~~ + +# Options +option( + CODE_COVERAGE + "Builds targets with code coverage instrumentation. (Requires GCC or Clang)" + OFF) + +# Programs +find_program(LLVM_COV_PATH llvm-cov) +find_program(LLVM_PROFDATA_PATH llvm-profdata) +find_program(LCOV_PATH lcov) +find_program(GENHTML_PATH genhtml) +# Hide behind the 'advanced' mode flag for GUI/ccmake +mark_as_advanced(FORCE LLVM_COV_PATH LLVM_PROFDATA_PATH LCOV_PATH GENHTML_PATH) + +# Variables +set(CMAKE_COVERAGE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/ccov) +set_property(GLOBAL PROPERTY JOB_POOLS ccov_serial_pool=1) + +# Common initialization/checks +if(CODE_COVERAGE AND NOT CODE_COVERAGE_ADDED) + set(CODE_COVERAGE_ADDED ON) + + # Common Targets + file(MAKE_DIRECTORY ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}) + + if(CMAKE_C_COMPILER_ID MATCHES "(Apple)?[Cc]lang" + OR CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang") + + if(CMAKE_C_COMPILER_ID MATCHES "AppleClang" OR CMAKE_CXX_COMPILER_ID + MATCHES "AppleClang") + # When on macOS and using the Apple-provided toolchain, use the + # XCode-provided llvm toolchain via `xcrun` + message( + STATUS + "Building with XCode-provided llvm code coverage tools (via `xcrun`)") + set(LLVM_COV_PATH xcrun llvm-cov) + set(LLVM_PROFDATA_PATH xcrun llvm-profdata) + else() + # Use the regular llvm toolchain + message(STATUS "Building with llvm code coverage tools") + endif() + + if(NOT LLVM_COV_PATH) + message(FATAL_ERROR "llvm-cov not found! Aborting.") + else() + # Version number checking for 'EXCLUDE' compatibility + execute_process(COMMAND ${LLVM_COV_PATH} --version + OUTPUT_VARIABLE LLVM_COV_VERSION_CALL_OUTPUT) + string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" LLVM_COV_VERSION + ${LLVM_COV_VERSION_CALL_OUTPUT}) + + if(LLVM_COV_VERSION VERSION_LESS "7.0.0") + message( + WARNING + "target_code_coverage()/add_code_coverage_all_targets() 'EXCLUDE' option only available on llvm-cov >= 7.0.0" + ) + endif() + endif() + + # Targets + if(${CMAKE_VERSION} VERSION_LESS "3.17.0") + add_custom_target( + ccov-clean + COMMAND ${CMAKE_COMMAND} -E remove -f + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list + COMMAND ${CMAKE_COMMAND} -E remove -f + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/profraw.list) + else() + add_custom_target( + ccov-clean + COMMAND ${CMAKE_COMMAND} -E rm -f + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list + COMMAND ${CMAKE_COMMAND} -E rm -f + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/profraw.list) + endif() + + # Used to get the shared object file list before doing the main all- + # processing + add_custom_target( + ccov-libs + COMMAND ; + COMMENT "libs ready for coverage report.") + + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES + "GNU") + # Messages + message(STATUS "Building with lcov Code Coverage Tools") + + if(CMAKE_BUILD_TYPE) + string(TOUPPER ${CMAKE_BUILD_TYPE} upper_build_type) + if(NOT ${upper_build_type} STREQUAL "DEBUG") + message( + WARNING + "Code coverage results with an optimized (non-Debug) build may be misleading" + ) + endif() + else() + message( + WARNING + "Code coverage results with an optimized (non-Debug) build may be misleading" + ) + endif() + if(NOT LCOV_PATH) + message(FATAL_ERROR "lcov not found! Aborting...") + endif() + if(NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() + + # Targets + add_custom_target(ccov-clean COMMAND ${LCOV_PATH} --directory + ${CMAKE_BINARY_DIR} --zerocounters) + + else() + message(FATAL_ERROR "Code coverage requires Clang or GCC. Aborting.") + endif() +endif() + +# Adds code coverage instrumentation to a library, or instrumentation/targets +# for an executable target. +# ~~~ +# EXECUTABLE ADDED TARGETS: +# GCOV/LCOV: +# ccov : Generates HTML code coverage report for every target added with 'AUTO' parameter. +# ccov-${TARGET_NAME} : Generates HTML code coverage report for the associated named target. +# ccov-all : Generates HTML code coverage report, merging every target added with 'ALL' parameter into a single detailed report. +# +# LLVM-COV: +# ccov : Generates HTML code coverage report for every target added with 'AUTO' parameter. +# ccov-report : Generates HTML code coverage report for every target added with 'AUTO' parameter. +# ccov-${TARGET_NAME} : Generates HTML code coverage report. +# ccov-report-${TARGET_NAME} : Prints to command line summary per-file coverage information. +# ccov-export-${TARGET_NAME} : Exports the coverage report to a JSON file. +# ccov-show-${TARGET_NAME} : Prints to command line detailed per-line coverage information. +# ccov-all : Generates HTML code coverage report, merging every target added with 'ALL' parameter into a single detailed report. +# ccov-all-report : Prints summary per-file coverage information for every target added with ALL' parameter to the command line. +# ccov-all-export : Exports the coverage report to a JSON file. +# +# Required: +# TARGET_NAME - Name of the target to generate code coverage for. +# Optional: +# PUBLIC - Sets the visibility for added compile options to targets to PUBLIC instead of the default of PRIVATE. +# INTERFACE - Sets the visibility for added compile options to targets to INTERFACE instead of the default of PRIVATE. +# PLAIN - Do not set any target visibility (backward compatibility with old cmake projects) +# AUTO - Adds the target to the 'ccov' target so that it can be run in a batch with others easily. Effective on executable targets. +# ALL - Adds the target to the 'ccov-all' and 'ccov-all-report' targets, which merge several executable targets coverage data to a single report. Effective on executable targets. +# EXTERNAL - For GCC's lcov, allows the profiling of 'external' files from the processing directory +# COVERAGE_TARGET_NAME - For executables ONLY, changes the outgoing target name so instead of `ccov-${TARGET_NAME}` it becomes `ccov-${COVERAGE_TARGET_NAME}`. +# EXCLUDE - Excludes files of the patterns provided from coverage. Note that GCC/lcov excludes by glob pattern, and clang/LLVM excludes via regex! **These do not copy to the 'all' targets.** +# OBJECTS - For executables ONLY, if the provided targets are static or shared libraries, adds coverage information to the output +# PRE_ARGS - For executables ONLY, prefixes given arguments to the associated ccov-* executable call ($ ccov-*) +# ARGS - For executables ONLY, appends the given arguments to the associated ccov-* executable call (ccov-* $) +# ~~~ +function(target_code_coverage TARGET_NAME) + # Argument parsing + set(options AUTO ALL EXTERNAL PUBLIC INTERFACE PLAIN) + set(single_value_keywords COVERAGE_TARGET_NAME) + set(multi_value_keywords EXCLUDE OBJECTS PRE_ARGS ARGS) + cmake_parse_arguments( + target_code_coverage "${options}" "${single_value_keywords}" + "${multi_value_keywords}" ${ARGN}) + + # Set the visibility of target functions to PUBLIC, INTERFACE or default to + # PRIVATE. + if(target_code_coverage_PUBLIC) + set(TARGET_VISIBILITY PUBLIC) + set(TARGET_LINK_VISIBILITY PUBLIC) + elseif(target_code_coverage_INTERFACE) + set(TARGET_VISIBILITY INTERFACE) + set(TARGET_LINK_VISIBILITY INTERFACE) + elseif(target_code_coverage_PLAIN) + set(TARGET_VISIBILITY PUBLIC) + set(TARGET_LINK_VISIBILITY) + else() + set(TARGET_VISIBILITY PRIVATE) + set(TARGET_LINK_VISIBILITY PRIVATE) + endif() + + if(NOT target_code_coverage_COVERAGE_TARGET_NAME) + # If a specific name was given, use that instead. + set(target_code_coverage_COVERAGE_TARGET_NAME ${TARGET_NAME}) + endif() + + if(CODE_COVERAGE) + + # Add code coverage instrumentation to the target's linker command + if(CMAKE_C_COMPILER_ID MATCHES "(Apple)?[Cc]lang" + OR CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang") + target_compile_options(${TARGET_NAME} ${TARGET_VISIBILITY} + -fprofile-instr-generate -fcoverage-mapping) + target_link_options(${TARGET_NAME} ${TARGET_VISIBILITY} + -fprofile-instr-generate -fcoverage-mapping) + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES + "GNU") + target_compile_options( + ${TARGET_NAME} ${TARGET_VISIBILITY} -fprofile-arcs -ftest-coverage + $<$:-fno-elide-constructors> -fno-default-inline) + target_link_libraries(${TARGET_NAME} ${TARGET_LINK_VISIBILITY} gcov) + endif() + + # Targets + get_target_property(target_type ${TARGET_NAME} TYPE) + + # Add shared library to processing for 'all' targets + if(target_type STREQUAL "SHARED_LIBRARY" AND target_code_coverage_ALL) + if(CMAKE_C_COMPILER_ID MATCHES "(Apple)?[Cc]lang" + OR CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang") + add_custom_target( + ccov-run-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E echo "-object=$" >> + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list + DEPENDS ${TARGET_NAME}) + + if(NOT TARGET ccov-libs) + message( + FATAL_ERROR + "Calling target_code_coverage with 'ALL' must be after a call to 'add_code_coverage_all_targets'." + ) + endif() + + add_dependencies(ccov-libs + ccov-run-${target_code_coverage_COVERAGE_TARGET_NAME}) + endif() + endif() + + # For executables add targets to run and produce output + if(target_type STREQUAL "EXECUTABLE") + if(CMAKE_C_COMPILER_ID MATCHES "(Apple)?[Cc]lang" + OR CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang") + + # If there are static or shared objects to also work with, generate the + # string to add them here + foreach(LINK_OBJECT ${target_code_coverage_OBJECTS}) + # Check to see if the target is a shared object + if(TARGET ${LINK_OBJECT}) + get_target_property(LINK_OBJECT_TYPE ${LINK_OBJECT} TYPE) + if(${LINK_OBJECT_TYPE} STREQUAL "STATIC_LIBRARY" + OR ${LINK_OBJECT_TYPE} STREQUAL "SHARED_LIBRARY") + set(LINKED_OBJECTS ${LINKED_OBJECTS} + -object=$) + endif() + endif() + endforeach() + + # Run the executable, generating raw profile data Make the run data + # available for further processing. Separated to allow Windows to run + # this target serially. + add_custom_target( + ccov-run-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env ${CMAKE_CROSSCOMPILING_EMULATOR} + ${target_code_coverage_PRE_ARGS} + LLVM_PROFILE_FILE=${target_code_coverage_COVERAGE_TARGET_NAME}.profraw + $ ${target_code_coverage_ARGS} + COMMAND + ${CMAKE_COMMAND} -E echo "-object=$" + ${LINKED_OBJECTS} >> + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list + COMMAND + ${CMAKE_COMMAND} -E echo + "${CMAKE_CURRENT_BINARY_DIR}/${target_code_coverage_COVERAGE_TARGET_NAME}.profraw" + >> ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/profraw.list + JOB_POOL ccov_serial_pool + DEPENDS ccov-libs ${TARGET_NAME}) + + # Merge the generated profile data so llvm-cov can process it + add_custom_target( + ccov-processing-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${LLVM_PROFDATA_PATH} merge -sparse + ${target_code_coverage_COVERAGE_TARGET_NAME}.profraw -o + ${target_code_coverage_COVERAGE_TARGET_NAME}.profdata + DEPENDS ccov-run-${target_code_coverage_COVERAGE_TARGET_NAME}) + + # Ignore regex only works on LLVM >= 7 + if(LLVM_COV_VERSION VERSION_GREATER_EQUAL "7.0.0") + foreach(EXCLUDE_ITEM ${target_code_coverage_EXCLUDE}) + set(EXCLUDE_REGEX ${EXCLUDE_REGEX} + -ignore-filename-regex='${EXCLUDE_ITEM}') + endforeach() + endif() + + # Print out details of the coverage information to the command line + add_custom_target( + ccov-show-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${LLVM_COV_PATH} show $ + -instr-profile=${target_code_coverage_COVERAGE_TARGET_NAME}.profdata + -show-line-counts-or-regions ${LINKED_OBJECTS} ${EXCLUDE_REGEX} + DEPENDS ccov-processing-${target_code_coverage_COVERAGE_TARGET_NAME}) + + # Print out a summary of the coverage information to the command line + add_custom_target( + ccov-report-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${LLVM_COV_PATH} report $ + -instr-profile=${target_code_coverage_COVERAGE_TARGET_NAME}.profdata + ${LINKED_OBJECTS} ${EXCLUDE_REGEX} + DEPENDS ccov-processing-${target_code_coverage_COVERAGE_TARGET_NAME}) + + # Export coverage information so continuous integration tools (e.g. + # Jenkins) can consume it + add_custom_target( + ccov-export-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${LLVM_COV_PATH} export $ + -instr-profile=${target_code_coverage_COVERAGE_TARGET_NAME}.profdata + -format="text" ${LINKED_OBJECTS} ${EXCLUDE_REGEX} > + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/${target_code_coverage_COVERAGE_TARGET_NAME}.json + DEPENDS ccov-processing-${target_code_coverage_COVERAGE_TARGET_NAME}) + + # Generates HTML output of the coverage information for perusal + add_custom_target( + ccov-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${LLVM_COV_PATH} show $ + -instr-profile=${target_code_coverage_COVERAGE_TARGET_NAME}.profdata + -show-line-counts-or-regions + -output-dir=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/${target_code_coverage_COVERAGE_TARGET_NAME} + -format="html" ${LINKED_OBJECTS} ${EXCLUDE_REGEX} + DEPENDS ccov-processing-${target_code_coverage_COVERAGE_TARGET_NAME}) + + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES + "GNU") + set(COVERAGE_INFO + "${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/${target_code_coverage_COVERAGE_TARGET_NAME}.info" + ) + + # Run the executable, generating coverage information + add_custom_target( + ccov-run-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${CMAKE_CROSSCOMPILING_EMULATOR} ${target_code_coverage_PRE_ARGS} + $ ${target_code_coverage_ARGS} + DEPENDS ${TARGET_NAME}) + + # Generate exclusion string for use + foreach(EXCLUDE_ITEM ${target_code_coverage_EXCLUDE}) + set(EXCLUDE_REGEX ${EXCLUDE_REGEX} --remove ${COVERAGE_INFO} + '${EXCLUDE_ITEM}') + endforeach() + + if(EXCLUDE_REGEX) + set(EXCLUDE_COMMAND ${LCOV_PATH} ${EXCLUDE_REGEX} --output-file + ${COVERAGE_INFO}) + else() + set(EXCLUDE_COMMAND ;) + endif() + + if(NOT ${target_code_coverage_EXTERNAL}) + set(EXTERNAL_OPTION --no-external) + endif() + + # Capture coverage data + if(${CMAKE_VERSION} VERSION_LESS "3.17.0") + add_custom_target( + ccov-capture-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND ${CMAKE_COMMAND} -E remove -f ${COVERAGE_INFO} + COMMAND ${LCOV_PATH} --directory ${CMAKE_BINARY_DIR} --zerocounters + COMMAND + ${CMAKE_CROSSCOMPILING_EMULATOR} ${target_code_coverage_PRE_ARGS} + $ ${target_code_coverage_ARGS} + COMMAND + ${LCOV_PATH} --directory ${CMAKE_BINARY_DIR} --base-directory + ${CMAKE_SOURCE_DIR} --capture ${EXTERNAL_OPTION} --output-file + ${COVERAGE_INFO} + COMMAND ${EXCLUDE_COMMAND} + DEPENDS ${TARGET_NAME}) + else() + add_custom_target( + ccov-capture-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND ${CMAKE_COMMAND} -E rm -f ${COVERAGE_INFO} + COMMAND ${LCOV_PATH} --directory ${CMAKE_BINARY_DIR} --zerocounters + COMMAND + ${CMAKE_CROSSCOMPILING_EMULATOR} ${target_code_coverage_PRE_ARGS} + $ ${target_code_coverage_ARGS} + COMMAND + ${LCOV_PATH} --directory ${CMAKE_BINARY_DIR} --base-directory + ${CMAKE_SOURCE_DIR} --capture ${EXTERNAL_OPTION} --output-file + ${COVERAGE_INFO} + COMMAND ${EXCLUDE_COMMAND} + DEPENDS ${TARGET_NAME}) + endif() + + # Generates HTML output of the coverage information for perusal + add_custom_target( + ccov-${target_code_coverage_COVERAGE_TARGET_NAME} + COMMAND + ${GENHTML_PATH} -o + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/${target_code_coverage_COVERAGE_TARGET_NAME} + ${COVERAGE_INFO} + DEPENDS ccov-capture-${target_code_coverage_COVERAGE_TARGET_NAME}) + endif() + + add_custom_command( + TARGET ccov-${target_code_coverage_COVERAGE_TARGET_NAME} + POST_BUILD + COMMAND ; + COMMENT + "Open ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/${target_code_coverage_COVERAGE_TARGET_NAME}/index.html in your browser to view the coverage report." + ) + + # AUTO + if(target_code_coverage_AUTO) + if(NOT TARGET ccov) + add_custom_target(ccov) + endif() + add_dependencies(ccov ccov-${target_code_coverage_COVERAGE_TARGET_NAME}) + + if(NOT CMAKE_C_COMPILER_ID MATCHES "GNU" AND NOT CMAKE_CXX_COMPILER_ID + MATCHES "GNU") + if(NOT TARGET ccov-report) + add_custom_target(ccov-report) + endif() + add_dependencies( + ccov-report + ccov-report-${target_code_coverage_COVERAGE_TARGET_NAME}) + endif() + endif() + + # ALL + if(target_code_coverage_ALL) + if(NOT TARGET ccov-all-processing) + message( + FATAL_ERROR + "Calling target_code_coverage with 'ALL' must be after a call to 'add_code_coverage_all_targets'." + ) + endif() + + add_dependencies(ccov-all-processing + ccov-run-${target_code_coverage_COVERAGE_TARGET_NAME}) + endif() + endif() + endif() +endfunction() + +# Adds code coverage instrumentation to all targets in the current directory and +# any subdirectories. To add coverage instrumentation to only specific targets, +# use `target_code_coverage`. +function(add_code_coverage) + if(CODE_COVERAGE) + if(CMAKE_C_COMPILER_ID MATCHES "(Apple)?[Cc]lang" + OR CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang") + add_compile_options(-fprofile-instr-generate -fcoverage-mapping) + add_link_options(-fprofile-instr-generate -fcoverage-mapping) + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES + "GNU") + add_compile_options( + -fprofile-arcs -ftest-coverage + $<$:-fno-elide-constructors> -fno-default-inline) + link_libraries(gcov) + endif() + endif() +endfunction() + +# Adds the 'ccov-all' type targets that calls all targets added via +# `target_code_coverage` with the `ALL` parameter, but merges all the coverage +# data from them into a single large report instead of the numerous smaller +# reports. Also adds the ccov-all-capture Generates an all-merged.info file, for +# use with coverage dashboards (e.g. codecov.io, coveralls). +# ~~~ +# Optional: +# EXCLUDE - Excludes files of the patterns provided from coverage. Note that GCC/lcov excludes by glob pattern, and clang/LLVM excludes via regex! +# ~~~ +function(add_code_coverage_all_targets) + # Argument parsing + set(multi_value_keywords EXCLUDE) + cmake_parse_arguments(add_code_coverage_all_targets "" "" + "${multi_value_keywords}" ${ARGN}) + + if(CODE_COVERAGE) + if(CMAKE_C_COMPILER_ID MATCHES "(Apple)?[Cc]lang" + OR CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang") + + # Merge the profile data for all of the run executables + if(WIN32) + add_custom_target( + ccov-all-processing + COMMAND + powershell -Command $$FILELIST = Get-Content + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/profraw.list\; llvm-profdata.exe + merge -o ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.profdata + -sparse $$FILELIST) + else() + add_custom_target( + ccov-all-processing + COMMAND + ${LLVM_PROFDATA_PATH} merge -o + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.profdata -sparse `cat + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/profraw.list`) + endif() + + # Regex exclude only available for LLVM >= 7 + if(LLVM_COV_VERSION VERSION_GREATER_EQUAL "7.0.0") + foreach(EXCLUDE_ITEM ${add_code_coverage_all_targets_EXCLUDE}) + set(EXCLUDE_REGEX ${EXCLUDE_REGEX} + -ignore-filename-regex='${EXCLUDE_ITEM}') + endforeach() + endif() + + # Print summary of the code coverage information to the command line + if(WIN32) + add_custom_target( + ccov-all-report + COMMAND + powershell -Command $$FILELIST = Get-Content + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list\; llvm-cov.exe + report $$FILELIST + -instr-profile=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.profdata + ${EXCLUDE_REGEX} + DEPENDS ccov-all-processing) + else() + add_custom_target( + ccov-all-report + COMMAND + ${LLVM_COV_PATH} report `cat + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list` + -instr-profile=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.profdata + ${EXCLUDE_REGEX} + DEPENDS ccov-all-processing) + endif() + + # Export coverage information so continuous integration tools (e.g. + # Jenkins) can consume it + if(WIN32) + add_custom_target( + ccov-all-export + COMMAND + powershell -Command $$FILELIST = Get-Content + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list\; llvm-cov.exe + export $$FILELIST + -instr-profile=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.profdata + -format="text" ${EXCLUDE_REGEX} > + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/coverage.json + DEPENDS ccov-all-processing) + else() + add_custom_target( + ccov-all-export + COMMAND + ${LLVM_COV_PATH} export `cat + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list` + -instr-profile=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.profdata + -format="text" ${EXCLUDE_REGEX} > + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/coverage.json + DEPENDS ccov-all-processing) + endif() + + # Generate HTML output of all added targets for perusal + if(WIN32) + add_custom_target( + ccov-all + COMMAND + powershell -Command $$FILELIST = Get-Content + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list\; llvm-cov.exe show + $$FILELIST + -instr-profile=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.profdata + -show-line-counts-or-regions + -output-dir=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged + -format="html" ${EXCLUDE_REGEX} + DEPENDS ccov-all-processing) + else() + add_custom_target( + ccov-all + COMMAND + ${LLVM_COV_PATH} show `cat + ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/binaries.list` + -instr-profile=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.profdata + -show-line-counts-or-regions + -output-dir=${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged + -format="html" ${EXCLUDE_REGEX} + DEPENDS ccov-all-processing) + endif() + + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES + "GNU") + set(COVERAGE_INFO "${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged.info") + + # Nothing required for gcov + add_custom_target(ccov-all-processing COMMAND ;) + + # Exclusion regex string creation + set(EXCLUDE_REGEX) + foreach(EXCLUDE_ITEM ${add_code_coverage_all_targets_EXCLUDE}) + set(EXCLUDE_REGEX ${EXCLUDE_REGEX} --remove ${COVERAGE_INFO} + '${EXCLUDE_ITEM}') + endforeach() + + if(EXCLUDE_REGEX) + set(EXCLUDE_COMMAND ${LCOV_PATH} ${EXCLUDE_REGEX} --output-file + ${COVERAGE_INFO}) + else() + set(EXCLUDE_COMMAND ;) + endif() + + # Capture coverage data + if(${CMAKE_VERSION} VERSION_LESS "3.17.0") + add_custom_target( + ccov-all-capture + COMMAND ${CMAKE_COMMAND} -E remove -f ${COVERAGE_INFO} + COMMAND ${LCOV_PATH} --directory ${CMAKE_BINARY_DIR} --capture + --output-file ${COVERAGE_INFO} + COMMAND ${EXCLUDE_COMMAND} + DEPENDS ccov-all-processing) + else() + add_custom_target( + ccov-all-capture + COMMAND ${CMAKE_COMMAND} -E rm -f ${COVERAGE_INFO} + COMMAND ${LCOV_PATH} --directory ${CMAKE_BINARY_DIR} --capture + --output-file ${COVERAGE_INFO} + COMMAND ${EXCLUDE_COMMAND} + DEPENDS ccov-all-processing) + endif() + + # Generates HTML output of all targets for perusal + add_custom_target( + ccov-all + COMMAND ${GENHTML_PATH} -o ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged + ${COVERAGE_INFO} -p ${CMAKE_SOURCE_DIR} + DEPENDS ccov-all-capture) + + endif() + + add_custom_command( + TARGET ccov-all + POST_BUILD + COMMAND ; + COMMENT + "Open ${CMAKE_COVERAGE_OUTPUT_DIRECTORY}/all-merged/index.html in your browser to view the coverage report." + ) + endif() +endfunction() diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt deleted file mode 100644 index 39b8982..0000000 --- a/core/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -file(GLOB_RECURSE _HDRS "include/matcher/*.(h|hpp)") -file(GLOB_RECURSE _SRCS "include/matcher/impl/*.cpp") -add_library(regexmatcher_core STATIC) -target_sources(regexmatcher_core - PUBLIC ${_HDRS} ${CMAKE_BINARY_DIR}/config/RegexMatcherConfig.h - PRIVATE ${_SRCS} -) -target_include_directories(regexmatcher_core PUBLIC include ${CMAKE_BINARY_DIR}/config) - -install(TARGETS regexmatcher_core DESTINATION lib) -install(FILES ${_HDRS} ${CMAKE_BINARY_DIR}/config/RegexMatcherConfig.h DESTINATION include/matcher) - -add_library(RegexMatcher::core ALIAS regexmatcher_core) diff --git a/core/include/matcher/core.hpp b/core/include/matcher/core.hpp deleted file mode 100644 index d7a3e63..0000000 --- a/core/include/matcher/core.hpp +++ /dev/null @@ -1,349 +0,0 @@ -#pragma once - -#include "RegexMatcherConfig.h" - -#include -#include -#include -#include -#include -#include -#include - -#ifdef DEBUG -#include -#endif - -/** - * @brief Public classes of the library - * - */ -namespace matcher { - /** - * @brief Contains the interface for adding regexes and matching a string with the trie - * - * @tparam RegexData Type of associated data with each regex - * @tparam char_t Type of the indiviidual symbols in the strings and regexes - */ - template - class RegexMatcher; -} - -/** - * @brief Private helper functions and classes for the library - * - */ -namespace { - - /** - * @brief Class containing the min and max repeat of the same edge - * - */ - struct Limits; - - /** - * @brief Class containing the list of regexes using the given edge - * - * @tparam T type of the reference to the match data - */ - template - struct EdgeInfo; - - /** - * @brief Contains the characters and additional attributes for wildcard symbols and Null symbols - * - */ - template - struct symbol; - - /** - * @brief Node in the trie-grah - * - * @tparam RegexData Type of the data associated with each regex - * @tparam char_t Type of symbols used - */ - template - class Node; - - template - class SubTree; -} - -namespace { - struct Limits { - size_t min; - std::optional max; - - Limits() : Limits (0, std::nullopt) {} - - Limits(size_t min, std::nullopt_t) { - this->min = min; - this->max = std::nullopt; - } - - Limits(std::nullopt_t, size_t max) : Limits (0, max) {} - - Limits(size_t min, size_t max) { - this->min = min; - this->max = max; - } - - static const Limits common_edge; - static const Limits zero_or_once; - static const Limits once_or_more; - static const Limits zero_or_more; - - static std::string to_string(std::optional::iterator> it) { - if (!it.has_value()) { return ""; } - std::stringstream ss; - if (!it.value()->max.has_value()) { ss << "(" << it.value()->min << "+)"; } - else { ss << "(" << it.value()->min << "..." << it.value()->max.value() << ")"; } - return ss.str(); - } - - static std::string to_string(Limits a) { - std::stringstream ss; - if (!a.max.has_value()) { ss << "(" << a.min << "+)"; } - else { ss << "(" << a.min << "..." << a.max.value() << ")"; } - return ss.str(); - } - - Limits& operator--() { - if (min > 0) min --; - if (max.has_value() && max.value() > 0) max = max.value() - 1; - return *this; - } - - bool is_required() const { - return min > 0; - } - - bool is_allowed_to_repeat() const { - return !max.has_value() || (max.value() > 0 && this->min <= this->max.value()); - } - }; - - const Limits Limits::common_edge = Limits(); - const Limits Limits::zero_or_once = Limits(0, 1); - const Limits Limits::once_or_more = Limits(1, std::nullopt); - const Limits Limits::zero_or_more = Limits(0, std::nullopt); - - template - struct EdgeInfo { - std::map::iterator>> paths; // each path may have different requirements for how many times should the edge be repeated. - Node* to; - }; - - template - struct symbol { - char_t ch; - bool wildcard; - bool none; - - static const symbol Any; - static const symbol None; - static const symbol EOR; // end-of-regex - - symbol() : symbol(symbol::None) {} - symbol(char_t s) : ch(s), wildcard(false), none(false) {} - symbol(char_t s, bool w, bool n) : ch(s), wildcard(w), none(n) {} - - inline bool operator== (const symbol& s) const { - return (wildcard == s.wildcard) && (none == s.none) && (ch == s.ch); - } - inline bool operator!= (const symbol& s) const { - return (wildcard != s.wildcard) || (none != s.none) || (ch != s.ch); - } - - bool operator<(const symbol& s) const { - if (ch == s.ch) { - if (wildcard == s.wildcard) - return none < s.none; - return wildcard < s.wildcard; - } - return (ch < s.ch); - } - - inline std::basic_string to_string() const { - if (*this == symbol::Any) return "wildcard"; - if (*this == symbol::None) return "(empty)"; - if (*this == symbol::EOR) return "EOR"; - return std::basic_string(1, ch); - } - }; - - template - const symbol symbol::Any{'\0', true, false}; - - template - const symbol symbol::None{'-', false, true}; - - template - const symbol symbol::EOR{'\0', false, true}; - - template - class Node { - private: - static std::map, std::string> special_symbols; - static std::list all_limits; - - /** - * @brief All directly connected nodes - * - */ - std::map, EdgeInfo>> neighbours; - - /** - * @brief The current symbol that the regex would match - * - */ - symbol current_symbol; - - /** - * @brief Construct a new Node object - * - */ - Node () { - current_symbol = symbol::None; - } - - /** - * @brief Construct a new Node object - * - * @param ch Node's symbol - */ - Node (symbol ch) { - current_symbol = ch; - } - - /** - * @brief Represents the current node's symbol as string - * - * @return std::string string representation of the node's symbol - */ - std::string to_string() const { - if (current_symbol == symbol::Any) - return "wildcard"; - if (current_symbol == symbol::None) - return "(empty)"; - return std::string(1, current_symbol.ch); - } - - /** - * @brief Checks if there is a node with a given symbol in the neighbour list - * - * @param ch Symbol to be checked - * @return true if a node with this symbol is direct neighbour to this node - * @return false if there is no node with this symbol as direct neighbour to this node - */ - bool hasChild(symbol ch); - - /** - * @brief Get the Child object with the given symbol - * - * @param ch the symbol that's being looked for - * @return Node* the node correspondign to this symbol - */ - Node* getChild(symbol ch); - - /** - * @brief Adds a child node to the current one and marks the connection as part of a given regex match - * - * @param child Existing node - * @param regex Regex data that is being used to indentify the regex that the edge is part of - * @param limits Pointer to the shared limit of the edge (nullptr if no limit is applied) - */ - void connect_with(Node* child, RegexData regex, std::optional::iterator> limits = std::nullopt); - - /** - * @brief Matches a string with all regexes and returns the identified of the one that matches - * - * @param text string that is being tried to be matched with any of the added regexes - * @tparam ConstIterator const iterator in a container - * @return std::vector set of unique identifiers of the regexes that matches the string - */ - template - std::vector match(ConstIterator, ConstIterator); - - template - std::vector match_helper(ConstIterator, ConstIterator, const std::vector&, Node*); - - #ifdef DEBUG - void print_helper(size_t layer, std::set*>& traversed, std::map*, std::string>& nodes) const; - - void print() const; - #endif - - friend class matcher::RegexMatcher; - }; - - template - class SubTree { - public: - std::vector roots; - std::vector leafs; - - SubTree(std::vector a, std::vector b) : roots(a), leafs(b) {} - SubTree() : roots(), leafs() {} - - inline const std::vector& get_roots() const; - - inline const std::vector& get_leafs() const; - }; -} - -#include -#include - -namespace matcher { - template - class RegexMatcher { - Node root; - - template - static std::list::iterator processLimit(const SubTree>&, SubTree>&, RegexData, ConstIterator&); - - template - static SubTree> processSet(std::vector*>, RegexData, ConstIterator&); - - template - static SubTree> process(std::vector*>, RegexData, ConstIterator&, ConstIterator, const bool); - - public: - /** - * @brief Construct a new Regex Matcher object - * - */ - RegexMatcher() {} - - /** - * @brief Adds another regex to the set of regexes - * - * @tparam Iterable Set of characters for the regex. Must implement std::cbegin and std::cend - * @tparam RegexData Value that will be associated with the regex - */ - template - void add_regex(Iterable, RegexData); - - /** - * @brief Matches a string with all added regexes - * - * @tparam Iterable Set of characters. Must implement std::cbegin and std::cend - * @return std::vector Set of regexes' UIDs that match the text - */ - template - std::vector match(Iterable); - - #ifdef DEBUG - /** - * @brief Prints list of edges withing the pattern graph - * - */ - void print() const { - this->root.print(); - } - #endif - }; -} - -#include \ No newline at end of file diff --git a/core/include/matcher/impl/core.cpp b/core/include/matcher/impl/core.cpp deleted file mode 100644 index 56c1c77..0000000 --- a/core/include/matcher/impl/core.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#ifndef CORE_IMPL -#define CORE_IMPL - -#include - -#include - -namespace matcher { - - template - template - std::list::iterator RegexMatcher::processLimit(const SubTree>& parent_of_latest, SubTree>& lastest, RegexData regex, ConstIterator& it) { - if (*it != '{') // not called at the beginning of a set - throw std::logic_error("The iterator doesn't start from a limit group."); - else - it++; - - std::list::iterator answer = Node::all_limits.insert(Node::all_limits.end(), Limits::common_edge); - bool min = true; - size_t number = 0; - - number = 0; - while(*it != '}') { - if (*it == ',') { - min = false; - answer->min = number; - number = 0; - } - else { // it is a digit - number = number * 10 + (*it - '0'); - } - it ++; - } - - if (!min && number != 0) - answer->max = number; - if (!min && number == 0) - answer->max = std::nullopt; - if (min) - answer->max = number; - - const size_t leafs = lastest.get_leafs().size(); - - if (answer->min == 0) { - for (auto root : parent_of_latest.get_leafs()) { - lastest.leafs.push_back(root); - } - answer->min = 1; - } - answer->min = answer->min - 1; - if (answer->max.has_value()) - answer->max = answer->max.value() - 1; - - if (answer->is_allowed_to_repeat()) { - for (size_t i = 0 ; i < leafs ; i ++) { - for (auto root : lastest.get_roots()) { - lastest.get_leafs()[i]->connect_with(root, regex, answer); - } - } - } - - return answer; - } - - template - template - SubTree> RegexMatcher::processSet(std::vector*> parents, [[maybe_unused]] RegexData regex, ConstIterator& it) { - if (*it != '[') // not called at the beginning of a set - throw std::logic_error("The iterator doesn't start from a set group."); - else - it++; - std::vector*> leafs; - ConstIterator prev; - bool takeTheNextSymbolLitterally = false; - while(*it != ']' || takeTheNextSymbolLitterally) { - if (!takeTheNextSymbolLitterally) { - if (*it == '\\') { // escape symbol is always followed by a reglar character - it ++; // so it is included no matter what - takeTheNextSymbolLitterally = true; - } - else if (*it == '-') { - it ++; - for (char ch = ((*prev) + 1) ; ch <= *it ; ch ++) { - Node* nextLeaf = nullptr; - for (auto parent : parents) - if (parent->hasChild(ch)) { - nextLeaf = parent->getChild(ch); - break; - } - if (nextLeaf == nullptr) { - nextLeaf = new Node(ch); - } - leafs.push_back(nextLeaf); - } - } - // TODO: implement not - else { - takeTheNextSymbolLitterally = true; - } - } - if (takeTheNextSymbolLitterally) - { - Node* nextLeaf = nullptr; - for (auto parent : parents) - if (parent->hasChild(*it)) { - nextLeaf = parent->getChild(*it); - break; - } - if (nextLeaf == nullptr) { - nextLeaf = new Node(*it); - } - leafs.push_back(nextLeaf); - takeTheNextSymbolLitterally = false; - } - prev = it; - it ++; - } - return {leafs, leafs}; - } - - template - template - SubTree> RegexMatcher::process(std::vector*> parents, RegexData regex, ConstIterator& it, ConstIterator end, const bool inBrackets) { - SubTree> answer = {{}, {}}; - std::vector>> nodeLayers = {{parents, parents}}; - for ( ; it != end ; it ++) { - if (*it == ')' && inBrackets) - break; - if (*it == '[') { // start of a set - const auto latest_parents = nodeLayers.back(); - SubTree> newNodes = processSet(latest_parents.get_leafs(), regex, it); - for (auto parent : latest_parents.get_leafs()) { - for (auto newNode : newNodes.get_leafs()) { - parent->connect_with(newNode, regex); - } - } - nodeLayers.push_back(newNodes); - } - else if (*it == '(') { // start of a regex in brackets - it ++; - SubTree> newLayer = process(nodeLayers.back().get_leafs(), regex, it, end, true); // leaves it at the closing bracket - nodeLayers.push_back(newLayer); - } - else if (*it == '|') { - answer.roots.insert(answer.roots.end(), nodeLayers[1].get_leafs().begin(), nodeLayers[1].get_leafs().end()); - answer.leafs.insert(answer.leafs.end(), nodeLayers.back().get_leafs().begin(), nodeLayers.back().get_leafs().end()); - nodeLayers.resize(1); - } - else if (*it == '{') { - [[maybe_unused]] std::list::iterator limits = processLimit(nodeLayers[nodeLayers.size() - 2], nodeLayers.back(), regex, it); - } - else if (auto special_regex = Node::special_symbols.find(*it); special_regex != Node::special_symbols.end()) { - auto tmp_it = special_regex->second.cbegin(); - [[maybe_unused]]std::list::iterator limits = processLimit(nodeLayers[nodeLayers.size() - 2], nodeLayers.back(), regex, tmp_it); - } - else { // normal character - symbol sym; - if (*it == '\\') { // skip escape symbol - it ++; - sym = symbol(*it); - } - else if (*it == '.') - sym = symbol::Any; - else - sym = symbol(*it); - Node* nextNode = nullptr; - for (auto parent : nodeLayers.back().get_leafs()) { - if (parent->neighbours.find(sym) != parent->neighbours.end()) { - nextNode = parent->neighbours[sym].to; - break; - } - } - if (nextNode == nullptr) { - nextNode = new Node(sym); - } - for (auto parent : nodeLayers.back().get_leafs()) { - parent->connect_with(nextNode, regex); - } - nodeLayers.push_back({{nextNode}, {nextNode}}); - } - } - answer.roots.insert(answer.roots.end(), nodeLayers[1].get_leafs().begin(), nodeLayers[1].get_leafs().end()); - answer.leafs.insert(answer.leafs.end(), nodeLayers.back().get_leafs().begin(), nodeLayers.back().get_leafs().end()); - if (it == end) { - Node* end_of_regex = new Node(symbol::EOR); - SubTree> final_answer = {answer.get_roots(), {end_of_regex}}; - for (auto parent : answer.leafs) { - parent->connect_with(end_of_regex, regex); - } - return final_answer; - } - - return answer; - } - - template - template - void RegexMatcher::add_regex(Iterable str, RegexData uid) { - auto it = std::cbegin(str); - process(std::vector{&root}, uid, it, std::cend(str), false); - } - - template - template - std::vector RegexMatcher::match(Iterable str) { - return root.match(std::cbegin(str), std::cend(str)); - } - -} - -#endif diff --git a/core/include/matcher/impl/node.cpp b/core/include/matcher/impl/node.cpp deleted file mode 100644 index c83c568..0000000 --- a/core/include/matcher/impl/node.cpp +++ /dev/null @@ -1,188 +0,0 @@ -#ifndef NODE_IMPL -#define NODE_IMPL - -#include - -namespace { - template - std::vector common_values(const std::vector& sorted, const std::map& paths) { - std::vector answer; - if (sorted.empty()) { - for (const auto [k, _] : paths) - answer.push_back(k); - return answer; - } - auto it = paths.cbegin(); - size_t ind = 0; - while (ind < sorted.size() && it != paths.cend()) { - if (it->first == sorted[ind]) { - answer.push_back(it->first); - it ++; - ind ++; - } else if (it->first < sorted[ind]) { - it ++; - } else { - ind ++; - } - } - return answer; - } - - template - std::list Node::all_limits = std::list(); - - template - std::map, std::string> Node::special_symbols = { - {'+', "{1,}"}, - {'*', "{0,}"}, - {'?', "{0,1}"} - }; - - template - bool Node::hasChild(symbol ch) { - return (this->neighbours.find(ch) != this->neighbours.end()); - } - - template - Node* Node::getChild(symbol ch) { - return this->neighbours.find(ch)->second.to; - } - - template - void Node::connect_with(Node* child, RegexData regex, std::optional::iterator> limit) { - if (auto existing_child = neighbours.find(child->current_symbol); existing_child != neighbours.end()) { - if (auto it = existing_child->second.paths.find(regex); it != existing_child->second.paths.end()) - { - if (!it->second.has_value() && limit == std::nullopt) { - it->second = Node::all_limits.insert(Node::all_limits.end(), Limits(1,1)); - } - else if (it->second.has_value() && limit == std::nullopt) { - (it->second.value()->min) ++; - if (it->second.value()->max.has_value()) { - (it->second.value()->max.value()) ++; - } - } - } - else if (this == child && limit == std::nullopt) { - neighbours[child->current_symbol].paths.emplace(regex, Node::all_limits.insert(Node::all_limits.end(), Limits(1,1))); - } - else - neighbours[child->current_symbol].paths.emplace(regex, limit); - return; - } - neighbours[child->current_symbol].paths.emplace(regex, limit); - neighbours[child->current_symbol].to = child; - } - - template - template - std::vector Node::match(ConstIterator begin, ConstIterator end) { - return match_helper(begin, end, {}, nullptr); - } - - template - template - std::vector Node::match_helper(ConstIterator begin, ConstIterator end, const std::vector& paths, Node* prev) { - if (begin == end) { - if (auto it = this->neighbours.find(symbol::EOR); it != this->neighbours.end()) { - std::vector answer; - std::vector potential_answer = common_values(paths, it->second.paths); - if (prev != nullptr) - { - for (RegexData pathId : potential_answer) { - bool to_include = true; - if (const auto knot = prev->neighbours.find(this->current_symbol); knot != prev->neighbours.end()) { - if (const auto knot_path = it->second.paths.find(pathId); knot_path != it->second.paths.end()) - if (knot_path->second.has_value()) { - to_include &= knot_path->second.value()->min == 0; - } - } - if (const auto knot = this->neighbours.find(this->current_symbol); knot != this->neighbours.end()) { - if (knot->second.paths.find(pathId) != knot->second.paths.end()) - if (knot->second.paths[pathId].has_value()) - to_include &= knot->second.paths[pathId].value()->min == 0; - } - if (to_include) { - answer.push_back(pathId); - } - } - return answer; - } - else - return potential_answer; - } - return {}; - } - std::vector answer; - const symbol current = symbol(*begin); - for (symbol to_test : {current, symbol::Any, symbol::None}) { - if (auto it = this->neighbours.find(to_test); it != this->neighbours.end()) { - size_t ind = 0; - std::vector new_paths; - new_paths.reserve(paths.size()); - std::map> current_paths; - for (const auto [pathId, limits_ptr] : it->second.paths) { - if (limits_ptr.has_value() && !limits_ptr.value()->is_allowed_to_repeat()) continue; - if (prev != nullptr) { - if (paths[ind] > pathId) continue; - while (ind < paths.size() && paths[ind] < pathId) ind ++; - if (ind == paths.size()) break; - } - if (prev == nullptr || paths[ind] == pathId) { - new_paths.push_back(pathId); - if (!limits_ptr.has_value()) continue; - current_paths.emplace(pathId, *limits_ptr.value()); - --(*limits_ptr.value()); - } - } - if (!new_paths.empty()) { - if (to_test != symbol::None) begin ++; - for (auto match : it->second.to->match_helper(begin, end, new_paths, this)) { - answer.push_back(match); - } - if (to_test != symbol::None) begin --; - for (const auto [pathId, old_limits] : current_paths) { - if(old_limits.has_value()) - (*it->second.paths[pathId].value()) = old_limits.value(); - } - } - } - } - return answer; - } - -#ifdef DEBUG - template - void Node::print_helper(size_t layer, std::set*>& traversed, std::map*, std::string>& nodes) const { - if (traversed.find(this) != traversed.end()) - return; - const std::basic_string layer_str = (std::basic_stringstream() << layer).str() + "_"; - const std::basic_string next_layer = (std::basic_stringstream() << (layer + 1)).str() + "_"; - traversed.emplace(this); - nodes.emplace(this, layer_str + current_symbol.to_string()); - for (auto child : neighbours) { - if (nodes.find(child.second.to) == nodes.end()) { - nodes.emplace(child.second.to, next_layer + child.second.to->current_symbol.to_string()); - } - std::cout << nodes[this] << " " << nodes[child.second.to] << " "; - std::cout << child.second.paths.begin()->first << Limits::to_string(child.second.paths.begin()->second); - for (auto it = std::next(child.second.paths.begin()) ; it != child.second.paths.end() ; it ++) { - std::cout << "," << it->first << Limits::to_string(it->second); - } - std::cout << std::endl; - if (nodes.find(child.second.to) != nodes.end()) { - child.second.to->print_helper(layer + 1, traversed, nodes); - } - } - } - - template - void Node::print() const { - std::set*> traversed; - std::map*, std::string> nodes; - print_helper(0, traversed, nodes); - } -#endif -} - -#endif diff --git a/core/include/matcher/impl/subtree.cpp b/core/include/matcher/impl/subtree.cpp deleted file mode 100644 index 51ab985..0000000 --- a/core/include/matcher/impl/subtree.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef SUBTREE_IMPL -#define SUBTREE_IMPL - -#include - -namespace { - template - inline const std::vector& SubTree::get_roots() const { - return roots; - } - - template - inline const std::vector& SubTree::get_leafs() const { - return leafs; - } -} - -#endif diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt new file mode 100644 index 0000000..7657080 --- /dev/null +++ b/lib/CMakeLists.txt @@ -0,0 +1,20 @@ +add_library(regexmatcher_core STATIC + src/node_info.cpp +) +target_sources(regexmatcher_core + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/config/RegexMatcherConfig.h + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src +) +target_include_directories(regexmatcher_core PUBLIC include ${CMAKE_BINARY_DIR}/config) + +install(TARGETS regexmatcher_core DESTINATION lib) +install( + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include + DESTINATION include/ +) +install(FILES ${CMAKE_BINARY_DIR}/config/RegexMatcherConfig.h DESTINATION include/) + +add_library(RegexMatcher::core ALIAS regexmatcher_core) diff --git a/lib/include/node_info.hpp b/lib/include/node_info.hpp new file mode 100644 index 0000000..12ea8a8 --- /dev/null +++ b/lib/include/node_info.hpp @@ -0,0 +1,68 @@ +#pragma once + +#include "regex_matcher.hpp" +#include "range.hpp" + +namespace matcher::regex +{ + /** + * @brief Attributes of the characters managed by the current node + * + */ + enum class Attributes + { + Unset, + Start_of_regex, + PositiveCharacterSet, + NegativeCharacterSet, + Wildcard, + End_of_regex + }; + + /** + * @brief Wrapper for the content of the current node in the graph of regexes + * + * @tparam symbol_t Symbol type + */ + template struct NodeInfo + { + private: + Range charset; + Attributes attributes; + + public: + /** + * @brief Construct a new NodeInfo object from a single unset symbol + * + */ + NodeInfo(); + + /** + * @brief Construct a new NodeInfo object from a single symbol + * + * @param symbol Positive charset containing only the given symbol + */ + explicit NodeInfo(symbol_t symbol); + + /** + * @brief Construct a new NodeInfo object from special node + * + * @param attributes Special node info + */ + explicit NodeInfo(Attributes attributes); + + // Copy constructor + NodeInfo(const NodeInfo& other); + NodeInfo& operator=(const NodeInfo& other); + + // Move constructor + NodeInfo(NodeInfo&& other) noexcept; + NodeInfo& operator=(NodeInfo&& other) noexcept; + + /** + * @brief Comparison operator + * + */ + auto operator<=>(const NodeInfo& other) const; + }; +} // namespace matcher::regex diff --git a/lib/include/range.hpp b/lib/include/range.hpp new file mode 100644 index 0000000..66e9f03 --- /dev/null +++ b/lib/include/range.hpp @@ -0,0 +1,58 @@ +#ifndef REGEX_MATCHER_RANGE_H +#define REGEX_MATCHER_RANGE_H + +namespace matcher::regex +{ + /** + * @brief Wrapper class for symbol ranges of type [from, to] + * + * @tparam symbol_t Symbol type + */ + template class Range + { + // the range is [from, to] + symbol_t from; + symbol_t to; + + public: + /** + * @brief Construct an empty Range object + * + */ + Range(); + + /** + * @brief Construct a new Range object from a single character + * + * @param ch the character + */ + explicit Range(symbol_t ch); + + /** + * @brief Construct a new Range object with range [from, to] + * + * @param from starting character + * @param to ending character (incl. this one) + */ + explicit Range(symbol_t from, symbol_t to); + + /** + * @brief Checks if a given character is in the current range + * + * @param ch character in question + * @return true if the character is in the range [from, to] + * @return false if the character is not in the range [from, to] + */ + bool contains(symbol_t ch); + + /** + * @brief Comparison operator + * + */ + auto operator<=>(const Range& other) const; + }; +} // namespace matcher::regex + +#include "../src/range.cpp" + +#endif diff --git a/lib/include/regex_matcher.hpp b/lib/include/regex_matcher.hpp new file mode 100644 index 0000000..ca9bd9b --- /dev/null +++ b/lib/include/regex_matcher.hpp @@ -0,0 +1,66 @@ +#ifndef REGEX_MATCHER_H +#define REGEX_MATCHER_H + +#include +#include +#include +#include "RegexMatcherConfig.h" + +namespace matcher +{ + template class RegexMatcher + { + public: + // Constructor + RegexMatcher(); + + // Destructor + ~RegexMatcher(); + + // Deleted copy constructor and assignment operator to enforce unique ownership + RegexMatcher(const RegexMatcher&) = delete; + RegexMatcher& operator=(const RegexMatcher&) = delete; + + // Move constructor and assignment operator + RegexMatcher(RegexMatcher&&) noexcept; + RegexMatcher& operator=(RegexMatcher&&) noexcept; + + // Public methods + /** + * @brief Adds new regex to match against + * + * @tparam Iterable Iterable container of symbols + * @param regex Iterable container of the symbols of the regex + * @param user_data User data associated with the given regex + * @return int Number of new nodes created in the trie. When 0, the regex match 1-to-1 previously added regex + */ + template int add_regex(const Iterable& regex, const RegexData& user_data); + + /** + * @brief Matches given set of symbols against all previously added regexes + * + * @tparam Iterable Iterable container of the symbols of the string for matching + * @param regex Iterable container of the symbols of the string for matching + * @return std::vector All regexes that match the given string + */ + template std::vector match(const Iterable& regex); + + /** + * @brief Print the graph structure (for debug purposes) + * + * @param out output stream where the result should be printed + */ + void print(std::ostream& out); + + private: + // Forward declaration of RegexMatcher's implementation + class RegexMatcherImpl; + + // pImpl pointer to hide the implementation details + std::unique_ptr impl; + }; +} // namespace matcher + +#include "../src/regex_matcher.cpp" + +#endif diff --git a/lib/src/node_info.cpp b/lib/src/node_info.cpp new file mode 100644 index 0000000..7ad0d96 --- /dev/null +++ b/lib/src/node_info.cpp @@ -0,0 +1,45 @@ +#include "node_info.hpp" +#include + +namespace matcher::regex +{ + template NodeInfo::NodeInfo() : NodeInfo(Attributes::Unset) + { + } + + template NodeInfo::NodeInfo(symbol_t symbol) : charset(symbol), attributes(Attributes::PositiveCharacterSet) + { + } + + template NodeInfo::NodeInfo(Attributes attributes) : charset(), attributes(attributes) + { + } + + template NodeInfo::NodeInfo(const NodeInfo& other) : charset(other.charset), attributes(other.attributes) + { + } + + template NodeInfo& NodeInfo::operator=(const NodeInfo& other) + { + charset = other.charset; + attributes = other.attributes; + return *this; + } + + template + NodeInfo::NodeInfo(NodeInfo&& other) noexcept : charset(std::move(other.charset)), attributes(std::move(other.attributes)) + { + } + + template NodeInfo& NodeInfo::operator=(NodeInfo&& other) noexcept + { + charset = std::move(other.charset); + attributes = std::move(other.attributes); + return *this; + } + + template auto NodeInfo::operator<=>(const NodeInfo& other) const + { + return (other.attributes == attributes) ? (charset <=> other.charset) : (other.attributes <=> attributes); + } +} // namespace matcher::regex diff --git a/lib/src/range.cpp b/lib/src/range.cpp new file mode 100644 index 0000000..08f913b --- /dev/null +++ b/lib/src/range.cpp @@ -0,0 +1,26 @@ +#include "range.hpp" + +namespace matcher::regex +{ + template Range::Range() : Range(symbol_t() + 1, symbol_t()) + { + } + + template Range::Range(symbol_t ch) : from(ch), to(ch) + { + } + + template Range::Range(symbol_t from, symbol_t to) : from(from), to(to) + { + } + + template bool Range::contains(symbol_t ch) + { + return from <= ch && ch <= to; + } + + template auto Range::operator<=>(const Range& other) const + { + return from <=> other.from; + } +} // namespace matcher::regex diff --git a/lib/src/regex_matcher.cpp b/lib/src/regex_matcher.cpp new file mode 100644 index 0000000..d7457fa --- /dev/null +++ b/lib/src/regex_matcher.cpp @@ -0,0 +1,32 @@ +#include "regex_matcher.hpp" +#include "regex_matcher_impl.hpp" + +namespace matcher +{ + template RegexMatcher::RegexMatcher() : impl(std::make_unique()) + { + } + + template RegexMatcher::~RegexMatcher() = default; + + template + template + int RegexMatcher::add_regex(const Iterable& regex, const RegexData& user_data) + { + return 0; + } + + template + template + std::vector RegexMatcher::match(const Iterable& regex) + { + return {}; + } + + template void RegexMatcher::print(std::ostream& out) + { + out << "Sike!\n"; + return; + } + +} // namespace matcher diff --git a/lib/src/regex_matcher_impl.cpp b/lib/src/regex_matcher_impl.cpp new file mode 100644 index 0000000..3d55b0a --- /dev/null +++ b/lib/src/regex_matcher_impl.cpp @@ -0,0 +1,10 @@ +#include "regex_matcher.hpp" + +namespace matcher +{ + template class RegexMatcher::RegexMatcherImpl + { + public: + }; + +} // namespace matcher diff --git a/lib/src/regex_matcher_impl.hpp b/lib/src/regex_matcher_impl.hpp new file mode 100644 index 0000000..3d55b0a --- /dev/null +++ b/lib/src/regex_matcher_impl.hpp @@ -0,0 +1,10 @@ +#include "regex_matcher.hpp" + +namespace matcher +{ + template class RegexMatcher::RegexMatcherImpl + { + public: + }; + +} // namespace matcher diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dd7396c..cc45257 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,140 +1,197 @@ -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - add_executable(demo "src/demo.cpp") - target_link_libraries(demo regexmatcher_core) -endif() - -add_executable(matcher-tests "src/main.cpp") -target_link_libraries(matcher-tests regexmatcher_core) - -add_test(NAME matcher-urls - COMMAND $ - --add "/" "\\/([0-9a-z\\-]+)" - --match "/" "/12asdf-" -) - -add_test(NAME matcher-test-no-regex - COMMAND $ - --add "text1" "text2" "text3" - --match "text" "text1" "text2" "text3" "text4" -) - -add_test(NAME matcher-test-group - COMMAND $ - --add "text(1)" "text(2)" "text(3)" - --match "text" "text1" "text2" "text3" "text4" -) - -add_test(NAME matcher-test-global-or - COMMAND $ - --add "text1|test2|test3" - --match "text" "text1" "text2" "text3" "text4" +# Add the test executable +add_executable(playground + playground.cpp ) -add_test(NAME matcher-test-or-within-group - COMMAND $ - --add "text(1|2|3)" - --match "text" "text1" "text2" "text3" "text4" +# Link Google Test and the library under test +target_link_libraries(playground PRIVATE + RegexMatcher::core ) -add_test(NAME matcher-test-letter-asterix - COMMAND $ - --add "text1*" - --match "text" "text1" "text2" "text3" "text4" -) +include(CTest) +enable_testing() -add_test(NAME matcher-test-group-asterix - COMMAND $ - --add "text(1)*" - --match "text" "text1" "text2" "text3" "text4" +# Download and include Google Test +include(FetchContent) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/refs/heads/main.zip + DOWNLOAD_EXTRACT_TIMESTAMP true ) +FetchContent_MakeAvailable(googletest) -add_test(NAME matcher-test-group-with-or-asterix - COMMAND $ - --add "text(1|2|3)*" - --match "text" "text1" "text2" "text3" "text4" -) +if (CMAKE_C_COMPILER_ID MATCHES "(Apple)?[Cc]lang" OR CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang" + OR CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") + # Includes code coverage CMake tool + list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake_tools) + include(code-coverage) + add_code_coverage() # Adds instrumentation to all targets + add_code_coverage_all_targets(EXCLUDE ${CMAKE_SOURCE_DIR}/tests/*) +endif() -add_test(NAME matcher-test-group-with-or-plus - COMMAND $ - --add "text(1|2|3)+" - --match "text" "text1" "text21" "text31" "text34" -) +# Register the test with CTest +include(GoogleTest) -add_test(NAME matcher-test-group-with-repeat - COMMAND $ - --add "text(1|2|3){1,2}" - --match "text" "text1" "text21" "text31" "text34" "text123123" -) +list(APPEND RegexMatcher_UnitTests ranges) +foreach(test IN LISTS RegexMatcher_UnitTests) + # Add the test executable + add_executable(test_${test} + ${test}.cpp + ) -add_test(NAME matcher-test-group-with-question-mark - COMMAND $ - --add "text(1|2|3)?" - --match "text" "text1" "text21" "text31" "text34" "text123123" -) + # Link Google Test and the library under test + target_link_libraries(test_${test} PRIVATE + gtest_main + RegexMatcher::core + ) + gtest_discover_tests(test_${test}) -add_test(NAME matcher-tests-many-regexes-many-matches - COMMAND $ - --add - "d(abc|def)*g+" - "d(abc)*g+" - "a?" - "b|c" - "(d|e)f" - "f[a-c]?d(ab|cd)*g+" - "a{1,3}a" - "aaa" - "aa" - --match - "a" - "aa" - "aaa" - "aaaa" - "a" - "b" - "c" - "d" - "df" - "e" - "ef" - "fdg" - "fdgg" - "fd" - "fdabgg" - "fdababgg" - "fdabcdgg" - "fdcdcdgg" - "fdacgg" - "fadabgg" - "fadababgg" - "fadabcdgg" - "fadcdcdgg" - "fadacgg" - "fbdabgg" - "fbdababgg" - "fbdabcdgg" - "fbdcdcdgg" - "fbdacgg" - "fcdabgg" - "fcdababgg" - "fcdabcdgg" - "fcdcdcdgg" - "fcdacgg" - "fccdabgg" - "fccdababgg" - "fccdabcdgg" - "fccdcdcdgg" - "fccdacgg" - "dab" - "dabcabc" - "dabc" - "ddefdef" - "dabcg" - "dg" - "dabcabcg" - "ddefabcg" - "ddefdefg" -) + if (CMAKE_C_COMPILER_ID MATCHES "(Apple)?[Cc]lang" OR CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang" + OR CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") + target_code_coverage(test_${test} AUTO ALL EXCLUDE test/*) + endif() +endforeach() -add_custom_target(tests COMMAND ${CMAKE_CTEST_COMMAND} -C ${CMAKE_BUILD_TYPE} --output-on-failure - DEPENDS matcher-tests) +# +# if(CMAKE_BUILD_TYPE STREQUAL "Debug") +# add_executable(demo "src/demo.cpp") +# target_link_libraries(demo regexmatcher_core) +# endif() +# +# add_executable(matcher-tests "src/main.cpp") +# target_link_libraries(matcher-tests regexmatcher_core) +# +# add_test(NAME matcher-urls +# COMMAND $ +# --add "/" "\\/([0-9a-z\\-]+)" +# --match "/" "/12asdf-" +# ) +# +# add_test(NAME matcher-test-no-regex +# COMMAND $ +# --add "text1" "text2" "text3" +# --match "text" "text1" "text2" "text3" "text4" +# ) +# +# add_test(NAME matcher-test-group +# COMMAND $ +# --add "text(1)" "text(2)" "text(3)" +# --match "text" "text1" "text2" "text3" "text4" +# ) +# +# add_test(NAME matcher-test-global-or +# COMMAND $ +# --add "text1|test2|test3" +# --match "text" "text1" "text2" "text3" "text4" +# ) +# +# add_test(NAME matcher-test-or-within-group +# COMMAND $ +# --add "text(1|2|3)" +# --match "text" "text1" "text2" "text3" "text4" +# ) +# +# add_test(NAME matcher-test-letter-asterix +# COMMAND $ +# --add "text1*" +# --match "text" "text1" "text2" "text3" "text4" +# ) +# +# add_test(NAME matcher-test-group-asterix +# COMMAND $ +# --add "text(1)*" +# --match "text" "text1" "text2" "text3" "text4" +# ) +# +# add_test(NAME matcher-test-group-with-or-asterix +# COMMAND $ +# --add "text(1|2|3)*" +# --match "text" "text1" "text2" "text3" "text4" +# ) +# +# add_test(NAME matcher-test-group-with-or-plus +# COMMAND $ +# --add "text(1|2|3)+" +# --match "text" "text1" "text21" "text31" "text34" +# ) +# +# add_test(NAME matcher-test-group-with-repeat +# COMMAND $ +# --add "text(1|2|3){1,2}" +# --match "text" "text1" "text21" "text31" "text34" "text123123" +# ) +# +# add_test(NAME matcher-test-group-with-question-mark +# COMMAND $ +# --add "text(1|2|3)?" +# --match "text" "text1" "text21" "text31" "text34" "text123123" +# ) +# +# +# add_test(NAME matcher-tests-many-regexes-many-matches +# COMMAND $ +# --add +# "d(abc|def)*g+" +# "d(abc)*g+" +# "a?" +# "b|c" +# "(d|e)f" +# "f[a-c]?d(ab|cd)*g+" +# "a{1,3}a" +# "aaa" +# "aa" +# --match +# "a" +# "aa" +# "aaa" +# "aaaa" +# "a" +# "b" +# "c" +# "d" +# "df" +# "e" +# "ef" +# "fdg" +# "fdgg" +# "fd" +# "fdabgg" +# "fdababgg" +# "fdabcdgg" +# "fdcdcdgg" +# "fdacgg" +# "fadabgg" +# "fadababgg" +# "fadabcdgg" +# "fadcdcdgg" +# "fadacgg" +# "fbdabgg" +# "fbdababgg" +# "fbdabcdgg" +# "fbdcdcdgg" +# "fbdacgg" +# "fcdabgg" +# "fcdababgg" +# "fcdabcdgg" +# "fcdcdcdgg" +# "fcdacgg" +# "fccdabgg" +# "fccdababgg" +# "fccdabcdgg" +# "fccdcdcdgg" +# "fccdacgg" +# "dab" +# "dabcabc" +# "dabc" +# "ddefdef" +# "dabcg" +# "dg" +# "dabcabcg" +# "ddefabcg" +# "ddefdefg" +# ) +# +# add_custom_target(tests COMMAND ${CMAKE_CTEST_COMMAND} -C ${CMAKE_BUILD_TYPE} --output-on-failure +# DEPENDS matcher-tests) +# \ No newline at end of file diff --git a/tests/playground.cpp b/tests/playground.cpp new file mode 100644 index 0000000..1aeabed --- /dev/null +++ b/tests/playground.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include + +struct limit +{ + int min; + std::optional max; + + constexpr limit(int min, int max) : min(min), max(max) { } + constexpr limit(int min) : min(min), max(std::nullopt) { } + + static const limit once; + static const limit at_most_once; + static const limit at_least_once; +}; + +static const limit once = limit(1, 1); +static const limit at_most_once = limit(0, 1); +static const limit at_least_once = limit(1); + +struct edge_manager +{ + using edge_limit_ptr = std::list::iterator; + std::list limits; + + edge_limit_ptr prepare_edge_limit(const limit& x) + { + return limits.insert(limits.end(), x); + } + + struct matcher_edge_matcher + { + struct visits { + bool can_visit_again = true; + int counts = 0; + }; + std::map visited; + const edge_manager& manager; + matcher_edge_matcher(const edge_manager& manager) : manager(manager), visited() {} + matcher_edge_matcher(matcher_edge_matcher&& matcher_manager) : manager(matcher_manager.manager), visited(std::move(matcher_manager.visited)) {} + + }; + + +}; + +struct interval +{ + char from, to; +}; + +struct node; + +struct chidren_set +{ + std::vector> children; + +}; + +struct node { + interval current; + chidren_set children; +}; + +node* createTreeFrom(std::string str) { + +} + +int main() +{ + std::cout << "Hello world" << std::endl; + return 0; +} \ No newline at end of file diff --git a/tests/ranges.cpp b/tests/ranges.cpp new file mode 100644 index 0000000..ae00cd3 --- /dev/null +++ b/tests/ranges.cpp @@ -0,0 +1,39 @@ +#include +#include "node_info.hpp" +#include "range.hpp" + +// Test cases +TEST(CreateRange, SingleSymbol) +{ + auto range = matcher::regex::Range('a'); + EXPECT_EQ(range.contains('a'), true); + EXPECT_EQ(range.contains('b'), false); + EXPECT_EQ(range.contains('A'), false); +} + +TEST(CreateRange, Empty) +{ + matcher::regex::Range range; + EXPECT_EQ(range.contains('a'), false); + EXPECT_EQ(range.contains('b'), false); + EXPECT_EQ(range.contains('A'), false); + EXPECT_EQ(range.contains('\0'), false); +} + +TEST(CreateRange, FromAtoZ) +{ + auto range = matcher::regex::Range('A', 'Z'); + EXPECT_EQ(range.contains('a'), false); + EXPECT_EQ(range.contains('b'), false); + for (char ch = 'A'; ch <= 'Z'; ch++) + { + EXPECT_EQ(range.contains(ch), true); + } + EXPECT_EQ(range.contains('\0'), false); +} + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tests/src/demo.cpp b/tests/src/demo.cpp index 674815a..f7b24c2 100644 --- a/tests/src/demo.cpp +++ b/tests/src/demo.cpp @@ -1,21 +1,16 @@ -#include +#include #include -#include -using std::chrono::high_resolution_clock; -using std::chrono::duration_cast; -using std::chrono::duration; -using std::chrono::milliseconds; -using namespace std::chrono_literals; +int main(int argc, char** argv) +{ + std::cout << "RegexMatcher VERSION: " << matcher::RegexMatcher_VERSION_MAJOR << "." << matcher::RegexMatcher_VERSION_MINOR << "." + << matcher::RegexMatcher_VERSION_PATCH << "." << matcher::RegexMatcher_VERSION_TWEAK << std::endl; -int main(int argc, char** argv) { - std::cout << "RegexMatcher VERSION: " << RegexMatcher_VERSION_MAJOR << "." << RegexMatcher_VERSION_MINOR << "." << RegexMatcher_VERSION_PATCH << "." << RegexMatcher_VERSION_TWEAK << std::endl; - - matcher::RegexMatcher root; - root.add_regex(std::string("a|b"), 0); - root.print(); - root.add_regex(std::string("a+"), 1); - root.print(); - const auto answer = root.match(std::string("ccc")); - return 0; -} \ No newline at end of file + matcher::RegexMatcher root; + root.add_regex(std::string("a|b"), 0); + root.print(std::cout); + root.add_regex(std::string("a+"), 1); + root.print(std::cout); + const auto answer = root.match(std::string("ccc")); + return 0; +} diff --git a/tests/src/main.cpp b/tests/src/main.cpp index f34a300..92d8899 100644 --- a/tests/src/main.cpp +++ b/tests/src/main.cpp @@ -1,89 +1,106 @@ -#include +#include #include #include #include -#define enable(x,y) x.push_back(y); -#define disable(x,y) +#define enable(x, y) x.push_back(y); +#define disable(x, y) #include using std::chrono::high_resolution_clock; -using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; using namespace std::chrono_literals; -int main(int argc, char** argv) { - std::cout << "RegexMatcher VERSION: " << RegexMatcher_VERSION_MAJOR << "." << RegexMatcher_VERSION_MINOR << "." << RegexMatcher_VERSION_PATCH << "." << RegexMatcher_VERSION_TWEAK << std::endl; - - matcher::RegexMatcher root; - std::vector regexes; - int num = 0; - bool is_it_regex = false; - std::chrono::nanoseconds total = 0ns, alternative = 0ns; - - for (int i = 1 ; i < argc ; i ++) { - if (strcmp(argv[i], "--add") == 0) { - is_it_regex = true; - } - else if (strcmp(argv[i], "--match") == 0) { - is_it_regex = false; - } - else if (is_it_regex) { - const auto t1 = high_resolution_clock::now(); - root.add_regex(std::string(argv[i]), num++); - const auto t2 = high_resolution_clock::now(); - regexes.push_back(argv[i]); - const auto t3 = high_resolution_clock::now(); - total = total + (t2-t1); - alternative = alternative + (t3-t2); - } - else if (!is_it_regex) { - std::string text = std::string(argv[i]); - const auto t1 = high_resolution_clock::now(); - const auto answer = root.match(text); - const auto t2 = high_resolution_clock::now(); - std::vector test_result; - for (size_t i = 0 ; i < regexes.size() ; i ++) { - if (std::regex_search(text, std::regex("^(" + regexes[i] + ")$"))) { - test_result.push_back(i); - } - } - const auto t3 = high_resolution_clock::now(); +int main(int argc, char** argv) +{ + std::cout << "RegexMatcher VERSION: " << matcher::RegexMatcher_VERSION_MAJOR << "." << matcher::RegexMatcher_VERSION_MINOR << "." + << matcher::RegexMatcher_VERSION_PATCH << "." << matcher::RegexMatcher_VERSION_TWEAK << std::endl; - // Time capturing - total = total + (t2-t1); - alternative = alternative + (t3-t2); + matcher::RegexMatcher root; + std::vector regexes; + int num = 0; + bool is_it_regex = false; + std::chrono::nanoseconds total = 0ns, alternative = 0ns; - // Validity check - if (answer.size() != test_result.size()) { - std::cout << "\tFailed:\n"; - std::cout << "\t" << text << std::endl; - for (auto x : answer) { - std::cout << "\t\t matcher lib: " << x << ") " << regexes[x - 1] << std::endl; - } - std::cout << "\t\t-----------------------\n"; - for (auto x : test_result) { - std::cout << "\t\t std::regex: " << x << ") " << regexes[x - 1] << std::endl; - } - return 1; - } - else { - bool failed = false; - for (size_t i = 0 ; i < answer.size() ; i ++) { - if ((failed = (failed || answer[i] != test_result[i]))) { - std::cout << "\t" << text << std::endl; - std::cout << "\t\t Failed: " << answer[i] << ") " << regexes[answer[i] - 1] << " | " << test_result[i] << ") " << regexes[test_result[i] - 1] << std::endl; - return 1; - } - } - std::cout << "\t" << text << std::endl; - std::cout << "\t\t Success: " << duration(t2 - t1).count() << "ns (lib) vs " << duration(t3 - t2).count() << "ns (std::regex)" << std::endl; - } - } - } - if (total > alternative + alternative / 10) { - std::cout << "\t\t Success but slow: " << total.count() << "ns (lib) vs " << alternative.count() << "ns (std::regex)" << std::endl; - } - return 0; -} \ No newline at end of file + for (int i = 1; i < argc; i++) + { + if (strcmp(argv[i], "--add") == 0) + { + is_it_regex = true; + } + else if (strcmp(argv[i], "--match") == 0) + { + is_it_regex = false; + } + else if (is_it_regex) + { + const auto t1 = high_resolution_clock::now(); + root.add_regex(std::string(argv[i]), num++); + const auto t2 = high_resolution_clock::now(); + regexes.push_back(argv[i]); + const auto t3 = high_resolution_clock::now(); + total = total + (t2 - t1); + alternative = alternative + (t3 - t2); + } + else if (!is_it_regex) + { + std::string text = std::string(argv[i]); + const auto t1 = high_resolution_clock::now(); + const auto answer = root.match(text); + const auto t2 = high_resolution_clock::now(); + std::vector test_result; + for (size_t i = 0; i < regexes.size(); i++) + { + if (std::regex_search(text, std::regex("^(" + regexes[i] + ")$"))) + { + test_result.push_back(i); + } + } + const auto t3 = high_resolution_clock::now(); + + // Time capturing + total = total + (t2 - t1); + alternative = alternative + (t3 - t2); + + // Validity check + if (answer.size() != test_result.size()) + { + std::cout << "\tFailed:\n"; + std::cout << "\t" << text << std::endl; + for (auto x : answer) + { + std::cout << "\t\t matcher lib: " << x << ") " << regexes[x - 1] << std::endl; + } + std::cout << "\t\t-----------------------\n"; + for (auto x : test_result) + { + std::cout << "\t\t std::regex: " << x << ") " << regexes[x - 1] << std::endl; + } + return 1; + } + else + { + bool failed = false; + for (size_t i = 0; i < answer.size(); i++) + { + if ((failed = (failed || answer[i] != test_result[i]))) + { + std::cout << "\t" << text << std::endl; + std::cout << "\t\t Failed: " << answer[i] << ") " << regexes[answer[i] - 1] << " | " << test_result[i] << ") " + << regexes[test_result[i] - 1] << std::endl; + return 1; + } + } + std::cout << "\t" << text << std::endl; + std::cout << "\t\t Success: " << duration(t2 - t1).count() << "ns (lib) vs " + << duration(t3 - t2).count() << "ns (std::regex)" << std::endl; + } + } + } + if (total > alternative + alternative / 10) + { + std::cout << "\t\t Success but slow: " << total.count() << "ns (lib) vs " << alternative.count() << "ns (std::regex)" << std::endl; + } + return 0; +}