diff --git a/.gitignore b/.gitignore index 6d7eb1c..85f1ac8 100644 --- a/.gitignore +++ b/.gitignore @@ -28,5 +28,8 @@ Makefile cmake_install.cmake docopt-config-version.cmake +# build directory +build/ + # Files configured by CMake run_tests diff --git a/CMakeLists.txt b/CMakeLists.txt index feff32e..b088529 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,6 @@ include(GNUInstallDirs) #============================================================================ option(WITH_TESTS "Build tests." OFF) option(WITH_EXAMPLE "Build example." OFF) -option(USE_BOOST_REGEX "Replace std::regex with Boost.Regex" OFF) #============================================================================ # Internal compiler options @@ -16,8 +15,8 @@ option(USE_BOOST_REGEX "Replace std::regex with Boost.Regex" OFF) # C++ standard set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -if(NOT CMAKE_CXX_STANDARD OR CMAKE_CXX_STANDARD LESS 11) - set(CMAKE_CXX_STANDARD 11) +if(NOT CMAKE_CXX_STANDARD OR CMAKE_CXX_STANDARD LESS 17) + set(CMAKE_CXX_STANDARD 17) endif() #============================================================================ @@ -31,6 +30,7 @@ set(docopt_HEADERS docopt_value.h ) +add_subdirectory(compile-time-regular-expressions EXCLUDE_FROM_ALL) #============================================================================ # Compile targets #============================================================================ @@ -43,11 +43,14 @@ if(MSVC OR XCODE) # See https://cmake.org/cmake/help/v3.0/command/add_library.html?highlight=add_library add_library(docopt SHARED ${docopt_SOURCES} ${docopt_HEADERS}) add_library(docopt_s STATIC ${docopt_SOURCES} ${docopt_HEADERS}) + target_link_libraries(docopt PRIVATE ctre::ctre) + target_link_libraries(docopt_s PRIVATE ctre::ctre) else() # If not using MSVC or Xcode, we will create an intermediate object target # to avoid compiling the source code twice. add_library(docopt_o OBJECT ${docopt_SOURCES} ${docopt_HEADERS}) set_target_properties(docopt_o PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + target_link_libraries(docopt_o PRIVATE ctre::ctre) add_library(docopt SHARED $) set_target_properties(docopt PROPERTIES @@ -74,19 +77,6 @@ if(NOT MSVC) set_target_properties(docopt_s PROPERTIES OUTPUT_NAME docopt) endif() -if(USE_BOOST_REGEX) - add_definitions("-DDOCTOPT_USE_BOOST_REGEX") - # This is needed on Linux, where linking a static library into docopt.so - # fails because boost static libs are not compiled with -fPIC - set(Boost_USE_STATIC_LIBS OFF) - find_package(Boost 1.53 REQUIRED COMPONENTS regex) - include_directories(${Boost_INCLUDE_DIRS}) - target_link_libraries(docopt ${Boost_LIBRARIES}) - if(WITH_STATIC) - target_link_libraries(docopt_s ${Boost_LIBRARIES}) - endif() -endif() - #============================================================================ # Examples #============================================================================ diff --git a/docopt.cpp b/docopt.cpp index 63645e4..2ff1ad6 100644 --- a/docopt.cpp +++ b/docopt.cpp @@ -67,44 +67,33 @@ class Tokens { explicit operator bool() const { return fIndex < fTokens.size(); } static Tokens from_pattern(std::string const& source) { - static const std::regex re_separators{ - "(?:\\s*)" // any spaces (non-matching subgroup) - "(" - "[\\[\\]\\(\\)\\|]" // one character of brackets or parens or pipe character - "|" - "\\.\\.\\." // elipsis - ")"}; - - static const std::regex re_strings{"(?:\\s*)" // any spaces (non-matching subgroup) - "(" - "\\S*<.*?>" // strings, but make sure to keep "< >" strings together - "|" - "[^<>\\s]+" // string without <> - ")"}; + constexpr static auto re_separator = + ctll::fixed_string{R"((?:\s*))" // any spaces (non-matching subgroup) + "(" + R"([\[\]\(\)\|])" // one character of brackets or parens or pipe character + "|" + R"(\.\.\.)" // ellipsis + ")"}; + + constexpr static auto re_strings = + ctll::fixed_string{R"(\S*<.*?>)" // strings, but make sure to keep "< >" strings together + "|" + R"([^<>\s]+)"}; // string without <> // We do two stages of regex matching. The '[]()' and '...' are strong delimeters // and need to be split out anywhere they occur (even at the end of a token). We // first split on those, and then parse the stuff between them to find the string - // tokens. This is a little harder than the python version, since they have regex.split - // and we dont have anything like that. + // tokens. This is a little harder than the python version std::vector tokens; - std::for_each(std::sregex_iterator{source.begin(), source.end(), re_separators}, - std::sregex_iterator{}, - [&](std::smatch const& match) { - // handle anything before the separator (this is the "stuff" between the delimeters) - if (match.prefix().matched) { - std::for_each( - std::sregex_iterator{match.prefix().first, match.prefix().second, re_strings}, - std::sregex_iterator{}, - [&](std::smatch const& m) { tokens.push_back(m[1].str()); }); - } - - // handle the delimter token itself - if (match[1].matched) { - tokens.push_back(match[1].str()); - } - }); + for (auto [first_stage, sep] : ctre::split(source)) { + for (auto sub_match : ctre::range(first_stage)) { + tokens.emplace_back(sub_match.to_string()); + } + if (sep) { + tokens.emplace_back(sep.to_string()); + } + } return Tokens(tokens, false); } @@ -148,26 +137,34 @@ std::vector flat_filter(Pattern& pattern) { return ret; } -static std::vector parse_section(std::string const& name, std::string const& source) { - // ECMAScript regex only has "?=" for a non-matching lookahead. In order to make sure we always have - // a newline to anchor our matching, we have to avoid matching the final newline of each grouping. - // Therefore, our regex is adjusted from the docopt Python one to use ?= to match the newlines before - // the following lines, rather than after. - std::regex const re_section_pattern{ - "(?:^|\\n)" // anchored at a linebreak (or start of string) - "(" - "[^\\n]*" + - name + - "[^\\n]*(?=\\n?)" // a line that contains the name - "(?:\\n[ \\t].*?(?=\\n|$))*" // followed by any number of lines that are indented - ")", - std::regex::icase}; - +static auto parse_usage(std::string_view source) -> std::vector { + constexpr static auto re_section_pattern = + ctll::fixed_string{R"((?:^|\n))" // anchored at a line-break (or start of string) + "(" + R"([^\n]*)" + "[uU][sS][aA][gG][eE]:" + R"([^\n]*(?=\n?))" // a line that contains "usage:" (case insensitive) + R"((?:\n[ \t].*?(?=\n|$))*)" // followed by any number of lines that are indented + ")"}; std::vector ret; - std::for_each(std::sregex_iterator(source.begin(), source.end(), re_section_pattern), - std::sregex_iterator(), - [&](std::smatch const& match) { ret.push_back(trim(match[1].str())); }); - + for (auto match : ctre::range(source)) { + ret.emplace_back(trim(match.get<1>().to_view())); + } + return ret; +} +static auto parse_options(std::string_view source) -> std::vector { + constexpr static auto re_section_pattern = + ctll::fixed_string{R"((?:^|\n))" // anchored at a line-break (or start of string) + "(" + R"([^\n]*)" + "[oO][pP][tT][iI][oO][nN][sS]:" + R"([^\n]*(?=\n?))" // a line that contains "options:" (case insensitive) + R"((?:\n[ \t].*?(?=\n|$))*)" // followed by any number of lines that are indented + ")"}; + std::vector ret; + for (auto match : ctre::range(source)) { + ret.emplace_back(trim(match.get<1>().to_view())); + } return ret; } @@ -487,16 +484,16 @@ static PatternList parse_argv(Tokens tokens, std::vector