Skip to content

Commit

Permalink
[feat/ctre] std::regex completely replaced with ctre, tests passed
Browse files Browse the repository at this point in the history
  • Loading branch information
andijcr committed May 24, 2021
1 parent 6a5e3d1 commit ca98f84
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 139 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,8 @@ Makefile
cmake_install.cmake
docopt-config-version.cmake

# build directory
build/

# Files configured by CMake
run_tests
22 changes: 6 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,15 @@ include(GNUInstallDirs)
#============================================================================
option(WITH_TESTS "Build tests." OFF)
option(WITH_EXAMPLE "Build example." OFF)
option(USE_BOOST_REGEX "Replace std::regex with Boost.Regex" OFF)

#============================================================================
# Internal compiler options
#============================================================================
# C++ standard
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
if(NOT CMAKE_CXX_STANDARD OR CMAKE_CXX_STANDARD LESS 11)
set(CMAKE_CXX_STANDARD 11)
if(NOT CMAKE_CXX_STANDARD OR CMAKE_CXX_STANDARD LESS 17)
set(CMAKE_CXX_STANDARD 17)
endif()

#============================================================================
Expand All @@ -31,6 +30,7 @@ set(docopt_HEADERS
docopt_value.h
)

add_subdirectory(compile-time-regular-expressions EXCLUDE_FROM_ALL)
#============================================================================
# Compile targets
#============================================================================
Expand All @@ -43,11 +43,14 @@ if(MSVC OR XCODE)
# See https://cmake.org/cmake/help/v3.0/command/add_library.html?highlight=add_library
add_library(docopt SHARED ${docopt_SOURCES} ${docopt_HEADERS})
add_library(docopt_s STATIC ${docopt_SOURCES} ${docopt_HEADERS})
target_link_libraries(docopt PRIVATE ctre::ctre)
target_link_libraries(docopt_s PRIVATE ctre::ctre)
else()
# If not using MSVC or Xcode, we will create an intermediate object target
# to avoid compiling the source code twice.
add_library(docopt_o OBJECT ${docopt_SOURCES} ${docopt_HEADERS})
set_target_properties(docopt_o PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
target_link_libraries(docopt_o PRIVATE ctre::ctre)

add_library(docopt SHARED $<TARGET_OBJECTS:docopt_o>)
set_target_properties(docopt PROPERTIES
Expand All @@ -74,19 +77,6 @@ if(NOT MSVC)
set_target_properties(docopt_s PROPERTIES OUTPUT_NAME docopt)
endif()

if(USE_BOOST_REGEX)
add_definitions("-DDOCTOPT_USE_BOOST_REGEX")
# This is needed on Linux, where linking a static library into docopt.so
# fails because boost static libs are not compiled with -fPIC
set(Boost_USE_STATIC_LIBS OFF)
find_package(Boost 1.53 REQUIRED COMPONENTS regex)
include_directories(${Boost_INCLUDE_DIRS})
target_link_libraries(docopt ${Boost_LIBRARIES})
if(WITH_STATIC)
target_link_libraries(docopt_s ${Boost_LIBRARIES})
endif()
endif()

#============================================================================
# Examples
#============================================================================
Expand Down
111 changes: 54 additions & 57 deletions docopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,44 +67,33 @@ class Tokens {
explicit operator bool() const { return fIndex < fTokens.size(); }

static Tokens from_pattern(std::string const& source) {
static const std::regex re_separators{
"(?:\\s*)" // any spaces (non-matching subgroup)
"("
"[\\[\\]\\(\\)\\|]" // one character of brackets or parens or pipe character
"|"
"\\.\\.\\." // elipsis
")"};

static const std::regex re_strings{"(?:\\s*)" // any spaces (non-matching subgroup)
"("
"\\S*<.*?>" // strings, but make sure to keep "< >" strings together
"|"
"[^<>\\s]+" // string without <>
")"};
constexpr static auto re_separator =
ctll::fixed_string{R"((?:\s*))" // any spaces (non-matching subgroup)
"("
R"([\[\]\(\)\|])" // one character of brackets or parens or pipe character
"|"
R"(\.\.\.)" // ellipsis
")"};

constexpr static auto re_strings =
ctll::fixed_string{R"(\S*<.*?>)" // strings, but make sure to keep "< >" strings together
"|"
R"([^<>\s]+)"}; // string without <>

// We do two stages of regex matching. The '[]()' and '...' are strong delimeters
// and need to be split out anywhere they occur (even at the end of a token). We
// first split on those, and then parse the stuff between them to find the string
// tokens. This is a little harder than the python version, since they have regex.split
// and we dont have anything like that.
// tokens. This is a little harder than the python version

std::vector<std::string> tokens;
std::for_each(std::sregex_iterator{source.begin(), source.end(), re_separators},
std::sregex_iterator{},
[&](std::smatch const& match) {
// handle anything before the separator (this is the "stuff" between the delimeters)
if (match.prefix().matched) {
std::for_each(
std::sregex_iterator{match.prefix().first, match.prefix().second, re_strings},
std::sregex_iterator{},
[&](std::smatch const& m) { tokens.push_back(m[1].str()); });
}

// handle the delimter token itself
if (match[1].matched) {
tokens.push_back(match[1].str());
}
});
for (auto [first_stage, sep] : ctre::split<re_separator>(source)) {
for (auto sub_match : ctre::range<re_strings>(first_stage)) {
tokens.emplace_back(sub_match.to_string());
}
if (sep) {
tokens.emplace_back(sep.to_string());
}
}

return Tokens(tokens, false);
}
Expand Down Expand Up @@ -148,26 +137,34 @@ std::vector<T*> flat_filter(Pattern& pattern) {
return ret;
}

static std::vector<std::string> parse_section(std::string const& name, std::string const& source) {
// ECMAScript regex only has "?=" for a non-matching lookahead. In order to make sure we always have
// a newline to anchor our matching, we have to avoid matching the final newline of each grouping.
// Therefore, our regex is adjusted from the docopt Python one to use ?= to match the newlines before
// the following lines, rather than after.
std::regex const re_section_pattern{
"(?:^|\\n)" // anchored at a linebreak (or start of string)
"("
"[^\\n]*" +
name +
"[^\\n]*(?=\\n?)" // a line that contains the name
"(?:\\n[ \\t].*?(?=\\n|$))*" // followed by any number of lines that are indented
")",
std::regex::icase};

static auto parse_usage(std::string_view source) -> std::vector<std::string> {
constexpr static auto re_section_pattern =
ctll::fixed_string{R"((?:^|\n))" // anchored at a line-break (or start of string)
"("
R"([^\n]*)"
"[uU][sS][aA][gG][eE]:"
R"([^\n]*(?=\n?))" // a line that contains "usage:" (case insensitive)
R"((?:\n[ \t].*?(?=\n|$))*)" // followed by any number of lines that are indented
")"};
std::vector<std::string> ret;
std::for_each(std::sregex_iterator(source.begin(), source.end(), re_section_pattern),
std::sregex_iterator(),
[&](std::smatch const& match) { ret.push_back(trim(match[1].str())); });

for (auto match : ctre::range<re_section_pattern>(source)) {
ret.emplace_back(trim(match.get<1>().to_view()));
}
return ret;
}
static auto parse_options(std::string_view source) -> std::vector<std::string> {
constexpr static auto re_section_pattern =
ctll::fixed_string{R"((?:^|\n))" // anchored at a line-break (or start of string)
"("
R"([^\n]*)"
"[oO][pP][tT][iI][oO][nN][sS]:"
R"([^\n]*(?=\n?))" // a line that contains "options:" (case insensitive)
R"((?:\n[ \t].*?(?=\n|$))*)" // followed by any number of lines that are indented
")"};
std::vector<std::string> ret;
for (auto match : ctre::range<re_section_pattern>(source)) {
ret.emplace_back(trim(match.get<1>().to_view()));
}
return ret;
}

Expand Down Expand Up @@ -487,16 +484,16 @@ static PatternList parse_argv(Tokens tokens, std::vector<Option>& options, bool
static std::vector<Option> parse_defaults(std::string const& doc) {
// This pattern is a delimiter by which we split the options.
// The delimiter is a new line followed by a whitespace(s) followed by one or two hyphens.
static std::regex const re_delimiter{
"(?:^|\\n)[ \\t]*" // a new line with leading whitespace
"(?=-{1,2})" // [split happens here] (positive lookahead) ... and followed by one or two hyphes
static constexpr auto re_delimiter = ctll::fixed_string{
R"((?:^|\n)[ \t]*)" // a new line with leading whitespace
"(?=-{1,2})" // [split happens here] (positive lookahead) ... and followed by one or two hyphes
};

std::vector<Option> defaults;
for (auto s : parse_section("options:", doc)) {
for (auto s : parse_options(doc)) {
s.erase(s.begin(), s.begin() + static_cast<std::ptrdiff_t>(s.find(':')) + 1); // get rid of "options:"

for (const auto& opt : regex_split(s, re_delimiter)) {
for (auto opt : ctre::split<re_delimiter>(s)) {
if (starts_with(opt, "-")) {
defaults.emplace_back(Option::parse(opt));
}
Expand Down Expand Up @@ -528,7 +525,7 @@ static void extras(bool help, bool version, PatternList const& options) {

// Parse the doc string and generate the Pattern tree
static std::pair<Required, std::vector<Option>> create_pattern_tree(std::string const& doc) {
auto usage_sections = parse_section("usage:", doc);
auto usage_sections = parse_usage(doc);
if (usage_sections.empty()) {
throw DocoptLanguageError("'usage:' (case-insensitive) not found.");
}
Expand Down
62 changes: 15 additions & 47 deletions docopt_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,12 @@
#ifndef docopt_docopt_private_h
#define docopt_docopt_private_h

#include "ctre.hpp"
#include <assert.h>
#include <memory>
#include <unordered_set>
#include <vector>

// Workaround GCC 4.8 not having std::regex
#if DOCTOPT_USE_BOOST_REGEX
#include <boost/regex.hpp>
namespace std {
using boost::regex;
using boost::regex_search;
using boost::smatch;
using boost::sregex_iterator;
namespace regex_constants {
using boost::regex_constants::match_not_null;
}
}
#else
#include <regex>
#endif

#include "docopt_util.h"
#include "docopt_value.h"

Expand Down Expand Up @@ -227,7 +212,7 @@ namespace docopt {

class Option final : public LeafPattern {
public:
static Option parse(std::string const& option_description);
static Option parse(std::string_view option_description);

Option(std::string shortOption, std::string longOption, int argcount = 0, value v = value{false})
: LeafPattern(longOption.empty() ? shortOption : longOption, std::move(v))
Expand Down Expand Up @@ -495,7 +480,7 @@ namespace docopt {
return ret;
}

inline Option Option::parse(std::string const& option_description) {
inline Option Option::parse(std::string_view option_description) {
std::string shortOption, longOption;
int argcount = 0;
value val{false};
Expand All @@ -506,41 +491,24 @@ namespace docopt {
options_end = option_description.begin() + static_cast<std::ptrdiff_t>(double_space);
}

static const std::regex pattern{"(-{1,2})?(.*?)([,= ]|$)"};
for (std::sregex_iterator
i{option_description.begin(), options_end, pattern, std::regex_constants::match_not_null},
e{};
i != e;
++i) {
std::smatch const& match = *i;
if (match[1].matched) { // [1] is optional.
if (match[1].length() == 1) {
shortOption = "-" + match[2].str();
static constexpr auto pattern = ctll::fixed_string{"(-{1,2})?(.*?)([,= ]|$)"};
for (auto match : ctre::range<pattern>(option_description.begin(), options_end)) {
auto [m, hyp, cont, _] = match;
if (hyp) { // [1] is optional.
if (hyp.size() == 1) {
shortOption = "-" + cont.to_string();
} else {
longOption = "--" + match[2].str();
longOption = "--" + cont.to_string();
}
} else if (match[2].length() > 0) { // [2] always matches.
std::string m = match[2];
} else if (cont.size() > 0) { // [2] always matches.
argcount = 1;
} else {
// delimeter
}

if (match[3].length() == 0) { // [3] always matches.
// Hit end of string. For some reason 'match_not_null' will let us match empty
// at the end, and then we'll spin in an infinite loop. So, if we hit an empty
// match, we know we must be at the end.
break;
}
}

if (argcount) {
std::smatch match;
if (std::regex_search(options_end,
option_description.end(),
match,
std::regex{"\\[default: (.*)\\]", std::regex::icase})) {
val = match[1].str();
// to support case insensitive search fo the pattern [default: .*]
constexpr static auto def_val = ctll::fixed_string{R"(\[[dD][eE][fF][aA][uU][lL][tT]: (.*)\])"};
if (auto [m, def_str] = ctre::search<def_val>(options_end, option_description.end()); m) {
val = def_str.to_string();
}
}

Expand Down
33 changes: 14 additions & 19 deletions docopt_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,19 @@
#ifndef docopt_docopt_util_h
#define docopt_docopt_util_h

#if DOCTOPT_USE_BOOST_REGEX
#include <boost/regex.hpp>
namespace std {
using boost::regex;
using boost::sregex_token_iterator;
}
#else
#include <regex>
#endif
#include <algorithm>
#include <string>
#include <string_view>
#include <tuple>
#include <vector>

#if 0
#pragma mark -
#pragma mark General utility
#endif

namespace {
bool starts_with(std::string const& str, std::string const& prefix) {
bool starts_with(std::string_view str, std::string_view prefix) {
if (str.length() < prefix.length()) return false;
return std::equal(prefix.begin(), prefix.end(), str.begin());
}
Expand All @@ -41,7 +37,14 @@ namespace {
return std::move(str);
}

std::vector<std::string> split(std::string const& str, size_t pos = 0) {
std::string trim(std::string_view str, std::string_view whitespace = " \t\n") {
auto const strEnd = str.find_last_not_of(whitespace);
if (strEnd == std::string::npos) return {};
auto const strBegin = str.find_first_not_of(whitespace);

return {str.begin() + strBegin, str.begin() + strEnd + 1};
}
std::vector<std::string> split(std::string_view str, size_t pos = 0) {
const char* const anySpace = " \t\r\n\v\f";

std::vector<std::string> ret;
Expand Down Expand Up @@ -88,14 +91,6 @@ namespace {
return ret;
}

std::vector<std::string> regex_split(std::string const& text, std::regex const& re) {
std::vector<std::string> ret;
for (auto it = std::sregex_token_iterator(text.begin(), text.end(), re, -1); it != std::sregex_token_iterator();
++it) {
ret.emplace_back(*it);
}
return ret;
}
}

namespace docopt {
Expand Down

0 comments on commit ca98f84

Please sign in to comment.