Skip to content

Commit

Permalink
Support for regex grammar (need to be checked).
Browse files Browse the repository at this point in the history
  • Loading branch information
Hana Dusíková committed Aug 20, 2018
1 parent 1611603 commit 90ac938
Show file tree
Hide file tree
Showing 10 changed files with 429 additions and 35 deletions.
15 changes: 13 additions & 2 deletions Makefile
@@ -1,9 +1,11 @@
.PHONY: default all clean
.PHONY: default all clean grammar

default: all

TARGETS := test.cpp

DESATOMAT := /www/root/desatomat/console/desatomat.php

CXXFLAGS := -std=c++17 -Iinclude -O3 -Wno-gnu-string-literal-operator-template
#-fconcepts

Expand All @@ -22,4 +24,13 @@ $(OBJECTS): %.o: %.cpp
-include $(DEPEDENCY_FILES)

clean:
rm -f $(TRUE_TARGETS) $(OBJECTS) $(DEPEDENCY_FILES)
rm -f $(TRUE_TARGETS) $(OBJECTS) $(DEPEDENCY_FILES)

grammar: include/ctre/pcre.hpp

regrammar:
rm -f include/ctre/pcre.hpp
$(MAKE) grammar

include/ctre/pcre.hpp: include/ctre/pcre.gram
$(DESATOMAT) --ll --q --input=include/ctre/pcre.gram --output=include/ctre/ --generator=cpp_ctre_v2 --cfg:fname=pcre.hpp --cfg:namespace=ctre --cfg:guard=CTRE__PCRE__HPP --cfg:grammar_name=pcre
9 changes: 9 additions & 0 deletions future.cpp
@@ -0,0 +1,9 @@
#include <ctre.hpp>

std::optional<std::string_view> match(std::string_view sv) {
if (auto match = regex<"^(name:[a-z]+):">(sv); match) {
return {true, match.get<"name">().first()};
} else {
return std::nullopt;
}
}
44 changes: 44 additions & 0 deletions gram.txt
@@ -0,0 +1,44 @@
backslash = {\\}
dot = {.}
dolar = {$}
open = {(}
close = {)}
questionmark = {?}
colon = {:}
sopen = {[}
sclose = {]}
plus = {+}
star = {*}
copen = {{}
cclose = {}}
num = {0,1,2,3,4,5,6,7,8,9}
comma = {\,}
pipe = {|}
caret = {^}
minus = {-}
hexdec = {0,1,2,3,4,5,6,7,8,9,A,B,C,D,E,F,a,b,c,d,e,f}
octal = {0,1,2,3,4,5,6,7}
nonspecial = {\,,-,0,1,2,3,4,5,6,7,8,9,:,A,B,C,D,E,F,X,],a,b,c,d,e,f,x}
nonspecial2 = {0,1,2,3,4,5,6,7,8,9,:,A,B,C,D,E,F,X,a,b,c,d,e,f,x}

S -> epsilon | [addchar],other,<repeat>,<string2>,<alt2> | [addchar],nonspecial,<repeat>,<string2>,<alt2> | \\,<B>,<repeat>,<string2>,<alt2> | dot,[any],<repeat>,<string2>,<alt2> | caret,[insert_begin],<repeat>,<string2>,<alt2> | dolar,[insert_end],<repeat>,<string2>,<alt2> | (,<C>,<repeat>,<string2>,<alt2> | [,<D>,<repeat>,<string2>,<alt2>
A -> [repeat_exactly],cclose | \,,<H>
alt2 -> pipe,<alt> | epsilon
alt -> [addchar],other,<repeat>,<string2>,<alt2> | [addchar],nonspecial,<repeat>,<string2>,<alt2> | \\,<B>,<repeat>,<string2>,<alt2> | dot,[any],<repeat>,<string2>,<alt2> | caret,[insert_begin],<repeat>,<string2>,<alt2> | dolar,[insert_end],<repeat>,<string2>,<alt2> | (,<C>,<repeat>,<string2>,<alt2> | [,<D>,<repeat>,<string2>,<alt2>
altB -> epsilon | [addchar],other,<repeat>,<string2>,<alt2> | [addchar],nonspecial,<repeat>,<string2>,<alt2> | \\,<B>,<repeat>,<string2>,<alt2> | dot,[any],<repeat>,<string2>,<alt2> | caret,[insert_begin],<repeat>,<string2>,<alt2> | dolar,[insert_end],<repeat>,<string2>,<alt2> | (,<C>,<repeat>,<string2>,<alt2> | [,<D>,<repeat>,<string2>,<alt2>
B -> a | 0,[odigit],octal,[odigit],octal,[odigit],octal | x,<G> | dot | sopen | copen | cclose | open | close | backslash | star | plus | questionmark | pipe | caret | dolar | d
C -> questionmark,colon,<altB>,close | [addchar],other,<repeat>,<string2>,<alt2>,[make_catch],close | [addchar],nonspecial,<repeat>,<string2>,<alt2>,[make_catch],close | \\,<B>,<repeat>,<string2>,<alt2>,[make_catch],close | dot,[any],<repeat>,<string2>,<alt2>,[make_catch],close | caret,[insert_begin],<repeat>,<string2>,<alt2>,[make_catch],close | dolar,[insert_end],<repeat>,<string2>,<alt2>,[make_catch],close | (,<C>,<repeat>,<string2>,<alt2>,[make_catch],close | [,<D>,<repeat>,<string2>,<alt2>,[make_catch],close | [empty],[make_catch],close
D -> caret,[set_neg_start],<set>,sclose | [addchar],other,<range>,<set2>,sclose | [addchar],nonspecial2,<range>,<set2>,sclose | [addchar],dot,<range>,<set2>,sclose | \\,<E>,<set2>,sclose
E -> b | a | 0,[odigit],octal,[odigit],octal,[odigit],octal | x,<G> | dot | sopen | copen | cclose | open | close | backslash | star | plus | questionmark | pipe | caret | dolar
F -> [insert_range],other | [insert_range],nonspecial2
G -> [hdigit],hexdec,[hdigit],hexdec | copen,[hdigit],hexdec,[hdigit],hexdec,[hdigit],hexdec,[hdigit],hexdec,cclose
H -> [repeat_at_least],cclose,<mod> | [digit],num,<number2>,[insert_number],[repeat_ab],cclose,<mod>
mod -> epsilon | questionmark | plus
number2 -> epsilon | [digit],num,<number2>
number -> [digit],num,<number2>
range -> epsilon | -,<F>
repeat -> epsilon | plus,[plus],<mod> | star,[star],<mod> | questionmark,[optional],<mod> | {,<number>,<A>
set2 -> epsilon | [addchar],other,<range>,[set_combine],<set2> | [addchar],nonspecial2,<range>,[set_combine],<set2> | [addchar],dot,<range>,[set_combine],<set2> | \\,<E>,[set_combine],<set2>
set -> [addchar],other,<range>,<set2> | [addchar],nonspecial2,<range>,<set2> | [addchar],dot,<range>,<set2> | \\,<E>,<set2>
string2 -> epsilon | [addchar],other,<repeat>,<string2> | [addchar],nonspecial,<repeat>,<string2> | \\,<B>,<repeat>,<string2> | dot,[any],<repeat>,<string2> | caret,[insert_begin],<repeat>,<string2> | dolar,[insert_end],<repeat>,<string2> | (,<C>,<repeat>,<string2> | [,<D>,<repeat>,<string2>

Empty file removed grammar.txt
Empty file.
8 changes: 6 additions & 2 deletions include/ctre/fixed_string.hpp
Expand Up @@ -17,8 +17,12 @@ template <typename CharT, size_t N> class basic_fixed_string {

constexpr size_t size() const noexcept {
// if it's zero terminated string (from string literal) then size N - 1
if (content[N-1] == '\0') return N - 1;
else return N;
if constexpr (N > 0) {
if (content[N-1] == '\0') return N - 1;
else return N;
} else {
return 0;
}
}
constexpr CharT operator[](size_t i) const noexcept {
return content[i];
Expand Down
15 changes: 14 additions & 1 deletion include/ctre/literals.hpp
Expand Up @@ -4,6 +4,7 @@
#include "fixed_string.hpp"
#include "parser.hpp"
#include "grammar.hpp"
#include "pcre.hpp"

namespace ctre {

Expand All @@ -19,11 +20,23 @@ namespace literals {

template <typename CharT, CharT... input> static inline constexpr auto string = basic_fixed_string<CharT, sizeof...(input)>({input...}, std::make_index_sequence<sizeof...(input)>());

template <typename CharT, CharT... input> constexpr auto operator""_ctre() noexcept {
template <typename CharT, CharT... input> constexpr auto operator""_expr() noexcept {
return ctre::parser<math_grammar_quick, string<CharT, input...>>::decide();
//return ctre::parser<math_grammar_quick, string<CharT, input...>>::decide(math_grammar_quick::subject_type());
}

template <typename CharT, CharT... input> constexpr auto operator""_ctre() noexcept {
auto expression = ctre::parser<pcre, string<CharT, input...>>::decide(ctre::empty_subject());
static_assert(expression, "Regular expression is not correct.");
return expression;
//return ctre::parser<math_grammar_quick, string<CharT, input...>>::decide(math_grammar_quick::subject_type());
}

template <typename CharT, CharT... input> constexpr auto operator""_pcre_test() noexcept {
return bool(ctre::parser<pcre, string<CharT, input...>>::decide(ctre::empty_subject()));
//return ctre::parser<math_grammar_quick, string<CharT, input...>>::decide(math_grammar_quick::subject_type());
}

} // literals

} // ctre
Expand Down
51 changes: 34 additions & 17 deletions include/ctre/parser.hpp
Expand Up @@ -5,8 +5,8 @@

#define RULE static constexpr auto rule
#define NONTERM(name) struct name { }
#define START_NONTERM(name) struct name { }; using start = name
#define SUBJECT_TYPE(name) using subject_type = name
#define START_NONTERM(name) struct name { }; using _start = name
#define SUBJECT_TYPE(name) using _subject_type = name

namespace ctre {

Expand Down Expand Up @@ -36,13 +36,14 @@ template <auto... Def> struct set {
template <auto V, typename = std::enable_if_t<((Def == V) || ... || false)>> constexpr set(term<V>) noexcept;
};


template <auto... Def> struct neg_set {
constexpr neg_set() noexcept { };
//template <auto V> constexpr set(term<V>) noexcept requires ((Def == V) || ... || false);
template <auto V, typename = std::enable_if_t<((Def != V) && ... && true)>> constexpr neg_set(term<V>) noexcept;
};

template <auto... Def> struct anything {
struct anything {
constexpr anything() noexcept { };
template <auto V> constexpr anything(term<V>) noexcept;
};
Expand Down Expand Up @@ -80,7 +81,7 @@ template <typename T> struct IsAction {
// everything else can be used as a nonterminal

template <typename Grammar> struct augment_grammar: public Grammar {
using typename Grammar::start;
using typename Grammar::_start;
using Grammar::rule; // Grammar rules should have same priority

// default behaviour is reject if there is unexpected state
Expand All @@ -89,24 +90,37 @@ template <typename Grammar> struct augment_grammar: public Grammar {
// if there are two same terms on top of the stack and current input, you should move forward
template <auto A> static constexpr auto rule(term<A>, term<A>) -> pop_input;

//template <auto A, auto B, auto V> static constexpr auto rule(range<A,B>, term<V>) -> pop_input requires ((A <= V) && (V <= B));
template <auto A, auto B, auto V, typename = std::enable_if_t<((A <= V) && (V <= B))>> static constexpr auto rule(range<A,B>, term<V>) -> pop_input;


//template <auto... Def, auto V> static constexpr auto rule(set<Def...>, term<V>) -> pop_input requires ((V == Def) || ... || false);
template <auto... Def, auto V, typename = std::enable_if_t<((V == Def) || ... || false)>> static constexpr auto rule(set<Def...>, term<V>) -> pop_input;

template <auto V> static constexpr auto rule(anything, term<V>) -> pop_input;

template <auto... Def, auto V, typename = std::enable_if_t<((V != Def) && ... && true)>> static constexpr auto rule(neg_set<Def...>, term<V>) -> pop_input;


// empty stack and empty input means we are accepting
static constexpr auto rule(epsilon, epsilon) -> accept;
};

struct empty_subject { };

template <typename Subject> struct parse_result {
bool value;
size_t steps;
Subject subject;
constexpr parse_result(bool value, size_t steps, Subject subject) noexcept: value{value}, steps{steps}, subject{subject} { }
template <typename Subject, bool Value> struct parse_result {
static constexpr bool correct{Value};
//static constexpr size_t steps{Steps};
Subject subject{};
constexpr parse_result(Subject subject) noexcept: subject{subject} { }
constexpr operator bool() const noexcept {
return correct;
}
};

template <bool Value> struct parse_result<empty_subject, Value> {
static constexpr bool correct{Value};
constexpr parse_result(empty_subject) noexcept { };
constexpr operator bool() const noexcept {
return correct;
}
};

//template <typename G, FixedString input> struct parser { // in c++20
Expand All @@ -129,11 +143,14 @@ template <typename G, const auto & input> struct parser {
return epsilon();
}
}
template <typename A, typename B> static constexpr auto rule(A a,B b) {
return decltype(grammar().rule(a,b))();
}
template <size_t pos = 0, typename Head> static constexpr auto get_move(Head head) {
return decltype(grammar().rule(Head(), current<pos>()))();
return rule(Head(), current<pos>());
}
template <typename Subject = typename G::subject_type> static constexpr auto decide(const Subject subject = Subject{}) {
return decide(list<typename G::start>(), 1, subject);
template <typename Subject = typename G::_subject_type> static constexpr auto decide(const Subject subject = Subject{}) {
return decide(list<typename G::_start>(), 1, subject);
}
template <size_t pos = 0, typename Stack, typename Subject = empty_subject> static constexpr auto decide(Stack stack, unsigned step, Subject subject) {
auto head_of_stack = head(stack);
Expand All @@ -152,7 +169,7 @@ template <typename G, const auto & input> struct parser {
return decide<pos>(pop(stack), step+1, subject);
} else {
// else reject input
return parse_result<Subject>(false, step, subject);
return parse_result<Subject,false>(subject);
}
} else {
// "else" branch because return type deduction is not working with standalone "if constexpr"
Expand All @@ -163,7 +180,7 @@ template <typename G, const auto & input> struct parser {
return decide<pos+1>(pop_and_push_quick(m, stack), step+1, subject);
} else if constexpr (IsExplicitlyConvertibleToBool<decltype(m)>::value) {
// accept or reject state
return parse_result<Subject>(bool(m), step, subject);
return parse_result<Subject,bool(m)>(subject);
} else {
// move forward with parsing
// if decltype(m) is pop_char then move a char forward
Expand Down
68 changes: 68 additions & 0 deletions include/ctre/pcre.gram
@@ -0,0 +1,68 @@
backslash={\\}
dot={.}
dolar={$}
open={(}
close={)}
questionmark={?}
colon={:}
sopen={[}
sclose={]}
plus={+}
star={*}
copen={\{}
cclose={\}}
num={0,1,2,3,4,5,6,7,8,9}
comma={\,}
pipe={|}
caret={^}
minus={-}

hexdec={0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f,A,B,C,D,E,F}
octal={0,1,2,3,4,5,6,7}
nonspecial={a,b,c,d,e,f,A,B,C,D,E,F,x,X,:,],0,1,2,3,4,5,6,7,8,9,\,,-}
nonspecial2={a,b,c,d,e,f,A,B,C,D,E,F,x,X,:,0,1,2,3,4,5,6,7,8,9}

S-><alt> | epsilon

alt-><string>,<alt2>
altB-><string>,<alt2> | epsilon,[empty]
alt2->pipe,<alt>,[alternate] | epsilon

string-><atom_repeat>,<string2>
string2-><atom_repeat>,<string2>,[create_sequence] | epsilon

atom_repeat-><atom>,<repeat>

atom-><character>|<nonchar>|<block>|<character_class>

repeat->epsilon | plus,[plus],<mod>|star,[star],<mod>|questionmark,[optional],<mod>
repeat->copen,<number>,[repeat_exactly],cclose
repeat->copen,<number>,comma,[repeat_at_least],cclose,<mod>
repeat->copen,<number>,comma,<number>,[repeat_ab],cclose,<mod>
mod->epsilon | questionmark,[nongreedy] | plus,[possessive]

number->[digit],num,<number2>,[insert_number]
number2->epsilon | [digit],num,<number2>

block->open,<altB>,[make_catch],close | open,questionmark,colon,<altB>,close

character_class->sopen,[set_start],<set>,sclose,[set_finish]|sopen,caret,[set_neg_start],<set>,sclose,[set_finish]
set-><setitem>,<set2>
set2-><setitem>,[set_combine],<set2>|epsilon

setitem->[addchar],other,<range> | [addchar],nonspecial2,<range> | [addchar],dot,<range>
range->epsilon|minus,[insert_range],other|minus,[insert_range],nonspecial2

setitem->backslash,<backslash> | backslash,b,[backb]

backslash->[addchar],<special>
special->dot|sopen|copen|cclose|open|close|backslash|star|plus|questionmark|pipe|caret|dolar

backslash->a,[backa]
backslash->x,[hdigit],hexdec,[hdigit],hexdec,[hexdec2]
backslash->x,copen,[hdigit],hexdec,[hdigit],hexdec,[hdigit],hexdec,[hdigit],hexdec,cclose,[hexdec4]
backslash->0,[odigit],octal,[odigit],octal,[odigit],octal,[octal3]
backslash_char->d,[class_digit]

character->[addchar],other | [addchar],nonspecial | dot,[any] | backslash,<backslash> | backslash,<backslash_char>
nonchar->caret,[insert_begin] | dolar,[insert_end]

0 comments on commit 90ac938

Please sign in to comment.