Skip to content

Commit

Permalink
Split the different Tokenizer for the TurtleParser into different files.
Browse files Browse the repository at this point in the history
* This cleans up the software architecture.
* Also equivalently refactor some of the TokenizerTests to use less RAM during compilation
  (The CTRE module is very RAM intensive)
  • Loading branch information
joka921 committed Jul 27, 2021
1 parent 98918e2 commit 0c74314
Show file tree
Hide file tree
Showing 12 changed files with 684 additions and 580 deletions.
2 changes: 1 addition & 1 deletion src/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ add_library(parser
ContextFileParser.cpp ContextFileParser.h
ParallelParseBuffer.h
PropertyPathParser.h PropertyPathParser.cpp
SparqlLexer.h SparqlLexer.cpp)
SparqlLexer.h SparqlLexer.cpp TokenizerCtre.h TurtleTokenId.h)
target_link_libraries(parser rdfEscaping re2 absl::flat_hash_map)
2 changes: 2 additions & 0 deletions src/parser/SparqlLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "../util/StringUtils.h"
#include "ParseException.h"
#include "Tokenizer.h"
#include "./RdfEscaping.h"


const std::string SparqlToken::TYPE_NAMES[] = {
"IRI", "WS", "KEYWORD", "VARIABLE", "SYMBOL",
Expand Down
58 changes: 29 additions & 29 deletions src/parser/Tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,63 +52,63 @@ std::tuple<bool, size_t, std::string> Tokenizer::getNextToken(
}

// ______________________________________________________________________________________________________
const RE2& Tokenizer::idToRegex(const TokId reg) {
const RE2& Tokenizer::idToRegex(const TurtleTokenId reg) {
switch (reg) {
case TokId::TurtlePrefix:
case TurtleTokenId::TurtlePrefix:
return _tokens.TurtlePrefix;
case TokId::SparqlPrefix:
case TurtleTokenId::SparqlPrefix:
return _tokens.SparqlPrefix;
case TokId::TurtleBase:
case TurtleTokenId::TurtleBase:
return _tokens.TurtleBase;
case TokId::SparqlBase:
case TurtleTokenId::SparqlBase:
return _tokens.SparqlBase;
case TokId::Dot:
case TurtleTokenId::Dot:
return _tokens.Dot;
case TokId::Comma:
case TurtleTokenId::Comma:
return _tokens.Comma;
case TokId::Semicolon:
case TurtleTokenId::Semicolon:
return _tokens.Semicolon;
case TokId::OpenSquared:
case TurtleTokenId::OpenSquared:
return _tokens.OpenSquared;
case TokId::CloseSquared:
case TurtleTokenId::CloseSquared:
return _tokens.CloseSquared;
case TokId::OpenRound:
case TurtleTokenId::OpenRound:
return _tokens.OpenRound;
case TokId::CloseRound:
case TurtleTokenId::CloseRound:
return _tokens.CloseRound;
case TokId::A:
case TurtleTokenId::A:
return _tokens.A;
case TokId::DoubleCircumflex:
case TurtleTokenId::DoubleCircumflex:
return _tokens.DoubleCircumflex;
case TokId::True:
case TurtleTokenId::True:
return _tokens.True;
case TokId::False:
case TurtleTokenId::False:
return _tokens.False;
case TokId::Langtag:
case TurtleTokenId::Langtag:
return _tokens.Langtag;
case TokId::Decimal:
case TurtleTokenId::Decimal:
return _tokens.Decimal;
case TokId::Exponent:
case TurtleTokenId::Exponent:
return _tokens.Exponent;
case TokId::Double:
case TurtleTokenId::Double:
return _tokens.Double;
case TokId::Iriref:
case TurtleTokenId::Iriref:
return _tokens.Iriref;
case TokId::PnameNS:
case TurtleTokenId::PnameNS:
return _tokens.PnameNS;
case TokId::PnameLN:
case TurtleTokenId::PnameLN:
return _tokens.PnameLN;
case TokId::PnLocal:
case TurtleTokenId::PnLocal:
return _tokens.PnLocal;
case TokId::BlankNodeLabel:
case TurtleTokenId::BlankNodeLabel:
return _tokens.BlankNodeLabel;
case TokId::WsMultiple:
case TurtleTokenId::WsMultiple:
return _tokens.WsMultiple;
case TokId::Anon:
case TurtleTokenId::Anon:
return _tokens.Anon;
case TokId::Comment:
case TurtleTokenId::Comment:
return _tokens.Comment;
case TokId::Integer:
case TurtleTokenId::Integer:
return _tokens.Integer;
}
throw std::runtime_error(
Expand Down

0 comments on commit 0c74314

Please sign in to comment.