diff --git a/src/fsqlf.d b/src/fsqlf.d index 8a5c81f..fa8928f 100644 --- a/src/fsqlf.d +++ b/src/fsqlf.d @@ -17,7 +17,7 @@ void main() void format_sql(Keyword[string] k, Keyword[string] i, string input, File output=std.stdio.stdout) { auto input_text = read_input(input); - auto tokens = lex(input_text, k, i); // split text into words, puntation/space chars and comments + auto tokens = preprocess(input_text, k, i); // split text into words, puntation/space chars and comments auto keywords = parse(tokens, k); // recognise logical keywords like 'LEFT OUTER JOIN'; Also handle such cases as LEFT /*f */ JOIN auto kw_spaced = space_insert(keywords); // insert spaces simply by looking at the keywords auto kw_formed = space_adjust(kw_spaced); // adjust spacing by context @@ -30,7 +30,7 @@ auto read_input(in string input) { return input; } -ref auto lex(in string input, Keyword[string] keywordList, Keyword[string] ignoredByParser) +ref auto preprocess(in string input, Keyword[string] keywordList, Keyword[string] ignoredByParser) { auto start = 0; Token[] resultTokens; diff --git a/src/higher_types.d b/src/higher_types.d index 5c2a6f3..135674f 100644 --- a/src/higher_types.d +++ b/src/higher_types.d @@ -3,6 +3,8 @@ module higher_types; import types; +alias uint t_index; + Keyword[string] keywordList, ignoredByParser, allOtherMatches; static this() { @@ -40,8 +42,6 @@ static this() ,"')'" : K("", S(0,0,0), S(0,0,0), ")" , "", true, `\)`) //&debug_p,&inc_RIGHTP,NULL ,NULL ,NULL,NULL ) ,"subquery '('" : K("", S(1,0,0), S(0,0,0), "(" , "", true, `\(`) //&debug_p,&inc_LEFTP ,NULL ,&begin_SUB,NULL,NULL ) ,"subquery ')'" : K("", S(1,0,0), S(1,0,0), ")" , "", true, `\)`) //&debug_p,&inc_RIGHTP,&end_SUB,NULL ,NULL,NULL ) - - //,"space" : K("", S(1,0,0), S(1,0,0), " " , "", true, `( |\n|\t)+`) ,"number" : K("", S(1,0,0), S(1,0,0), " " , "", true, `\d+`) ]; @@ -67,7 +67,7 @@ struct Token string text; ulong length; - auto toString() + pure auto toString() { return "(" ~ this.name ~ " : " ~ this.text ~ ")"; } @@ -77,13 +77,13 @@ struct Token } - this(in string name, in string text) + pure this(in string name, in string text) { this.name = name; this.text = text; this.length = this.text.length; } - auto opEquals(Token b) + pure auto opEquals(Token b) { return this.name == b.name && this.text == b.text && this.length == b.length; } @@ -121,25 +121,25 @@ unittest } -/* Get Keeyword from the front of token[] container */ +/* Get Keyword from the front of token[] container */ auto getFrontKeyword(ref Token[] tokens, Keyword[string] keywordCollection) { import std.algorithm; if(tokens[0].text == "") return Token("EOF",tokens[0].text); foreach( kwname, kw ; keywordCollection) { - auto n = 9; // longest keyword seems to be 3 words. Add buffer 9 just in case :) //kw.getLongestWordCount(); - use this to optimise if needed later - ulong[] ix = nTokenIndexesByName(tokens, n, "keyword"); - string[] nextNWords = extractTokenTextsByIndexes(tokens, ix); + auto n = 9; // longest keyword seems to be 3 words. 9 will surely enough //kw.getLongestWordCount(); - use this to optimise if needed later + t_index[] keywordIndexes = nTokenIndexesByName(tokens, n, "keyword"); + string[] nextNWords = extractTokenTextsByIndexes(tokens, keywordIndexes); - auto nbrOfMatchedWords = kw.matchKeyword(nextNWords); - if(nbrOfMatchedWords) + auto nbrOfMatchedWords = kw.matchedWordcount(nextNWords); + if(nbrOfMatchedWords > 0) { /* leave first - delete others words from input, because they are allready matched and should not be used by later matches */ for(auto i = 0 ; i < nbrOfMatchedWords ; i++) { - tokens[ix[i]].text = " "; - tokens[ix[i]].name = "space"; // TODO : do better deletion - probably move to linked lists + tokens[keywordIndexes[i]].text = " "; + tokens[keywordIndexes[i]].name = "space"; // TODO : do better deletion - probably move to linked lists } return Token(kwname, std_algorithm_joiner(nextNWords[0..nbrOfMatchedWords])); } @@ -168,12 +168,12 @@ unittest } -pure ref auto extractTokenTextsByIndexes(in Token[] tokens, in ulong[] indexes) +pure ref auto extractTokenTextsByIndexes(in Token[] tokens, in t_index[] indexes) { assert(indexes.length >= 0); assert(indexes.length <= tokens.length); string[] extractedTokens; - foreach(ulong i ; indexes) // get next 'n' 'keywords' which are not spaces nor comments + foreach(t_index i ; indexes) // get next 'n' 'keywords' which are not spaces nor comments { extractedTokens ~= tokens[i].text; } @@ -193,12 +193,12 @@ auto std_algorithm_joiner(string[] x, string separator = " ") //nIndexesOfFunctionalTokens -pure auto nTokenIndexesByName(in Token[] tokens, ulong n, string tokenName) +pure auto nTokenIndexesByName(in Token[] tokens, t_index n, string tokenName) { assert(n>0); assert(n<10); // can't think of any keyword containing that much - ulong[] result; // will be returned - ulong i = 0; + t_index[] result; // will be returned + t_index i = 0; import std.stdio; do @@ -236,11 +236,11 @@ bool isMember(T)(T item, T[] array) } -pure auto closestTokenByName(in Token[] tokens, in ulong currentIndex, in string tokenName) +pure auto closestTokenByName(in Token[] tokens, in t_index currentIndex, in string tokenName) { assert(tokens[currentIndex].name != "EOF"); assert(currentIndex < tokens.length); - ulong resultIndex = currentIndex; + t_index resultIndex = currentIndex; while(resultIndex+1 < tokens.length && tokens[resultIndex].name != tokenName) ++resultIndex; diff --git a/src/types.d b/src/types.d index dd901e9..b43b3e9 100644 --- a/src/types.d +++ b/src/types.d @@ -3,10 +3,10 @@ module types; struct Spacing { - int newLines=0; - int tabs=0; - int spaces=0; - string tab=" "; + int newLines = 0; + int tabs = 0; + int spaces = 0; + string tab = " "; /* Generate string which will be the output of this spacing configuration */ pure auto outputString() @@ -27,6 +27,49 @@ struct Spacing } +struct KeywordText +{ + import std.regex; + + string vLong; // Long version (e.g. "LEFT OUTER JOIN") + string vShort; // Short version (e.g. "LEFT JOIN") + Regex!(char) patern; // some keywords should be recognised using regex + + /* Describes what method to use during the matching */ + enum MatchMethod{ text, patern }; + MatchMethod matchMethod; + + this(string singleTextVersion) + { + this.vShort = singleTextVersion; + this.vLong = singleTextVersion; + this.matchMethod = MatchMethod.text; + } + this(string textShort,string textLong) + { + this.vShort = textShort; + this.vLong = textLong; + this.matchMethod = MatchMethod.text; + } + this(Regex!(char) patern, string text="") + { + this.patern = patern; + this.matchMethod = MatchMethod.patern; + this.vShort = text; // only for printing + this.vLong = text; // only for printing + } + + /* Keywords may contain many words. Objective of the function is to match at least one word - prefferably longer */ + auto matchOneWord(string sqlText) + { + import std.algorithm:map; + import std.array:split; + auto paterns = std.algorithm.map!(std.regex.regex)(std.array.split(this.vLong)); + paterns ~= std.algorithm.map!(std.regex.regex)(std.array.split(this.vShort)); + } + +} + enum KeywordType { simple, composite }; @@ -87,28 +130,28 @@ struct Keyword /* get longest possible word count, taking into account short and long versions */ - auto getLongestWordCount() + pure auto getLongestWordCount() { - assert(this.getShortWordCount() <= this.getLongWordCount()); + assert(Keyword.wordCount(this.textShort) <= Keyword.wordCount(this.textLong)); switch(this.keywordType) { case KeywordType.simple: return 1; - default: return getLongWordCount(); + default: return Keyword.wordCount(this.textLong); } } - auto matchKeyword(string[] txt) + auto matchedWordcount(string[] txt) { //import std.stdio; //writeln("in matchKeyword: this.keywordType=", this.keywordType); switch(this.keywordType) { case KeywordType.simple: - return this.matchSimpleKeyword(txt); + return this.matchAgainstRegex(txt); break; case KeywordType.composite: - return this.matchCompositeKeyword(txt); + return this.matchAgainstText(txt); break; default: assert(0); } @@ -149,7 +192,7 @@ private: /* Choose default value for Keyword members */ - auto chooseDefaultValue() + pure auto chooseDefaultValue() { assert(this.textShort != "" || this.textLong != ""); if(this.textShort=="") @@ -166,7 +209,8 @@ private: } - auto matchSimpleKeyword(string[] inputWord) + /* Match token against predefined Keywrod's regex */ + auto matchAgainstRegex(string[] inputWord) { assert(this.keywordType == KeywordType.simple); assert(inputWord.length>0); @@ -174,17 +218,18 @@ private: } - auto matchCompositeKeyword(string[] inputWords) + /* Match tokens against Keywrod's text */ + auto matchAgainstText(string[] inputWords) { assert(this.keywordType == KeywordType.composite); import std.stdio; //if(this.textShort == "LEFT JOIN") writeln(inputWords); import std.array; - if(inputWords.length < this.getLongWordCount() - && inputWords.length < this.getShortWordCount()) return 0; - if( matchAgainstKeywordText(inputWords, this.textLong )) return this.getLongWordCount(); - else if(matchAgainstKeywordText(inputWords, this.textShort)) return this.getShortWordCount(); + if(inputWords.length < Keyword.wordCount(this.textLong) + && inputWords.length < Keyword.wordCount(this.textShort)) return 0; + if( matchAgainstKeywordText(inputWords, this.textLong )) return Keyword.wordCount(this.textLong); + else if(matchAgainstKeywordText(inputWords, this.textShort)) return Keyword.wordCount(this.textShort); else return 0; } unittest @@ -193,9 +238,9 @@ import std.stdio; kw.textLong = "LEFT OUTER JOIN"; kw.textShort = "LEFT JOIN"; kw.initDefaults(); - assert( kw.matchCompositeKeyword(["LeFT", "outer", "JOIN"]) ); - assert( kw.matchCompositeKeyword(["LeFT", "JOIN", "table"]) ); - assert(!kw.matchCompositeKeyword(["LeFT", "inner", "JOIN"])); + assert( kw.matchAgainstText(["LeFT", "outer", "JOIN"]) ); + assert( kw.matchAgainstText(["LeFT", "JOIN", "table"]) ); + assert(!kw.matchAgainstText(["LeFT", "inner", "JOIN"])); } @@ -222,16 +267,10 @@ import std.stdio; } - auto getShortWordCount() - { - import std.array:split; - return std.array.split(this.textShort).length; - } - - - auto getLongWordCount() + pure static auto wordCount(in string text) { import std.array; - return std.array.split(this.textLong).length; + return std.array.split(text).length; } + }