refactoring in progress

dnsmkl · May 1, 2012 · 3269f80 · 3269f80
1 parent 1d47fa9
commit 3269f80
Show file tree

Hide file tree

Showing 3 changed files with 90 additions and 51 deletions.
diff --git a/src/fsqlf.d b/src/fsqlf.d
@@ -17,7 +17,7 @@ void main()
 void format_sql(Keyword[string] k, Keyword[string] i, string input, File output=std.stdio.stdout)
 {
     auto input_text  = read_input(input);
-    auto tokens      = lex(input_text, k, i);   // split text into words, puntation/space chars and comments
+    auto tokens      = preprocess(input_text, k, i);   // split text into words, puntation/space chars and comments
     auto keywords    = parse(tokens, k);        // recognise logical keywords like 'LEFT OUTER JOIN'; Also handle such cases as LEFT /*f */ JOIN
     auto kw_spaced   = space_insert(keywords);  // insert spaces simply by looking at the keywords
     auto kw_formed   = space_adjust(kw_spaced); // adjust spacing by context
@@ -30,7 +30,7 @@ auto read_input(in string input) { return input; }
 
 
 
-ref auto lex(in string input, Keyword[string] keywordList, Keyword[string] ignoredByParser)
+ref auto preprocess(in string input, Keyword[string] keywordList, Keyword[string] ignoredByParser)
 {
     auto start = 0;
     Token[] resultTokens;

diff --git a/src/higher_types.d b/src/higher_types.d
@@ -3,6 +3,8 @@ module higher_types;
 
 import types;
 
+alias uint t_index;
+
 Keyword[string] keywordList, ignoredByParser, allOtherMatches;
 static this()
 {
@@ -40,8 +42,6 @@ static this()
         ,"')'"          : K("",   S(0,0,0), S(0,0,0), ")"        , "",  true,      `\)`) //&debug_p,&inc_RIGHTP,NULL    ,NULL      ,NULL,NULL )
         ,"subquery '('" : K("",   S(1,0,0), S(0,0,0), "("        , "",  true,      `\(`) //&debug_p,&inc_LEFTP ,NULL    ,&begin_SUB,NULL,NULL )
         ,"subquery ')'" : K("",   S(1,0,0), S(1,0,0), ")"        , "",  true,      `\)`) //&debug_p,&inc_RIGHTP,&end_SUB,NULL      ,NULL,NULL )
-
-        //,"space"      : K("",   S(1,0,0), S(1,0,0), " "          , "",  true, `( |\n|\t)+`)
         ,"number"     : K("",   S(1,0,0), S(1,0,0), " "          , "",  true, `\d+`)
     ];
 
@@ -67,7 +67,7 @@ struct Token
     string text;
     ulong length;
 
-    auto toString()
+    pure auto toString()
     {
         return "(" ~ this.name ~ " : " ~ this.text ~ ")";
     }
@@ -77,13 +77,13 @@ struct Token
     }
 
 
-    this(in string name, in string text)
+    pure this(in string name, in string text)
     {
         this.name = name;
         this.text = text;
         this.length = this.text.length;
     }
-    auto opEquals(Token b)
+    pure auto opEquals(Token b)
     {
         return this.name == b.name && this.text == b.text && this.length == b.length;
     }
@@ -121,25 +121,25 @@ unittest
 }
 
 
-/* Get Keeyword from the front of token[] container */
+/* Get Keyword from the front of token[] container */
 auto getFrontKeyword(ref Token[] tokens, Keyword[string] keywordCollection)
 {
     import std.algorithm;
     if(tokens[0].text == "") return Token("EOF",tokens[0].text);
     foreach( kwname, kw ; keywordCollection)
     {
-        auto n = 9; // longest keyword seems to be 3 words. Add buffer 9 just in case :) //kw.getLongestWordCount(); - use this to optimise if needed later
-        ulong[] ix = nTokenIndexesByName(tokens, n, "keyword");
-        string[] nextNWords = extractTokenTextsByIndexes(tokens, ix);
+        auto n = 9; // longest keyword seems to be 3 words. 9 will surely enough //kw.getLongestWordCount(); - use this to optimise if needed later
+        t_index[] keywordIndexes = nTokenIndexesByName(tokens, n, "keyword");
+        string[] nextNWords = extractTokenTextsByIndexes(tokens, keywordIndexes);
 
-        auto nbrOfMatchedWords = kw.matchKeyword(nextNWords);
-        if(nbrOfMatchedWords)
+        auto nbrOfMatchedWords = kw.matchedWordcount(nextNWords);
+        if(nbrOfMatchedWords > 0)
         {
             /* leave first - delete others words from input, because they are allready matched and should not be used by later matches */
             for(auto i = 0 ; i < nbrOfMatchedWords ; i++) 
             {
-                tokens[ix[i]].text = " ";
-                tokens[ix[i]].name = "space"; // TODO : do better deletion - probably move to linked lists
+                tokens[keywordIndexes[i]].text = " ";
+                tokens[keywordIndexes[i]].name = "space"; // TODO : do better deletion - probably move to linked lists
             }
             return Token(kwname, std_algorithm_joiner(nextNWords[0..nbrOfMatchedWords]));
         }
@@ -168,12 +168,12 @@ unittest
 }
 
 
-pure ref auto extractTokenTextsByIndexes(in Token[] tokens, in ulong[] indexes)
+pure ref auto extractTokenTextsByIndexes(in Token[] tokens, in t_index[] indexes)
 {
     assert(indexes.length >= 0);
     assert(indexes.length <= tokens.length);
     string[] extractedTokens;
-    foreach(ulong i ; indexes) // get next 'n' 'keywords' which are not spaces nor comments
+    foreach(t_index i ; indexes) // get next 'n' 'keywords' which are not spaces nor comments
     {
         extractedTokens ~= tokens[i].text;
     }
@@ -193,12 +193,12 @@ auto std_algorithm_joiner(string[] x, string separator = " ")
 
 
 //nIndexesOfFunctionalTokens
-pure auto nTokenIndexesByName(in Token[] tokens, ulong n, string tokenName)
+pure auto nTokenIndexesByName(in Token[] tokens, t_index n, string tokenName)
 {
     assert(n>0);
     assert(n<10); // can't think of any keyword containing that much
-    ulong[] result; // will be returned
-    ulong i = 0;
+    t_index[] result; // will be returned
+    t_index i = 0;
     import std.stdio;
 
     do
@@ -236,11 +236,11 @@ bool isMember(T)(T item, T[] array)
 }
 
 
-pure auto closestTokenByName(in Token[] tokens, in ulong currentIndex, in string tokenName)
+pure auto closestTokenByName(in Token[] tokens, in t_index currentIndex, in string tokenName)
 {
     assert(tokens[currentIndex].name != "EOF");
     assert(currentIndex < tokens.length);
-    ulong resultIndex = currentIndex;
+    t_index resultIndex = currentIndex;
 
     while(resultIndex+1 < tokens.length && tokens[resultIndex].name != tokenName) ++resultIndex;
 

diff --git a/src/types.d b/src/types.d
@@ -3,10 +3,10 @@ module types;
 
 struct Spacing
 {
-    int newLines=0;
-    int tabs=0;
-    int spaces=0;
-    string tab="    ";
+    int newLines = 0;
+    int tabs = 0;
+    int spaces = 0;
+    string tab = "    ";
 
     /* Generate string which will be the output of this spacing configuration */
     pure auto outputString()
@@ -27,6 +27,49 @@ struct Spacing
 }
 
 
+struct KeywordText
+{
+    import std.regex;
+
+    string vLong;        // Long version (e.g. "LEFT OUTER JOIN")
+    string vShort;       // Short version (e.g. "LEFT JOIN")
+    Regex!(char) patern; // some keywords should be recognised using regex
+
+    /* Describes what method to use during the matching */
+    enum MatchMethod{ text, patern };
+    MatchMethod matchMethod;
+
+    this(string singleTextVersion)
+    {
+        this.vShort = singleTextVersion;
+        this.vLong  = singleTextVersion;
+        this.matchMethod = MatchMethod.text;
+    }
+    this(string textShort,string textLong)
+    {
+        this.vShort = textShort;
+        this.vLong  = textLong;
+        this.matchMethod = MatchMethod.text;
+    }
+    this(Regex!(char) patern, string text="")
+    {
+        this.patern = patern;
+        this.matchMethod = MatchMethod.patern;
+        this.vShort = text; // only for printing
+        this.vLong  = text; // only for printing
+    }
+
+    /* Keywords may contain many words. Objective of the function is to match at least one word - prefferably longer */
+    auto matchOneWord(string sqlText)
+    {
+        import std.algorithm:map;
+        import std.array:split;
+        auto paterns = std.algorithm.map!(std.regex.regex)(std.array.split(this.vLong));
+        paterns     ~= std.algorithm.map!(std.regex.regex)(std.array.split(this.vShort));
+    }
+
+}
+
 enum KeywordType { simple, composite };
 
 
@@ -87,28 +130,28 @@ struct Keyword
 
 
     /* get longest possible word count, taking into account short and long versions */
-    auto getLongestWordCount()
+    pure auto getLongestWordCount()
     {
-        assert(this.getShortWordCount() <= this.getLongWordCount());
+        assert(Keyword.wordCount(this.textShort) <= Keyword.wordCount(this.textLong));
         switch(this.keywordType)
         {
             case KeywordType.simple: return 1;
-            default:                 return getLongWordCount();
+            default:                 return Keyword.wordCount(this.textLong);
         }
     }
 
 
-    auto matchKeyword(string[] txt)
+    auto matchedWordcount(string[] txt)
     {
 //import std.stdio;
 //writeln("in matchKeyword: this.keywordType=", this.keywordType);
         switch(this.keywordType)
         {
             case KeywordType.simple:
-                return this.matchSimpleKeyword(txt);
+                return this.matchAgainstRegex(txt);
                 break;
             case KeywordType.composite:
-                return this.matchCompositeKeyword(txt);
+                return this.matchAgainstText(txt);
                 break;
             default: assert(0);
         }
@@ -149,7 +192,7 @@ private:
 
 
     /* Choose default value for Keyword members */
-    auto chooseDefaultValue()
+    pure auto chooseDefaultValue()
     {
         assert(this.textShort != "" || this.textLong != "");
         if(this.textShort=="")
@@ -166,25 +209,27 @@ private:
     }
 
 
-    auto matchSimpleKeyword(string[] inputWord)
+    /* Match token against predefined Keywrod's regex */
+    auto matchAgainstRegex(string[] inputWord)
     {
         assert(this.keywordType == KeywordType.simple);
         assert(inputWord.length>0);
         return this.matchTokens(inputWord[0]) ? 1 : 0;
     }
 
 
-    auto matchCompositeKeyword(string[] inputWords)
+    /* Match tokens against Keywrod's text */
+    auto matchAgainstText(string[] inputWords)
     {
         assert(this.keywordType == KeywordType.composite);
 import std.stdio;
 //if(this.textShort == "LEFT JOIN") writeln(inputWords);
 
         import std.array;
-        if(inputWords.length < this.getLongWordCount()
-            && inputWords.length < this.getShortWordCount()) return 0;
-        if(     matchAgainstKeywordText(inputWords, this.textLong )) return this.getLongWordCount();
-        else if(matchAgainstKeywordText(inputWords, this.textShort)) return this.getShortWordCount();
+        if(inputWords.length < Keyword.wordCount(this.textLong)
+            && inputWords.length < Keyword.wordCount(this.textShort)) return 0;
+        if(     matchAgainstKeywordText(inputWords, this.textLong )) return Keyword.wordCount(this.textLong);
+        else if(matchAgainstKeywordText(inputWords, this.textShort)) return Keyword.wordCount(this.textShort);
         else return 0;
     }
     unittest
@@ -193,9 +238,9 @@ import std.stdio;
         kw.textLong  = "LEFT OUTER JOIN";
         kw.textShort = "LEFT JOIN";
         kw.initDefaults();
-        assert( kw.matchCompositeKeyword(["LeFT", "outer", "JOIN"]) );
-        assert( kw.matchCompositeKeyword(["LeFT", "JOIN", "table"]) );
-        assert(!kw.matchCompositeKeyword(["LeFT", "inner", "JOIN"]));
+        assert( kw.matchAgainstText(["LeFT", "outer", "JOIN"]) );
+        assert( kw.matchAgainstText(["LeFT", "JOIN", "table"]) );
+        assert(!kw.matchAgainstText(["LeFT", "inner", "JOIN"]));
     }
 
 
@@ -222,16 +267,10 @@ import std.stdio;
     }
 
 
-    auto getShortWordCount()
-    {
-        import std.array:split;
-        return std.array.split(this.textShort).length;
-    }
-
-
-    auto getLongWordCount()
+    pure static auto wordCount(in string text)
     {
         import std.array;
-        return std.array.split(this.textLong).length;
+        return std.array.split(text).length;
     }
+
 }