Skip to content

Commit

Permalink
refactoring in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
dnsmkl committed May 1, 2012
1 parent 1d47fa9 commit 3269f80
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 51 deletions.
4 changes: 2 additions & 2 deletions src/fsqlf.d
Expand Up @@ -17,7 +17,7 @@ void main()
void format_sql(Keyword[string] k, Keyword[string] i, string input, File output=std.stdio.stdout)
{
auto input_text = read_input(input);
auto tokens = lex(input_text, k, i); // split text into words, puntation/space chars and comments
auto tokens = preprocess(input_text, k, i); // split text into words, puntation/space chars and comments
auto keywords = parse(tokens, k); // recognise logical keywords like 'LEFT OUTER JOIN'; Also handle such cases as LEFT /*f */ JOIN
auto kw_spaced = space_insert(keywords); // insert spaces simply by looking at the keywords
auto kw_formed = space_adjust(kw_spaced); // adjust spacing by context
Expand All @@ -30,7 +30,7 @@ auto read_input(in string input) { return input; }



ref auto lex(in string input, Keyword[string] keywordList, Keyword[string] ignoredByParser)
ref auto preprocess(in string input, Keyword[string] keywordList, Keyword[string] ignoredByParser)
{
auto start = 0;
Token[] resultTokens;
Expand Down
40 changes: 20 additions & 20 deletions src/higher_types.d
Expand Up @@ -3,6 +3,8 @@ module higher_types;

import types;

alias uint t_index;

Keyword[string] keywordList, ignoredByParser, allOtherMatches;
static this()
{
Expand Down Expand Up @@ -40,8 +42,6 @@ static this()
,"')'" : K("", S(0,0,0), S(0,0,0), ")" , "", true, `\)`) //&debug_p,&inc_RIGHTP,NULL ,NULL ,NULL,NULL )
,"subquery '('" : K("", S(1,0,0), S(0,0,0), "(" , "", true, `\(`) //&debug_p,&inc_LEFTP ,NULL ,&begin_SUB,NULL,NULL )
,"subquery ')'" : K("", S(1,0,0), S(1,0,0), ")" , "", true, `\)`) //&debug_p,&inc_RIGHTP,&end_SUB,NULL ,NULL,NULL )

//,"space" : K("", S(1,0,0), S(1,0,0), " " , "", true, `( |\n|\t)+`)
,"number" : K("", S(1,0,0), S(1,0,0), " " , "", true, `\d+`)
];

Expand All @@ -67,7 +67,7 @@ struct Token
string text;
ulong length;

auto toString()
pure auto toString()
{
return "(" ~ this.name ~ " : " ~ this.text ~ ")";
}
Expand All @@ -77,13 +77,13 @@ struct Token
}


this(in string name, in string text)
pure this(in string name, in string text)
{
this.name = name;
this.text = text;
this.length = this.text.length;
}
auto opEquals(Token b)
pure auto opEquals(Token b)
{
return this.name == b.name && this.text == b.text && this.length == b.length;
}
Expand Down Expand Up @@ -121,25 +121,25 @@ unittest
}


/* Get Keeyword from the front of token[] container */
/* Get Keyword from the front of token[] container */
auto getFrontKeyword(ref Token[] tokens, Keyword[string] keywordCollection)
{
import std.algorithm;
if(tokens[0].text == "") return Token("EOF",tokens[0].text);
foreach( kwname, kw ; keywordCollection)
{
auto n = 9; // longest keyword seems to be 3 words. Add buffer 9 just in case :) //kw.getLongestWordCount(); - use this to optimise if needed later
ulong[] ix = nTokenIndexesByName(tokens, n, "keyword");
string[] nextNWords = extractTokenTextsByIndexes(tokens, ix);
auto n = 9; // longest keyword seems to be 3 words. 9 will surely enough //kw.getLongestWordCount(); - use this to optimise if needed later
t_index[] keywordIndexes = nTokenIndexesByName(tokens, n, "keyword");
string[] nextNWords = extractTokenTextsByIndexes(tokens, keywordIndexes);

auto nbrOfMatchedWords = kw.matchKeyword(nextNWords);
if(nbrOfMatchedWords)
auto nbrOfMatchedWords = kw.matchedWordcount(nextNWords);
if(nbrOfMatchedWords > 0)
{
/* leave first - delete others words from input, because they are allready matched and should not be used by later matches */
for(auto i = 0 ; i < nbrOfMatchedWords ; i++)
{
tokens[ix[i]].text = " ";
tokens[ix[i]].name = "space"; // TODO : do better deletion - probably move to linked lists
tokens[keywordIndexes[i]].text = " ";
tokens[keywordIndexes[i]].name = "space"; // TODO : do better deletion - probably move to linked lists
}
return Token(kwname, std_algorithm_joiner(nextNWords[0..nbrOfMatchedWords]));
}
Expand Down Expand Up @@ -168,12 +168,12 @@ unittest
}


pure ref auto extractTokenTextsByIndexes(in Token[] tokens, in ulong[] indexes)
pure ref auto extractTokenTextsByIndexes(in Token[] tokens, in t_index[] indexes)
{
assert(indexes.length >= 0);
assert(indexes.length <= tokens.length);
string[] extractedTokens;
foreach(ulong i ; indexes) // get next 'n' 'keywords' which are not spaces nor comments
foreach(t_index i ; indexes) // get next 'n' 'keywords' which are not spaces nor comments
{
extractedTokens ~= tokens[i].text;
}
Expand All @@ -193,12 +193,12 @@ auto std_algorithm_joiner(string[] x, string separator = " ")


//nIndexesOfFunctionalTokens
pure auto nTokenIndexesByName(in Token[] tokens, ulong n, string tokenName)
pure auto nTokenIndexesByName(in Token[] tokens, t_index n, string tokenName)
{
assert(n>0);
assert(n<10); // can't think of any keyword containing that much
ulong[] result; // will be returned
ulong i = 0;
t_index[] result; // will be returned
t_index i = 0;
import std.stdio;

do
Expand Down Expand Up @@ -236,11 +236,11 @@ bool isMember(T)(T item, T[] array)
}


pure auto closestTokenByName(in Token[] tokens, in ulong currentIndex, in string tokenName)
pure auto closestTokenByName(in Token[] tokens, in t_index currentIndex, in string tokenName)
{
assert(tokens[currentIndex].name != "EOF");
assert(currentIndex < tokens.length);
ulong resultIndex = currentIndex;
t_index resultIndex = currentIndex;

while(resultIndex+1 < tokens.length && tokens[resultIndex].name != tokenName) ++resultIndex;

Expand Down
97 changes: 68 additions & 29 deletions src/types.d
Expand Up @@ -3,10 +3,10 @@ module types;

struct Spacing
{
int newLines=0;
int tabs=0;
int spaces=0;
string tab=" ";
int newLines = 0;
int tabs = 0;
int spaces = 0;
string tab = " ";

/* Generate string which will be the output of this spacing configuration */
pure auto outputString()
Expand All @@ -27,6 +27,49 @@ struct Spacing
}


struct KeywordText
{
import std.regex;

string vLong; // Long version (e.g. "LEFT OUTER JOIN")
string vShort; // Short version (e.g. "LEFT JOIN")
Regex!(char) patern; // some keywords should be recognised using regex

/* Describes what method to use during the matching */
enum MatchMethod{ text, patern };
MatchMethod matchMethod;

this(string singleTextVersion)
{
this.vShort = singleTextVersion;
this.vLong = singleTextVersion;
this.matchMethod = MatchMethod.text;
}
this(string textShort,string textLong)
{
this.vShort = textShort;
this.vLong = textLong;
this.matchMethod = MatchMethod.text;
}
this(Regex!(char) patern, string text="")
{
this.patern = patern;
this.matchMethod = MatchMethod.patern;
this.vShort = text; // only for printing
this.vLong = text; // only for printing
}

/* Keywords may contain many words. Objective of the function is to match at least one word - prefferably longer */
auto matchOneWord(string sqlText)
{
import std.algorithm:map;
import std.array:split;
auto paterns = std.algorithm.map!(std.regex.regex)(std.array.split(this.vLong));
paterns ~= std.algorithm.map!(std.regex.regex)(std.array.split(this.vShort));
}

}

enum KeywordType { simple, composite };


Expand Down Expand Up @@ -87,28 +130,28 @@ struct Keyword


/* get longest possible word count, taking into account short and long versions */
auto getLongestWordCount()
pure auto getLongestWordCount()
{
assert(this.getShortWordCount() <= this.getLongWordCount());
assert(Keyword.wordCount(this.textShort) <= Keyword.wordCount(this.textLong));
switch(this.keywordType)
{
case KeywordType.simple: return 1;
default: return getLongWordCount();
default: return Keyword.wordCount(this.textLong);
}
}


auto matchKeyword(string[] txt)
auto matchedWordcount(string[] txt)
{
//import std.stdio;
//writeln("in matchKeyword: this.keywordType=", this.keywordType);
switch(this.keywordType)
{
case KeywordType.simple:
return this.matchSimpleKeyword(txt);
return this.matchAgainstRegex(txt);
break;
case KeywordType.composite:
return this.matchCompositeKeyword(txt);
return this.matchAgainstText(txt);
break;
default: assert(0);
}
Expand Down Expand Up @@ -149,7 +192,7 @@ private:


/* Choose default value for Keyword members */
auto chooseDefaultValue()
pure auto chooseDefaultValue()
{
assert(this.textShort != "" || this.textLong != "");
if(this.textShort=="")
Expand All @@ -166,25 +209,27 @@ private:
}


auto matchSimpleKeyword(string[] inputWord)
/* Match token against predefined Keywrod's regex */
auto matchAgainstRegex(string[] inputWord)
{
assert(this.keywordType == KeywordType.simple);
assert(inputWord.length>0);
return this.matchTokens(inputWord[0]) ? 1 : 0;
}


auto matchCompositeKeyword(string[] inputWords)
/* Match tokens against Keywrod's text */
auto matchAgainstText(string[] inputWords)
{
assert(this.keywordType == KeywordType.composite);
import std.stdio;
//if(this.textShort == "LEFT JOIN") writeln(inputWords);

import std.array;
if(inputWords.length < this.getLongWordCount()
&& inputWords.length < this.getShortWordCount()) return 0;
if( matchAgainstKeywordText(inputWords, this.textLong )) return this.getLongWordCount();
else if(matchAgainstKeywordText(inputWords, this.textShort)) return this.getShortWordCount();
if(inputWords.length < Keyword.wordCount(this.textLong)
&& inputWords.length < Keyword.wordCount(this.textShort)) return 0;
if( matchAgainstKeywordText(inputWords, this.textLong )) return Keyword.wordCount(this.textLong);
else if(matchAgainstKeywordText(inputWords, this.textShort)) return Keyword.wordCount(this.textShort);
else return 0;
}
unittest
Expand All @@ -193,9 +238,9 @@ import std.stdio;
kw.textLong = "LEFT OUTER JOIN";
kw.textShort = "LEFT JOIN";
kw.initDefaults();
assert( kw.matchCompositeKeyword(["LeFT", "outer", "JOIN"]) );
assert( kw.matchCompositeKeyword(["LeFT", "JOIN", "table"]) );
assert(!kw.matchCompositeKeyword(["LeFT", "inner", "JOIN"]));
assert( kw.matchAgainstText(["LeFT", "outer", "JOIN"]) );
assert( kw.matchAgainstText(["LeFT", "JOIN", "table"]) );
assert(!kw.matchAgainstText(["LeFT", "inner", "JOIN"]));
}


Expand All @@ -222,16 +267,10 @@ import std.stdio;
}


auto getShortWordCount()
{
import std.array:split;
return std.array.split(this.textShort).length;
}


auto getLongWordCount()
pure static auto wordCount(in string text)
{
import std.array;
return std.array.split(this.textLong).length;
return std.array.split(text).length;
}

}

0 comments on commit 3269f80

Please sign in to comment.