Skip to content

Commit

Permalink
normal blanks now parse properly
Browse files Browse the repository at this point in the history
  • Loading branch information
khannatanmai committed Jun 26, 2020
1 parent 73f6f5c commit ab7eb07
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 40 deletions.
101 changes: 61 additions & 40 deletions apertium/transfer.cc
Expand Up @@ -2067,8 +2067,37 @@ Transfer::readToken(FILE *in)
int val = fgetwc_unlocked(in);
if(feof(in) || (val == 0 && internal_null_flush))
{
in_wblank = false;
return input_buffer.add(TransferToken(content, tt_eof));
}
if(in_wblank)
{
content += L"[[";

while(true)
{
int val3 = fgetwc_unlocked(in);
if(val3 == L'\\')
{
content += L'\\';
content += wchar_t(fgetwc_unlocked(in));
}
else if(val3 == L'$') //[[..]]^..$ is the LU
{
in_wblank = false;
return input_buffer.add(TransferToken(content, tt_word));
}
else if(val3 == L'\0' && null_flush)
{
in_wblank = false;
fflush(output);
}
else
{
content += wchar_t(val3);
}
}
}
if(val == '\\')
{
content += L'\\';
Expand All @@ -2087,29 +2116,10 @@ Transfer::readToken(FILE *in)
}
else if(val2 == L'[')
{ //wordbound blank
content += L'[';
in_wblank = true;
content.pop_back();

while(true)
{
int val3 = fgetwc_unlocked(in);
if(val3 == L'\\')
{
content += L'\\';
content += wchar_t(fgetwc_unlocked(in));
}
else if(val3 == L'$') //[[..]]^..$ is the LU
{
return input_buffer.add(TransferToken(content, tt_word));
}
else if(val3 == L'\0' && null_flush)
{
fflush(output);
}
else
{
content += wchar_t(val3);
}
}
return input_buffer.add(TransferToken(content, tt_blank));
}
else if(val2 == L']')
{
Expand All @@ -2132,6 +2142,7 @@ Transfer::readToken(FILE *in)
}
else if(val == L'\0' && null_flush)
{
in_wblank = false;
fflush(output);
}
else
Expand Down Expand Up @@ -2344,28 +2355,45 @@ Transfer::transfer(FILE *in, FILE *out)
}
continue;
}
else if(*it == L'[' && *(it+1) == L'[')
else if(*it == L'[')
{
while(true)
if(*(it+1) == L'[') //wordbound blank
{
if(*it == L'\\')
while(true)
{
wblank.push_back(*it);
if(*it == L'\\')
{
wblank.push_back(*it);
it++;
wblank.push_back(*it);
}
else if(*it == L'^' && *(it-1) == L']' && *(it-2) == L']')
{
break;
}
else
{
wblank.push_back(*it);
}

it++;
wblank.push_back(*it);
}
else if(*it == L'^' && *(it-1) == L']' && *(it-2) == L']')
}
else
{
if(seenSlash == 0)
{
sl.push_back(*it);
}
else if(seenSlash == 1)
{
break;
tl.push_back(*it);
}
else
{
wblank.push_back(*it);
ref.push_back(*it);
}

it++;
}

continue;
}
else if(*it == L'/')
Expand Down Expand Up @@ -2527,14 +2555,7 @@ Transfer::applyRule()
}
else
{
if(tmpblank.size() < i-1)
{
blank[i-1] = new string(UtfConverter::toUtf8(*tmpblank[i-1]));
}
else
{
blank[i-1] = new string(UtfConverter::toUtf8(L""));
}
}

pair<wstring, int> tr;
Expand Down
2 changes: 2 additions & 0 deletions apertium/transfer.h
Expand Up @@ -70,6 +70,8 @@ class Transfer
map <string, string> var_secondary_tags; //map variable name to secondary tags of the word it takes lem/lemh from
map <string, bool> var_has_lemq; //map variable name to bool->true if variable clips lemq

bool in_wblank;

bool gettingLemmaFromWord(string attr);

FSTProcessor fstp;
Expand Down

0 comments on commit ab7eb07

Please sign in to comment.