Skip to content

Commit

Permalink
reparse individual non-parsed words after full sentence (#94)
Browse files Browse the repository at this point in the history
* had to std:: here to make it compile

* Reparse individual non-parsed words after full sentence

mitigates #80

We splice in the outputQueueReparsed instead of just replacing in case
the output rule changes the number of LU's output.

* Tests for reparse #80

* Note to self (don't edit run_tests.py)
  • Loading branch information
unhammer committed Sep 5, 2023
1 parent d1356f0 commit fc7fe18
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 0 deletions.
32 changes: 32 additions & 0 deletions src/rtx_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1796,6 +1796,38 @@ RTXProcessor::processGLR(UFILE *out)
currentBranch = parseGraph[0];
parseGraph[0]->getChunks(outputQueue, parseGraph[0]->length-1);
parseGraph.clear();

// We have now parsed input into a tree, and are ready to run
// output rules and do the output. But first: For every chunk
// that didn't get a parse, reparse it disregarding context, so
// we can at least use single-word rules on them.
{
ParseNode* prevBranch = currentBranch;
for(auto it = outputQueue.begin(); it != outputQueue.end();) {
Chunk* ch = *it;
if(ch->rule == -1 && !ch->isBlank) { // -1 means didn't get a parse
if(printingAll) cerr << "Reparsing chunk ^" << ch->source << "/" << ch->target << "$" << endl;
ParseNode* temp = parsePool.next();
temp->init(mx, ch);
temp->id = ++newBranchId;
temp->stringVars = variables;
temp->wblankVars = wblank_variables;
temp->chunkVars = vector<Chunk*>(varCount, NULL);
checkForReduce(parseGraph, temp);

list<Chunk*> outputQueueReparsed;
parseGraph[0]->getChunks(outputQueueReparsed, parseGraph[0]->length-1);
it = outputQueue.erase(it); // skip current word since reparse includes it
outputQueue.splice(it, outputQueueReparsed);
parseGraph.clear();
}
else {
++it;
}
}
currentBranch = prevBranch;
}

outputAll(out);
variables = currentBranch->stringVars;
wblank_variables = currentBranch->wblankVars;
Expand Down
1 change: 1 addition & 0 deletions tests/Reparse.input
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
^ja<cnjcoo>/ja<cnjcoo>$ ^Jämtlánda<np><top><sg><gen><@→N>/Jämtlánnda<np><top><sg><gen><@→N>$ ^regiovdna<n><sem_plc><sg><gen><@→P>/regiåvnnå<n><sem_plc><sg><gen><@→P>$ ^dáfus<post><@ADVL>/gáktuj<post><@ADVL>$^.<sent>/.<sent>$
1 change: 1 addition & 0 deletions tests/Reparse.output
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
^ja<cnjcoo>$ ^Jämtlánnda<np><sg><gen>$ ^regiåvnnå<n><sg><gen>$ ^gáktuj<post>$^.<sent>$
42 changes: 42 additions & 0 deletions tests/Reparse.rtx
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
!!!!!!!!!!!!!!!
!! ATTRIBUTE CATEGORIES
!!!!!!!!!!!!!!!

function = "@→N" "@→P" "@ADVL" ;
number = sg du pl ;
case = acc nom gen loc ine ela com ess ill ;

!!!!!!!!!!!!!!!
!! OUTPUT PATTERNS
!!!!!!!!!!!!!!!

NP: _.number.case.function ;
PP: _ ;

Name: _.number.case.function ;
N: _.number.case.function ;
n: <n>.number.case;

post: _ ;

np: <np>.number.case ;


!!!!!!!!!!!!!!!
!! REDUCTION RULES
!!!!!!!!!!!!!!!

N -> "N:n" %n { %1 } ;

Name -> "N:np" %np { %1 }
| "NP:N Name" N %Name {1 _ %2 } !gonagas Harald
| "NP:N Name" np %Name {1 _ %2 } !
| "NP:N Name" Name %N {1 _ %2 } ! ! Verdens Gang aviisii
;

NP -> "NP: N" %N { %1 } ;


PP -> "PP N post" N %post { 1 _ %2 }
| "lone post" %post { 1 }
;
5 changes: 5 additions & 0 deletions tests/build_tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/usr/bin/env python3
f = open('run_tests.py', 'w')
f.write('''#!/usr/bin/env python3
#####################################################
### run_tests.py IS A GENERATED FILE, DO NOT EDIT ###
#####################################################
import subprocess, unittest
class CompilerTest:
Expand Down

0 comments on commit fc7fe18

Please sign in to comment.