Skip to content

Commit

Permalink
🚑 Allow tags after "?" for unknowns, fix #48
Browse files Browse the repository at this point in the history
  • Loading branch information
unhammer committed Dec 7, 2021
1 parent e1ee7b2 commit 2d4086b
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 7 deletions.
19 changes: 13 additions & 6 deletions src/cgspell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
namespace divvun {

static const string subreading_separator = "#";
static const string unknown_analysis = " ?";
static const string tag_unknown = "?";

/**
* Return the size in bytes of the first complete UTF-8 codepoint in c,
Expand Down Expand Up @@ -230,11 +230,18 @@ void run_cgspell(std::istream& is,
}
else if (!result.empty() && result[5].length() != 0)
{
c.unknown = (result[5] == unknown_analysis);
if(c.unknown) {
sent.n_unknowns += 1;
}
c.lines.push_back(line);
std::stringstream ana(result[5]);
std::string tag;
c.unknown = false;
while (ana >> tag) {
if(tag == tag_unknown) {
c.unknown = true;
}
}
if (c.unknown) {
sent.n_unknowns += 1;
}
c.lines.push_back(line);
}
else if(!result.empty() && result[7].length() != 0) {
// TODO: Can we ever get a flush in the middle of readings?
Expand Down
2 changes: 1 addition & 1 deletion src/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace divvun {

const std::basic_regex<char> CG_LINE ("^"
"(\"<(.*)>\".*" // wordform, group 2
"|(\t+)(\"[^\"]*\"\\S*)(\\s+\\S+)*\\s*" // reading, group 3, 4, 5
"|(\t+)(\"[^\"]*\"\\S*)((?:\\s+\\S+)*)\\s*" // reading, group 3, 4, 5
"|:(.*)" // blank, group 6
"|(<STREAMCMD:FLUSH>)" // flush, group 7
"|(;\t+.*)" // traced reading, group 8
Expand Down
6 changes: 6 additions & 0 deletions test/cgspell/expected.default
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@
"<ballat>"
"ballat" V TV Inf
"ballat" V TV Ind Prs Pl1
:
"<skuvlabufse>"
"skuvllabufse" ? <LastCohort> <NotFirstCohort>
"busse" N Sg <W:1> <WA:0> <spelled> "skuvlabusse"S
"skuvla" N Sg
:
4 changes: 4 additions & 0 deletions test/cgspell/input.default
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@
"<ballat>"
"ballat" V TV Inf
"ballat" V TV Ind Prs Pl1
:
"<skuvlabufse>"
"skuvllabufse" ? <LastCohort> <NotFirstCohort>
:

0 comments on commit 2d4086b

Please sign in to comment.