Skip to content

Commit

Permalink
New divvun-suggest mode -a/--autocorrect to auto-apply corrections
Browse files Browse the repository at this point in the history
Implements #37 - Autocorrect mode for divvun-checker
  • Loading branch information
unhammer committed Jan 30, 2020
1 parent 68d591c commit 1b8e5f6
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 10 deletions.
15 changes: 13 additions & 2 deletions src/main_suggest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ int main(int argc, char ** argv)

options.add_options()
("j,json", "Use JSON output format (default: CG)")
("a,autocorrect", "Autocorrect output format (default: CG)")
("g,generator", "Generator (HFSTOL format)", cxxopts::value<std::string>(), "BIN")
#ifdef HAVE_LIBPUGIXML
("m,messages", "ERROR messages (XML format)", cxxopts::value<std::string>(), "FILE")
Expand Down Expand Up @@ -84,7 +85,17 @@ int main(int argc, char ** argv)
}

const auto& genfile = options["generator"].as<std::string>();
bool json = options.count("j");
divvun::RunMode mode = divvun::RunCG;
if(options.count("j")) {
mode = divvun::RunJson;
if(options.count("a")) {
std::cerr << argv[0] << " ERROR: Pick just one of --json/--autocorrect" << std::endl;
return (EXIT_FAILURE);
}
};
if(options.count("a")) {
mode = divvun::RunAutoCorrect;
};
bool genall = options.count("A");
bool verbose = options.count("v");

Expand Down Expand Up @@ -123,7 +134,7 @@ int main(int argc, char ** argv)
#endif

divvun::Suggest s(t, m, locale, verbose, genall);
s.run(std::cin, std::cout, json);
s.run(std::cin, std::cout, mode);
}
catch (const cxxopts::OptionException& e)
{
Expand Down
2 changes: 1 addition & 1 deletion src/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ SuggestCmd::SuggestCmd (const string& gen_path, const string& msg_path, const st
}
void SuggestCmd::run(stringstream& input, stringstream& output) const
{
suggest->run(input, output, true);
suggest->run(input, output, RunJson);
}
vector<Err> SuggestCmd::run_errs(stringstream& input) const
{
Expand Down
51 changes: 45 additions & 6 deletions src/suggest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,39 @@ RunState Suggest::run_json(std::istream& is, std::ostream& os)
return sentence.runstate;
}

RunState Suggest::run_autocorrect(std::istream& is, std::ostream& os)
{
json::sanity_test();
Sentence sentence = run_sentence(is, *generator, msgs, generate_all_readings);
vector<Err> errs = mk_errs(sentence);

size_t offset = 0;
u16string text = fromUtf8(sentence.text.str());
for(const auto& e : errs) {
if(e.beg > offset) {
os << toUtf8(text.substr(offset, e.beg - offset));
}
bool printed = false;
for(const auto& r : e.rep) {
os << toUtf8(r);
printed = true;
break;
}
if(!printed) {
os << toUtf8(e.form);
}
offset = e.end;
}
os << toUtf8(text.substr(offset));

if(sentence.runstate == flushing) {
os << '\0';
os.flush();
os.clear();
}
return sentence.runstate;
}


void print_cg_reading(const Casing& inputCasing, const string& readinglines, std::ostream& os, const hfst::HfstTransducer& t, bool generate_all_readings) {
os << readinglines;
Expand Down Expand Up @@ -938,7 +971,7 @@ void run_cg(std::istream& is, std::ostream& os, const hfst::HfstTransducer& t, b
}
}

void Suggest::run(std::istream& is, std::ostream& os, bool json)
void Suggest::run(std::istream& is, std::ostream& os, RunMode mode)
{
try {
auto _old = std::locale::global(std::locale(""));
Expand All @@ -947,11 +980,17 @@ void Suggest::run(std::istream& is, std::ostream& os, bool json)
{
std::cerr << "WARNING: Couldn't set global locale \"\" (locale-specific native environment): " << e.what() << std::endl;
}
if(json) {
while(run_json(is, os) == flushing);
}
else {
run_cg(is, os, *generator, generate_all_readings); // ignores ignores
switch(mode) {
case RunJson:
while(run_json(is, os) == flushing);
break;
case RunAutoCorrect:
while(run_autocorrect(is, os) == flushing);
break;
case RunCG:
default:
run_cg(is, os, *generator, generate_all_readings); // ignores ignores
break;
}
}

Expand Down
9 changes: 8 additions & 1 deletion src/suggest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ enum RunState {
eof
};

enum RunMode {
RunCG,
RunJson,
RunAutoCorrect
};

using rel_id = size_t;
using relations = std::unordered_map<string, rel_id>;

Expand Down Expand Up @@ -187,7 +193,7 @@ class Suggest {
Suggest(const string& gen_path, const string& locale, bool verbose);
~Suggest() = default;

void run(std::istream& is, std::ostream& os, bool json);
void run(std::istream& is, std::ostream& os, RunMode mode);

vector<Err> run_errs(std::istream& is);
void setIgnores(const std::set<ErrId>& ignores);
Expand All @@ -200,6 +206,7 @@ class Suggest {
private:
const SortedMsgLangs sortedmsglangs; // invariant: contains all and only the keys of msgs
RunState run_json(std::istream& is, std::ostream& os);
RunState run_autocorrect(std::istream& is, std::ostream& os);
std::unique_ptr<const hfst::HfstTransducer> generator;
std::set<ErrId> ignores;
bool generate_all_readings = false;
Expand Down

0 comments on commit 1b8e5f6

Please sign in to comment.