Skip to content

Commit

Permalink
tools: add rime_table_decompiler (rime#706)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Qijia Liu <liumeo@pku.edu.cn>
  • Loading branch information
2 people authored and graphemecluster committed Sep 4, 2023
1 parent cdb1b72 commit 70ca155
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 40 deletions.
39 changes: 0 additions & 39 deletions src/rime/dict/table.cc
Expand Up @@ -21,45 +21,6 @@ const int kTableFormatLowestCompatible = 4.0;
const char kTableFormatPrefix[] = "Rime::Table/";
const size_t kTableFormatPrefixLen = sizeof(kTableFormatPrefix) - 1;

class TableQuery {
public:
TableQuery(table::Index* index) : lv1_index_(index) { Reset(); }

TableAccessor Access(SyllableId syllable_id, double credibility = 0.0) const;
void AccessAll(vector<TableAccessor>& accessors, double credibility = 0.0);

// down to next level
bool Advance(SyllableId syllable_id, double credibility = 0.0);

// up one level
bool Backdate();

// back to root
void Reset();

size_t level() const { return level_; }

protected:
size_t level_ = 0;
Code index_code_;
vector<double> credibility_;

private:
bool Walk(SyllableId syllable_id);

table::HeadIndex* lv1_index_ = nullptr;
table::TrunkIndex* lv2_index_ = nullptr;
table::TrunkIndex* lv3_index_ = nullptr;
table::TailIndex* lv4_index_ = nullptr;
};

struct QueryQueue {
size_t pos;
TableQuery query;
bool isRegularSpelling;
bool hasNoEntry;
};

TableAccessor::TableAccessor(const Code& index_code,
const List<table::Entry>* list,
double credibility)
Expand Down
41 changes: 40 additions & 1 deletion src/rime/dict/table.h
Expand Up @@ -128,7 +128,38 @@ class TableAccessor {
using TableQueryResult = map<int, vector<TableAccessor>>;

struct SyllableGraph;
class TableQuery;

class TableQuery {
public:
TableQuery(table::Index* index) : lv1_index_(index) { Reset(); }

TableAccessor Access(SyllableId syllable_id, double credibility = 0.0) const;
void AccessAll(vector<TableAccessor>& accessors, double credibility = 0.0);

// down to next level
bool Advance(SyllableId syllable_id, double credibility = 0.0);

// up one level
bool Backdate();

// back to root
void Reset();

size_t level() const { return level_; }

protected:
size_t level_ = 0;
Code index_code_;
vector<double> credibility_;

private:
bool Walk(SyllableId syllable_id);

table::HeadIndex* lv1_index_ = nullptr;
table::TrunkIndex* lv2_index_ = nullptr;
table::TrunkIndex* lv3_index_ = nullptr;
table::TailIndex* lv4_index_ = nullptr;
};

class Table : public MappedFile {
public:
Expand All @@ -153,6 +184,7 @@ class Table : public MappedFile {
RIME_API string GetEntryText(const table::Entry& entry);

uint32_t dict_file_checksum() const;
table::Metadata* metadata() const { return metadata_; }

private:
table::Index* BuildIndex(const Vocabulary& vocabulary, size_t num_syllables);
Expand Down Expand Up @@ -184,6 +216,13 @@ class Table : public MappedFile {
the<StringTableBuilder> string_table_builder_;
};

struct QueryQueue {
size_t pos;
TableQuery query;
bool isRegularSpelling;
bool hasNoEntry;
};

} // namespace rime

#endif // RIME_TABLE_H_
13 changes: 13 additions & 0 deletions tools/CMakeLists.txt
Expand Up @@ -37,7 +37,20 @@ target_link_libraries(rime_deployer
${rime_dict_library}
${rime_levers_library})

set(rime_table_decompiler_src
"rime_table_decompiler.cc"
${CMAKE_SOURCE_DIR}/src/rime/dict/table.cc
${CMAKE_SOURCE_DIR}/src/rime/dict/mapped_file.cc
${CMAKE_SOURCE_DIR}/src/rime/dict/string_table.cc
${CMAKE_SOURCE_DIR}/src/rime/dict/vocabulary.cc
)
add_executable(rime_table_decompiler ${rime_table_decompiler_src})
target_link_libraries(rime_table_decompiler
${rime_library}
${rime_dict_library})

install(TARGETS rime_deployer DESTINATION ${BIN_INSTALL_DIR})
install(TARGETS rime_dict_manager DESTINATION ${BIN_INSTALL_DIR})
install(TARGETS rime_table_decompiler DESTINATION ${BIN_INSTALL_DIR})

install(TARGETS rime_patch DESTINATION ${BIN_INSTALL_DIR})
124 changes: 124 additions & 0 deletions tools/rime_table_decompiler.cc
@@ -0,0 +1,124 @@
// rime_table_decompiler.cc
// nopdan <me@nopdan.com>
//
#include <cmath>
#include <fstream>
#include <iomanip>
#include <ios>
#include <iostream>
#include <string>
#include <rime/dict/table.h>

// usage:
// rime_table_decompiler <rime-table-file> [save-path]
// example:
// rime_table_decompiler pinyin.table.bin pinyin.dict.yaml

void outCode(rime::Table* table, const rime::Code code, std::ofstream& fout) {
if (code.empty()) {
return;
}
auto item = code.begin();
fout << table->GetSyllableById(*item);
item++;
for (; item != code.end(); ++item) {
fout << " ";
fout << table->GetSyllableById(*item);
}
return;
}

void access(rime::Table* table,
rime::TableAccessor accessor,
std::ofstream& fout) {
while (!accessor.exhausted()) {
auto word = table->GetEntryText(*accessor.entry());
fout << word << "\t";
outCode(table, accessor.code(), fout);

auto weight = accessor.entry()->weight;
if (weight >= 0) {
fout << "\t" << exp(weight);
}
fout << std::endl;
accessor.Next();
}
}

// recursively traverse table
void recursion(rime::Table* table,
rime::TableQuery* query,
std::ofstream& fout) {
for (int i = 0; i < table->metadata()->num_syllables; i++) {
auto accessor = query->Access(i);
access(table, accessor, fout);
if (query->Advance(i)) {
if (query->level() < 3) {
recursion(table, query, fout);
} else {
auto accessor = query->Access(0);
access(table, accessor, fout);
}
query->Backdate();
}
}
}

void traversal(rime::Table* table, std::ofstream& fout) {
auto metadata = table->metadata();
std::cout << "num_syllables: " << metadata->num_syllables << std::endl;
std::cout << "num_entries: " << metadata->num_entries << std::endl;

fout << std::fixed;
fout << std::setprecision(0);
rime::TableQuery query(table->metadata()->index.get());
recursion(table, &query, fout);
}

int main(int argc, char* argv[]) {
if (argc < 2 || argc > 3) {
std::cout << "Usage: rime_table_decompiler <rime-table-file> [save-path]"
<< std::endl;
std::cout << "Example: rime_table_decompiler pinyin.table.bin pinyin.dict.yaml"
<< std::endl;
return 0;
}

std::string fileName(argv[1]);
rime::Table table(fileName);
bool success = table.Load();
if (!success) {
std::cerr << "Failed to load table." << std::endl;
return 1;
}

// Remove the extension ".table.bin" if present.
const size_t table_bin_idx = fileName.rfind(".table.bin");
if (std::string::npos != table_bin_idx) {
fileName.erase(table_bin_idx);
}
const std::string outputName =
(argc == 3) ? argv[2]: fileName + ".yaml";

std::ofstream fout;
fout.open(outputName);
if (!fout.is_open()) {
std::cerr << "Failed to open file " << outputName << std::endl;
return 1;
}

// schema id
const size_t last_slash_idx = fileName.find_last_of("\\/");
if (std::string::npos != last_slash_idx) {
fileName.erase(0, last_slash_idx + 1);
}
fout << "# Rime dictionary\n\n";
fout << "---\n"
"name: " << fileName << "\n"
"version: \"1.0\"\n"
"...\n\n";
traversal(&table, fout);
std::cout << "Save to: " << outputName << std::endl;
fout.close();
return 0;
}

0 comments on commit 70ca155

Please sign in to comment.