forked from rime/librime
-
Notifications
You must be signed in to change notification settings - Fork 0
/
preset_vocabulary.cc
115 lines (100 loc) · 3.12 KB
/
preset_vocabulary.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//
// Copyright RIME Developers
// Distributed under the BSD License
//
// 2011-11-27 GONG Chen <chen.sst@gmail.com>
//
#include <filesystem>
#include <boost/lexical_cast.hpp>
#include <utf8.h>
#include <rime/resource.h>
#include <rime/service.h>
#include <rime/dict/preset_vocabulary.h>
#include <rime/dict/text_db.h>
namespace rime {
static const ResourceType kVocabularyResourceType = {"vocabulary", "", ".txt"};
struct VocabularyDb : public TextDb {
VocabularyDb(const string& path, const string& name);
an<DbAccessor> cursor;
static const TextFormat format;
};
VocabularyDb::VocabularyDb(const string& path, const string& name)
: TextDb(path, name, kVocabularyResourceType.name, VocabularyDb::format) {}
static bool rime_vocabulary_entry_parser(const Tsv& row,
string* key,
string* value) {
if (row.size() < 1 || row[0].empty()) {
return false;
}
*key = row[0];
*value = row.size() > 1 ? row[1] : "0";
return true;
}
static bool rime_vocabulary_entry_formatter(const string& key,
const string& value,
Tsv* tsv) {
// Tsv& row(*tsv);
// row.push_back(key);
// row.push_back(value);
return true;
}
const TextFormat VocabularyDb::format = {
rime_vocabulary_entry_parser,
rime_vocabulary_entry_formatter,
"Rime vocabulary",
};
string PresetVocabulary::DictFilePath(const string& vocabulary) {
the<ResourceResolver> resource_resolver(
Service::instance().CreateResourceResolver(kVocabularyResourceType));
return resource_resolver->ResolvePath(vocabulary).string();
}
PresetVocabulary::PresetVocabulary(const string& vocabulary) {
db_.reset(new VocabularyDb(DictFilePath(vocabulary), vocabulary));
if (db_ && db_->OpenReadOnly()) {
db_->cursor = db_->QueryAll();
}
}
PresetVocabulary::~PresetVocabulary() {
if (db_)
db_->Close();
}
bool PresetVocabulary::GetWeightForEntry(const string& key, double* weight) {
string weight_str;
if (!db_ || !db_->Fetch(key, &weight_str))
return false;
try {
*weight = boost::lexical_cast<double>(weight_str);
} catch (...) {
return false;
}
return true;
}
void PresetVocabulary::Reset() {
if (db_ && db_->cursor)
db_->cursor->Reset();
}
bool PresetVocabulary::GetNextEntry(string* key, string* value) {
if (!db_ || !db_->cursor)
return false;
bool got = false;
do {
got = db_->cursor->GetNextRecord(key, value);
} while (got && !IsQualifiedPhrase(*key, *value));
return got;
}
bool PresetVocabulary::IsQualifiedPhrase(const string& phrase,
const string& weight_str) {
if (max_phrase_length_ > 0) {
size_t length = utf8::unchecked::distance(phrase.c_str(),
phrase.c_str() + phrase.length());
if (static_cast<int>(length) > max_phrase_length_)
return false;
}
if (min_phrase_weight_ > 0.0) {
double weight = boost::lexical_cast<double>(weight_str);
if (weight < min_phrase_weight_)
return false;
}
return true;
}
} // namespace rime