Skip to content

Commit

Permalink
xapian: setup metadata filter in advance
Browse files Browse the repository at this point in the history
This avoids every writing a compacted database once it's compacted.

In theory this will let us switch to the Honey backend.
  • Loading branch information
brong committed Sep 4, 2020
1 parent 0fe8a75 commit 5ff26ec
Showing 1 changed file with 58 additions and 26 deletions.
84 changes: 58 additions & 26 deletions imap/xapian_wrap.cpp
Expand Up @@ -171,7 +171,7 @@ static int calculate_language_counts(const Xapian::Database& db,
return 0;
}

static void remove_legacy_metadata(Xapian::WritableDatabase& db)
static void remove_legacy_metadata(Xapian::WritableDatabase& metadb, Xapian::Database& db)
{
const std::string prefix{XAPIAN_LANG_DOC_KEYPREFIX "."};
for (Xapian::TermIterator key = db.metadata_keys_begin(prefix);
Expand All @@ -181,22 +181,10 @@ static void remove_legacy_metadata(Xapian::WritableDatabase& db)
// Remove legacy keys and values.
if ((*key).find('.') != std::string::npos ||
(!val.empty() && !isalpha(val[0]))) {
db.set_metadata(*key, "");
}
}
for (Xapian::docid docid = 1; docid <= db.get_lastdocid(); ++docid) {
try {
Xapian::Document doc = db.get_document(docid);
const std::string& val = doc.get_value(SLOT_DOCLANGS);
// Remove legacy doclang slot values.
if (!val.empty() && !isalpha(val[0])) {
doc.remove_value(SLOT_DOCLANGS);
}
}
catch (Xapian::DocNotFoundError e) {
// ignore
metadb.set_metadata(*key, "OVERWRITE:");
}
}
// NOTE: we're not removing DOCLANG slot because we can't tell the compactor to remove it
}

static void write_language_counts(Xapian::WritableDatabase& db,
Expand Down Expand Up @@ -452,10 +440,17 @@ class CyrusMetadataCompactor : public Xapian::Compactor
size_t num_tags,
const std::string tags[])
{
size_t i;
// allow overwriting of values!
for (i = 0; i < num_tags; i++) {
if (tags[i].rfind("OVERWRITE:", 0) == 0) {
return tags[i].substr(10);
}
}
if (key.rfind("cyrusid.", 0) == 0) {
uint8_t indexlevel = parse_indexlevel(tags[0]);
size_t bestpos = 0;
for (size_t i = 1; i < num_tags; i++) {
for (i = 1; i < num_tags; i++) {
uint8_t level = parse_indexlevel(tags[i]);
if (better_indexlevel(indexlevel, level) == level) {
indexlevel = level;
Expand Down Expand Up @@ -518,20 +513,57 @@ int xapian_compact_dbs(const char *dest, const char **sources)
}
}
thispath = "(unknown path)";
int flags = Xapian::DB_DANGEROUS|Xapian::DB_NO_SYNC|Xapian::DB_CREATE|Xapian::DB_BACKEND_GLASS;
std::string metadir = std::string(dest) + ".META";
Xapian::WritableDatabase metadb {metadir, flags};

// Compact database.
static CyrusMetadataCompactor comp;
// FULLER because we never write to compression targets again.
db.compact(dest, Xapian::Compactor::FULLER | Xapian::DBCOMPACT_MULTIPASS, 0, comp);
// set version metadata
std::ostringstream val;
for (std::set<int>::iterator it = db_versions.begin(); it != db_versions.end(); ++it) {
if (it != db_versions.begin()) val << ",";
val << *it;
}
if (db.get_metadata("cyrus.db_version").empty()) {
metadb.set_metadata("cyrus.db_version", val.str());
}
else {
metadb.set_metadata("cyrus.db_version", std::string("OVERWRITE:") + val.str());
}
if (db.get_metadata("cyrus.stem-version").empty()) {
metadb.set_metadata("cyrus.stem-version", "");
}
else {
metadb.set_metadata("cyrus.stem-version", "OVERWRITE:");
}

// Clean metadata
remove_legacy_metadata(metadb, db);

for (Xapian::TermIterator it = db.metadata_keys_begin(XAPIAN_LANG_COUNT_KEYPREFIX);
it != db.metadata_keys_end(XAPIAN_LANG_COUNT_KEYPREFIX); ++it) {
metadb.set_metadata(*it, "OVERWRITE:");
}
for (const std::pair<std::string, unsigned>& it : lang_counts) {
std::string key = lang_count_key(it.first);
if (db.get_metadata(key).empty()) {
metadb.set_metadata(key, std::to_string(it.second));
}
else {
metadb.set_metadata(key, std::string("OVERWRITE:") + std::to_string(it.second));
}
}

Xapian::WritableDatabase newdb(dest);
write_db_versions(newdb, db_versions);
// commit changes
metadb.commit();

// Clean metadata.
remove_legacy_metadata(newdb);
// and add it to the list
db.add_database(metadb);

// Reset language counts.
write_language_counts(newdb, lang_counts);
// Compact database.
static CyrusMetadataCompactor comp;
// FULLER because we never write to compression targets again.
db.compact(dest, Xapian::Compactor::FULLER | Xapian::DBCOMPACT_MULTIPASS | Xapian::DB_BACKEND_GLASS, 0, comp);
removedir(metadir.c_str());
}
catch (const Xapian::Error &err) {
syslog(LOG_ERR, "IOERROR: Xapian: caught exception compact_dbs: %s (%s)",
Expand Down

0 comments on commit 5ff26ec

Please sign in to comment.