Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

* mu-query/mu-msg-iter: when showing related message (--include-relat…

…ed),

  favor the ones that were in the original set
  • Loading branch information...
commit 70356a62f5ca4c96fab1a7feafef87ee6f778d0c 1 parent a0d8d4f
@djcb authored
Showing with 108 additions and 43 deletions.
  1. +64 −33 lib/mu-msg-iter.cc
  2. +12 −2 lib/mu-msg-iter.h
  3. +32 −8 lib/mu-query.cc
View
97 lib/mu-msg-iter.cc
@@ -30,12 +30,22 @@
#include <string>
#include <set>
+#include <map>
#include "mu-util.h"
#include "mu-msg.h"
#include "mu-msg-iter.h"
#include "mu-threader.h"
+
+struct ltstr {
+ bool operator () (const std::string &s1,
+ const std::string &s2) const {
+ return g_strcmp0 (s1.c_str(), s2.c_str()) < 0;
+ }
+};
+typedef std::map <std::string, unsigned, ltstr> msgid_docid_map;
+
class ThreadKeyMaker: public Xapian::KeyMaker {
public:
ThreadKeyMaker (GHashTable *threadinfo): _threadinfo(threadinfo) {}
@@ -56,7 +66,7 @@ struct _MuMsgIter {
MuMsgFieldId sortfield, MuMsgIterFlags flags):
_enq(enq), _thread_hash (0), _msg(0), _flags(flags),
_skip_unreadable(flags & MU_MSG_ITER_FLAG_SKIP_UNREADABLE),
- _skip_dups (flags & MU_MSG_ITER_FLAG_SKIP_DUPS) {
+ _skip_dups (flags & MU_MSG_ITER_FLAG_SKIP_DUPS){
bool descending = (flags & MU_MSG_ITER_FLAG_DESCENDING);
bool threads = (flags & MU_MSG_ITER_FLAG_THREADS);
@@ -71,24 +81,22 @@ struct _MuMsgIter {
if (threads) {
_matches.fetch();
_cursor = _matches.begin();
- { // temporarily turn-off skipping dups
- _skip_dups = FALSE;
- _thread_hash = mu_threader_calculate
- (this, _matches.size(), sortfield, descending);
- _skip_dups = (flags & MU_MSG_ITER_FLAG_SKIP_DUPS);
- }
+ // NOTE: temporarily turn-off skipping duplicates, since we
+ // need threadinfo for *all*
+ _skip_dups = false;
+ _thread_hash = mu_threader_calculate
+ (this, _matches.size(), sortfield, descending);
+ _skip_dups = (flags & MU_MSG_ITER_FLAG_SKIP_DUPS);
ThreadKeyMaker keymaker(_thread_hash);
enq.set_sort_by_key (&keymaker, false);
_matches = _enq.get_mset (0, maxnum);
-
} else if (sortfield != MU_MSG_FIELD_ID_NONE) {
enq.set_sort_by_value ((Xapian::valueno)sortfield,
descending);
_matches = _enq.get_mset (0, maxnum);
_cursor = _matches.begin();
}
-
_cursor = _matches.begin();
}
@@ -120,12 +128,30 @@ struct _MuMsgIter {
bool looks_like_dup () const {
try {
const Xapian::Document doc (cursor().get_document());
- const std::string msg_uid
- (doc.get_value(MU_MSG_FIELD_ID_MSGID));
- if (_msg_uid_set.find (msg_uid) != _msg_uid_set.end()) {
+ const std::string msgid (doc.get_value(MU_MSG_FIELD_ID_MSGID));
+ unsigned docid (doc.get_docid());
+
+ if (msgid.empty())
+ return false;
+
+ // is this message in the preferred map? if
+ // so, it's not a duplicate, otherwise, it
+ // isn't
+ msgid_docid_map::const_iterator pref_iter (_preferred_map.find (msgid));
+ if (pref_iter != _preferred_map.end()) {
+ //std::cerr << "in the set!" << std::endl;
+ if ((*pref_iter).second == docid)
+ return false; // in the set: not a dup!
+ else
+ return true;
+ }
+
+ // otherwise, simply check if we've already seen this message-id,
+ // and, if so, it's considered a dup
+ if (_msg_uid_set.find (msgid) != _msg_uid_set.end()) {
return true;
} else {
- _msg_uid_set.insert (msg_uid);
+ _msg_uid_set.insert (msgid);
return false;
}
} catch (...) {
@@ -133,6 +159,17 @@ struct _MuMsgIter {
}
}
+ static void each_preferred (const char *msgid, gpointer docidp, msgid_docid_map *preferred_map) {
+ (*preferred_map)[msgid] = GPOINTER_TO_SIZE(docidp);
+ }
+
+ void set_preferred_map (GHashTable *preferred_hash) {
+ if (!preferred_hash)
+ _preferred_map.clear();
+ else
+ g_hash_table_foreach (preferred_hash,
+ (GHFunc)each_preferred, &_preferred_map);
+ }
bool skip_dups () const { return _skip_dups; }
bool skip_unreadable () const { return _skip_unreadable; }
@@ -147,15 +184,15 @@ struct _MuMsgIter {
MuMsgIterFlags _flags;
- struct ltstr {
- bool operator () (const std::string &s1,
- const std::string &s2) const {
- return g_strcmp0 (s1.c_str(), s2.c_str()) < 0;
- }
- };
mutable std::set <std::string, ltstr> _msg_uid_set;
-
- bool _skip_unreadable, _skip_dups;
+ bool _skip_unreadable;
+
+ // the 'preferred map' (msgid->docid) is used when checking
+ // for duplicates; if a message is in the preferred map, it
+ // will not be excluded (but other messages with the same
+ // msgid will)
+ msgid_docid_map _preferred_map;
+ bool _skip_dups;
};
@@ -212,21 +249,17 @@ mu_msg_iter_destroy (MuMsgIter *iter)
try { delete iter; } MU_XAPIAN_CATCH_BLOCK;
}
+
+
void
-mu_msg_iter_set_skip_duplicates (MuMsgIter *iter, gboolean skip_duplicates,
- GHashTable *preferred_set)
+mu_msg_iter_set_preferred (MuMsgIter *iter, GHashTable *preferred_hash)
{
g_return_if_fail (iter);
- g_return_if_fail (!skip_duplicates && preferred_set);
-
-
-
-
+ iter->set_preferred_map (preferred_hash);
}
-
MuMsg*
mu_msg_iter_get_msg_floating (MuMsgIter *iter)
{
@@ -306,7 +339,7 @@ mu_msg_iter_is_done (MuMsgIter *iter)
/* hmmm.... is it impossible to get a 0 docid, or just very improbable? */
-unsigned int
+unsigned
mu_msg_iter_get_docid (MuMsgIter *iter)
{
g_return_val_if_fail (iter, (unsigned int)-1);
@@ -368,8 +401,6 @@ mu_msg_iter_get_thread_id (MuMsgIter *iter)
}
-
-
const MuMsgIterThreadInfo*
mu_msg_iter_get_thread_info (MuMsgIter *iter)
{
@@ -385,7 +416,7 @@ mu_msg_iter_get_thread_info (MuMsgIter *iter)
(iter->thread_hash(), GUINT_TO_POINTER(docid));
if (!ti)
- g_printerr ("no ti for %u\n", docid);
+ g_warning ("no ti for %u\n", docid);
return ti;
View
14 lib/mu-msg-iter.h
@@ -51,8 +51,6 @@ enum _MuMsgIterFlags {
};
typedef unsigned MuMsgIterFlags;
-
-
/**
* create a new MuMsgIter -- basically, an iterator over the search
* results
@@ -129,6 +127,18 @@ MuMsg* mu_msg_iter_get_msg_floating (MuMsgIter *iter)
/**
+ * Provide a preferred_hash, which is a hashtable msgid->docid to
+ * indicate the messages which should /not/ be seen as duplicates.
+ *
+ * @param iter a valid MuMsgIter iterator
+ * @param preferred_hash a hashtable msgid->docid of message /not/ to
+ * mark as duplicates, or NULL
+ */
+void mu_msg_iter_set_preferred (MuMsgIter *iter, GHashTable *preferred_hash);
+
+
+
+/**
* get the document id for the current message
*
* @param iter a valid MuMsgIter iterator
View
40 lib/mu-query.cc
@@ -397,20 +397,34 @@ get_enquire (MuQuery *self, const char *searchexpr, MuMsgFieldId sortfieldid,
}
/*
- * record all threadids for the messages
+ * record all threadids for the messages; also 'orig_set' receives all
+ * original matches (a map msgid-->docid), so we can make sure the
+ * originals are not seen as 'duplicates' later (when skipping
+ * duplicates). We want to favor the originals over the related
+ * messages, when skipping duplicates.
*/
static GHashTable*
-get_thread_ids (MuMsgIter *iter)
+get_thread_ids (MuMsgIter *iter, GHashTable **orig_set)
{
GHashTable *ids;
- ids = g_hash_table_new_full (g_str_hash, g_str_equal,
- (GDestroyNotify)g_free, NULL);
+ ids = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free, NULL);
+ *orig_set = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free, NULL);
while (!mu_msg_iter_is_done (iter)) {
- const char *thread_id;
+ const char *thread_id, *msgid;
+ unsigned docid;
+ /* record the thread id for the message */
if ((thread_id = mu_msg_iter_get_thread_id (iter)))
g_hash_table_insert (ids, g_strdup (thread_id),
GSIZE_TO_POINTER(TRUE));
+ /* record the original set */
+ docid = mu_msg_iter_get_docid(iter);
+ if (docid != 0 && (msgid = mu_msg_iter_get_msgid (iter)))
+ g_hash_table_insert (*orig_set, g_strdup (msgid),
+ GSIZE_TO_POINTER(docid));
+
if (!mu_msg_iter_next (iter))
break;
}
@@ -420,7 +434,7 @@ get_thread_ids (MuMsgIter *iter)
static Xapian::Query
-get_related_query (MuMsgIter *iter)
+get_related_query (MuMsgIter *iter, GHashTable **orig_set)
{
GHashTable *hash;
GList *id_list, *cur;
@@ -428,7 +442,9 @@ get_related_query (MuMsgIter *iter)
static std::string pfx (1, mu_msg_field_xapian_prefix
(MU_MSG_FIELD_ID_THREAD_ID));
- hash = get_thread_ids (iter);
+ /* orig_set receives the hash msgid->docid of the set of
+ * original matches */
+ hash = get_thread_ids (iter, orig_set);
/* id_list now gets a list of all thread-ids seen in the query
* results; either in the Message-Id field or in
* References. */
@@ -451,10 +467,12 @@ static void
include_related (MuQuery *self, MuMsgIter **iter, int maxnum,
MuMsgFieldId sortfieldid, MuQueryFlags flags)
{
+ GHashTable *orig_set;
Xapian::Enquire enq (self->db());
MuMsgIter *rel_iter;
- enq.set_query(get_related_query (*iter));
+ orig_set = NULL;
+ enq.set_query(get_related_query (*iter, &orig_set));
enq.set_cutoff(0,0);
rel_iter= mu_msg_iter_new (
@@ -465,6 +483,12 @@ include_related (MuQuery *self, MuMsgIter **iter, int maxnum,
NULL);
mu_msg_iter_destroy (*iter);
+
+ // set the preferred set for the iterator (ie., the set not
+ // consider to be duplicates) to be the original matches
+ mu_msg_iter_set_preferred (rel_iter, orig_set);
+ g_hash_table_destroy (orig_set);
+
*iter = rel_iter;
}
Please sign in to comment.
Something went wrong with that request. Please try again.