Navigation Menu

Skip to content

Commit

Permalink
FEATURE: IMAP detect spammed email and delete associated Discourse to…
Browse files Browse the repository at this point in the history
…pic (#11654)

This PR adds functionality for the IMAP sync code to detect if a UID that is missing from the mail group mailbox is in the Spam/Junk folder for the mail account, and if so delete the associated Discourse topic. This is identical to what we do for emails that are moved for Trash.

If an email is missing but not in Spam or Trash, then we mark the incoming email record with imap_missing: true. This may be used in future to further filter or identify these emails, and perhaps go hunting for them in the email account in bulk.

Note: This adds some code duplication because the trash and spam email detection and handling is very similar. I intend to do more refactors/improvements to the IMAP sync code in time because there is a lot of room for improvement.
  • Loading branch information
martin-brennan committed Jan 13, 2021
1 parent 8f9db38 commit 8796153
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 17 deletions.
@@ -0,0 +1,7 @@
# frozen_string_literal: true

class AddImapMissingColumnToIncomingEmail < ActiveRecord::Migration[6.0]
def change
add_column :incoming_emails, :imap_missing, :boolean, default: false, null: false
end
end
70 changes: 58 additions & 12 deletions lib/imap/providers/generic.rb
Expand Up @@ -10,6 +10,10 @@ class TrashedMailResponse
attr_accessor :trashed_emails, :trash_uid_validity
end

class SpamMailResponse
attr_accessor :spam_emails, :spam_uid_validity
end

class BasicMail
attr_accessor :uid, :message_id

Expand Down Expand Up @@ -172,14 +176,19 @@ def unarchive(uid)
end

# Look for the special Trash XLIST attribute.
# TODO: It might be more efficient to just store this against the group.
# Another table is looking more and more attractive....
def trash_mailbox
Discourse.cache.fetch("imap_trash_mailbox_#{account_digest}", expires_in: 30.minutes) do
list_mailboxes(:Trash).first
end
end

# Look for the special Junk XLIST attribute.
def spam_mailbox
Discourse.cache.fetch("imap_spam_mailbox_#{account_digest}", expires_in: 30.minutes) do
list_mailboxes(:Junk).first
end
end

# open the trash mailbox for inspection or writing. after the yield we
# close the trash and reopen the original mailbox to continue operations.
# the normal open_mailbox call can be made if more extensive trash ops
Expand All @@ -196,19 +205,26 @@ def open_trash_mailbox(write: false)
trash_uid_validity
end

# open the spam mailbox for inspection or writing. after the yield we
# close the spam and reopen the original mailbox to continue operations.
# the normal open_mailbox call can be made if more extensive spam ops
# need to be done.
def open_spam_mailbox(write: false)
open_mailbox_before_spam = @open_mailbox_name
open_mailbox_before_spam_write = @open_mailbox_write

spam_uid_validity = open_mailbox(spam_mailbox, write: write)[:uid_validity]

yield(spam_uid_validity) if block_given?

open_mailbox(open_mailbox_before_spam, write: open_mailbox_before_spam_write)
spam_uid_validity
end

def find_trashed_by_message_ids(message_ids)
trashed_emails = []
trash_uid_validity = open_trash_mailbox do
header_message_id_terms = message_ids.map do |msgid|
"HEADER Message-ID '#{Email.message_id_rfc_format(msgid)}'"
end

# OR clauses are written in Polish notation...so the query looks like this:
# OR OR HEADER Message-ID XXXX HEADER Message-ID XXXX HEADER Message-ID XXXX
or_clauses = 'OR ' * (header_message_id_terms.length - 1)
query = "#{or_clauses}#{header_message_id_terms.join(" ")}"

trashed_email_uids = imap.uid_search(query)
trashed_email_uids = find_uids_by_message_ids(message_ids)
if trashed_email_uids.any?
trashed_emails = emails(trashed_email_uids, ["UID", "ENVELOPE"]).map do |e|
BasicMail.new(message_id: Email.message_id_clean(e['ENVELOPE'].message_id), uid: e['UID'])
Expand All @@ -222,6 +238,36 @@ def find_trashed_by_message_ids(message_ids)
end
end

def find_spam_by_message_ids(message_ids)
spam_emails = []
spam_uid_validity = open_spam_mailbox do
spam_email_uids = find_uids_by_message_ids(message_ids)
if spam_email_uids.any?
spam_emails = emails(spam_email_uids, ["UID", "ENVELOPE"]).map do |e|
BasicMail.new(message_id: Email.message_id_clean(e['ENVELOPE'].message_id), uid: e['UID'])
end
end
end

SpamMailResponse.new.tap do |resp|
resp.spam_emails = spam_emails
resp.spam_uid_validity = spam_uid_validity
end
end

def find_uids_by_message_ids(message_ids)
header_message_id_terms = message_ids.map do |msgid|
"HEADER Message-ID '#{Email.message_id_rfc_format(msgid)}'"
end

# OR clauses are written in Polish notation...so the query looks like this:
# OR OR HEADER Message-ID XXXX HEADER Message-ID XXXX HEADER Message-ID XXXX
or_clauses = 'OR ' * (header_message_id_terms.length - 1)
query = "#{or_clauses}#{header_message_id_terms.join(" ")}"

imap.uid_search(query)
end

def trash(uid)
# MOVE is way easier than doing the COPY \Deleted EXPUNGE dance ourselves.
# It is supported by Gmail and Outlook.
Expand Down
38 changes: 33 additions & 5 deletions lib/imap/sync.rb
Expand Up @@ -180,15 +180,15 @@ def handle_missing_uids(old_uids)
# This can be done because Message-ID is unique on a mail server between mailboxes,
# where the UID will have changed when moving into the Trash mailbox. We need to get
# the new UID from the trash.
potential_spam = []
response = @provider.find_trashed_by_message_ids(missing_message_ids)
existing_incoming.each do |incoming|
matching_trashed = response.trashed_emails.find { |email| email.message_id == incoming.message_id }

# if the email is not in the trash then we don't know where it is... could
# be in any mailbox on the server or could be permanently deleted. TODO
# here would be some sort of more complex detection of "where in the world
# has this UID gone?"
next if !matching_trashed
if !matching_trashed
potential_spam << incoming
next
end

# if we deleted the topic/post ourselves in discourse then the post will
# not exist, and this sync is just updating the old UIDs to the new ones
Expand All @@ -202,6 +202,34 @@ def handle_missing_uids(old_uids)
ImapSyncLog.debug("Updating incoming ID #{incoming.id} uid data FROM [UID #{incoming.imap_uid} | UIDVALIDITY #{incoming.imap_uid_validity}] TO [UID #{matching_trashed.uid} | UIDVALIDITY #{response.trash_uid_validity}] (TRASHED)", @group)
incoming.update(imap_uid_validity: response.trash_uid_validity, imap_uid: matching_trashed.uid)
end

# This can be done because Message-ID is unique on a mail server between mailboxes,
# where the UID will have changed when moving into the Trash mailbox. We need to get
# the new UID from the spam.
response = @provider.find_spam_by_message_ids(missing_message_ids)
potential_spam.each do |incoming|
matching_spam = response.spam_emails.find { |email| email.message_id == incoming.message_id }

# if the email is not in the trash or spam then we don't know where it is... could
# be in any mailbox on the server or could be permanently deleted.
if !matching_spam
ImapSyncLog.debug("Email for incoming ID #{incoming.id} (#{incoming.message_id}) could not be found in the group mailbox, trash, or spam. It could be in another mailbox or permanently deleted.", @group)
incoming.update(imap_missing: true)
next
end

# if we deleted the topic/post ourselves in discourse then the post will
# not exist, and this sync is just updating the old UIDs to the new ones
# in the spam, and we don't need to re-destroy the post
if incoming.post
ImapSyncLog.debug("Deleting post ID #{incoming.post_id}, topic id #{incoming.topic_id}; email has been moved to spam on the IMAP server.", @group)
PostDestroyer.new(Discourse.system_user, incoming.post).destroy
end

# the email has moved mailboxes, we don't want to try marking as spam again next time
ImapSyncLog.debug("Updating incoming ID #{incoming.id} uid data FROM [UID #{incoming.imap_uid} | UIDVALIDITY #{incoming.imap_uid_validity}] TO [UID #{matching_spam.uid} | UIDVALIDITY #{response.spam_uid_validity}] (SPAM)", @group)
incoming.update(imap_uid_validity: response.spam_uid_validity, imap_uid: matching_spam.uid)
end
end

def process_new_uids(new_uids, import_mode, all_old_uids_size, all_new_uids_size)
Expand Down
45 changes: 45 additions & 0 deletions spec/components/imap/sync_spec.rb
Expand Up @@ -321,6 +321,7 @@

provider.stubs(:uids).with(to: 100).returns([])
provider.stubs(:uids).with(from: 101).returns([])
provider.stubs(:find_spam_by_message_ids).returns(stub(spam_emails: []))
provider.stubs(:find_trashed_by_message_ids).returns(
stub(
trashed_emails: [
Expand All @@ -341,6 +342,49 @@
expect(Topic.with_deleted.find(incoming_100.topic_id).deleted_at).not_to eq(nil)
end

it "detects previously synced UIDs are missing and deletes the posts if they are in the spam/junk mailbox" do
sync_handler.process
incoming_100 = IncomingEmail.find_by(imap_uid: 100)
provider.stubs(:uids).with.returns([])

provider.stubs(:uids).with(to: 100).returns([])
provider.stubs(:uids).with(from: 101).returns([])
provider.stubs(:find_trashed_by_message_ids).returns(stub(trashed_emails: []))
provider.stubs(:find_spam_by_message_ids).returns(
stub(
spam_emails: [
stub(
uid: 10,
message_id: incoming_100.message_id
)
],
spam_uid_validity: 99
)
)
sync_handler.process

incoming_100.reload
expect(incoming_100.imap_uid_validity).to eq(99)
expect(incoming_100.imap_uid).to eq(10)
expect(Post.with_deleted.find(incoming_100.post_id).deleted_at).not_to eq(nil)
expect(Topic.with_deleted.find(incoming_100.topic_id).deleted_at).not_to eq(nil)
end

it "marks the incoming email as IMAP missing if it cannot be found in spam or trash" do
sync_handler.process
incoming_100 = IncomingEmail.find_by(imap_uid: 100)
provider.stubs(:uids).with.returns([])

provider.stubs(:uids).with(to: 100).returns([])
provider.stubs(:uids).with(from: 101).returns([])
provider.stubs(:find_trashed_by_message_ids).returns(stub(trashed_emails: []))
provider.stubs(:find_spam_by_message_ids).returns(stub(spam_emails: []))
sync_handler.process

incoming_100.reload
expect(incoming_100.imap_missing).to eq(true)
end

it "detects the topic being deleted on the discourse site and deletes on the IMAP server and
does not attempt to delete again on discourse site when deleted already by us on the IMAP server" do
SiteSetting.enable_imap_write = true
Expand Down Expand Up @@ -385,6 +429,7 @@

provider.stubs(:uids).with(to: 100).returns([])
provider.stubs(:uids).with(from: 101).returns([])
provider.stubs(:find_spam_by_message_ids).returns(stub(spam_emails: []))
provider.stubs(:find_trashed_by_message_ids).returns(
stub(
trashed_emails: [
Expand Down

0 comments on commit 8796153

Please sign in to comment.