Skip to content

Commit

Permalink
fix: drop leading whitespace in Message-ID header. Fixes #3760 (#3761)
Browse files Browse the repository at this point in the history
  • Loading branch information
rpcross committed May 17, 2024
1 parent c211366 commit 8b7138d
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 1 deletion.
2 changes: 1 addition & 1 deletion backend/mlarchive/archive/mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,7 @@ def normalize(self, header_text):
# encode as UTF8 and compress whitespace
# normal = normal.encode('utf8') # this is unnecessary
normal = clean_spaces(normal)
return normal.rstrip()
return normal.strip()

def process(self):
"""Perform the rest of the parsing and construct the Message object. Note,
Expand Down
45 changes: 45 additions & 0 deletions backend/mlarchive/archive/migrations/0003_fix_message_msgid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Generated by Django 4.2.13 on 2024-05-16 22:18

from django.db import migrations
import hashlib
import base64


def get_hash(msg):
"""Returns the message hashcode, a SHA-1 digest of the Message-ID and listname.
Similar to the popular Web Email Archive, mail-archive.com
see: https://www.mail-archive.com/faq.html#msgid
"""
msgid = msg.msgid.encode('utf8')
listname = msg.email_list.name.encode('utf8')
sha = hashlib.sha1(msgid)
sha.update(listname)
b64 = base64.urlsafe_b64encode(sha.digest())
return b64.decode('utf8')


def forward(apps, schema_editor):
Message = apps.get_model('archive', 'Message')

msgs = Message.objects.filter(msgid__startswith=' ')
for m in msgs:
msgid = m.msgid
fixed_msgid = msgid.lstrip().lstrip('<')
m.msgid = fixed_msgid
m.spam_score = 20240516
m.hashcode = get_hash(m)
m.save()

def reverse(apps, schema_editor):
pass


class Migration(migrations.Migration):

dependencies = [
("archive", "0002_alter_message_msgid"),
]

operations = [
migrations.RunPython(forward, reverse),
]
14 changes: 14 additions & 0 deletions backend/mlarchive/tests/archive/mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,20 @@ def test_MessageWrapper_get_to():
assert mw.get_cc() == 'ancp@ietf.org'


def test_MessageWrapper_normalize():
msg = message_from_file('mail_multipart.1')
mw = MessageWrapper.from_message(msg, 'public')
# leading space
assert mw.normalize(' <LEADING@example.com>') == '<LEADING@example.com>'
# trailing space
assert mw.normalize('<LEADING@example.com> ') == '<LEADING@example.com>'
# compress space
assert mw.normalize('Hello there') == 'Hello there'
# decode
text = '=?utf-8?q?ngs=29_to_Proposed_Standard?='
assert mw.normalize(text) == 'ngs) to Proposed Standard'


@pytest.mark.django_db(transaction=True)
def test_MessageWrapper_process_attachments():
msg = message_from_file('mail_multipart.1')
Expand Down

0 comments on commit 8b7138d

Please sign in to comment.