Skip to content

Commit

Permalink
fix: unknown charset windows-874 problem on incoming mail
Browse files Browse the repository at this point in the history
When the sender is using email with windows-874 charset (i.e., Outlook / Thai), the incoming email, i.e., to Issue will result in weird characters.

This is due to, python don't know about this charset. This fix by using alias charset for the problematic charset.
  • Loading branch information
kittiu committed Apr 17, 2024
1 parent e020862 commit 69f9db6
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions frappe/email/receive.py
Expand Up @@ -3,6 +3,7 @@

import datetime
import email
import email.charset
import email.utils
import imaplib
import json
Expand Down Expand Up @@ -38,6 +39,13 @@
from frappe.utils.html_utils import clean_email_html
from frappe.utils.user import is_system_user

# use alias charset for python unknown charset
email.charset.ALIASES.update(
{
"windows-874": "cp874",
}
)

# fix due to a python bug in poplib that limits it to 2048
poplib._MAXLINE = 1_00_000

Expand Down Expand Up @@ -405,10 +413,12 @@ def set_subject(self):
"""Parse and decode `Subject` header."""
_subject = decode_header(self.mail.get("Subject", "No Subject"))
self.subject = _subject[0][0] or ""
charset = _subject[0][1]

if _subject[0][1]:
if charset:
# Encoding is known by decode_header (might also be unknown-8bit)
self.subject = safe_decode(self.subject, _subject[0][1])
charset = email.charset.ALIASES.get(charset, charset)
self.subject = safe_decode(self.subject, charset)

if isinstance(self.subject, bytes):
# Fall back to utf-8 if the charset is unknown or decoding fails
Expand Down Expand Up @@ -502,7 +512,7 @@ def get_charset(self, part):

def get_payload(self, part):
charset = self.get_charset(part)

charset = email.charset.ALIASES.get(charset, charset)
try:
return str(part.get_payload(decode=True), str(charset), "ignore")
except LookupError:
Expand Down

0 comments on commit 69f9db6

Please sign in to comment.