Skip to content
This repository has been archived by the owner on Mar 17, 2022. It is now read-only.

Commit

Permalink
RFC5987 filename parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangyunhao116 committed Feb 1, 2019
1 parent 82e366d commit b7e47dc
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions zmail/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from email.header import decode_header
from quopri import decodestring
from typing import List
from urllib.parse import unquote

from .exceptions import ParseError
from .structures import CaseInsensitiveDict
Expand All @@ -20,6 +21,7 @@
TYPE_TEXT_HTML = ('text', 'html')
DATE_PATTERN_1 = re.compile(r'(\w+),\s+([0-9]+)\s+(\w+)\s+([0-9]+)\s+([0-9]+):([0-9]+):([0-9]+)\s+(.+)')
DATE_PATTERN_2 = re.compile(r'([0-9]+)\s+([\w]+)\s+([0-9]+)\s+([0-9]+):([0-9]+):([0-9]+)\s+(.+)')
FILENAME_PATTERN = re.compile(re.compile(r"([^']+)'([^']*)'(.+)"))
MONTH_TO_INT = CaseInsensitiveDict({
'Jan': 1,
'Feb': 2,
Expand Down Expand Up @@ -297,6 +299,11 @@ def parse_one_part_body(headers: CaseInsensitiveDict, body: List[bytes], main_ty
log.warning('Can not decode Content-Disposition extra part:' + part + ' reason:' + str(e))
continue
filename = _extra_kv.get('filename')
if filename is None and _extra_kv.get('filename*'): # RFC5987 and ignore language tags
match = FILENAME_PATTERN.fullmatch(_extra_kv.get('filename*'))
if match:
_encoding, _language_tags, _name = match.groups()
filename = unquote(_name, _encoding)
else:
filename = None
attachment_name = filename or extra_kv.get('name') or headers.get('subject') or 'Untitled'
Expand Down Expand Up @@ -330,6 +337,11 @@ def parse_one_part_body(headers: CaseInsensitiveDict, body: List[bytes], main_ty
log.warning('Can not decode Content-Disposition extra part:' + part + ' reason:' + str(e))
continue
filename = _extra_kv.get('filename')
if filename is None and _extra_kv.get('filename*'): # RFC5987 and ignore language tags
match = FILENAME_PATTERN.fullmatch(_extra_kv.get('filename*'))
if match:
_encoding, _language_tags, _name = match.groups()
filename = unquote(_name, _encoding)
else:
filename = None
attachment_name = filename or extra_kv.get('name') or headers.get('subject') or 'Untitled'
Expand Down

0 comments on commit b7e47dc

Please sign in to comment.