Skip to content

Commit

Permalink
ParseMediaType tolerates unencoded 8bit characters (#201)
Browse files Browse the repository at this point in the history
* ParseMediaType tolerates unencoded 8bit characters
* Added ReadEnvelope test with unencoded 8bit attachment filename

Co-authored-by: Pavel Bazika <pavel.bazika@icewarp.com>
  • Loading branch information
pavelbazika and Pavel Bazika committed Jul 15, 2021
1 parent 6817b15 commit 0c598a7
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 58 deletions.
18 changes: 18 additions & 0 deletions envelope_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,24 @@ func TestBadContentTransferEncodingInMime(t *testing.T) {
}
}

func TestBadMime8bitFilename(t *testing.T) {
msg := test.OpenTestData("mail", "mime-bad-8bit-filename.raw")
e, err := enmime.ReadEnvelope(msg)

if err != nil {
t.Fatal("Failed to parse MIME:", err)
}
if strings.TrimSpace(e.Text) != "Text part" {
t.Fatal("Text part not parsed correctly")
}
if len(e.Attachments) != 1 {
t.Fatal("Wrong number of attachments")
}
if e.Attachments[0].FileName != "管理.doc" {
t.Fatal("Wrong attachment name")
}
}

func TestBlankMediaName(t *testing.T) {
msg := test.OpenTestData("mail", "mime-blank-media-name.raw")
e, err := enmime.ReadEnvelope(msg)
Expand Down
129 changes: 71 additions & 58 deletions header.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"mime"
"net/textproto"
"strings"
_utf8 "unicode/utf8"

"github.com/jhillyerd/enmime/internal/coding"
"github.com/jhillyerd/enmime/internal/stringutil"
Expand Down Expand Up @@ -358,6 +359,7 @@ func consumeParam(s string) (consumed, rest string) {
valueQuotedOriginally := false
valueQuoteAdded := false
valueQuoteNeeded := false
rfc2047Needed := false

var r rune
findValueStart:
Expand All @@ -369,7 +371,8 @@ findValueStart:
case '"':
valueQuotedOriginally = true
valueQuoteAdded = true
value.WriteRune(r)
valueQuoteNeeded = true
param.WriteRune(r)

break findValueStart

Expand All @@ -381,6 +384,10 @@ findValueStart:
break findValueStart

default:
if r > 127 {
rfc2047Needed = true
}

valueQuotedOriginally = false
valueQuoteAdded = false
value.WriteRune(r)
Expand All @@ -389,6 +396,18 @@ findValueStart:
}
}

quoteIfUnquoted := func() {
if !valueQuoteNeeded {
if !valueQuoteAdded {
param.WriteByte('"')

valueQuoteAdded = true
}

valueQuoteNeeded = true
}
}

if len(s)-i < 1 {
// parameter value starts at the end of the string, make empty
// quoted string to play nice with mime.ParseMediaType
Expand All @@ -397,108 +416,102 @@ findValueStart:
} else {
// The beginning of the value is not at the end of the string

quoteIfUnquoted := func() {
if !valueQuoteNeeded {
if !valueQuoteAdded {
param.WriteByte('"')

valueQuoteAdded = true
}

valueQuoteNeeded = true
}
}

for _, v := range []byte{'(', ')', '<', '>', '@', ',', ':', '/', '[', ']', '?', '='} {
if s[0] == v {
quoteIfUnquoted()
break
}
}

s = s[i+1:]
_, runeLength := _utf8.DecodeRuneInString(s[i:])
s = s[i+runeLength:]
escaped := false

findValueEnd:
for len(s) > 0 {
switch s[0] {
for i, r = range s {
if escaped {
value.WriteRune(r)
escaped = false
continue
}

switch r {
case ';', ' ', '\t':
if valueQuotedOriginally {
// We're in a quoted string, so whitespace is allowed.
value.WriteByte(s[0])
s = s[1:]
value.WriteRune(r)
break
}

// Otherwise, we've reached the end of an unquoted value.

param.WriteString(value.String())
value.Reset()

if valueQuoteNeeded {
param.WriteByte('"')
}

param.WriteByte(s[0])
s = s[1:]

rest = s[i:]
break findValueEnd

case '"':
if valueQuotedOriginally {
// We're in a quoted value. This is the end of that value.
param.WriteString(value.String())
value.Reset()

param.WriteByte(s[0])
s = s[1:]

rest = s[i:]
break findValueEnd
}

quoteIfUnquoted()

value.WriteByte('\\')
value.WriteByte(s[0])
s = s[1:]
value.WriteRune(r)

case '\\':
if len(s) > 1 {
value.WriteByte(s[0])
s = s[1:]

// Backslash escapes the next char. Consume that next char.
value.WriteByte(s[0])

if i < len(s)-1 {
// If next char is present, escape it with backslash
value.WriteRune(r)
escaped = true
quoteIfUnquoted()
}
// Else there is no next char to consume.
s = s[1:]

case '(', ')', '<', '>', '@', ',', ':', '/', '[', ']', '?', '=':
quoteIfUnquoted()

fallthrough

default:
value.WriteByte(s[0])
s = s[1:]
if r > 127 {
rfc2047Needed = true
}
value.WriteRune(r)
}
}
}

if value.Len() > 0 {
// There is a value that ends with the string. Capture it.
param.WriteString(value.String())

if valueQuotedOriginally || valueQuoteNeeded {
// If valueQuotedOriginally is true and we got here,
// that means there was no closing quote. So we'll add one.
// Otherwise, we're here because it was an unquoted value
// with a special char in it, and we had to quote it.
param.WriteByte('"')
// Convert whole value to RFC2047 if it contains forbidden characters (ASCII > 127)
val := value.String()
if rfc2047Needed {
val = mime.BEncoding.Encode(utf8, val)
// RFC 2047 must be quoted
quoteIfUnquoted()
}

// Write the value
param.WriteString(val)
}

// Add final quote if required
if valueQuoteNeeded {
param.WriteByte('"')
}

// Write last parsed char if any
if rest != "" {
if rest[0] != '"' {
// When last char is quote, valueQuotedOriginally is surely true and the quote was already written.
// Otherwise output the character (; for example)
param.WriteByte(rest[0])
}

// Focus the rest of the string
rest = rest[1:]
}

return param.String(), s
return param.String(), rest
}

// fixUnquotedSpecials as defined in RFC 2045, section 5.1:
Expand Down
30 changes: 30 additions & 0 deletions header_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,36 @@ func TestFixUnquotedSpecials(t *testing.T) {
input: `text/html;charset="`,
want: `text/html;charset=""`,
},
{
// Check unquoted 8bit is encoded
input: `application/msword;name=管理.doc`,
want: `application/msword;name="=?utf-8?b?566h55CGLmRvYw==?="`,
},
{
// Check mix of ascii and unquoted 8bit is encoded
input: `application/msword;name=15管理.doc`,
want: `application/msword;name="=?utf-8?b?MTXnrqHnkIYuZG9j?="`,
},
{
// Check quoted 8bit is encoded
input: `application/msword;name="15管理.doc"`,
want: `application/msword;name="=?utf-8?b?MTXnrqHnkIYuZG9j?="`,
},
{
// Check quoted 8bit with missing closing quote is encoded
input: `application/msword;name="15管理.doc`,
want: `application/msword;name="=?utf-8?b?MTXnrqHnkIYuZG9j?="`,
},
{
// Trailing quote without starting quote is considered as part of param text for simplicity
input: `application/msword;name=15管理.doc"`,
want: `application/msword;name="=?utf-8?b?MTXnrqHnkIYuZG9jXCI=?="`,
},
{
// Invalid UTF-8 sequence does not cause any fatal error
input: "application/msword;name=\xe2\x28\xa1.doc",
want: `application/msword;name="=?utf-8?b?77+9KO+/vS5kb2M=?="`,
},
}
for _, tc := range testCases {
t.Run(tc.input, func(t *testing.T) {
Expand Down
26 changes: 26 additions & 0 deletions testdata/mail/mime-bad-8bit-filename.raw
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Date: Wed, 22 Feb 2021 13:29:24 +0800
From: "Pavel Bazika" <pavel.bazika@some.domain>
To: <test@dome.domain>,
Subject: Malformed test
Mime-Version: 1.0
Content-Type: multipart/mixed;
boundary="=====003_Dragon323481247347_====="

This is a multi-part message in MIME format.

--=====003_Dragon323481247347_=====
Content-Type: text/plain;
charset=us-ascii

Text part

--=====003_Dragon323481247347_=====
Content-Type: application/msword;
name=管理.doc
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename=管理.doc

PGh0bWw+Cg==

--=====003_Dragon323481247347_=====--

0 comments on commit 0c598a7

Please sign in to comment.