diff --git a/envelope_test.go b/envelope_test.go index 48d873c5..508b4ac8 100644 --- a/envelope_test.go +++ b/envelope_test.go @@ -938,6 +938,24 @@ func TestBadContentTransferEncodingInMime(t *testing.T) { } } +func TestBadMime8bitFilename(t *testing.T) { + msg := test.OpenTestData("mail", "mime-bad-8bit-filename.raw") + e, err := enmime.ReadEnvelope(msg) + + if err != nil { + t.Fatal("Failed to parse MIME:", err) + } + if strings.TrimSpace(e.Text) != "Text part" { + t.Fatal("Text part not parsed correctly") + } + if len(e.Attachments) != 1 { + t.Fatal("Wrong number of attachments") + } + if e.Attachments[0].FileName != "管理.doc" { + t.Fatal("Wrong attachment name") + } +} + func TestBlankMediaName(t *testing.T) { msg := test.OpenTestData("mail", "mime-blank-media-name.raw") e, err := enmime.ReadEnvelope(msg) diff --git a/header.go b/header.go index f636881e..b83301e1 100644 --- a/header.go +++ b/header.go @@ -7,6 +7,7 @@ import ( "mime" "net/textproto" "strings" + _utf8 "unicode/utf8" "github.com/jhillyerd/enmime/internal/coding" "github.com/jhillyerd/enmime/internal/stringutil" @@ -358,6 +359,7 @@ func consumeParam(s string) (consumed, rest string) { valueQuotedOriginally := false valueQuoteAdded := false valueQuoteNeeded := false + rfc2047Needed := false var r rune findValueStart: @@ -369,7 +371,8 @@ findValueStart: case '"': valueQuotedOriginally = true valueQuoteAdded = true - value.WriteRune(r) + valueQuoteNeeded = true + param.WriteRune(r) break findValueStart @@ -381,6 +384,10 @@ findValueStart: break findValueStart default: + if r > 127 { + rfc2047Needed = true + } + valueQuotedOriginally = false valueQuoteAdded = false value.WriteRune(r) @@ -389,6 +396,18 @@ findValueStart: } } + quoteIfUnquoted := func() { + if !valueQuoteNeeded { + if !valueQuoteAdded { + param.WriteByte('"') + + valueQuoteAdded = true + } + + valueQuoteNeeded = true + } + } + if len(s)-i < 1 { // parameter value starts at the end of the string, make empty // quoted string to play nice with mime.ParseMediaType @@ -397,81 +416,56 @@ findValueStart: } else { // The beginning of the value is not at the end of the string - quoteIfUnquoted := func() { - if !valueQuoteNeeded { - if !valueQuoteAdded { - param.WriteByte('"') - - valueQuoteAdded = true - } - - valueQuoteNeeded = true - } - } - for _, v := range []byte{'(', ')', '<', '>', '@', ',', ':', '/', '[', ']', '?', '='} { if s[0] == v { quoteIfUnquoted() + break } } - s = s[i+1:] + _, runeLength := _utf8.DecodeRuneInString(s[i:]) + s = s[i+runeLength:] + escaped := false findValueEnd: - for len(s) > 0 { - switch s[0] { + for i, r = range s { + if escaped { + value.WriteRune(r) + escaped = false + continue + } + + switch r { case ';', ' ', '\t': if valueQuotedOriginally { // We're in a quoted string, so whitespace is allowed. - value.WriteByte(s[0]) - s = s[1:] + value.WriteRune(r) break } // Otherwise, we've reached the end of an unquoted value. - - param.WriteString(value.String()) - value.Reset() - - if valueQuoteNeeded { - param.WriteByte('"') - } - - param.WriteByte(s[0]) - s = s[1:] - + rest = s[i:] break findValueEnd case '"': if valueQuotedOriginally { // We're in a quoted value. This is the end of that value. - param.WriteString(value.String()) - value.Reset() - - param.WriteByte(s[0]) - s = s[1:] - + rest = s[i:] break findValueEnd } quoteIfUnquoted() value.WriteByte('\\') - value.WriteByte(s[0]) - s = s[1:] + value.WriteRune(r) case '\\': - if len(s) > 1 { - value.WriteByte(s[0]) - s = s[1:] - - // Backslash escapes the next char. Consume that next char. - value.WriteByte(s[0]) - + if i < len(s)-1 { + // If next char is present, escape it with backslash + value.WriteRune(r) + escaped = true quoteIfUnquoted() } - // Else there is no next char to consume. - s = s[1:] case '(', ')', '<', '>', '@', ',', ':', '/', '[', ']', '?', '=': quoteIfUnquoted() @@ -479,26 +473,45 @@ findValueStart: fallthrough default: - value.WriteByte(s[0]) - s = s[1:] + if r > 127 { + rfc2047Needed = true + } + value.WriteRune(r) } } } if value.Len() > 0 { - // There is a value that ends with the string. Capture it. - param.WriteString(value.String()) - - if valueQuotedOriginally || valueQuoteNeeded { - // If valueQuotedOriginally is true and we got here, - // that means there was no closing quote. So we'll add one. - // Otherwise, we're here because it was an unquoted value - // with a special char in it, and we had to quote it. - param.WriteByte('"') + // Convert whole value to RFC2047 if it contains forbidden characters (ASCII > 127) + val := value.String() + if rfc2047Needed { + val = mime.BEncoding.Encode(utf8, val) + // RFC 2047 must be quoted + quoteIfUnquoted() } + + // Write the value + param.WriteString(val) + } + + // Add final quote if required + if valueQuoteNeeded { + param.WriteByte('"') + } + + // Write last parsed char if any + if rest != "" { + if rest[0] != '"' { + // When last char is quote, valueQuotedOriginally is surely true and the quote was already written. + // Otherwise output the character (; for example) + param.WriteByte(rest[0]) + } + + // Focus the rest of the string + rest = rest[1:] } - return param.String(), s + return param.String(), rest } // fixUnquotedSpecials as defined in RFC 2045, section 5.1: diff --git a/header_test.go b/header_test.go index 6d232d9d..3973dbc7 100644 --- a/header_test.go +++ b/header_test.go @@ -395,6 +395,36 @@ func TestFixUnquotedSpecials(t *testing.T) { input: `text/html;charset="`, want: `text/html;charset=""`, }, + { + // Check unquoted 8bit is encoded + input: `application/msword;name=管理.doc`, + want: `application/msword;name="=?utf-8?b?566h55CGLmRvYw==?="`, + }, + { + // Check mix of ascii and unquoted 8bit is encoded + input: `application/msword;name=15管理.doc`, + want: `application/msword;name="=?utf-8?b?MTXnrqHnkIYuZG9j?="`, + }, + { + // Check quoted 8bit is encoded + input: `application/msword;name="15管理.doc"`, + want: `application/msword;name="=?utf-8?b?MTXnrqHnkIYuZG9j?="`, + }, + { + // Check quoted 8bit with missing closing quote is encoded + input: `application/msword;name="15管理.doc`, + want: `application/msword;name="=?utf-8?b?MTXnrqHnkIYuZG9j?="`, + }, + { + // Trailing quote without starting quote is considered as part of param text for simplicity + input: `application/msword;name=15管理.doc"`, + want: `application/msword;name="=?utf-8?b?MTXnrqHnkIYuZG9jXCI=?="`, + }, + { + // Invalid UTF-8 sequence does not cause any fatal error + input: "application/msword;name=\xe2\x28\xa1.doc", + want: `application/msword;name="=?utf-8?b?77+9KO+/vS5kb2M=?="`, + }, } for _, tc := range testCases { t.Run(tc.input, func(t *testing.T) { diff --git a/testdata/mail/mime-bad-8bit-filename.raw b/testdata/mail/mime-bad-8bit-filename.raw new file mode 100644 index 00000000..d2bbeff1 --- /dev/null +++ b/testdata/mail/mime-bad-8bit-filename.raw @@ -0,0 +1,26 @@ +Date: Wed, 22 Feb 2021 13:29:24 +0800 +From: "Pavel Bazika" +To: , +Subject: Malformed test +Mime-Version: 1.0 +Content-Type: multipart/mixed; + boundary="=====003_Dragon323481247347_=====" + +This is a multi-part message in MIME format. + +--=====003_Dragon323481247347_===== +Content-Type: text/plain; + charset=us-ascii + +Text part + +--=====003_Dragon323481247347_===== +Content-Type: application/msword; + name=管理.doc +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; + filename=管理.doc + +PGh0bWw+Cg== + +--=====003_Dragon323481247347_=====--